Source code for histo_kit.utils.wsi

from PIL import Image
import numpy as np
from skimage import measure

[docs] def slide_info(slide, verbose=False): """ Retrieve basic information about a whole-slide image (WSI). This function extracts metadata such as objective power, vendor, and width and height of the largest layer, and level downsamples. Parameters ---------- slide : OpenSlide object Whole-slide image loaded via OpenSlide. verbose : bool, default=False If True, prints slide information to the console. Returns ------- width_level_0 : int Width of the slide at the highest resolution (level 0). height_level_0 : int Height of the slide at the highest resolution (level 0). obj_power : float Objective magnification of the slide. num_level: int Number of levels downsampled. vendor: str Slide vendor. down_levels: tuple of int Ratio of downsampled levels. Examples -------- >>> patch_size, obj_power = slide_info(slide, verbose=True) """ # Objective power try: obj_power = float(slide.properties["openslide.objective-power"]) except: obj_power = 99 # Vendor vendor = slide.properties["openslide.vendor"] # Extract and save dimensions of level [0] dim_l0 = slide.level_dimensions[0] width_level_0 = dim_l0[0] height_level_0 = dim_l0[1] # Number of levels num_level = slide.level_count # Level down samples down_levels = slide.level_downsamples # Get slide MPP if there is no information about MPP get approximated slide MPP slide_mpp = float(slide.properties["openslide.mpp-x"]) if slide.properties.get("openslide.mpp-x", None) is not None else 10/obj_power # Calculate magnification levels of each file level_mag = [obj_power/x for x in down_levels] # Output if verbose: print("") print("Basic data about processed whole-slide image") print("") print("Vendor: ", vendor) print("Scan magnification: ", obj_power) print("Number of levels: ", num_level) print("Level downsamples: ", down_levels) print("Height: ", height_level_0) print("Width: ", width_level_0) print("MPP: ", slide_mpp) print("Level magnifications: ", level_mag) return width_level_0, height_level_0, obj_power, num_level, vendor, down_levels, level_mag, slide_mpp
[docs] def get_regions_location(bg_mask): """ Extract bounding boxes of connected tissue regions from a binary tissue mask. This function identifies connected components in the input mask and returns their bounding box coordinates. Each bounding box is represented as: ``[y_min, x_min, y_max, x_max]`` in pixel coordinates. Parameters ---------- bg_mask : ndarray of bool or int 2D binary mask where non-zero values indicate tissue (foreground) and zero indicates background. Returns ------- list of list of int A list of bounding boxes, where each bounding box is represented as: ``[y_min, x_min, y_max, x_max]``. If no tissue regions are found, an empty list is returned. Notes ----- - The mask is expected to be 2D. - Bounding boxes are returned in the coordinate order used by ``skimage.measure.regionprops``. Examples -------- >>> mask = np.array([ ... [0, 1, 1, 0], ... [0, 1, 1, 0], ... [0, 0, 0, 0] ... ], dtype=bool) >>> get_regions_location(mask) [[0, 1, 2, 3]] """ label_img = measure.label(bg_mask) props = measure.regionprops(label_img) bbox_list = [] image_list = [] for region in props: bbox = region.bbox image = region.image # y_min, x_min, y_max, x_max bbox_list.append([bbox[0], bbox[1], bbox[2], bbox[3]]) image_list.append(image) return bbox_list, image_list
[docs] def load_wsi_mag(wsi, desired_mag, rescale_method=Image.LANCZOS, verbose=False, allow_upscaling=True): """ Load and rescale a whole-slide image (WSI) to a desired magnification. This function reads the WSI at the closest available level to the desired magnification. If the exact magnification is unavailable, it rescales the image using the best level for downsample and the specified resampling method. When desired magnification is 5x and wsi has only magnifications 40x, 20x, 10x, 2.5x, the image will be rescaled from the magnification 10x to 5x. Optionally, upscaling is allowed when the desired magnification is higher than the highest WSI magnification. Parameters ---------- wsi : OpenSlide object OpenSlide WSI object to load and rescale. desired_mag : float Desired slide magnification (e.g., 10, 20, 40). rescale_method : PIL.Image.Resampling or int, optional Resampling method used when resizing the image. Options include `Image.BICUBIC`, `Image.BILINEAR`, `Image.BOX`, `Image.HAMMING`, `Image.LANCZOS`, `Image.NEAREST`. Default is `Image.LANCZOS`. verbose : bool, optional If True, prints information about the rescaling process. Default is False. allow_upscaling : bool, optional If True, allows upscaling when the desired magnification is higher than the highest magnification available. Default is True. Returns ------- region : PIL.Image.Image Rescaled WSI region at the desired magnification (converted to RGB). scale_val : float Scaling factor applied relative to the highest-resolution WSI level. For example, if the highest level is 40x and desired magnification is 10x, `scale_val = 40/10 = 4`. info : str Information message describing whether the desired magnification was available or if rescaling/upscaling was applied. mpp_slide : float Approximate microns-per-pixel (MPP) of the slide based on the highest magnification. ratio : list of float List of downsample ratios for each WSI level. Notes ----- - If the desired magnification is available among the WSI levels, no rescaling is performed. - Rescaling is performed from the highest magnification level if the exact desired magnification is unavailable. - The function converts any RGBA images to RGB. Examples -------- >>> region, scale_val, info, mpp_slide, ratio = load_wsi_mag(wsi, desired_mag=10) >>> print(info) >>> region.show() """ w0, h0, mag_l0, _, _, ratio, mag_layers, mpp_slide = slide_info(wsi) mag_layers = [round(m, 2) for m in mag_layers] if desired_mag in mag_layers: info = "Desired magnification is available" mag_idx = mag_layers.index(desired_mag) w, h = wsi.level_dimensions[mag_idx] region = wsi.read_region((0, 0), mag_idx, (w, h)) scale_val = ratio[mag_idx] elif desired_mag < mag_l0: info = "Desired resolution is not available, image will be rescaled from the best level for downsample." desired_ratio = mag_l0 / desired_mag level = wsi.get_best_level_for_downsample(desired_ratio) w, h = wsi.level_dimensions[level] region = wsi.read_region((0, 0), level, (w, h)) scale_val = desired_mag / mag_layers[level] region = region.resize((int(w * scale_val), int(h * scale_val)), rescale_method) else: if not allow_upscaling: raise ValueError("The desired magnification is smaller than the highest magnification available. " "The parameter allow_upscaling is set to False, so the image will not be upscaled. " "If you want to upscale the image, set the parameter allow_upscaling to True. ") else: info = "Desired resolution is larger than available, image will be rescaled from the highest magnification available." region = wsi.read_region((0, 0), 0, (w0, h0)) scale_val = desired_mag / mag_l0 region = region.resize((int(w0 * scale_val), int(h0 * scale_val)), rescale_method) # convert RGBA to RGB region = region.convert("RGB") scale_val = w0/region.size[0] if verbose: print(info) return region, scale_val, info, mpp_slide, ratio
[docs] def read_region(wsi, mask_file, region_idx, desired_mag, notation="python", allow_list=(1, 7), resampling_method=Image.Resampling.LANCZOS): """ Read a masked region from a whole-slide image (WSI) and rescale it to a desired magnification. This function extracts a specified region from a WSI using bounding box information stored in a mask file. It applies artifact filtering, rescales the region to the desired magnification, and converts background pixels to white. Parameters ---------- wsi : OpenSlide object OpenSlide WSI object from which to read the region. mask_file : dict-like Dictionary or NumPy file containing region bounding boxes, masks, and scaling information. region_idx : int Index of the region to read from the mask file. desired_mag : float Target magnification for the output region. notation : {'python', 'matlab'}, optional Specifies whether bounding boxes use Python (0-based) or MATLAB (1-based) indexing. Default is "python". allow_list : tuple of int, optional Artifacts to allow in the mask. Only pixels labeled with these artifact types will be kept. Default is `1, 7`. resampling_method : PIL.Image.Resampling, optional Resampling method used when resizing regions (e.g., `Image.Resampling.LANCZOS`). Default is `Image.Resampling.LANCZOS`. Returns ------- region_masked : ndarray of shape (H, W, 3) Masked and rescaled RGB region. Background pixels are set to white ([255, 255, 255]). Notes ----- - Reads the WSI at the level closest to the desired magnification. If an exact level is not available, the region is rescaled using the specified resampling method. - Masks are resized to match the extracted region, and only allowed artifact regions are retained. - Pixels outside allowed regions are set to white for visualization. Examples -------- >>> region = read_region(wsi, mask_file, region_idx=0, desired_mag=10) >>> plt.imshow(region) >>> plt.show() """ bbox = np.array(mask_file["tiss_stats"][region_idx]) # change matlab indexing to python indexing if notation == "matlab": bbox = bbox - 1 mag_l0 = float(wsi.properties["openslide.objective-power"]) desired_ratio = mag_l0 / desired_mag scale_val = mask_file["scale_val"] / desired_ratio bbox = (bbox * scale_val).astype(int) w0, h0 = wsi.level_dimensions[0] des_w, des_h = int(w0 / desired_ratio), int(h0 / desired_ratio) # load region defined by bbox if (des_w, des_h) in wsi.level_dimensions: level = wsi.level_dimensions.index((des_w, des_h)) # when desired magnification is not available - read the layer with the ratio that # is the nearest larger ratio to the desired ratio and resize region = wsi.read_region((0, 0), level, wsi.level_dimensions[level]).convert("RGB") region = region.resize((des_w, des_h), resampling_method) region = np.array(region)[bbox[0]:bbox[2], bbox[1]:bbox[3]] else: # when desired ratio is available read the correct wsi layer level = wsi.get_best_level_for_downsample(desired_ratio) region = np.array( wsi.read_region((bbox[0], bbox[1]), level, (bbox[2] - bbox[0], bbox[3] - bbox[1])).convert("RGB")) # load mask with artifacts mask_art = mask_file["mask_art"][region_idx] mask = np.zeros_like(mask_art) # take regions from allow list for i in allow_list: mask[mask_art == i] = 1 # Resize mask for desired resolution mask = np.array(Image.fromarray(mask).resize((region.shape[1], region.shape[0]), Image.Resampling.NEAREST)) # get only areas defined by mask mask_rgb = np.repeat(mask[:, :, np.newaxis], 3, axis=2) region_masked = region * mask_rgb # turn bg pixels to white region_masked[np.all(region_masked == [0, 0, 0], axis=-1)] = [255, 255, 255] return region_masked