Source code for core_ct.importers

"""Methods that assist in importing CT scans of rock cores."""

from core_ct.core import Core
from core_ct.slice import Slice
from pydicom import dcmread
from pydicom.errors import InvalidDicomError
from os import listdir
import os.path
import numpy as np


[docs] def dicom( dir: str | None = None, files: list[str] | None = None, force: bool = False, ignore_hidden_files: bool = True, ignore_file_extensions: bool = False ) -> Core: """ Load a DICOM dataset into a `Core` object. This is used to load a set of images into one core object. All images must come from the same CT scan of the same core. Files containing the DICOM dataset can be specified by providing a directory or a list of files. If both `dir` and `files` are provided, `dir` will be ignored. When specifying a directory all files in that directory will be treated as part of the DICOM dataset. If this is undesirable, use `files` instead. Subfolders/directories are ignored. All dicom data files must be explicitly specified via `files` or located in the `dir` provided. Arguments --------- dir : str Path to directory containing DICOM dataset; ignored if `files` is specified files : list[str] List of filepaths belonging to DICOM dataset force : bool If set to `True`, files that produce errors will be ignored ignore_hidden_files : bool If set to `True`, hidden files (names starting with ".") will be ignored Raises ------ ValueError If no files are found. Caused by providing an empty directory (`dir`) or an empty `files` list. Also raised when files are missing header information. RuntimeError If no data was loaded. Happens when no files can be parsed and `force` is set to `True` pydicom.InvalidDicomError If `pydicom` fails to parse a file """ # if files was not provided, load files from the provided directory if files is None or len(files) == 0: # throw error if directory not provided if dir is None: raise ValueError("Must provide a directory (`dir`) when not using `files`") # get the list of files for the core files = [os.path.join(dir, file_name) for file_name in listdir(dir)] # remove invalid files for f in files: f_name = os.path.basename(f) # get the basename of the file and then check if it is a hidden file if ignore_hidden_files and f_name.startswith("."): files.remove(f) # ignore subdirectories if os.path.isdir(f): files.remove(f) if len(files) == 0: raise ValueError( "No files found. This could mean an empty directory (`dir`) was provided " "or `files` is empty.") # skip files with no SliceLocation information (should be a float) slices = [] skipped: list[str] = [] for f in files: # try to read slice try: ds = dcmread(f, force=force) except InvalidDicomError: if not force: # forward pydicom exception so the stack trace is more useful raise else: continue # make sure SliceLocation exists in the slice try: if isinstance(ds.SliceLocation, float): slices.append(ds) else: skipped.append(f) # in case SliceLocation isn't an attribute of ds except AttributeError: skipped.append(f) if not force and len(skipped) > 0: raise ValueError( f"Failed to load {len(skipped)} files, invalid or missing SliceLocation: " f"{skipped}" ) # make sure we actually loaded data if len(slices) == 0: raise RuntimeError( "No data loaded. This could mean no files could be parsed and `force` was " "set to `True`" ) # re-sort to put the slices in the right order slices = sorted(slices, key=lambda s: s.SliceLocation) # pixel dimensions, assuming all slices are the same x_dim: float = float(slices[0].PixelSpacing[0]) y_dim: float = float(slices[0].PixelSpacing[1]) z_dim: float = float(slices[0].SliceThickness) # create 3D array img_shape: list[int] = list(slices[0].pixel_array.shape) img_shape.append(len(slices)) img3d: np.typing.NDArray[np.float64] = np.zeros(img_shape) # fill 3D array with the images from the files for i, s in enumerate(slices): img2d = s.pixel_array img3d[:, :, i] = img2d return Core(data=img3d, pixel_dimensions=(x_dim, y_dim, z_dim))
[docs] def dicom_slice( file: str, force: bool = False, ) -> Slice: """ Load a DICOM dataset into a `Slice` object. This function should be used when you only want to load one image/scan instead of a collection. Arguments --------- file : str Path to file to load DICOM data from force : bool If set to `True`, files that produce errors will be ignored Raises ------ ValueError If the file is missing header information. pydicom.InvalidDicomError If `pydicom` fails to parse the file """ # try to read slice try: ds = dcmread(file, force=force) except InvalidDicomError: if not force: # forward pydicom exception so the stack trace is more useful raise # make sure SliceLocation exists in the slice try: if not isinstance(ds.SliceLocation, float): raise ValueError("File could not be parsed (incorrect SliceLocation type)") # in case SliceLocation isn't an attribute of ds except AttributeError as e: if not force: raise ValueError( "File does not contain SliceLocation in header, it may not be a valid " "dicom file" ) from e # pixel dimensions x_dim: float = float(ds.PixelSpacing[0]) y_dim: float = float(ds.PixelSpacing[1]) z_dim: float = float(ds.SliceThickness) return Slice(data=ds.pixel_array, pixel_dimensions=(x_dim, y_dim), thickness=z_dim)