Resample and Convert DICOM to Common Image Formats

Overview

This example illustrates the use of SimpleITK for converting a set of DICOM images to other file formats (tif, jpg, png,…). The output file format is specified by the user, and the output image width can also be specified by the user (height is determined from the width as resulting pixel sizes are required to be isotropic). Grayscale images with high dynamic range are rescaled to [0,255] before conversion to the new format. Output is written in the same location as the input image, or to a user specified output directory. An additional csv file mapping between original and converted file names is also written, either to the specified output directory or to the current working directory.

Code

import argparse
import csv
import functools
import itertools
import multiprocessing
import os
import sys

import SimpleITK as sitk


def convert_image(input_file_name, output_file_name, new_width=None):
    try:
        image_file_reader = sitk.ImageFileReader()
        # only read DICOM images
        image_file_reader.SetImageIO('GDCMImageIO')
        image_file_reader.SetFileName(input_file_name)
        image_file_reader.ReadImageInformation()
        image_size = list(image_file_reader.GetSize())
        if len(image_size) == 3 and image_size[2] == 1:
            image_size[2] = 0
        image_file_reader.SetExtractSize(image_size)
        image = image_file_reader.Execute()
        if new_width:
            original_size = image.GetSize()
            original_spacing = image.GetSpacing()
            new_spacing = [(original_size[0] - 1) * original_spacing[0]
                           / (new_width - 1)] * 2
            new_size = [new_width, int((original_size[1] - 1)
                                       * original_spacing[1] / new_spacing[1])]
            image = sitk.Resample(image1=image, size=new_size,
                                  transform=sitk.Transform(),
                                  interpolator=sitk.sitkLinear,
                                  outputOrigin=image.GetOrigin(),
                                  outputSpacing=new_spacing,
                                  outputDirection=image.GetDirection(),
                                  defaultPixelValue=0,
                                  outputPixelType=image.GetPixelID())
        # If a single channel image, rescale to [0,255]. Also modify the
        # intensity values based on the photometric interpretation. If
        # MONOCHROME2 (minimum should be displayed as black) we don't need to
        # do anything, if image has MONOCRHOME1 (minimum should be displayed as
        # white) we flip # the intensities. This is a constraint imposed by ITK
        # which always assumes MONOCHROME2.
        if image.GetNumberOfComponentsPerPixel() == 1:
            image = sitk.RescaleIntensity(image, 0, 255)
            if image_file_reader.GetMetaData('0028|0004').strip() \
                    == 'MONOCHROME1':
                image = sitk.InvertIntensity(image, maximum=255)
            image = sitk.Cast(image, sitk.sitkUInt8)
        sitk.WriteImage(image, output_file_name)
        return True
    except BaseException:
        return False


def convert_images(input_file_names, output_file_names, new_width):
    MAX_PROCESSES = 15
    with multiprocessing.Pool(processes=MAX_PROCESSES) as pool:
        return pool.starmap(functools.partial(convert_image,
                                              new_width=new_width),
                            zip(input_file_names, output_file_names))


def positive_int(int_str):
    value = int(int_str)
    if value <= 0:
        raise argparse.ArgumentTypeError(
            int_str + ' is not a positive integer value')
    return value


def directory(dir_name):
    if not os.path.isdir(dir_name):
        raise argparse.ArgumentTypeError(dir_name +
                                         ' is not a valid directory name')
    return dir_name


def main(argv=None):
    parser = argparse.ArgumentParser(
        description='Convert and resize DICOM files to common image types.')
    parser.add_argument('root_of_data_directory', type=directory,
                        help='Path to the topmost directory containing data.')
    parser.add_argument(
        'output_file_extension',
        help='Image file extension, this determines output file type '
             '(e.g. png) .')
    parser.add_argument('--w', type=positive_int,
                        help='Width of converted images.')
    parser.add_argument('--od', type=directory, help='Output directory.')
    args = parser.parse_args(argv)

    input_file_names = []
    for dir_name, subdir_names, file_names in os.walk(
            args.root_of_data_directory):
        input_file_names += [os.path.join(os.path.abspath(dir_name), fname)
                             for fname in file_names]
    if args.od:
        # if all output files are written to the same directory we need them
        # to have a unique name, so use an index.
        file_names = [os.path.join(os.path.abspath(args.od), str(i))
                      for i in range(len(input_file_names))]
    else:
        file_names = input_file_names
    output_file_names = [file_name + '.' + args.output_file_extension
                         for file_name in file_names]

    res = convert_images(input_file_names, output_file_names, args.w)
    input_file_names = list(itertools.compress(input_file_names, res))
    output_file_names = list(itertools.compress(output_file_names, res))

    # save csv file mapping input and output file names.
    # using csv module and not pandas so as not to create more dependencies
    # for the examples. pandas based code is more elegant/shorter.
    dir_name = args.od if args.od else os.getcwd()
    with open(os.path.join(dir_name, 'file_names.csv'), mode='w') as fp:
        fp_writer = csv.writer(fp, delimiter=',', quotechar='"',
                               quoting=csv.QUOTE_MINIMAL)
        fp_writer.writerow(['input file name', 'output file name'])
        for data in zip(input_file_names, output_file_names):
            fp_writer.writerow(data)


if __name__ == "__main__":
    sys.exit(main())
library(SimpleITK)
library(argparser)

convert_image <- function(input_file_name, output_file_name, new_width = NA)
{
  image_file_reader = ImageFileReader()
  # only read DICOM images
  image_file_reader$SetImageIO('GDCMImageIO')
  image_file_reader$SetFileName(input_file_name)
  try(image_file_reader$ReadImageInformation(), silent = TRUE)
  image_size <- image_file_reader$GetSize()
  #if this isn't a DICOM image return FALSE
  if(length(image_size) == 0) {
    return(FALSE)
  }
  if(length(image_size) == 3 & image_size[3] == 1) {
    image_size[3] <- 0
    image_file_reader$SetExtractSize(image_size)
  }
  image <- NULL
  try(image <- image_file_reader$Execute(), silent = TRUE)
  if(is.null(image))
  {
    return(FALSE)
  }
  if(!is.na(new_width))
  {
    original_size <- image$GetSize()
    original_spacing <- image$GetSpacing()
    new_spacing <- c((original_size[1]-1)*original_spacing[1]/(new_width-1),
                    (original_size[1]-1)*original_spacing[1]/(new_width-1))
    new_size <- c(new_width, as.integer((original_size[2]-1)*original_spacing[2]/new_spacing[2]))
    image <- Resample(image, new_size, Transform(), 'sitkLinear',
                      image$GetOrigin(), new_spacing, image$GetDirection(),0,
                      image$GetPixelID())
  }
  # If a single channel image, rescale to [0,255]. Also modify the intensity values based
  # on the photomertic interpretation. If MONOCHROME2 (minimum should be displayed as black) we
  # don't need to do anything, if image has MONOCRHOME1 (minimum should be displayed as white) we flip
  # the intensities. This is a constraint imposed by ITK which always assumes MONOCHROME2.
  if(image$GetNumberOfComponentsPerPixel() == 1)
  {
    image <- RescaleIntensity(image, 0, 255)
    if(trimws(image_file_reader$GetMetaData('0028|0004')) == 'MONOCHROME1')
    {
      image <-  InvertIntensity(image, maximum=255)
    }
    image <- Cast(image, 'sitkUInt8')
  }
  WriteImage(image, output_file_name)
  return(TRUE)
 }

parser<-arg_parser('Convert and resize DICOM files to common image types.')
parser<-add_argument(parser, 'root_of_data_directory', help='Path to the topmost directory containing data.')
parser<-add_argument(parser, 'output_file_extension', help='Image file extension, this determines output file type (e.g. png) .')
parser<-add_argument(parser, '--w', help='Width of converted images.')
parser<-add_argument(parser, '--od', help='Output directory.')
argv<-parse_args(parser)

input_file_names <- list.files(argv$root_of_data_directory, recursive=TRUE, full.names=TRUE)
if(!is.na(argv$od)) {
  # if all output files are written to the same directory we need them to have a unique name, so use an index.
  file_names <- lapply(seq_len(length(input_file_names)), function(new_file_name) return(file.path(argv$od, toString(new_file_name))))
} else {
  file_names <- input_file_names
}
#append the file extension to the output file names
output_file_names <- lapply(file_names, function(file_name) return(paste0(file_name,'.',argv$output_file_extension)))
res <- mapply(convert_image, input_file_names, output_file_names, new_width=as.integer(argv$w), SIMPLIFY=FALSE)
df <- data.frame(input_file_name = unlist(input_file_names[unlist(res)]),
                output_file_name = unlist(output_file_names[unlist(res)]))
dir_name <- if(!is.na(argv$od)) argv$od else getwd()
write.csv(df,file.path(dir_name, 'file_names.csv'), row.names=FALSE)