Resample and Convert DICOM to Common Image Formats


This example illustrates the use of SimpleITK for converting a set of DICOM images to other file formats (tif, jpg, png,…). The output file format is specified by the user, and the output image width can also be specified by the user (height is determined from the width as resulting pixel sizes are required to be isotropic). Grayscale images with high dynamic range are rescaled to [0,255] before conversion to the new format. Output is written in the same location as the input image, or to a user specified output directory. An additional csv file mapping between original and converted file names is also written, either to the specified output directory or to the current working directory.


import argparse
import csv
import functools
import itertools
import multiprocessing
import os
import sys

import SimpleITK as sitk

def convert_image(input_file_name, output_file_name, new_width=None):
        image_file_reader = sitk.ImageFileReader()
        # only read DICOM images
        image_size = list(image_file_reader.GetSize())
        if len(image_size) == 3 and image_size[2] == 1:
            image_size[2] = 0
        image = image_file_reader.Execute()
        if new_width:
            original_size = image.GetSize()
            original_spacing = image.GetSpacing()
            new_spacing = [(original_size[0] - 1) * original_spacing[0]
                           / (new_width - 1)] * 2
            new_size = [new_width, int((original_size[1] - 1)
                                       * original_spacing[1] / new_spacing[1])]
            image = sitk.Resample(image1=image, size=new_size,
        # If a single channel image, rescale to [0,255]. Also modify the
        # intensity values based on the photometric interpretation. If
        # MONOCHROME2 (minimum should be displayed as black) we don't need to
        # do anything, if image has MONOCRHOME1 (minimum should be displayed as
        # white) we flip # the intensities. This is a constraint imposed by ITK
        # which always assumes MONOCHROME2.
        if image.GetNumberOfComponentsPerPixel() == 1:
            image = sitk.RescaleIntensity(image, 0, 255)
            if image_file_reader.GetMetaData('0028|0004').strip() \
                    == 'MONOCHROME1':
                image = sitk.InvertIntensity(image, maximum=255)
            image = sitk.Cast(image, sitk.sitkUInt8)
        sitk.WriteImage(image, output_file_name)
        return True
    except BaseException:
        return False

def convert_images(input_file_names, output_file_names, new_width):
    with multiprocessing.Pool(processes=MAX_PROCESSES) as pool:
        return pool.starmap(functools.partial(convert_image,
                            zip(input_file_names, output_file_names))

def positive_int(int_str):
    value = int(int_str)
    if value <= 0:
        raise argparse.ArgumentTypeError(
            int_str + ' is not a positive integer value')
    return value

def directory(dir_name):
    if not os.path.isdir(dir_name):
        raise argparse.ArgumentTypeError(dir_name +
                                         ' is not a valid directory name')
    return dir_name

def main(argv=None):
    parser = argparse.ArgumentParser(
        description='Convert and resize DICOM files to common image types.')
    parser.add_argument('root_of_data_directory', type=directory,
                        help='Path to the topmost directory containing data.')
        help='Image file extension, this determines output file type '
             '(e.g. png) .')
    parser.add_argument('--w', type=positive_int,
                        help='Width of converted images.')
    parser.add_argument('--od', type=directory, help='Output directory.')
    args = parser.parse_args(argv)

    input_file_names = []
    for dir_name, subdir_names, file_names in os.walk(
        input_file_names += [os.path.join(os.path.abspath(dir_name), fname)
                             for fname in file_names]
    if args.od:
        # if all output files are written to the same directory we need them
        # to have a unique name, so use an index.
        file_names = [os.path.join(os.path.abspath(args.od), str(i))
                      for i in range(len(input_file_names))]
        file_names = input_file_names
    output_file_names = [file_name + '.' + args.output_file_extension
                         for file_name in file_names]

    res = convert_images(input_file_names, output_file_names, args.w)
    input_file_names = list(itertools.compress(input_file_names, res))
    output_file_names = list(itertools.compress(output_file_names, res))

    # save csv file mapping input and output file names.
    # using csv module and not pandas so as not to create more dependencies
    # for the examples. pandas based code is more elegant/shorter.
    dir_name = args.od if args.od else os.getcwd()
    with open(os.path.join(dir_name, 'file_names.csv'), mode='w') as fp:
        fp_writer = csv.writer(fp, delimiter=',', quotechar='"',
        fp_writer.writerow(['input file name', 'output file name'])
        for data in zip(input_file_names, output_file_names):

if __name__ == "__main__":