Resample and Convert DICOM to Common Image Formats
Overview
This example illustrates the use of SimpleITK for converting a set of DICOM images to other file formats (tif, jpg, png,…). The output file format is specified by the user, and the output image width can also be specified by the user (height is determined from the width as resulting pixel sizes are required to be isotropic). Grayscale images with high dynamic range are rescaled to [0,255] before conversion to the new format. Output is written in the same location as the input image, or to a user specified output directory. An additional csv file mapping between original and converted file names is also written, either to the specified output directory or to the current working directory.
Code
""" A SimpleITK example demonstrating how to convert and resize DICOM files
to common image types. """
import argparse
import csv
import functools
import itertools
import multiprocessing
import os
import sys
import SimpleITK as sitk
def convert_image(input_file_name, output_file_name, new_width=None):
""" Convert a single DICOM image to a common image type. """
try:
image_file_reader = sitk.ImageFileReader()
# only read DICOM images
image_file_reader.SetImageIO("GDCMImageIO")
image_file_reader.SetFileName(input_file_name)
image_file_reader.ReadImageInformation()
image_size = list(image_file_reader.GetSize())
if len(image_size) == 3 and image_size[2] == 1:
image_size[2] = 0
image_file_reader.SetExtractSize(image_size)
image = image_file_reader.Execute()
if new_width:
original_size = image.GetSize()
original_spacing = image.GetSpacing()
new_spacing = [
(original_size[0] - 1) * original_spacing[0] / (new_width - 1)
] * 2
new_size = [
new_width,
int((original_size[1] - 1) * original_spacing[1] / new_spacing[1]),
]
image = sitk.Resample(
image1=image,
size=new_size,
transform=sitk.Transform(),
interpolator=sitk.sitkLinear,
outputOrigin=image.GetOrigin(),
outputSpacing=new_spacing,
outputDirection=image.GetDirection(),
defaultPixelValue=0,
outputPixelType=image.GetPixelID(),
)
# If a single channel image, rescale to [0,255]. Also modify the
# intensity values based on the photometric interpretation. If
# MONOCHROME2 (minimum should be displayed as black) we don't need to
# do anything, if image has MONOCRHOME1 (minimum should be displayed as
# white) we flip # the intensities. This is a constraint imposed by ITK
# which always assumes MONOCHROME2.
if image.GetNumberOfComponentsPerPixel() == 1:
image = sitk.RescaleIntensity(image, 0, 255)
if image_file_reader.GetMetaData("0028|0004").strip() == "MONOCHROME1":
image = sitk.InvertIntensity(image, maximum=255)
image = sitk.Cast(image, sitk.sitkUInt8)
sitk.WriteImage(image, output_file_name)
return True
except RuntimeError:
return False
def convert_images(input_file_names, output_file_names, new_width):
""" Convert multiple DICOM images in parallel to a common image type. """
MAX_PROCESSES = 15
with multiprocessing.Pool(processes=MAX_PROCESSES) as pool:
return pool.starmap(
functools.partial(convert_image, new_width=new_width),
zip(input_file_names, output_file_names),
)
def positive_int(int_str):
""" Custom argparse type for positive integers. """
value = int(int_str)
if value <= 0:
raise argparse.ArgumentTypeError(int_str + " is not a positive integer value")
return value
def directory(dir_name):
""" Custom argparse type for directory. """
if not os.path.isdir(dir_name):
raise argparse.ArgumentTypeError(dir_name + " is not a valid directory name")
return dir_name
def main(argv=None):
""" Main function. """
parser = argparse.ArgumentParser(
description="Convert and resize DICOM files to common image types."
)
parser.add_argument(
"root_of_data_directory",
type=directory,
help="Path to the topmost directory containing data.",
)
parser.add_argument(
"output_file_extension",
help="Image file extension, this determines output file type " "(e.g. png) .",
)
parser.add_argument("--w", type=positive_int, help="Width of converted images.")
parser.add_argument("--od", type=directory, help="Output directory.")
args = parser.parse_args(argv)
input_file_names = []
for dir_name, _, file_names in os.walk(args.root_of_data_directory):
input_file_names += [
os.path.join(os.path.abspath(dir_name), fname) for fname in file_names
]
if args.od:
# if all output files are written to the same directory we need them
# to have a unique name, so use an index.
file_names = [
os.path.join(os.path.abspath(args.od), str(i))
for i in range(len(input_file_names))
]
else:
file_names = input_file_names
output_file_names = [
file_name + "." + args.output_file_extension for file_name in file_names
]
res = convert_images(input_file_names, output_file_names, args.w)
input_file_names = list(itertools.compress(input_file_names, res))
output_file_names = list(itertools.compress(output_file_names, res))
# save csv file mapping input and output file names.
# using csv module and not pandas so as not to create more dependencies
# for the examples. pandas based code is more elegant/shorter.
dir_name = args.od if args.od else os.getcwd()
with open(os.path.join(dir_name, "file_names.csv"), mode="w", encoding='utf-8') as fp:
fp_writer = csv.writer(
fp, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL
)
fp_writer.writerow(["input file name", "output file name"])
for data in zip(input_file_names, output_file_names):
fp_writer.writerow(data)
if __name__ == "__main__":
sys.exit(main())
library(SimpleITK)
library(argparser)
convert_image <- function(input_file_name, output_file_name, new_width = NA)
{
image_file_reader = ImageFileReader()
# only read DICOM images
image_file_reader$SetImageIO('GDCMImageIO')
image_file_reader$SetFileName(input_file_name)
try(image_file_reader$ReadImageInformation(), silent = TRUE)
image_size <- image_file_reader$GetSize()
#if this isn't a DICOM image return FALSE
if(length(image_size) == 0) {
return(FALSE)
}
if(length(image_size) == 3 & image_size[3] == 1) {
image_size[3] <- 0
image_file_reader$SetExtractSize(image_size)
}
image <- NULL
try(image <- image_file_reader$Execute(), silent = TRUE)
if(is.null(image))
{
return(FALSE)
}
if(!is.na(new_width))
{
original_size <- image$GetSize()
original_spacing <- image$GetSpacing()
new_spacing <- c((original_size[1]-1)*original_spacing[1]/(new_width-1),
(original_size[1]-1)*original_spacing[1]/(new_width-1))
new_size <- c(new_width, as.integer((original_size[2]-1)*original_spacing[2]/new_spacing[2]))
image <- Resample(image, new_size, Transform(), 'sitkLinear',
image$GetOrigin(), new_spacing, image$GetDirection(),0,
image$GetPixelID())
}
# If a single channel image, rescale to [0,255]. Also modify the intensity values based
# on the photomertic interpretation. If MONOCHROME2 (minimum should be displayed as black) we
# don't need to do anything, if image has MONOCRHOME1 (minimum should be displayed as white) we flip
# the intensities. This is a constraint imposed by ITK which always assumes MONOCHROME2.
if(image$GetNumberOfComponentsPerPixel() == 1)
{
image <- RescaleIntensity(image, 0, 255)
if(trimws(image_file_reader$GetMetaData('0028|0004')) == 'MONOCHROME1')
{
image <- InvertIntensity(image, maximum=255)
}
image <- Cast(image, 'sitkUInt8')
}
WriteImage(image, output_file_name)
return(TRUE)
}
parser<-arg_parser('Convert and resize DICOM files to common image types.')
parser<-add_argument(parser, 'root_of_data_directory', help='Path to the topmost directory containing data.')
parser<-add_argument(parser, 'output_file_extension', help='Image file extension, this determines output file type (e.g. png) .')
parser<-add_argument(parser, '--w', help='Width of converted images.')
parser<-add_argument(parser, '--od', help='Output directory.')
argv<-parse_args(parser)
input_file_names <- list.files(argv$root_of_data_directory, recursive=TRUE, full.names=TRUE)
if(!is.na(argv$od)) {
# if all output files are written to the same directory we need them to have a unique name, so use an index.
file_names <- lapply(seq_len(length(input_file_names)), function(new_file_name) return(file.path(argv$od, toString(new_file_name))))
} else {
file_names <- input_file_names
}
#append the file extension to the output file names
output_file_names <- lapply(file_names, function(file_name) return(paste0(file_name,'.',argv$output_file_extension)))
res <- mapply(convert_image, input_file_names, output_file_names, new_width=as.integer(argv$w), SIMPLIFY=FALSE)
df <- data.frame(input_file_name = unlist(input_file_names[unlist(res)]),
output_file_name = unlist(output_file_names[unlist(res)]))
dir_name <- if(!is.na(argv$od)) argv$od else getwd()
write.csv(df,file.path(dir_name, 'file_names.csv'), row.names=FALSE)