Skip to content

Commit

Permalink
added support for specifying an xml file path when running mass_deide…
Browse files Browse the repository at this point in the history
…ntify.py (#107)
  • Loading branch information
cmadjar authored Apr 4, 2019
1 parent 1271b0a commit 977f531
Show file tree
Hide file tree
Showing 3 changed files with 279 additions and 7 deletions.
255 changes: 255 additions & 0 deletions dicat/data/fields_to_zap_for_open_science.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,255 @@
<data>
<item>
<name>0010,0030</name>
<description>PatientBirthDate</description>
<editable>yes</editable>
</item>
<item>
<name>0010,0040</name>
<description>PatientSex</description>
<editable>yes</editable>
</item>
<item>
<name>0010,0020</name>
<description>PatientID</description>
<editable>no</editable>
</item>
<item>
<name>0008,0080</name>
<description>InstitutionName</description>
<editable>no</editable>
</item>
<item>
<name>0008,0081</name>
<description>InstitutionAddress</description>
<editable>no</editable>
</item>
<item>
<name>0008,1010</name>
<description>StationName</description>
<editable>no</editable>
</item>
<item>
<name>0008,1030</name>
<description>StudyDescription</description>
<editable>no</editable>
</item>
<!--
<item>
<name>0018,1000</name>
<description>DeviceSerialNumber</description>
<editable>no</editable>
</item>
-->
<item>
<name>0008,0090</name>
<description>ReferringPhysicianName</description>
<editable>no</editable>
</item>
<item>
<name>0008,1048</name>
<description>PhysiciansOfRecord</description>
<editable>no</editable>
</item>
<item>
<name>0010,0010</name>
<description>PatientName</description>
<editable>yes</editable>
</item>
<!--
<item>
<name>0020,000d</name>
<description>StudyInstanceUID</description>
<editable>no</editable>
</item>
-->
<item>
<name>0032,1032</name>
<description>RequestingPhysician</description>
<editable>no</editable>
</item>
<item>
<name>0032,1060</name>
<description>RequestedProcedureDescription</description>
<editable>no</editable>
</item>
<item>
<name>0040,0254</name>
<description>PerformedProcedureStepDescription</description>
<editable>no</editable>
</item>
<item>
<name>0040,0253</name>
<description>PerformedProcedureStepID</description>
<editable>no</editable>
</item>
<!-- Description of this field couldn't be found
<item>
<name>0023,0006</name>
<description>0023,0006</description>
<editable>no</editable>
</item>
-->
<item>
<name>0010,1001</name>
<description>OtherPatientName</description>
<editable>no</editable>
</item>
<item>
<name>0010,1001</name>
<description>OtherPatientNames</description>
<editable>no</editable>
</item>
<item>
<name>0010,0021</name>
<description>IssuerOfPatientID</description>
<editable>no</editable>
</item>
<item>
<name>0010,1000</name>
<description>OtherPatientIDs</description>
<editable>no</editable>
</item>
<item>
<name>0010,1005</name>
<description>PatientBirthName</description>
<editable>no</editable>
</item>
<item>
<name>0010,1010</name>
<description>PatientAge</description>
<editable>no</editable>
</item>
<item>
<name>0010,1040</name>
<description>PatientAddress</description>
<editable>no</editable>
</item>
<item>
<name>0010,1060</name>
<description>PatientMotherBirthName</description>
<editable>no</editable>
</item>
<item>
<name>0010,21f0</name>
<description>PatientReligiousPreference</description>
<editable>no</editable>
</item>
<item>
<name>0010,2154</name>
<description>PatientTelephoneNumbers</description>
<editable>no</editable>
</item>
<item>
<name>0008,1040</name>
<description>InstitutionalDepartmentName</description>
<editable>no</editable>
</item>
<item>
<name>0008,1050</name>
<description>PerformingPhysicianName</description>
<editable>no</editable>
</item>
<item>
<name>0008,1070</name>
<description>OperatorsName</description>
<editable>no</editable>
</item>

<item>
<name>0008,0012</name>
<description>InstanceCreationDate</description>
<editable>no</editable>
</item>
<item>
<name>0008,0013</name>
<description>InstanceCreationTime</description>
<editable>no</editable>
</item>
<item>
<name>0008,0020</name>
<description>StudyDate</description>
<editable>no</editable>
</item>
<item>
<name>0008,0021</name>
<description>SeriesDate</description>
<editable>no</editable>
</item>
<item>
<name>0008,0022</name>
<description>AcquisitionDate</description>
<editable>no</editable>
</item>
<item>
<name>0008,0023</name>
<description>ContentDate</description>
<editable>no</editable>
</item>
<item>
<name>0008,0030</name>
<description>StudyTime</description>
<editable>no</editable>
</item>
<item>
<name>0008,0031</name>
<description>SeriesTime</description>
<editable>no</editable>
</item>
<item>
<name>0008,0032</name>
<description>AcquisitionTime</description>
<editable>no</editable>
</item>
<item>
<name>0008,0033</name>
<description>ContentTime</description>
<editable>no</editable>
</item>
<item>
<name>0040,0244</name>
<description>PerformedProcedureStepStartDate</description>
<editable>no</editable>
</item>
<item>
<name>0040,0245</name>
<description>PerformedProcedureStepStartTime</description>
<editable>no</editable>
</item>
<item>
<name>0029,1009</name>
<description>Unknown Tag</description>
<editable>no</editable>
</item>
<item>
<name>0029,1019</name>
<description>Unknown Tag</description>
<editable>no</editable>
</item>

<item>
<name>0002,0003</name>
<description>MediaStorageSOPInstanceUID</description>
<editable>no</editable>
</item>
<item>
<name>0008,0018</name>
<description>SOPInstanceUID</description>
<editable>no</editable>
</item>
<item>
<name>0008,1155</name>
<description>ReferencedSOPInstanceUID</description>
<editable>no</editable>
</item>
<item>
<name>0020,000e</name>
<description>SeriesInstanceUID</description>
<editable>no</editable>
</item>
<item>
<name>0020,0052</name>
<description>FrameOfReferenceUID</description>
<editable>no</editable>
</item>
</data>
15 changes: 11 additions & 4 deletions dicat/lib/dicom_anonymizer_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,7 @@ def read_csv(csv_file):
return dicom_dict_list


def mass_zapping(dicom_dict_list, verbose):
def mass_zapping(dicom_dict_list, verbose, xml_file_with_fields_to_zap):
"""
Function that deidentifies a given list of DICOM studies.
Expand All @@ -429,7 +429,7 @@ def mass_zapping(dicom_dict_list, verbose):
error_arr = []
no_valid_dicom = []
for row in dicom_dict_list:
field_dict = map_DICOM_fields(row)
field_dict = map_DICOM_fields(row, xml_file_with_fields_to_zap)
if not field_dict:
print 'No valid DICOM file was found in ' + row['dcm_dir']
no_valid_dicom.append(row['dcm_dir'])
Expand All @@ -453,21 +453,28 @@ def mass_zapping(dicom_dict_list, verbose):
return success_arr, error_arr, no_valid_dicom


def map_DICOM_fields(dicom_dict):
def map_DICOM_fields(dicom_dict, xml_file_with_fields_to_zap):
"""
Function that maps DICOM values with the new values provided in dicom_dict.
:param dicom_dict: DICOM dictionary with DICOM path, new patient name, dob
and sex information to modify into the DICOM files
:type dicom_dict: dict
:param xml_file_with_fields_to_zap: path to the XML file with the list of
DICOM fields to zap
:type xml_file_with_fields_to_zap: str
:return: field_dict -> dictionary of {field: values} to be replaced in DICOM
:rtype: dict
"""

# Read the XML file with the identifying DICOM fields
xml_file = load_xml('data/fields_to_zap.xml')
if xml_file_with_fields_to_zap is None:
xml_file = load_xml('data/fields_to_zap.xml')
else:
xml_file = xml_file_with_fields_to_zap
field_dict = grep_dicom_fields(xml_file)

# Read DICOM header and grep identifying DICOM field values
Expand Down
16 changes: 13 additions & 3 deletions dicat/mass_deidentify.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@

def main():
csv_file = ''
xml_zap_file = None
verbose = False
long_options = ["help", "csvfile=", "verbose"]
long_options = ["help", "csvfile=", "xmlfile=", "verbose"]
usage = (
'usage : mass_deidentify -c <csv_file>\n\n'
'options: \n'
'\t-c, --csvfile: CSV file with the following format\n'
'\t-x, --xmlfile: XML file with the list of DICOM fields to zap\n'
'\t-v, --verbose: if set, be verbose. Note: regardless of whether the\n'
'\t verbose option is set, a summary of success/failure\n'
'\t will be provided at the end of execution.\n\n'
Expand All @@ -28,7 +30,7 @@ def main():
)

try:
opts, args = getopt.getopt(sys.argv[1:], 'hc:v', long_options)
opts, args = getopt.getopt(sys.argv[1:], 'hx:c:v', long_options)
except getopt.GetoptError as err:
print usage
sys.exit(2)
Expand All @@ -39,13 +41,21 @@ def main():
sys.exit()
elif opt in ("-c", "--csvlist"):
csv_file = arg
if opt in ("-x", "--xmlfile"):
xml_zap_file = arg
if opt in ("-v", "--verbose"):
verbose = True

if xml_zap_file and not os.path.isfile(xml_zap_file):
message = 'ERROR: Could not find the XML file with the list of DICOM fields to zap'
print message
print usage
sys.exit(2)

if os.path.isfile(csv_file):
dicom_dict_list = methods.read_csv(csv_file)
(success_arr, error_arr, no_valid_dicom) = methods.mass_zapping(
dicom_dict_list, verbose
dicom_dict_list, verbose, xml_zap_file
)
methods.print_mass_summary(success_arr, error_arr, no_valid_dicom)
else:
Expand Down

0 comments on commit 977f531

Please sign in to comment.