From 45a2974b0a5ebe08f4fc331c44aabc5df06bdf66 Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Fri, 21 Jun 2024 17:13:17 -0400 Subject: [PATCH 1/3] Apply isort This patch applies effects of the following patch that adds `isort` to CI. The patch-ordering is because adding to CI first would cause CI to fail on finding files need to be re-formatted by `pre-commit`. This also separates manual changes from automated changes. Disclaimer: Participation by NIST in the creation of the documentation of mentioned software is not intended to imply a recommendation or endorsement by the National Institute of Standards and Technology, nor is it intended to imply that any specific software is necessarily the best available for the purpose. Signed-off-by: Alex Nelson --- demos/demo_fiwalk_diskimage.py | 4 +++- demos/demo_mac_timeline.py | 1 - demos/demo_mac_timeline_iter.py | 1 + demos/demo_piecewise.py | 7 ++++--- demos/demo_plot_times.py | 7 +++---- demos/demo_readtimes.py | 8 +++---- demos/demo_registry_timeline.py | 1 - demos/demo_sizes.py | 4 ++-- demos/demo_spark.py | 7 ++++--- demos/spark/demo_spark.py | 7 ++++--- demos/vmstats/vmstats.py | 5 +++-- demos/vmstats/vmstats_decode.py | 18 +++++++++------- dfxml/__init__.py | 17 ++++++++------- dfxml/bin/Extractor.py | 6 +++--- dfxml/bin/allocation_counter.py | 8 +++---- dfxml/bin/break_out_diffs_by_anno.py | 6 ++++-- dfxml/bin/cat_fileobjects.py | 5 +++-- dfxml/bin/cat_partitions.py | 3 ++- dfxml/bin/corpus_sync.py | 6 ++++-- dfxml/bin/dedup.py | 6 +++++- dfxml/bin/deidentify_xml.py | 4 +++- dfxml/bin/dfxinfo.py | 2 +- dfxml/bin/dfxml_tool.py | 6 +++--- dfxml/bin/exp_slack.py | 3 ++- dfxml/bin/filesdb.py | 7 +++++-- dfxml/bin/hash_sectors.py | 5 +++-- dfxml/bin/iblkfind.py | 3 ++- dfxml/bin/icarvingtruth.py | 7 ++++--- dfxml/bin/idifference.py | 1 + dfxml/bin/idifference2.py | 4 ++-- dfxml/bin/iextract.py | 6 +++++- dfxml/bin/igrep.py | 5 +++-- dfxml/bin/ihistogram.py | 17 +++++++++------ dfxml/bin/imap.py | 3 +-- dfxml/bin/imicrosoft_redact.py | 10 +++++---- dfxml/bin/iredact.py | 11 ++++++---- dfxml/bin/ireport.py | 15 +++++++++---- dfxml/bin/iverify.py | 7 +++++-- dfxml/bin/mem_info.py | 5 +++-- dfxml/bin/nsrl_rds.py | 1 - dfxml/bin/rdifference.py | 10 +++++++-- dfxml/bin/report_silent_changes.py | 6 ++++-- dfxml/bin/summarize_differential_dfxml.py | 3 +-- dfxml/bin/tcpdiff.py | 9 +++++--- dfxml/bin/validate_dfxml.py | 6 ++++-- dfxml/bin/xdiff.py | 5 ++++- dfxml/bin/xmirror.py | 5 ++++- dfxml/bin/xml2body.py | 14 ++++++++----- dfxml/fiwalk.py | 11 ++++++---- dfxml/histogram.py | 3 +-- dfxml/objects.py | 11 +++++----- dfxml/writer.py | 21 +++++++++---------- setup.py | 1 + .../test_differential_dfxml.py | 2 +- tests/misc_bin_tests/dfxml_test.py | 1 + tests/misc_bin_tests/iexport_test.py | 3 ++- tests/misc_bin_tests/test_idifference.py | 4 +++- tests/misc_object_tests/ByteRun_test.py | 10 ++++----- tests/misc_object_tests/ByteRuns_test.py | 3 ++- tests/misc_object_tests/CellObject_test.py | 3 +-- .../DFXMLObject_program_test.py | 1 + .../misc_object_tests/DiskImageObject_test.py | 6 +++--- .../FileObject_allocation_test.py | 1 + .../FileObject_byte_run_facets_test.py | 2 +- .../FileObject_externals_test.py | 3 ++- .../FileObject_from_stat_test.py | 3 ++- tests/misc_object_tests/FileObject_test.py | 6 +++--- .../LibraryObject_read_test.py | 2 +- .../LibraryObject_write_test.py | 2 +- tests/misc_object_tests/Makefile_test.py | 3 ++- .../misc_object_tests/PartitionObject_test.py | 6 +++--- .../PartitionSystemObject_test.py | 6 +++--- tests/misc_object_tests/RegXMLObject_test.py | 5 +++-- .../VolumeObject_externals_test.py | 6 +++--- .../VolumeObject_hash_test.py | 3 ++- tests/misc_object_tests/VolumeObject_test.py | 6 +++--- .../diff_file_ignore_sample_dfxml_test.py | 1 + .../diff_file_ignore_test.py | 2 +- .../diffing_ByteRuns_test.py | 2 +- .../diffing_CellObject_test.py | 5 +++-- .../diffing_FileObject_test.py | 3 ++- .../diffing_HiveObject_test.py | 3 ++- .../diffing_TimestampObject_test.py | 5 +++-- .../diffing_VolumeObject_test.py | 3 ++- tests/misc_object_tests/error_test.py | 6 +++--- tests/misc_object_tests/libtest.py | 4 ++-- tests/misc_object_tests/objects_test.py | 5 +++-- .../misc_object_tests/storage_layers_test.py | 8 +++---- .../misc_object_tests/test_TCPFlowObjects.py | 3 ++- tests/test_objects.py | 1 + tests/test_reads.py | 1 + tests/test_version.py | 1 + 92 files changed, 297 insertions(+), 197 deletions(-) diff --git a/demos/demo_fiwalk_diskimage.py b/demos/demo_fiwalk_diskimage.py index de57bdb..8336d7b 100644 --- a/demos/demo_fiwalk_diskimage.py +++ b/demos/demo_fiwalk_diskimage.py @@ -6,10 +6,12 @@ if the required buffer size exceeds available RAM! """ -import sys import io +import sys + from dfxml import fiwalk + def writeDfxml(imageFile: str, outFile: str) -> None: """Generate filesystem metadata for disk image and and write resulting dfxml to file""" diff --git a/demos/demo_mac_timeline.py b/demos/demo_mac_timeline.py index 6ee6671..af6121b 100644 --- a/demos/demo_mac_timeline.py +++ b/demos/demo_mac_timeline.py @@ -7,7 +7,6 @@ sys.path.append( os.path.join(os.path.dirname(__file__), "..")) import dfxml - timeline = [] def process(fi): diff --git a/demos/demo_mac_timeline_iter.py b/demos/demo_mac_timeline_iter.py index 873de23..a5294d2 100644 --- a/demos/demo_mac_timeline_iter.py +++ b/demos/demo_mac_timeline_iter.py @@ -22,6 +22,7 @@ sys.path.append( os.path.join(os.path.dirname(__file__), "..")) import dfxml + def main(): if len(sys.argv) < 2: print("Usage: {} ".format(sys.argv[0])) diff --git a/demos/demo_piecewise.py b/demos/demo_piecewise.py index d9edef5..56941e6 100644 --- a/demos/demo_piecewise.py +++ b/demos/demo_piecewise.py @@ -9,10 +9,11 @@ import sys sys.path.append( os.path.join(os.path.dirname(__file__), "..")) -import dfxml - +import collections +import math +import sys -import math,sys,collections +import dfxml class SectorCorrelator: diff --git a/demos/demo_plot_times.py b/demos/demo_plot_times.py index 82b07bb..487a98d 100644 --- a/demos/demo_plot_times.py +++ b/demos/demo_plot_times.py @@ -1,14 +1,13 @@ #!/usr/bin/python -import fiwalk -import time - import os import sys +import time + +import fiwalk sys.path.append( os.path.join(os.path.dirname(__file__), "..")) import dfxml - if __name__=="__main__": import sys from optparse import OptionParser diff --git a/demos/demo_readtimes.py b/demos/demo_readtimes.py index c57f3b4..caeec94 100644 --- a/demos/demo_readtimes.py +++ b/demos/demo_readtimes.py @@ -1,15 +1,15 @@ #!/usr/bin/python """Reads an fiwalk XML file and reports how many of the files are still in the image...""" -import sys,os - import os import sys sys.path.append( os.path.join(os.path.dirname(__file__), "..")) +import time + import dfxml import dfxml.fiwalk as fiwalk -import time + def calc_jumps(fis,title): print(title) @@ -39,7 +39,7 @@ def calc_jumps(fis,title): if __name__=="__main__": import sys from optparse import OptionParser - from subprocess import Popen,PIPE + from subprocess import PIPE, Popen global options parser = OptionParser() diff --git a/demos/demo_registry_timeline.py b/demos/demo_registry_timeline.py index 656fb3b..9739c0d 100644 --- a/demos/demo_registry_timeline.py +++ b/demos/demo_registry_timeline.py @@ -5,7 +5,6 @@ sys.path.append( os.path.join(os.path.dirname(__file__), "..")) import dfxml - timeline = [] def process(co): diff --git a/demos/demo_sizes.py b/demos/demo_sizes.py index 5ddecce..7afee2d 100644 --- a/demos/demo_sizes.py +++ b/demos/demo_sizes.py @@ -4,10 +4,10 @@ # Demo program that shows how to calculate the average size of file objects in a DFXML file # +import collections import math -import sys import os -import collections +import sys sys.path.append( os.path.join(os.path.dirname(__file__), "..")) import dfxml diff --git a/demos/demo_spark.py b/demos/demo_spark.py index 10229cb..aa6fdfc 100644 --- a/demos/demo_spark.py +++ b/demos/demo_spark.py @@ -4,18 +4,19 @@ # This program runs spark if it is not already running -import sys import os +import sys sys.path.append("../python") from dfxml_writer import DFXMLWriter + def spark_demo(): """A small spark program. Must be run under spark""" - from pyspark import SparkConf - from pyspark import SparkContext import operator + from pyspark import SparkConf, SparkContext + conf = SparkConf() sc = SparkContext(conf=conf) m = 1000000 diff --git a/demos/spark/demo_spark.py b/demos/spark/demo_spark.py index 552931e..aa9f7ba 100644 --- a/demos/spark/demo_spark.py +++ b/demos/spark/demo_spark.py @@ -4,18 +4,19 @@ # This program runs Spark if it is not already running -import sys import os +import sys sys.path.append("../python") from dfxml_writer import DFXMLWriter + def spark_demo(): """A small Spark program. Must be run under Spark""" - from pyspark import SparkConf - from pyspark import SparkContext import operator + from pyspark import SparkConf, SparkContext + conf = SparkConf() sc = SparkContext(conf=conf) m = 1000000 diff --git a/demos/vmstats/vmstats.py b/demos/vmstats/vmstats.py index 4d3c375..2093307 100644 --- a/demos/vmstats/vmstats.py +++ b/demos/vmstats/vmstats.py @@ -6,9 +6,10 @@ import os import os.path import sys +import time import xml.etree.ElementTree as ET + import psutil -import time sys.path.append( os.path.join(os.path.dirname(__file__), "../../python") ) @@ -70,8 +71,8 @@ def write_process_dfxml_to_file(fname,prettyprint=False): f.write("\n") if __name__=="__main__": - from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter import time + from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter) parser.add_argument("fname",help="filename") parser.add_argument("--repeat",help="Number of times to repeat",type=int,default=1) diff --git a/demos/vmstats/vmstats_decode.py b/demos/vmstats/vmstats_decode.py index 2681efa..fe71fe7 100644 --- a/demos/vmstats/vmstats_decode.py +++ b/demos/vmstats/vmstats_decode.py @@ -2,20 +2,22 @@ # # plot vmstats output +import datetime +import json import os import os.path +import shutil +import statistics import sys +import time import xml.etree.ElementTree as ET + import psutil -import time -import json -import statistics -import shutil -import datetime sys.path.append( os.path.join(os.path.dirname(__file__), "../python") ) import dfxml + def get_dfxml(fname): # Given a file, return dfxml objects with open(fname,"r") as f: @@ -70,9 +72,10 @@ def html_filename(root): TEMPLATE_FILE = "vmstats_decode.html" def html_generate(root, *, prev_fname, next_fname): - import jinja2 import os.path + import jinja2 + stats = get_stats(root) ps_list = list( get_processes(root) ) @@ -91,8 +94,8 @@ def html_generate(root, *, prev_fname, next_fname): if __name__=="__main__": - from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter import time + from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter) parser.add_argument("fname",nargs="+", help="filename") parser.add_argument("--ps", help="Show the processes", action='store_true') @@ -170,6 +173,7 @@ def myconverter(o): if args.plot: import datetime + import matplotlib.pyplot as plt t0 = stats[0]['start_time'] when = [ (st['start_time']-t0).total_seconds() for st in stats] diff --git a/dfxml/__init__.py b/dfxml/__init__.py index 3823e1f..2c60fc7 100644 --- a/dfxml/__init__.py +++ b/dfxml/__init__.py @@ -45,15 +45,14 @@ __version__ = "1.0.2" -import sys -import re -from sys import stderr -from subprocess import Popen,PIPE import base64 +import datetime import hashlib import os - -import datetime +import re +import sys +from subprocess import PIPE, Popen +from sys import stderr #slg: removed this, because the dfxml module shouldn't define a logger. #I don't even think that objects should define a logger... @@ -90,7 +89,9 @@ def timestamp2iso8601(ts): import time return time.strftime("%FT%TZ",time.gmtime(ts)) -from datetime import tzinfo,timedelta +from datetime import timedelta, tzinfo + + class GMTMIN(tzinfo): def __init__(self,minoffset): # DST starts last Sunday in March self.minoffset = minoffset @@ -417,6 +418,7 @@ def iso8601(self): def timestamp(self): import time + # Do we have a cached representation? try: return self.timestamp_ @@ -434,6 +436,7 @@ def timestamp(self): def datetime(self): import datetime + # return the datetime from parsing either iso8601 or from parsing timestamp try: self.datetime_ = self.ts2datetime(self.timestamp_) diff --git a/dfxml/bin/Extractor.py b/dfxml/bin/Extractor.py index b438bea..a0b43fa 100644 --- a/dfxml/bin/Extractor.py +++ b/dfxml/bin/Extractor.py @@ -15,11 +15,11 @@ __version__ = "0.5.2" +import copy +import hashlib +import logging import os import sys -import logging -import hashlib -import copy import traceback _logger = logging.getLogger(os.path.basename(__file__)) diff --git a/dfxml/bin/allocation_counter.py b/dfxml/bin/allocation_counter.py index afecf00..949c515 100644 --- a/dfxml/bin/allocation_counter.py +++ b/dfxml/bin/allocation_counter.py @@ -22,14 +22,14 @@ # * Tabular output in HTML # * Tabular output in LaTeX -import dfxml.objects as Objects -import dfxml.bin.make_differential_dfxml - import collections import logging +import os import sys import xml.etree.ElementTree as ET -import os + +import dfxml.bin.make_differential_dfxml +import dfxml.objects as Objects _logger = logging.getLogger(os.path.basename(__file__)) diff --git a/dfxml/bin/break_out_diffs_by_anno.py b/dfxml/bin/break_out_diffs_by_anno.py index eb5b8e6..a77b8a1 100644 --- a/dfxml/bin/break_out_diffs_by_anno.py +++ b/dfxml/bin/break_out_diffs_by_anno.py @@ -22,9 +22,11 @@ __version__ = "0.1.0" -import dfxml.objects as Objects -import sys import collections +import sys + +import dfxml.objects as Objects + def main(): #Key: (annotation, histogram) diff --git a/dfxml/bin/cat_fileobjects.py b/dfxml/bin/cat_fileobjects.py index cf0e95e..af0c6ff 100644 --- a/dfxml/bin/cat_fileobjects.py +++ b/dfxml/bin/cat_fileobjects.py @@ -19,11 +19,12 @@ __version__ = "0.3.1" +import logging +import os import sys import xml.etree.ElementTree as ET + import dfxml -import logging -import os _logger = logging.getLogger(os.path.basename(__file__)) diff --git a/dfxml/bin/cat_partitions.py b/dfxml/bin/cat_partitions.py index ea1d20b..91c0745 100644 --- a/dfxml/bin/cat_partitions.py +++ b/dfxml/bin/cat_partitions.py @@ -29,12 +29,13 @@ __version__ = "0.2.2" -import dfxml.objects as Objects import logging import os import sys import xml.etree.ElementTree as ET +import dfxml.objects as Objects + _logger = logging.getLogger(os.path.basename(__file__)) def main(): diff --git a/dfxml/bin/corpus_sync.py b/dfxml/bin/corpus_sync.py index 5f849e8..6cc9458 100644 --- a/dfxml/bin/corpus_sync.py +++ b/dfxml/bin/corpus_sync.py @@ -2,9 +2,11 @@ # # sync corpus based on DFXML files +from collections import defaultdict + import dfxml import dfxml.fiwalk as fiwalk -from collections import defaultdict + class CorpusDB: def __init__(self): @@ -28,8 +30,8 @@ def __delitem__(self,fi): if __name__=="__main__": - from optparse import OptionParser from copy import deepcopy + from optparse import OptionParser parser = OptionParser() (options,args) = parser.parse_args() diff --git a/dfxml/bin/dedup.py b/dfxml/bin/dedup.py index 62ef989..705a501 100644 --- a/dfxml/bin/dedup.py +++ b/dfxml/bin/dedup.py @@ -2,7 +2,11 @@ # # dedup - detect and optionally remove duplicates based on a DFXML file -import os,dfxml,xml +import os +import xml + +import dfxml + class dedup: def __init__(self): diff --git a/dfxml/bin/deidentify_xml.py b/dfxml/bin/deidentify_xml.py index 3511619..72c809f 100644 --- a/dfxml/bin/deidentify_xml.py +++ b/dfxml/bin/deidentify_xml.py @@ -14,7 +14,9 @@ ok_top_paths = ok_top_paths_win + ok_top_paths_mac + ['$orphanfiles'] acceptable_extensions = ["exe","dll","sys","com","hlp"] -import os.path, os, sys +import os +import os.path +import sys partdir : typing.Dict[str, str] = dict() def sanitize_part(part): diff --git a/dfxml/bin/dfxinfo.py b/dfxml/bin/dfxinfo.py index d609c87..2c6d64c 100644 --- a/dfxml/bin/dfxinfo.py +++ b/dfxml/bin/dfxinfo.py @@ -4,9 +4,9 @@ Generates a report about what's up with a DFXML file. """ -import platform import os import os.path +import platform import sys import time import xml diff --git a/dfxml/bin/dfxml_tool.py b/dfxml/bin/dfxml_tool.py index 6d068d7..2889536 100644 --- a/dfxml/bin/dfxml_tool.py +++ b/dfxml/bin/dfxml_tool.py @@ -22,9 +22,9 @@ __version__ = '1.0.0' -import sys -import os.path import hashlib +import os.path +import sys from xml.sax.saxutils import escape xmloutputversion = '0.3' @@ -293,7 +293,7 @@ def end_element(name): if(__name__=='__main__'): - from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter + from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser global args parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter) diff --git a/dfxml/bin/exp_slack.py b/dfxml/bin/exp_slack.py index 4b1d34b..bfb1cd3 100644 --- a/dfxml/bin/exp_slack.py +++ b/dfxml/bin/exp_slack.py @@ -6,12 +6,13 @@ # (c) Martin Mulazzani, 2012 # Additions by Simson Garfinkel -import re import os +import re import sys import dfxml.fiwalk as fiwalk + def proc(fi): # Skip the virtual files? if fi.filename()[0:1] in ['$']: diff --git a/dfxml/bin/filesdb.py b/dfxml/bin/filesdb.py index a7e77e2..30c31d2 100644 --- a/dfxml/bin/filesdb.py +++ b/dfxml/bin/filesdb.py @@ -3,9 +3,12 @@ # a module that holds a database of DFXML files # -import dfxml -from collections import defaultdict import sys +from collections import defaultdict + +import dfxml + + class filesdb: def __init__(self,fname=None): self.sha1db = defaultdict(list) # fi's by hashdb diff --git a/dfxml/bin/hash_sectors.py b/dfxml/bin/hash_sectors.py index 4dd032e..9b25775 100644 --- a/dfxml/bin/hash_sectors.py +++ b/dfxml/bin/hash_sectors.py @@ -14,12 +14,13 @@ __version__ = "0.3.0" -import dfxml.objects as Objects +import hashlib import logging import os -import hashlib import sqlite3 +import dfxml.objects as Objects + _logger = logging.getLogger(os.path.basename(__file__)) _nagged_ids = False diff --git a/dfxml/bin/iblkfind.py b/dfxml/bin/iblkfind.py index afe1a59..2cca274 100644 --- a/dfxml/bin/iblkfind.py +++ b/dfxml/bin/iblkfind.py @@ -3,8 +3,9 @@ Reports the files in which sectors s1, s2, s3... are located. """ -import dfxml,sys +import sys +import dfxml if __name__=="__main__": from optparse import OptionParser diff --git a/dfxml/bin/icarvingtruth.py b/dfxml/bin/icarvingtruth.py index 967ff4e..211d3f0 100644 --- a/dfxml/bin/icarvingtruth.py +++ b/dfxml/bin/icarvingtruth.py @@ -13,12 +13,13 @@ - For all of the sectors not used in the final version, note which sectors of the original files survive. """ -import dfxml -import dfxml.fiwalk as fiwalk import sys import xml.dom.minidom from xml.dom.minidom import parseString +import dfxml +import dfxml.fiwalk as fiwalk + # http://wiki.python.org/moin/MiniDom def make_residual(fi=None,image=None,runs=None): @@ -107,8 +108,8 @@ def sector_from_file(imagefile,sector_number,sectorsize = 512): return imagefile.read(sectorsize) if __name__=="__main__": - from optparse import OptionParser from copy import deepcopy + from optparse import OptionParser parser = OptionParser() parser.usage = '%prog [options] [mapfile1.iso mapfile2.iso ...] masterfile.iso' diff --git a/dfxml/bin/idifference.py b/dfxml/bin/idifference.py index adbdea7..fab41ce 100644 --- a/dfxml/bin/idifference.py +++ b/dfxml/bin/idifference.py @@ -39,6 +39,7 @@ import sys import time import typing + if sys.version_info < (3,1): raise RuntimeError("idifference.py now requires Python 3.1 or above") diff --git a/dfxml/bin/idifference2.py b/dfxml/bin/idifference2.py index f5dad90..dd4cc7e 100644 --- a/dfxml/bin/idifference2.py +++ b/dfxml/bin/idifference2.py @@ -19,15 +19,15 @@ __version__ = "2.0.0alpha2" -import sys import logging import os +import sys _logger = logging.getLogger(os.path.basename(__file__)) -import dfxml.objects as Objects import dfxml.bin.make_differential_dfxml import dfxml.bin.summarize_differential_dfxml +import dfxml.objects as Objects INCLUDE_DOTDIRS = False diff --git a/dfxml/bin/iextract.py b/dfxml/bin/iextract.py index bf5aec9..5a6292d 100644 --- a/dfxml/bin/iextract.py +++ b/dfxml/bin/iextract.py @@ -1,6 +1,10 @@ #!/usr/bin/env python -import zipfile,sys,os,os.path,datetime +import datetime +import os +import os.path +import sys +import zipfile import dfxml import dfxml.fiwalk as fiwalk diff --git a/dfxml/bin/igrep.py b/dfxml/bin/igrep.py index 2dfc843..66d78a6 100644 --- a/dfxml/bin/igrep.py +++ b/dfxml/bin/igrep.py @@ -3,11 +3,12 @@ Reports the files in which files have the string. """ -import fiwalk,dfxml +import fiwalk + +import dfxml if __name__=="__main__": import sys - from optparse import OptionParser parser = OptionParser() parser.usage = '%prog [options] image.iso s1' diff --git a/dfxml/bin/ihistogram.py b/dfxml/bin/ihistogram.py index 620802e..9596959 100644 --- a/dfxml/bin/ihistogram.py +++ b/dfxml/bin/ihistogram.py @@ -2,16 +2,19 @@ """Draw a quick histogram of the timestamps on the hard drive""" import matplotlib + matplotlib.use('agg.pdf') -import dfxml.fiwalk as fiwalk import datetime -from matplotlib.dates import MonthLocator, WeekdayLocator, DateFormatter -from matplotlib.dates import MONDAY,SATURDAY import time + +from matplotlib.dates import (MONDAY, SATURDAY, DateFormatter, MonthLocator, + WeekdayLocator) from pylab import * +import dfxml.fiwalk as fiwalk + def get_dates_and_counts(times): from datetime import date @@ -36,7 +39,8 @@ def version1(times): def version2(times): # see http://mail.python.org/pipermail/python-list/2003-November/236559.html # http://www.gossamer-threads.com/lists/python/python/665014 - from matplotlib.pylab import plot, show, title, xlabel, ylabel, gca, bar, savefig, plot_date + from matplotlib.pylab import (bar, gca, plot, plot_date, savefig, show, + title, xlabel, ylabel) dates_and_counts = get_dates_and_counts(times) dates, counts = zip(*dates_and_counts) @@ -48,11 +52,12 @@ def version2(times): def version3(times): import datetime - import numpy as np + import matplotlib - import matplotlib.pyplot as pyplot import matplotlib.dates as mdates import matplotlib.mlab as mlab + import matplotlib.pyplot as pyplot + import numpy as np dates_and_counts = get_dates_and_counts(times) dates, counts = zip(*dates_and_counts) diff --git a/dfxml/bin/imap.py b/dfxml/bin/imap.py index 32cc473..0732f45 100644 --- a/dfxml/bin/imap.py +++ b/dfxml/bin/imap.py @@ -10,9 +10,8 @@ ################################################################ if __name__=="__main__": import sys - from sys import stdout - from optparse import OptionParser + from sys import stdout parser = OptionParser() parser.usage = '%prog [options] image.iso ' parser.add_option("-d","--debug",help="debug",action="store_true") diff --git a/dfxml/bin/imicrosoft_redact.py b/dfxml/bin/imicrosoft_redact.py index 0acd331..a5bb9b7 100644 --- a/dfxml/bin/imicrosoft_redact.py +++ b/dfxml/bin/imicrosoft_redact.py @@ -9,8 +9,9 @@ hex FF """ -import os.path,sys -from subprocess import Popen,call,PIPE +import os.path +import sys +from subprocess import PIPE, Popen, call sys.path.append(os.getenv("DOMEX_HOME") + "/src/lib/") # add the library sys.path.append(os.getenv("DOMEX_HOME") + "/src/fiwalk/python/") # add the library @@ -96,9 +97,10 @@ def redact_function(ch): if __name__=="__main__": - import sys,time + import sys + import time from optparse import OptionParser - from subprocess import Popen,PIPE + from subprocess import PIPE, Popen global options,xml_out from glob import glob diff --git a/dfxml/bin/iredact.py b/dfxml/bin/iredact.py index 1679aa6..2ddc9ec 100644 --- a/dfxml/bin/iredact.py +++ b/dfxml/bin/iredact.py @@ -64,11 +64,13 @@ """ -import xml.parsers.expat import hashlib import os.path -import dfxml.fiwalk as fiwalk import re +import xml.parsers.expat + +import dfxml.fiwalk as fiwalk + ################################################################ def convert_fileglob_to_re(fileglob): @@ -374,9 +376,10 @@ def close_files(self): self.xmlfile.close() if __name__=="__main__": - import sys,time + import sys + import time from optparse import OptionParser - from subprocess import Popen,PIPE + from subprocess import PIPE, Popen global options parser = OptionParser() diff --git a/dfxml/bin/ireport.py b/dfxml/bin/ireport.py index 6626693..d90bf84 100644 --- a/dfxml/bin/ireport.py +++ b/dfxml/bin/ireport.py @@ -2,16 +2,23 @@ # # Print the stats from a DFXML file -import sys,os,shelve +import os +import shelve +import sys sys.path.append(os.getenv("HOME")+"/slg/src/python") sys.path.append(os.getenv("DOMEX_HOME")+"/src/lib") -from histogram2d import histogram2d +import re + +import fiwalk from histogram import histogram +from histogram2d import histogram2d from statbag import statbag from ttable import ttable -import re,dfxml,fiwalk + +import dfxml + def process_files(fn): drive_files = {} # index of drives @@ -172,8 +179,8 @@ def isfstype(x): return x.fstype==fstype def main(): - from optparse import OptionParser from copy import deepcopy + from optparse import OptionParser global options parser = OptionParser() diff --git a/dfxml/bin/iverify.py b/dfxml/bin/iverify.py index 9ea3dd1..adca52e 100644 --- a/dfxml/bin/iverify.py +++ b/dfxml/bin/iverify.py @@ -1,7 +1,10 @@ #!/usr/bin/python """Reads an fiwalk XML file and reports how many of the files are still in the image...""" -import hashlib, os.path, sys +import hashlib +import os.path +import sys + from dfxml import fiwalk present = [] @@ -24,7 +27,7 @@ def process_fi(fi): def main(): import sys from optparse import OptionParser - from subprocess import Popen,PIPE + from subprocess import PIPE, Popen global options parser = OptionParser() diff --git a/dfxml/bin/mem_info.py b/dfxml/bin/mem_info.py index 4e04368..5be1d1e 100644 --- a/dfxml/bin/mem_info.py +++ b/dfxml/bin/mem_info.py @@ -4,8 +4,9 @@ """ -import xml.etree.ElementTree as ET import sys +import xml.etree.ElementTree as ET + def fmt(n): if args.h: @@ -31,7 +32,7 @@ def process_dfxml(dfxml): if __name__=="__main__": - from argparse import ArgumentParser,ArgumentDefaultsHelpFormatter + from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser parser = ArgumentParser( formatter_class = ArgumentDefaultsHelpFormatter, description="report memory utilization from DFXML file" ) parser.add_argument("--h", help="human format", action='store_true') diff --git a/dfxml/bin/nsrl_rds.py b/dfxml/bin/nsrl_rds.py index 43f0f97..4f6b952 100644 --- a/dfxml/bin/nsrl_rds.py +++ b/dfxml/bin/nsrl_rds.py @@ -7,7 +7,6 @@ import xmlrpclib - if __name__=="__main__": print("Demonstration of NSRL RDS service at %s\n" % RDS_SERVER) print("") diff --git a/dfxml/bin/rdifference.py b/dfxml/bin/rdifference.py index 29d42a8..407ecef 100644 --- a/dfxml/bin/rdifference.py +++ b/dfxml/bin/rdifference.py @@ -30,7 +30,9 @@ #AJN This script does not call out duplicate paths, but they are reported. -import sys,time +import sys +import time + if sys.version_info < (3,1): raise RuntimeError("rdifference.py requires Python 3.1 or above") @@ -244,7 +246,11 @@ def report(self): def output_archive(self,tarname=None,zipname=None): """Write the changed and/or new files to a tarfile or a ZIP file. """ - import zipfile, tarfile, StringIO, datetime + import datetime + import tarfile + import zipfile + + import StringIO tfile = None zfile = None diff --git a/dfxml/bin/report_silent_changes.py b/dfxml/bin/report_silent_changes.py index 4894ce6..d0fe869 100644 --- a/dfxml/bin/report_silent_changes.py +++ b/dfxml/bin/report_silent_changes.py @@ -19,15 +19,17 @@ __version__ = "0.2.2" -import os import logging +import os import sys _logger = logging.getLogger(os.path.basename(__file__)) -import dfxml.objects as Objects import make_differential_dfxml +import dfxml.objects as Objects + + def main(): d = Objects.DFXMLObject("1.2.0") d.program = sys.argv[0] diff --git a/dfxml/bin/summarize_differential_dfxml.py b/dfxml/bin/summarize_differential_dfxml.py index dc64cc6..2da13e3 100644 --- a/dfxml/bin/summarize_differential_dfxml.py +++ b/dfxml/bin/summarize_differential_dfxml.py @@ -20,9 +20,8 @@ import logging import os -from dfxml import objects as Objects - import dfxml.bin.idifference as idifference +from dfxml import objects as Objects _logger = logging.getLogger(os.path.basename(__file__)) diff --git a/dfxml/bin/tcpdiff.py b/dfxml/bin/tcpdiff.py index 162e3b4..ab0c15b 100644 --- a/dfxml/bin/tcpdiff.py +++ b/dfxml/bin/tcpdiff.py @@ -23,13 +23,16 @@ """ -import sys,time +import sys +import time + if sys.version_info < (3,1): raise RuntimeError("rdifference.py requires Python 3.1 or above") import dfxml -import dfxml.fiwalk as fiwalk import dfxml.dfxml_html as dfxml_html +import dfxml.fiwalk as fiwalk + def ptime(t): """Print the time in the requested format. T is a dfxml time value""" @@ -72,8 +75,8 @@ def report(self): dfxml_html.table(['Total Connections',str(len(self.connections))]) if __name__=="__main__": - from optparse import OptionParser from copy import deepcopy + from optparse import OptionParser global options parser = OptionParser() diff --git a/dfxml/bin/validate_dfxml.py b/dfxml/bin/validate_dfxml.py index 47e9855..461f309 100644 --- a/dfxml/bin/validate_dfxml.py +++ b/dfxml/bin/validate_dfxml.py @@ -1,9 +1,11 @@ -import sys,os.path +import os.path +import sys from optparse import OptionParser from sys import stdout - + import dfxml.fiwalk as fiwalk + def demo_dfxml_time_bug(filename): parser = OptionParser() parser.usage = '%prog% [options] xmlfile ' diff --git a/dfxml/bin/xdiff.py b/dfxml/bin/xdiff.py index 31f8d2e..41d403e 100644 --- a/dfxml/bin/xdiff.py +++ b/dfxml/bin/xdiff.py @@ -1,9 +1,12 @@ # # Report the difference between two dfxml files # +import sys + from filesdb import filesdb + import dfxml -import sys + # # test program. Reads a database and dumps it. # diff --git a/dfxml/bin/xmirror.py b/dfxml/bin/xmirror.py index 3dfbc58..7ed6dce 100644 --- a/dfxml/bin/xmirror.py +++ b/dfxml/bin/xmirror.py @@ -1,9 +1,12 @@ # # Using two XML files make the current system look like the master # +import sys + from filesdb import filesdb + import dfxml -import sys + # # test program. Reads a database and dumps it. # diff --git a/dfxml/bin/xml2body.py b/dfxml/bin/xml2body.py index 1dc6846..187a97f 100644 --- a/dfxml/bin/xml2body.py +++ b/dfxml/bin/xml2body.py @@ -8,21 +8,25 @@ Dave Dittrich """ -import sys,time -import dfxml -import dfxml.fiwalk as fiwalk +import sys +import time import idifference - # We are re-using code from idifference.py and over-riding # the process_fi method in the DiskState class. from idifference import DiskState +import dfxml +import dfxml.fiwalk as fiwalk + + def dprint(x): global options if options.debug: print(x) import stat + + def is_suid(mode): return(mode & stat.S_ISUID == stat.S_ISUID) def is_sgid(mode): return(mode & stat.S_ISGID == stat.S_ISGID) def is_svtx(mode): return(mode & stat.S_ISVTX == stat.S_ISVTX) @@ -127,8 +131,8 @@ def process_fi(self,fi): DiskState.process_fi = process_fi if __name__=="__main__": - from optparse import OptionParser from copy import deepcopy + from optparse import OptionParser global options parser = OptionParser() diff --git a/dfxml/fiwalk.py b/dfxml/fiwalk.py index 5b92f8c..32e8d3d 100644 --- a/dfxml/fiwalk.py +++ b/dfxml/fiwalk.py @@ -29,9 +29,11 @@ sys.path.append( os.path.join(os.path.dirname(__file__), "..")) -import dfxml +from subprocess import PIPE, Popen from sys import stderr -from subprocess import Popen,PIPE + +import dfxml + ALLOC_ONLY = 1 fiwalk_cached_installed_version = None @@ -40,8 +42,8 @@ def fiwalk_installed_version(fiwalk='fiwalk'): global fiwalk_cached_installed_version if fiwalk_cached_installed_version: return fiwalk_cached_installed_version - from subprocess import Popen,PIPE import re + from subprocess import PIPE, Popen for line in Popen([fiwalk,'-V'],stdout=PIPE).stdout.read().decode('utf-8').split("\n"): g = re.search("^FIWalk Version:\s+(.*)$",line) if g: @@ -133,7 +135,8 @@ def fiwalk_xml_stream( ) -> typing.BinaryIO: """ Returns an fiwalk XML stream given a disk image by running fiwalk.""" if flags & ALLOC_ONLY: fiwalk_args += "-O" - from subprocess import call,Popen,PIPE + from subprocess import PIPE, Popen, call + # Make sure we have a valid fiwalk try: res = Popen([fiwalk,'-V'],stdout=PIPE).communicate()[0] diff --git a/dfxml/histogram.py b/dfxml/histogram.py index 0adf76c..7c03ac5 100644 --- a/dfxml/histogram.py +++ b/dfxml/histogram.py @@ -112,7 +112,6 @@ def make_graph(self, figureTitle='Bargraph', binTitle='', # called, or every 'graph' will simply be drawn on top of the # previous one. # make the figure tall and skinny - #w,h = plot.figaspect(1.75) #fig = plot.figure(figsize=(w,h)) fig = plot.figure() @@ -221,8 +220,8 @@ def f2(a,b): j.print_info(1000) print("Histogram test routine...") - from datetime import date import time + from datetime import date j = histogram() j.add(date.fromtimestamp(time.time()),4) j.add(date(2005,3,1)) diff --git a/dfxml/objects.py b/dfxml/objects.py index b78f311..3c0d157 100644 --- a/dfxml/objects.py +++ b/dfxml/objects.py @@ -35,24 +35,23 @@ # * Compatibility with the DFXML schema, version >=2.0.0. import abc +import copy import logging +import os +import platform import re -import copy -import xml.etree.ElementTree as ET +import struct import subprocess -import os import sys -import struct -import platform import typing import warnings +import xml.etree.ElementTree as ET # The following allows us to import the dfxml module as dfxml # There may be a cleaner way to do this. sys.path.append( os.path.dirname(__file__) + "/..") import dfxml # type: ignore - _logger = logging.getLogger(os.path.basename(__file__)) # Contains: (namespace, local name, class) qualified XML element name pairs, with a reference to the class that had the problem. diff --git a/dfxml/writer.py b/dfxml/writer.py index 1d29e1b..cab0541 100644 --- a/dfxml/writer.py +++ b/dfxml/writer.py @@ -2,20 +2,19 @@ # # dfxml_gen.py: Generate DFXML # -import sys -import os -import time +import atexit +import datetime +import logging import os import pwd -import sys -import datetime import subprocess +import sys +import time import xml.etree.ElementTree as ET import xml.parsers.expat + import __main__ -import atexit import psutil -import logging __version__="0.1" @@ -165,7 +164,7 @@ def add_DFXML_execution_environment(self,e): entities[chr(ch)] = "\\%03o" % ch - from xml.sax.saxutils import quoteattr,escape + from xml.sax.saxutils import escape, quoteattr for (name,value) in os.environ.items(): ET.SubElement(env, 'var', {'name':escape(name,entities), 'value':escape(value,entities)}) @@ -277,10 +276,10 @@ def prettyprint(self): def add_spark(self,node): """Connect to SPARK on local host and dump information. Uses requests. Note: disables HTTPS certificate warnings.""" - import os import json - from urllib.request import urlopen + import os import ssl + from urllib.request import urlopen if "SPARK_ENV_LOADED" not in os.environ: return # no Spark @@ -350,7 +349,7 @@ def done(self): self.timestamp("done") if __name__=="__main__": - from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter + from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser arg_parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter, description="""Demo program. Run DFXML for this program and print the results. If you run it on a system with SPARK, you get the spark DFXML too!""") diff --git a/setup.py b/setup.py index e28486f..e67cd41 100644 --- a/setup.py +++ b/setup.py @@ -15,4 +15,5 @@ # We would appreciate acknowledgement if the software is used. import setuptools + setuptools.setup() diff --git a/tests/make_differential_dfxml/test_differential_dfxml.py b/tests/make_differential_dfxml/test_differential_dfxml.py index 13e9b44..6ae1fca 100644 --- a/tests/make_differential_dfxml/test_differential_dfxml.py +++ b/tests/make_differential_dfxml/test_differential_dfxml.py @@ -33,8 +33,8 @@ __version__ = "0.2.2" import argparse -import os import logging +import os import sys import tempfile import typing diff --git a/tests/misc_bin_tests/dfxml_test.py b/tests/misc_bin_tests/dfxml_test.py index 9264b1d..50c4a7f 100644 --- a/tests/misc_bin_tests/dfxml_test.py +++ b/tests/misc_bin_tests/dfxml_test.py @@ -1,5 +1,6 @@ from dfxml import * + def check_equal(a,b,want=None): da = dftime(a) db = dftime(b) diff --git a/tests/misc_bin_tests/iexport_test.py b/tests/misc_bin_tests/iexport_test.py index 1094e49..c9d6ba2 100644 --- a/tests/misc_bin_tests/iexport_test.py +++ b/tests/misc_bin_tests/iexport_test.py @@ -1,8 +1,9 @@ -import sys import os +import sys from dfxml.bin.iexport import * + def test_iexport(): r1 = Run(0,1000) r2 = Run(50,60) diff --git a/tests/misc_bin_tests/test_idifference.py b/tests/misc_bin_tests/test_idifference.py index f7c8b74..5ab8179 100644 --- a/tests/misc_bin_tests/test_idifference.py +++ b/tests/misc_bin_tests/test_idifference.py @@ -3,7 +3,9 @@ Test script. Evaluates idifference.py on a sequence of disk images. """ -import sys, os, subprocess +import os +import subprocess +import sys if __name__ == "__main__": from optparse import OptionParser diff --git a/tests/misc_object_tests/ByteRun_test.py b/tests/misc_object_tests/ByteRun_test.py index 73dd74f..bff88a7 100644 --- a/tests/misc_object_tests/ByteRun_test.py +++ b/tests/misc_object_tests/ByteRun_test.py @@ -15,16 +15,16 @@ __version__ = "0.2.0" -import os -import sys import copy -import logging import hashlib - -import dfxml.objects as Objects +import logging +import os +import sys import libtest +import dfxml.objects as Objects + _logger = logging.getLogger(os.path.basename(__file__)) TEST_BYTE_STRING = b"test" diff --git a/tests/misc_object_tests/ByteRuns_test.py b/tests/misc_object_tests/ByteRuns_test.py index 94087a4..0e7159e 100644 --- a/tests/misc_object_tests/ByteRuns_test.py +++ b/tests/misc_object_tests/ByteRuns_test.py @@ -14,12 +14,13 @@ __version__ = "0.1.1" +import copy import os import sys -import copy import dfxml.objects as Objects + def test_all(): br0 = Objects.ByteRun() br0.img_offset = 0 diff --git a/tests/misc_object_tests/CellObject_test.py b/tests/misc_object_tests/CellObject_test.py index 382c02e..fdf6714 100644 --- a/tests/misc_object_tests/CellObject_test.py +++ b/tests/misc_object_tests/CellObject_test.py @@ -14,13 +14,12 @@ __version__ = "0.1.1" +import logging import os import sys import dfxml.objects as Objects -import logging -import os def test_all(): logging.basicConfig(level=logging.DEBUG) diff --git a/tests/misc_object_tests/DFXMLObject_program_test.py b/tests/misc_object_tests/DFXMLObject_program_test.py index fb29382..14532ef 100644 --- a/tests/misc_object_tests/DFXMLObject_program_test.py +++ b/tests/misc_object_tests/DFXMLObject_program_test.py @@ -20,6 +20,7 @@ import dfxml.objects as Objects + def main(): dobj = Objects.parse(args.in_dfxml) assert dobj.program == args.expected_program diff --git a/tests/misc_object_tests/DiskImageObject_test.py b/tests/misc_object_tests/DiskImageObject_test.py index af3675b..71d438a 100644 --- a/tests/misc_object_tests/DiskImageObject_test.py +++ b/tests/misc_object_tests/DiskImageObject_test.py @@ -15,14 +15,14 @@ __version__ = "0.3.1" +import logging import os import sys -import logging - -import dfxml.objects as Objects import libtest +import dfxml.objects as Objects + _logger = logging.getLogger(os.path.basename(__file__)) ERROR_1 = "Error 1" diff --git a/tests/misc_object_tests/FileObject_allocation_test.py b/tests/misc_object_tests/FileObject_allocation_test.py index 77643c5..37ed8e5 100644 --- a/tests/misc_object_tests/FileObject_allocation_test.py +++ b/tests/misc_object_tests/FileObject_allocation_test.py @@ -19,6 +19,7 @@ import dfxml.objects as Objects + def test_all(): fa1 = Objects.FileObject() fa1.alloc = True diff --git a/tests/misc_object_tests/FileObject_byte_run_facets_test.py b/tests/misc_object_tests/FileObject_byte_run_facets_test.py index 10c2899..0b5aa24 100644 --- a/tests/misc_object_tests/FileObject_byte_run_facets_test.py +++ b/tests/misc_object_tests/FileObject_byte_run_facets_test.py @@ -13,9 +13,9 @@ __version__ = "0.1.1" -import sys import logging import os +import sys import xml.etree.ElementTree as ET import dfxml.objects as Objects diff --git a/tests/misc_object_tests/FileObject_externals_test.py b/tests/misc_object_tests/FileObject_externals_test.py index 4d60870..bb43dc7 100644 --- a/tests/misc_object_tests/FileObject_externals_test.py +++ b/tests/misc_object_tests/FileObject_externals_test.py @@ -14,13 +14,14 @@ __version__ = "0.1.1" -import sys import logging import os +import sys import xml.etree.ElementTree as ET import dfxml.objects as Objects + def test_all(): diff --git a/tests/misc_object_tests/FileObject_from_stat_test.py b/tests/misc_object_tests/FileObject_from_stat_test.py index e66ee1b..54f05b8 100644 --- a/tests/misc_object_tests/FileObject_from_stat_test.py +++ b/tests/misc_object_tests/FileObject_from_stat_test.py @@ -14,12 +14,13 @@ __version__ = "0.1.1" -import sys import logging import os +import sys import dfxml.objects as Objects + def test_all(): logging.basicConfig(level=logging.DEBUG) diff --git a/tests/misc_object_tests/FileObject_test.py b/tests/misc_object_tests/FileObject_test.py index 18cb7ff..3d63409 100644 --- a/tests/misc_object_tests/FileObject_test.py +++ b/tests/misc_object_tests/FileObject_test.py @@ -15,14 +15,14 @@ __version__ = "0.1.1" +import logging import os import sys -import logging - -import dfxml.objects as Objects import libtest +import dfxml.objects as Objects + _logger = logging.getLogger(os.path.basename(__file__)) diff --git a/tests/misc_object_tests/LibraryObject_read_test.py b/tests/misc_object_tests/LibraryObject_read_test.py index 883b6ca..69367ac 100644 --- a/tests/misc_object_tests/LibraryObject_read_test.py +++ b/tests/misc_object_tests/LibraryObject_read_test.py @@ -16,9 +16,9 @@ __version__ = "0.1.1" -import sys import logging import os +import sys import dfxml import dfxml.objects as Objects diff --git a/tests/misc_object_tests/LibraryObject_write_test.py b/tests/misc_object_tests/LibraryObject_write_test.py index 551a8b2..13e7a49 100644 --- a/tests/misc_object_tests/LibraryObject_write_test.py +++ b/tests/misc_object_tests/LibraryObject_write_test.py @@ -12,9 +12,9 @@ __version__ = "0.1.1" -import sys import logging import os +import sys import dfxml import dfxml.objects as Objects diff --git a/tests/misc_object_tests/Makefile_test.py b/tests/misc_object_tests/Makefile_test.py index de50268..3e0e314 100644 --- a/tests/misc_object_tests/Makefile_test.py +++ b/tests/misc_object_tests/Makefile_test.py @@ -18,10 +18,11 @@ # TODO Some of the tests in the Makefile are currently known to be redundantly called when using py.test. -import subprocess import os +import subprocess import sys + def test_make_all(): if sys.platform=='win32': return # don't run on win32 diff --git a/tests/misc_object_tests/PartitionObject_test.py b/tests/misc_object_tests/PartitionObject_test.py index b997de6..b137d34 100644 --- a/tests/misc_object_tests/PartitionObject_test.py +++ b/tests/misc_object_tests/PartitionObject_test.py @@ -15,14 +15,14 @@ __version__ = "0.1.1" +import logging import os import sys -import logging - -import dfxml.objects as Objects import libtest +import dfxml.objects as Objects + _logger = logging.getLogger(os.path.basename(__file__)) def test_empty_object(): diff --git a/tests/misc_object_tests/PartitionSystemObject_test.py b/tests/misc_object_tests/PartitionSystemObject_test.py index 195e63b..11664b0 100644 --- a/tests/misc_object_tests/PartitionSystemObject_test.py +++ b/tests/misc_object_tests/PartitionSystemObject_test.py @@ -15,14 +15,14 @@ __version__ = "0.1.1" +import logging import os import sys -import logging - -import dfxml.objects as Objects import libtest +import dfxml.objects as Objects + _logger = logging.getLogger(os.path.basename(__file__)) def test_empty_object(): diff --git a/tests/misc_object_tests/RegXMLObject_test.py b/tests/misc_object_tests/RegXMLObject_test.py index e0206eb..9205bf7 100644 --- a/tests/misc_object_tests/RegXMLObject_test.py +++ b/tests/misc_object_tests/RegXMLObject_test.py @@ -17,11 +17,12 @@ import os import sys -import dfxml.objects as Objects - import diffing_CellObject_test import diffing_HiveObject_test +import dfxml.objects as Objects + + def test_all(): ro = Objects.RegXMLObject(version="0.2") ho = Objects.HiveObject() diff --git a/tests/misc_object_tests/VolumeObject_externals_test.py b/tests/misc_object_tests/VolumeObject_externals_test.py index 1033ba1..0280adf 100644 --- a/tests/misc_object_tests/VolumeObject_externals_test.py +++ b/tests/misc_object_tests/VolumeObject_externals_test.py @@ -17,13 +17,13 @@ import logging import os -import xml.etree.ElementTree as ET import sys - -import dfxml.objects as Objects +import xml.etree.ElementTree as ET import libtest +import dfxml.objects as Objects + # Only register one of these namespaces in ET. XMLNS_TEST_CLAMSCAN = "file:///opt/local/bin/clamscan" XMLNS_TEST_UNREGGED = "file:///dev/random" diff --git a/tests/misc_object_tests/VolumeObject_hash_test.py b/tests/misc_object_tests/VolumeObject_hash_test.py index 94bd8fa..71333f6 100644 --- a/tests/misc_object_tests/VolumeObject_hash_test.py +++ b/tests/misc_object_tests/VolumeObject_hash_test.py @@ -15,12 +15,13 @@ __version__ = "0.1.1" -import os import logging +import os import sys import dfxml.objects as Objects + def test_all(): logging.basicConfig(level=logging.DEBUG) _logger = logging.getLogger(os.path.basename(__file__)) diff --git a/tests/misc_object_tests/VolumeObject_test.py b/tests/misc_object_tests/VolumeObject_test.py index 6b5d512..52ca955 100644 --- a/tests/misc_object_tests/VolumeObject_test.py +++ b/tests/misc_object_tests/VolumeObject_test.py @@ -15,14 +15,14 @@ __version__ = "0.1.1" +import logging import os import sys -import logging - -import dfxml.objects as Objects import libtest +import dfxml.objects as Objects + _logger = logging.getLogger(os.path.basename(__file__)) def test_empty_object(): diff --git a/tests/misc_object_tests/diff_file_ignore_sample_dfxml_test.py b/tests/misc_object_tests/diff_file_ignore_sample_dfxml_test.py index a5096d6..2628213 100644 --- a/tests/misc_object_tests/diff_file_ignore_sample_dfxml_test.py +++ b/tests/misc_object_tests/diff_file_ignore_sample_dfxml_test.py @@ -19,6 +19,7 @@ import dfxml.objects as Objects + def main(): dobj = Objects.DFXMLObject() dobj.diff_file_ignores.add("atime") diff --git a/tests/misc_object_tests/diff_file_ignore_test.py b/tests/misc_object_tests/diff_file_ignore_test.py index a842ef5..0dbcc9a 100644 --- a/tests/misc_object_tests/diff_file_ignore_test.py +++ b/tests/misc_object_tests/diff_file_ignore_test.py @@ -13,9 +13,9 @@ __version__ = "0.1.1" -import sys import logging import os +import sys import dfxml.objects as Objects diff --git a/tests/misc_object_tests/diffing_ByteRuns_test.py b/tests/misc_object_tests/diffing_ByteRuns_test.py index b6c5a06..7aa3206 100644 --- a/tests/misc_object_tests/diffing_ByteRuns_test.py +++ b/tests/misc_object_tests/diffing_ByteRuns_test.py @@ -14,10 +14,10 @@ __version__ = "0.1.1" -import sys import copy import logging import os +import sys import dfxml.objects as Objects diff --git a/tests/misc_object_tests/diffing_CellObject_test.py b/tests/misc_object_tests/diffing_CellObject_test.py index 4ba6186..582509d 100644 --- a/tests/misc_object_tests/diffing_CellObject_test.py +++ b/tests/misc_object_tests/diffing_CellObject_test.py @@ -14,13 +14,14 @@ __version__ = "0.1.1" -import sys import logging import os +import sys + +import diffing_ByteRuns_test import dfxml.objects as Objects -import diffing_ByteRuns_test def get_co(): _logger = logging.getLogger(os.path.basename(__file__)) diff --git a/tests/misc_object_tests/diffing_FileObject_test.py b/tests/misc_object_tests/diffing_FileObject_test.py index b4a6db5..0e6b38a 100644 --- a/tests/misc_object_tests/diffing_FileObject_test.py +++ b/tests/misc_object_tests/diffing_FileObject_test.py @@ -13,12 +13,13 @@ __version__ = "0.1.1" -import sys import logging import os +import sys import dfxml.objects as Objects + def test_all(): logging.basicConfig(level=logging.DEBUG) _logger = logging.getLogger(os.path.basename(__file__)) diff --git a/tests/misc_object_tests/diffing_HiveObject_test.py b/tests/misc_object_tests/diffing_HiveObject_test.py index 1cd4ac2..8f0c53c 100644 --- a/tests/misc_object_tests/diffing_HiveObject_test.py +++ b/tests/misc_object_tests/diffing_HiveObject_test.py @@ -14,13 +14,14 @@ __version__ = "0.1.1" -import sys import copy import logging import os +import sys import dfxml.objects as Objects + def get_ho(): ho = Objects.HiveObject() ho.mtime = "2010-01-02T03:45:00Z" diff --git a/tests/misc_object_tests/diffing_TimestampObject_test.py b/tests/misc_object_tests/diffing_TimestampObject_test.py index 4e31c91..997fad4 100644 --- a/tests/misc_object_tests/diffing_TimestampObject_test.py +++ b/tests/misc_object_tests/diffing_TimestampObject_test.py @@ -14,13 +14,14 @@ __version__ = "0.1.1" -import sys +import copy import logging import os -import copy +import sys import dfxml.objects as Objects + def test_all(): t0 = Objects.TimestampObject() t0.name = "mtime" diff --git a/tests/misc_object_tests/diffing_VolumeObject_test.py b/tests/misc_object_tests/diffing_VolumeObject_test.py index 2c22d46..4574e7b 100644 --- a/tests/misc_object_tests/diffing_VolumeObject_test.py +++ b/tests/misc_object_tests/diffing_VolumeObject_test.py @@ -14,13 +14,14 @@ __version__ = "0.1.0" -import sys import logging import os +import sys import dfxml import dfxml.objects as Objects + def test_all(): logging.basicConfig(level=logging.DEBUG) _logger = logging.getLogger(os.path.basename(__file__)) diff --git a/tests/misc_object_tests/error_test.py b/tests/misc_object_tests/error_test.py index 05d8854..d07bc93 100644 --- a/tests/misc_object_tests/error_test.py +++ b/tests/misc_object_tests/error_test.py @@ -13,15 +13,15 @@ __version__ = "0.1.1" -import os import logging +import os import sys import xml.etree.ElementTree as ET -import dfxml.objects as Objects - import libtest +import dfxml.objects as Objects + ERROR_STRING_V = "Volume test error" ERROR_STRING_F = "File test error" diff --git a/tests/misc_object_tests/libtest.py b/tests/misc_object_tests/libtest.py index be5ce10..1cc34a3 100644 --- a/tests/misc_object_tests/libtest.py +++ b/tests/misc_object_tests/libtest.py @@ -19,10 +19,10 @@ __version__ = "0.1.1" -import os -import sys import logging +import os import subprocess +import sys import tempfile import typing import warnings diff --git a/tests/misc_object_tests/objects_test.py b/tests/misc_object_tests/objects_test.py index 57abcfb..4a4a4e1 100644 --- a/tests/misc_object_tests/objects_test.py +++ b/tests/misc_object_tests/objects_test.py @@ -3,11 +3,12 @@ __version__ = "0.1.1" -import sys import os +import sys from dfxml.objects import * -from dfxml.objects import _intcast, _qsplit, _logger +from dfxml.objects import _intcast, _logger, _qsplit + def test_all(): assert _intcast(-1) == -1 diff --git a/tests/misc_object_tests/storage_layers_test.py b/tests/misc_object_tests/storage_layers_test.py index 64837c3..96f335e 100644 --- a/tests/misc_object_tests/storage_layers_test.py +++ b/tests/misc_object_tests/storage_layers_test.py @@ -15,16 +15,16 @@ __version__ = "0.3.1" -import os -import sys import hashlib import logging +import os +import sys import typing -import dfxml.objects as Objects - import libtest +import dfxml.objects as Objects + _logger = logging.getLogger(os.path.basename(__file__)) TEST_BYTE_STRING_1 = b"Test string 1" diff --git a/tests/misc_object_tests/test_TCPFlowObjects.py b/tests/misc_object_tests/test_TCPFlowObjects.py index e38ad81..f762cf1 100644 --- a/tests/misc_object_tests/test_TCPFlowObjects.py +++ b/tests/misc_object_tests/test_TCPFlowObjects.py @@ -18,9 +18,10 @@ import pytest -import dfxml.objects as Objects #TODO - It seems TCPFlowObjects might be better served from /dfxml instead of /dfxml/bin. import dfxml.bin.TCPFlowObjects +import dfxml.objects as Objects + @pytest.fixture def top_srcdir() -> pathlib.Path: diff --git a/tests/test_objects.py b/tests/test_objects.py index 46eebde..40a4dc2 100644 --- a/tests/test_objects.py +++ b/tests/test_objects.py @@ -21,6 +21,7 @@ import dfxml.objects as Objects + def test_AbstractHierarchyObject_append() -> None: """ This test confirms expected append() behaviors, in lieu of static type checking enforcement. diff --git a/tests/test_reads.py b/tests/test_reads.py index 51d86f5..2a2fa02 100644 --- a/tests/test_reads.py +++ b/tests/test_reads.py @@ -18,6 +18,7 @@ import dfxml import dfxml.objects + def nop(x : object) -> None: pass diff --git a/tests/test_version.py b/tests/test_version.py index c9da788..83672c3 100644 --- a/tests/test_version.py +++ b/tests/test_version.py @@ -13,5 +13,6 @@ import dfxml + def test_version() -> None: assert not dfxml.__version__ is None From f76557f288944a6256ed8fbdd04888c570b27a75 Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Fri, 21 Jun 2024 17:09:21 -0400 Subject: [PATCH 2/3] Start Python formatting review with pre-commit and isort This patch draws on Cyber Domain Ontology deployments from the noted PRs. References: * https://github.com/casework/CASE-Utilities-Python/pull/37 * https://github.com/Cyber-Domain-Ontology/CDO-Shapes-Example/pull/1 Signed-off-by: Alex Nelson --- .github/workflows/continuous-integration.yml | 5 ++ .gitignore | 1 + .pre-commit-config.yaml | 6 ++ Makefile | 71 +++++++++++++++++++- tests/Makefile | 2 +- 5 files changed, 81 insertions(+), 4 deletions(-) create mode 100644 .pre-commit-config.yaml diff --git a/.github/workflows/continuous-integration.yml b/.github/workflows/continuous-integration.yml index e829b5b..5945ebc 100644 --- a/.github/workflows/continuous-integration.yml +++ b/.github/workflows/continuous-integration.yml @@ -37,6 +37,11 @@ jobs: sudo apt update sudo apt install --yes libxml2-utils + - name: Pre-commit Checks + run: | + pip -q install pre-commit + pre-commit run --all-files + - name: Make check run: make check diff --git a/.gitignore b/.gitignore index b0089ec..7adfa8f 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,4 @@ python/demo.dfxml .pytest_cache *.egg-info *.log +.venv-pre-commit diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..2281f01 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,6 @@ +repos: + - repo: https://github.com/pycqa/isort + rev: 5.13.2 + hooks: + - id: isort + name: isort (python) diff --git a/Makefile b/Makefile index 71f13af..8923c02 100644 --- a/Makefile +++ b/Makefile @@ -18,11 +18,18 @@ ifeq ($(shell basename $(SHELL)),sh) SHELL := $(shell which /bin/bash 2>/dev/null || which /usr/local/bin/bash) endif -all: +PYTHON3 ?= python3 +ifeq ($(PYTHON3),) +$(error python3 not found) +endif + +all: \ + .venv-pre-commit/var/.pre-commit-built.log .PHONY: \ check-mypy \ - check-supply-chain + check-supply-chain \ + check-supply-chain-pre-commit .git_submodule_init.done.log: .gitmodules # Confirm dfxml_schema has been checked out at least once. @@ -31,6 +38,29 @@ all: test -r dependencies/dfxml_schema/dfxml.xsd touch $@ +# This virtual environment is meant to be built once and then persist, even through 'make clean'. +# If a recipe is written to remove this flag file, it should first run `pre-commit uninstall`. +.venv-pre-commit/var/.pre-commit-built.log: + rm -rf .venv-pre-commit + test -r .pre-commit-config.yaml \ + || (echo "ERROR:Makefile:pre-commit is expected to install for this repository, but .pre-commit-config.yaml does not seem to exist." >&2 ; exit 1) + $(PYTHON3) -m venv \ + .venv-pre-commit + source .venv-pre-commit/bin/activate \ + && pip install \ + --upgrade \ + pip \ + setuptools \ + wheel + source .venv-pre-commit/bin/activate \ + && pip install \ + pre-commit + source .venv-pre-commit/bin/activate \ + && pre-commit install + mkdir -p \ + .venv-pre-commit/var + touch $@ + clean: find . -name '*~' -exec rm {} \; $(MAKE) \ @@ -38,8 +68,10 @@ clean: clean check: \ - check-mypy + .git_submodule_init.done.log \ + .venv-pre-commit/var/.pre-commit-built.log $(MAKE) \ + PYTHON3=$(PYTHON3) \ SHELL=$(SHELL) \ --directory tests \ check @@ -47,12 +79,45 @@ check: \ check-mypy: \ .git_submodule_init.done.log $(MAKE) \ + PYTHON3=$(PYTHON3) \ SHELL=$(SHELL) \ --directory tests \ check-mypy check-supply-chain: \ + check-supply-chain-pre-commit \ check-mypy +# Update pre-commit configuration and use the updated config file to +# review code. Only have Make exit if 'pre-commit run' modifies files. +check-supply-chain-pre-commit: \ + .venv-pre-commit/var/.pre-commit-built.log + source .venv-pre-commit/bin/activate \ + && pre-commit autoupdate + git diff \ + --exit-code \ + .pre-commit-config.yaml \ + || ( \ + source .venv-pre-commit/bin/activate \ + && pre-commit run \ + --all-files \ + --config .pre-commit-config.yaml \ + ) \ + || git diff \ + --stat \ + --exit-code \ + || ( \ + echo \ + "WARNING:Makefile:pre-commit configuration can be updated. It appears the updated would change file formatting." \ + >&2 \ + ; exit 1 \ + ) + @git diff \ + --exit-code \ + .pre-commit-config.yaml \ + || echo \ + "INFO:Makefile:pre-commit configuration can be updated. It appears the update would not change file formatting." \ + >&2 + check-tools: (cd tests/misc_object_tests;make check) diff --git a/tests/Makefile b/tests/Makefile index dfe92c4..91f324d 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -18,7 +18,7 @@ endif top_srcdir := $(shell cd .. ; pwd) -PYTHON3 ?= $(shell which python3.9 2>/dev/null || which python3.8 2>/dev/null || which python3.7 2>/dev/null || which python3.6 2>/dev/null || which python3) +PYTHON3 ?= python3 ifeq ($(PYTHON3),) $(error python3 not found) endif From a9b4b8f3231c385f8dcac4ee48d75b575d87cd46 Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Fri, 21 Jun 2024 17:24:35 -0400 Subject: [PATCH 3/3] Document `pre-commit` hook Text adapted from contributions to `case-utils` by @kchason. Disclaimer: Participation by NIST in the creation of the documentation of mentioned software is not intended to imply a recommendation or endorsement by the National Institute of Standards and Technology, nor is it intended to imply that any specific software is necessarily the best available for the purpose. References: * https://github.com/casework/CASE-Utilities-Python/pull/37 Signed-off-by: Alex Nelson --- CONTRIBUTE.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/CONTRIBUTE.md b/CONTRIBUTE.md index 10d78e0..ec8a2c5 100644 --- a/CONTRIBUTE.md +++ b/CONTRIBUTE.md @@ -1,6 +1,25 @@ # Contributing to DFXML's Python code base +## Pre-commit + +This project uses [the `pre-commit` tool](https://pre-commit.com/) for linting. + +`pre-commit` hooks into Git's commit machinery to run a set of linters and static analyzers over each change. To install `pre-commit` into Git's hooks, run one (not both) of the following sets of commands: + +```bash +pip install pre-commit +pre-commit --version +pre-commit install +``` + +Or: + +```bash +make +``` + + ## Installable tools versus in-place scripts The [`dfxml/bin/`](dfxml/bin/) directory contains scripts for interacting with DFXML. Some of the tools are installed in the command-line `$PATH` when the `dfxml` package is installed.