cytomining · gwaybio · Jan 6, 2021 · Jan 6, 2021 · Jan 6, 2021 · Jan 6, 2021
diff --git a/.travis.yml b/.travis.yml
@@ -1,4 +1,10 @@
 language: python
+python:
+  - "3.7"
+  - "3.8"
+  - "3.9"
+install:
+- pip install -r requirements.txt
 script:
   - pip install pytest-cov codecov
   - py.test -v --cov-report=xml --cov=pycytominer pycytominer/tests/

diff --git a/pycytominer/aggregate.py b/pycytominer/aggregate.py
@@ -18,7 +18,7 @@ def aggregate(
     operation="median",
     output_file="none",
     subset_data_df="none",
-    compression=None,
+    compression_options=None,
     float_format=None,
 ):
     """
@@ -79,7 +79,7 @@ def aggregate(
         output(
             df=population_df,
             output_filename=output_file,
-            compression=compression,
+            compression_options=compression_options,
             float_format=float_format,
         )
     else:

diff --git a/pycytominer/annotate.py b/pycytominer/annotate.py
@@ -21,7 +21,7 @@ def annotate(
     external_metadata="none",
     external_join_left="none",
     external_join_right="none",
-    compression=None,
+    compression_options=None,
     float_format=None,
 ):
     """
@@ -48,7 +48,7 @@ def annotate(
                         metadata information
     external_join_left - [default: "none"] the merge column in the profile metadata
     external_join_right - [default: "none"] the merge column in the external metadata
-    compression - the mechanism to compress [default: None] See cyto_utils/output.py for options.
+    compression_options - the mechanism to compress [default: None] See cyto_utils/output.py for options.
     float_format - decimal precision to use in writing output file [default: None]
                        For example, use "%.3g" for 3 decimal precision.
 
@@ -180,7 +180,7 @@ def annotate(
         output(
             df=annotated,
             output_filename=output_file,
-            compression=compression,
+            compression_options=compression_options,
             float_format=float_format,
         )
     else:

diff --git a/pycytominer/consensus.py b/pycytominer/consensus.py
@@ -20,7 +20,7 @@ def consensus(
     operation="median",
     features="infer",
     output_file="none",
-    compression=None,
+    compression_options=None,
     float_format=None,
     modz_args={"method": "spearman"},
 ):
@@ -38,8 +38,8 @@ def consensus(
     :type output_file: str
     :param modz_args: Additional custom arguments passed as kwargs if operation="modz". See pycytominer.cyto_utils.modz for more details.
     :type modz_args: dict
-    :param compression: the method to compress output data, defaults to None. See pycytominer.cyto_utils.output.py for options
-    :type compression: str
+    :param compression_options: the method to compress output data, defaults to None. See pycytominer.cyto_utils.output.py for options
+    :type compression_options: str
     :param float_format: decimal precision to use in writing output file, defaults to None. For example, use "%.3g" for 3 decimal precision.
 
     :Example:
@@ -102,7 +102,7 @@ def consensus(
         output(
             df=consensus_df,
             output_filename=output_file,
-            compression=compression,
+            compression_options=compression_options,
             float_format=float_format,
         )
     else:

diff --git a/pycytominer/cyto_utils/cells.py b/pycytominer/cyto_utils/cells.py
@@ -319,7 +319,7 @@ def aggregate_compartment(
     def merge_single_cells(
         self,
         sc_output_file="none",
-        compression=None,
+        compression_options=None,
         float_format=None,
         single_cell_normalize=False,
         normalize_args=None,
@@ -429,7 +429,7 @@ def merge_single_cells(
             output(
                 df=sc_df,
                 output_filename=sc_output_file,
-                compression=compression,
+                compression_options=compression_options,
                 float_format=float_format,
             )
         else:
@@ -439,7 +439,7 @@ def aggregate_profiles(
         self,
         compute_subsample=False,
         output_file="none",
-        compression=None,
+        compression_options=None,
         float_format=None,
         aggregate_args=None,
     ):
@@ -492,7 +492,7 @@ def aggregate_profiles(
             output(
                 df=aggregated,
                 output_filename=self.output_file,
-                compression=compression,
+                compression_options=compression_options,
                 float_format=float_format,
             )
         else:

diff --git a/pycytominer/cyto_utils/output.py b/pycytominer/cyto_utils/output.py
@@ -6,73 +6,99 @@
 import warnings
 import pandas as pd
 
-compress_options = {"gzip": ".gz", "bz2": ".bz2", "zip": ".zip", "xz": ".xz", None: ""}
+compress_options = ["gzip", None]
 
 
-def output(df, output_filename, compression="gzip", float_format=None):
-    """
-    Given an output file and compression options, write file to disk
+def output(
+    df,
+    output_filename,
+    sep=",",
+    float_format=None,
+    compression_options={"method": "gzip", "mtime": 1},
+):
+    """Given an output file and compression options, write file to disk
 
-    Arguments:
-    df - a pandas dataframe that will be written to file
-    output_filename - a string or path object that stores location of file
-    compression - the mechanism to compress [default: "gzip"]
-    float_format - decimal precision to use in writing output file [default: None]
-                   For example, use "%.3g" for 3 decimal precision.
+    :param df: a pandas dataframe that will be written to file
+    :type df: pandas.DataFrame
+    :param output_filename: a string or path object that stores location of file
+    :type output_filename: str
+    :param sep: file delimiter
+    :type sep: str
+    :param float_format: decimal precision to use in writing output file [default: None]
+    :type float_format: str
+    :param compression_options: compression arguments as input to pandas.to_csv() [default: check different function call]
+    :type compression_options: str, dict
 
-    Return:
-    Nothing, write df to file
-    """
+    :Example:
 
-    # Extract suffixes from the provided output file name
-    filename, output_file_extension = os.path.splitext(output_filename)
-    basefilename, non_compression_suffix = os.path.splitext(filename)
-
-    # if no additional suffix was provided, make it a csv
-    if len(non_compression_suffix) == 0 and output_file_extension not in [
-        ".csv",
-        ".tsv",
-    ]:
-        output_filename = "{}.csv".format(output_filename)
-
-    # Set the delimiter
-    delim = ","
-    if non_compression_suffix == ".tsv":
-        delim = "\t"
-
-    # Determine the compression suffix
-    compression_suffix = infer_compression_suffix(compression=compression)
-    if output_file_extension in compress_options.values():
-        if output_file_extension != compression_suffix:
-            warnings.warn(
-                "The output file has a compression file extension ('{}') that is different than what is specified in 'compression' ('{}'). Defaulting to output filename suffix.".format(
-                    output_file_extension, compression_suffix
-                )
-            )
-        compression = "infer"
-    else:
-        output_filename = "{}{}".format(output_filename, compression_suffix)
+    import pandas as pd
+    from pycytominer.cyto_utils import output
+
+    data_df = pd.concat(
+        [
+            pd.DataFrame(
+                {
+                    "Metadata_Plate": "X",
+                    "Metadata_Well": "a",
+                    "Cells_x": [0.1, 0.3, 0.8],
+                    "Nuclei_y": [0.5, 0.3, 0.1],
+                }
+            ),
+            pd.DataFrame(
+                {
+                    "Metadata_Plate": "X",
+                    "Metadata_Well": "b",
+                    "Cells_x": [0.4, 0.2, -0.5],
+                    "Nuclei_y": [-0.8, 1.2, -0.5],
+                }
+            ),
+        ]
+    ).reset_index(drop=True)
+
+    output_file = "test.csv.gz"
+    output(
+        df=data_df,
+        output_filename=output_file,
+        sep=",",
+        compression_options={"method": "gzip", "mtime": 1},
+        float_format=None,
+    )
+    """
+    # Make sure the compression method is supported
+    compression_options = set_compression_method(compression=compression_options)
 
     df.to_csv(
         path_or_buf=output_filename,
-        sep=delim,
+        sep=sep,
         index=False,
         float_format=float_format,
-        compression=compression,
+        compression=compression_options,
     )
 
 
-def infer_compression_suffix(compression="gzip"):
+def set_compression_method(compression):
+    """Set the compression options
+
+    :param compression: indicating compression options
+    :type compression: str, dict
     """
-    Determine the compression suffix
 
-    Arguments:
-    compression - the mechanism to compress [default: "gzip"]
+    if compression is None:
+        compression = {"method": None}
+
+    if isinstance(compression, str):
+        compression = {"method": compression}
+
+    check_compression_method(compression["method"])
+    return compression
+
+
+def check_compression_method(compression):
+    """Ensure compression options are set properly
+
+    :param compression: the compression used to output data
+    :type compression: str
     """
     assert (
         compression in compress_options
-    ), "{} is not supported, select one of {}".format(
-        compression, list(compress_options.keys())
-    )
-
-    return compress_options[compression]
+    ), "{} is not supported, select one of {}".format(compression, compress_options)
diff --git a/pycytominer/feature_select.py b/pycytominer/feature_select.py
@@ -30,7 +30,7 @@ def feature_select(
     corr_method="pearson",
     freq_cut=0.05,
     unique_cut=0.1,
-    compression=None,
+    compression_options=None,
     float_format=None,
     blocklist_file=None,
     outlier_cutoff=15,
@@ -144,7 +144,7 @@ def feature_select(
         output(
             df=selected_df,
             output_filename=output_file,
-            compression=compression,
+            compression_options=compression_options,
             float_format=float_format,
         )
     else:

diff --git a/pycytominer/normalize.py b/pycytominer/normalize.py
@@ -20,7 +20,7 @@ def normalize(
     samples="all",
     method="standardize",
     output_file="none",
-    compression=None,
+    compression_options=None,
     float_format=None,
     spherize_center=True,
     spherize_method="ZCA-cor",
@@ -104,7 +104,7 @@ def normalize(
         output(
             df=normalized,
             output_filename=output_file,
-            compression=compression,
+            compression_options=compression_options,
             float_format=float_format,
         )
     else:

diff --git a/pycytominer/tests/test_annotate.py b/pycytominer/tests/test_annotate.py
@@ -75,7 +75,7 @@ def test_annotate_compress():
         join_on=["Metadata_well_position", "Metadata_Well"],
         add_metadata_id_to_platemap=False,
         output_file=compress_file,
-        compression="gzip",
+        compression_options={"method": "gzip"},
     )
 
     result = annotate(

diff --git a/pycytominer/tests/test_cyto_utils/test_cells.py b/pycytominer/tests/test_cyto_utils/test_cells.py
@@ -389,7 +389,9 @@ def test_aggregate_subsampling_profile():
 def test_aggregate_subsampling_profile_compress():
     compress_file = os.path.join(tmpdir, "test_aggregate_compress.csv.gz")
 
-    _ = ap_subsample.aggregate_profiles(output_file=compress_file, compression="gzip")
+    _ = ap_subsample.aggregate_profiles(
+        output_file=compress_file, compression_options={"method": "gzip"}
+    )
     result = pd.read_csv(compress_file)
 
     expected_result = pd.DataFrame(