Proteobench · RobbinBouwmeester · Jul 16, 2024 · Jul 16, 2024 · Jul 16, 2024 · Jul 16, 2024
diff --git a/proteobench/io/parsing/parse_settings_ion.py b/proteobench/io/parsing/parse_settings_ion.py
@@ -79,43 +79,51 @@ def convert_to_standard_format(self, df: pd.DataFrame) -> tuple[pd.DataFrame, Di
             replicate_to_raw[v].append(k)
 
         if "Reverse" in self.mapper:
-            df = df[df["Reverse"] != self.decoy_flag]
+            df_filtered = df[df["Reverse"] != self.decoy_flag].copy()
+        else:
+            df_filtered = df.copy()
 
-        df["contaminant"] = df["Proteins"].str.contains(self.contaminant_flag)
+        df_filtered["contaminant"] = df_filtered["Proteins"].str.contains(self.contaminant_flag)
         for flag, species in self._species_dict.items():
-            df[species] = df["Proteins"].str.contains(flag)
-        df["MULTI_SPEC"] = df[list(self._species_dict.values())].sum(axis=1) > self.min_count_multispec
+            df_filtered[species] = df_filtered["Proteins"].str.contains(flag)
+        df_filtered["MULTI_SPEC"] = (
+            df_filtered[list(self._species_dict.values())].sum(axis=1) > self.min_count_multispec
+        )
 
-        df = df[df["MULTI_SPEC"] == False]
+        df_filtered = df_filtered[df_filtered["MULTI_SPEC"] == False]
 
         # If there is "Raw file" then it is a long format, otherwise short format
         if "Raw file" not in self.mapper.values():
             melt_vars = self.condition_mapper.keys()
             # Should be handled more elegant
             try:
-                df = df.melt(
-                    id_vars=list(set(df.columns).difference(set(melt_vars))),
+                df_filtered_melted = df_filtered.melt(
+                    id_vars=list(set(df_filtered.columns).difference(set(melt_vars))),
                     value_vars=melt_vars,
                     var_name="Raw file",
                     value_name="Intensity",
                 )
             except KeyError:
-                df.columns = [c.replace(".mzML", ".mzML.gz") for c in df.columns]
-                df = df.melt(
-                    id_vars=list(set(df.columns).difference(set(melt_vars))),
+                df_filtered.columns = [c.replace(".mzML", ".mzML.gz") for c in df.columns]
+                df_filtered_melted = df_filtered.melt(
+                    id_vars=list(set(df_filtered.columns).difference(set(melt_vars))),
                     value_vars=melt_vars,
                     var_name="Raw file",
                     value_name="Intensity",
                 )
+        else:
+            df_filtered_melted = df_filtered.copy()
 
-        df["replicate"] = df["Raw file"].map(self.condition_mapper)
-        df = pd.concat([df, pd.get_dummies(df["Raw file"])], axis=1)
+        df_filtered_melted.loc[:, "replicate"] = df_filtered_melted["Raw file"].map(self.condition_mapper)
+        df_filtered_melted = pd.concat([df_filtered_melted, pd.get_dummies(df_filtered_melted["Raw file"])], axis=1)
 
-        if "proforma" in df.columns and "Charge" in df.columns:
-            df["precursor ion"] = df["proforma"] + "|Z=" + df["Charge"].astype(str)
+        if "proforma" in df_filtered_melted.columns and "Charge" in df_filtered_melted.columns:
+            df_filtered_melted["precursor ion"] = (
+                df_filtered_melted["proforma"] + "|Z=" + df_filtered_melted["Charge"].astype(str)
+            )
         else:
             print("Not all columns required for making the ion are available.")
-        return df, replicate_to_raw
+        return df_filtered_melted, replicate_to_raw
 
 
 class ParseModificationSettings:

diff --git a/proteobench/score/quant/quantscores.py b/proteobench/score/quant/quantscores.py
@@ -116,14 +116,14 @@ def compute_epsilon(withspecies, species_expected_ratio):
         withspecies["unique"] = withspecies[species_expected_ratio.keys()].sum(axis=1)
 
         # now remove all rows with withspecies["unique"] > 1
-        withspecies = withspecies[withspecies["unique"] == 1]
+        withspecies_unique = withspecies[withspecies["unique"] == 1].copy()
 
         # for species in parse_settings.species_dict.values(), set all values in new column "species" to species if withe species is True
         for species in species_expected_ratio.keys():
-            withspecies.loc[withspecies[species] == True, "species"] = species
-            withspecies.loc[withspecies[species] == True, "log2_expectedRatio"] = np.log2(
+            withspecies_unique.loc[withspecies_unique[species] == True, "species"] = species
+            withspecies_unique.loc[withspecies_unique[species] == True, "log2_expectedRatio"] = np.log2(
                 species_expected_ratio[species]["A_vs_B"]
             )
 
-        withspecies["epsilon"] = withspecies["log2_A_vs_B"] - withspecies["log2_expectedRatio"]
-        return withspecies
+        withspecies_unique["epsilon"] = withspecies_unique["log2_A_vs_B"] - withspecies_unique["log2_expectedRatio"]
+        return withspecies_unique