Merge pull request #1 from jlab/dev

Dev
jlab · Aug 28, 2024 · c772a7c · c772a7c
2 parents 5643f4f + 0d0db46
commit c772a7c
Show file tree

Hide file tree

Showing 6 changed files with 18 additions and 18 deletions.
diff --git a/.github/workflows/github_tests.yml b/.github/workflows/github_tests.yml
@@ -23,11 +23,11 @@ jobs:
 
     - name: Lint with flake8
       run: |
-        $CONDA/bin/flake8 src/meta_tran_sim/
+        $CONDA/bin/flake8 src/marbel/
 
     - name: Run tests with pytest
       run: |
-        $CONDA/bin/pytest tests --doctest-modules --cov=src/meta_tran_sim --cov-report=xml
+        $CONDA/bin/pytest tests --doctest-modules --cov=src/marbel --cov-report=xml
 
     - name: Convert coverage to lcov format
       run: |

diff --git a/README.md b/README.md
@@ -17,9 +17,9 @@ For this you need to have conda-build installed `(conda install conda-build`)
 Create new environment and install package:
 
 ```
-conda create -n meta_tran_sim
-conda activate meta_tran_sim
-conda install --use-local meta_tran_sim
+conda create -n marbel
+conda activate marbel
+conda install --use-local marbel
 ```
 
 ### Install by hand (for development purposes)

diff --git a/environment.yml b/environment.yml
@@ -1,4 +1,4 @@
-name: meta_tran_sim
+name: marbel
 channels:
   - bioconda
   - conda-forge

diff --git a/meta.yaml b/meta.yaml
@@ -1,5 +1,5 @@
 package:
-  name: meta_tran_sim
+  name: marbel
   version: 0.0.1
 
 source:
@@ -22,7 +22,7 @@ requirements:
     - pandas
 
 about:
-  home: https://github.com/jlab/meta_tran_sim_dev
+  home: https://github.com/jlab/marbel
   license: Apache-2.0
   summary: "A CLI for creating a simulated metatranscriptome dataset"
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -22,8 +22,8 @@ classifiers = [
 dynamic = ["dependencies"]
 
 [project.urls]
-"Homepage" = "https://github.com/jlab/meta_tran_sim_dev" 
-"Bug Tracker" = "https://github.com/jlab/meta_tran_sim_dev/issues" 
+"Homepage" = "https://github.com/jlab/marbel" 
+"Bug Tracker" = "https://github.com/jlab/marbel/issues" 
 
 [project.scripts]
 marbel= "marbel.meta_tran_sim:app"

diff --git a/src/marbel/data_generations.py b/src/marbel/data_generations.py
@@ -29,7 +29,7 @@ def draw_random_species(number_of_species):
 def create_ortholgous_group_rates(number_of_orthogous_groups, max_species_per_group, seed=None):
     """
     Creates a list of group sizes for orthogroups, such that the maximum group size is less than or equal to
-    the specified maximum species per group and the total number of orthogroups matches the specified number 
+    the specified maximum species per group and the total number of orthogroups matches the specified number
     of orthogroups.
 
     Parameters:
@@ -264,7 +264,7 @@ def aggregate_gene_data(species, species_abundances, selected_ortho_groups, read
 
     for sp in species:
         species_genes_list = selected_ortho_groups[selected_ortho_groups[sp] != "-"][sp].to_list()
-        origin_orthogroup += selected_ortho_groups[selected_ortho_groups[sp] != "-"].index.to_list()
+        origin_orthogroup += [f"og{ortho_group}" for ortho_group in selected_ortho_groups[selected_ortho_groups[sp] != "-"].index.to_list()]
         scaled_read_mean_counts += [species_weights[i] * c for c in read_mean_counts[current_read_index:(current_read_index + len(species_genes_list))]]
         current_read_index += len(species_genes_list)
         all_species_genes += species_genes_list
@@ -285,11 +285,11 @@ def aggregate_gene_data(species, species_abundances, selected_ortho_groups, read
 def convert_fasta_dir_to_fastq_dir(fasta_dir, gzipped=True):
     """
     Converts a directory containing .fasta files to a directory containing .fastq files. If gzipped is True, the output files will be gzipped.
-    
+
     Parameters:
     - fasta_dir (str): The path to the directory containing the .fasta files.
     - gzipped (bool): Whether the output files should be gzipped.
-    
+
     Note that the input .fasta files will be removed after the conversion is done.
     """
     fasta_dir = Path(fasta_dir)
@@ -312,7 +312,7 @@ def write_as_fastq_gz(fa_path, fq_path):
     """
     Converts a .fasta file to a .fastq.gz file. The function reads the .fasta file,
     adds phred quality scores to each sequence and writes the output to a .fastq.gz file.
-    
+
     Args:
         fa_path (str): Path to the input .fasta file.
         fq_path (str): Path to the output .fastq.gz file.
@@ -327,7 +327,7 @@ def write_as_fastq(fa_path, fq_path):
     """
     Converts a .fasta file to a .fastq file. The function reads the .fasta file,
     adds phred quality scores to each sequence and writes the output to a .fastq file.
-    
+
     Args:
         fa_path (str): Path to the input .fasta file.
         fq_path (str): Path to the output .fastq file.
@@ -342,7 +342,7 @@ def summarize_parameters(number_of_orthogous_groups, number_of_species, number_o
                          outdir, max_phylo_distance, min_identity, deg_ratio, seed, output_format, read_length, result_file):
     """
     Writes the simulation parameters to the result_file.
-    
+
     Args:
         number_of_orthogous_groups (int): The number of orthologous groups.
         number_of_species (int): The number of species.
@@ -387,7 +387,7 @@ def generate_report(number_of_orthogous_groups, number_of_species, number_of_sam
         read_length (int): The read length.
     """
     summary_dir = f"{outdir}/summary"
-    with open(f"{summary_dir}/meta_tran_sim_params.txt", "w") as f:
+    with open(f"{summary_dir}/marbel_params.txt", "w") as f:
         summarize_parameters(number_of_orthogous_groups, number_of_species, number_of_sample, outdir,
                              max_phylo_distance, min_identity, deg_ratio, seed, output_format, read_length, f)
     gene_summary.to_csv(f"{summary_dir}/gene_summary.csv", index=False)