From 601e86c3a292aacae0bebb06481518a1298a8706 Mon Sep 17 00:00:00 2001
From: Andrey Tovchigrechko <tovchigrechkoa@medimmune.com>
Date: Mon, 29 Jan 2018 15:18:35 -0500
Subject: [PATCH 1/3] Option to run subprocess w/o shell

---
 ariba/common.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ariba/common.py b/ariba/common.py
index 0410334f..a84528df 100644
--- a/ariba/common.py
+++ b/ariba/common.py
@@ -9,11 +9,11 @@
 class Error (Exception): pass
 
 
-def syscall(cmd, allow_fail=False, verbose=False, verbose_filehandle=sys.stdout, print_errors=True):
+def syscall(cmd, allow_fail=False, verbose=False, verbose_filehandle=sys.stdout, print_errors=True, shell=True):
     if verbose:
         print('syscall:', cmd, flush=True, file=verbose_filehandle)
     try:
-        subprocess.check_output(cmd, shell=True, stderr=subprocess.STDOUT)
+        subprocess.check_output(cmd, shell=shell, stderr=subprocess.STDOUT)
     except subprocess.CalledProcessError as error:
         errors = error.output.decode()
         if print_errors:

From 4fb0392ce99bb6f36c8b4a24e18d2b2bb52b7902 Mon Sep 17 00:00:00 2001
From: Andrey Tovchigrechko <tovchigrechkoa@medimmune.com>
Date: Wed, 31 Jan 2018 17:32:06 -0500
Subject: [PATCH 2/3] Remove reference to spades options in tests that are not
 using spades. Cleanup tmp dirs before copytree in case the test harness is
 repeated in a directory after previous run has failed with unhandled
 exceptions.

---
 ariba/tests/cluster_test.py | 72 +++++++++++++++++++++++++------------
 1 file changed, 49 insertions(+), 23 deletions(-)

diff --git a/ariba/tests/cluster_test.py b/ariba/tests/cluster_test.py
index 754c7a54..ea7c078a 100644
--- a/ariba/tests/cluster_test.py
+++ b/ariba/tests/cluster_test.py
@@ -43,6 +43,7 @@ def test_init_fail_files_missing(self):
         dirs = [os.path.join(data_dir, d) for d in dirs]
         for d in dirs:
             tmpdir = 'tmp.cluster_test_init_fail_files_missing'
+            shutil.rmtree(tmpdir,ignore_errors=True)
             shutil.copytree(d, tmpdir)
             with self.assertRaises(cluster.Error):
                 cluster.Cluster(tmpdir, 'name', refdata=refdata, total_reads=42, total_reads_bases=4242)
@@ -100,6 +101,7 @@ def test_full_run_no_reads_after_filtering(self):
         tsv_in = os.path.join(data_dir, 'cluster_test_full_run_no_reads_after_filtering.in.tsv')
         refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
         tmpdir = 'tmp.test_full_run_no_reads_after_filtering'
+        shutil.rmtree(tmpdir, ignore_errors=True)
         shutil.copytree(os.path.join(data_dir, 'cluster_test_full_run_no_reads_after_filtering'), tmpdir)
 
         c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=0, total_reads_bases=0)
@@ -118,9 +120,10 @@ def test_full_run_choose_ref_fail(self):
         tsv_in = os.path.join(data_dir, 'cluster_test_full_run_choose_ref_fail.in.tsv')
         refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
         tmpdir = 'tmp.test_full_run_choose_ref_fail'
+        shutil.rmtree(tmpdir, ignore_errors=True)
         shutil.copytree(os.path.join(data_dir, 'cluster_test_full_run_choose_ref_fail'), tmpdir)
 
-        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=2, total_reads_bases=108, spades_other_options='--only-assembler')
+        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=2, total_reads_bases=108)
         c.run()
 
         expected = '\t'.join(['.', '.', '.', '.', '1024', '2', 'cluster_name'] + ['.'] * 24)
@@ -137,9 +140,10 @@ def test_full_run_ref_not_in_cluster(self):
         refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
         tmpdir = 'tmp.test_full_run_ref_not_in_cluster'
         all_refs_fa =  os.path.join(data_dir, 'cluster_test_full_run_ref_not_in_cluster.all_refs.fa')
+        shutil.rmtree(tmpdir, ignore_errors=True)
         shutil.copytree(os.path.join(data_dir, 'cluster_test_full_run_ref_not_in_cluster'), tmpdir)
 
-        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=72, total_reads_bases=3600, all_ref_seqs_fasta=all_refs_fa)
+        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=72, total_reads_bases=3600, all_ref_seqs_fasta=all_refs_fa)
         c.run()
 
         expected = '\t'.join(['.', '.', '.', '.', '1024', '72', 'cluster_name'] + ['.'] * 24)
@@ -155,6 +159,7 @@ def test_full_run_assembly_fail(self):
         tsv_in = os.path.join(data_dir, 'cluster_test_full_run_assembly_fail.in.tsv')
         refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
         tmpdir = 'tmp.test_full_run_assembly_fail'
+        shutil.rmtree(tmpdir, ignore_errors=True)
         shutil.copytree(os.path.join(data_dir, 'cluster_test_full_run_assembly_fail'), tmpdir)
 
         c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=4, total_reads_bases=304)
@@ -173,9 +178,10 @@ def test_full_run_ok_non_coding(self):
         tsv_in = os.path.join(data_dir, 'cluster_test_full_run_ok_non_coding.metadata.tsv')
         refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
         tmpdir = 'tmp.test_full_run_ok_non_coding'
+        shutil.rmtree(tmpdir, ignore_errors=True)
         shutil.copytree(os.path.join(data_dir, 'cluster_test_full_run_ok_non_coding'), tmpdir)
 
-        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=72, total_reads_bases=3600)
+        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=72, total_reads_bases=3600)
         c.run()
 
         self.maxDiff=None
@@ -198,9 +204,10 @@ def test_full_run_ok_presence_absence(self):
         tsv_in = os.path.join(data_dir, 'cluster_test_full_run_ok_presence_absence.metadata.tsv')
         refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
         tmpdir = 'tmp.cluster_test_full_run_ok_presence_absence'
+        shutil.rmtree(tmpdir, ignore_errors=True)
         shutil.copytree(os.path.join(data_dir, 'cluster_test_full_run_ok_presence_absence'), tmpdir)
 
-        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=64, total_reads_bases=3200)
+        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=64, total_reads_bases=3200)
         c.run()
 
         expected = [
@@ -220,9 +227,10 @@ def test_full_run_ok_variants_only_variant_not_present(self):
         tsv_in = os.path.join(data_dir, 'cluster_test_full_run_ok_variants_only.not_present.metadata.tsv')
         refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
         tmpdir = 'tmp.cluster_test_full_run_ok_variants_only.not_present'
+        shutil.rmtree(tmpdir, ignore_errors=True)
         shutil.copytree(os.path.join(data_dir, 'cluster_test_full_run_ok_variants_only'), tmpdir)
 
-        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=66, total_reads_bases=3300)
+        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=66, total_reads_bases=3300)
         c.run()
         expected = [
             'variants_only1\tvariants_only1\t1\t1\t27\t66\tcluster_name\t96\t96\t100.0\tcluster_name.l15.c30.ctg.1\t215\t15.3\t1\tSNP\tp\tR3S\t0\t.\t.\t7\t9\tCGC\t65\t67\tCGC\t18;18;19\tC;G;C\t18;18;19\tvariants_only1:1:1:R3S:.:Ref and assembly have wild type, so do not report\tGeneric description of variants_only1'
@@ -237,9 +245,10 @@ def test_full_run_ok_variants_only_variant_not_present_always_report(self):
         tsv_in = os.path.join(data_dir, 'cluster_full_run_varonly.not_present.always_report.tsv')
         refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
         tmpdir = 'tmp.cluster_full_run_varonly.not_present.always_report'
+        shutil.rmtree(tmpdir, ignore_errors=True)
         shutil.copytree(os.path.join(data_dir, 'cluster_test_full_run_ok_variants_only'), tmpdir)
 
-        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=66, total_reads_bases=3300)
+        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=66, total_reads_bases=3300)
         c.run()
         expected = [
             'variants_only1\tvariants_only1\t1\t1\t27\t66\tcluster_name\t96\t96\t100.0\tcluster_name.l15.c30.ctg.1\t215\t15.3\t1\tSNP\tp\tR3S\t0\t.\t.\t7\t9\tCGC\t65\t67\tCGC\t18;18;19\tC;G;C\t18;18;19\tvariants_only1:1:1:R3S:.:Ref and assembly have wild type, but always report anyway\tGeneric description of variants_only1'
@@ -254,9 +263,10 @@ def test_full_run_ok_variants_only_variant_is_present(self):
         tsv_in = os.path.join(data_dir, 'cluster_test_full_run_ok_variants_only.present.metadata.tsv')
         refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
         tmpdir = 'tmp.cluster_test_full_run_ok_variants_only.present'
+        shutil.rmtree(tmpdir, ignore_errors=True)
         shutil.copytree(os.path.join(data_dir, 'cluster_test_full_run_ok_variants_only'), tmpdir)
 
-        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=66, total_reads_bases=3300)
+        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=66, total_reads_bases=3300)
         c.run()
 
         expected = [
@@ -273,8 +283,9 @@ def test_full_run_ok_gene_start_mismatch(self):
         tsv_in = os.path.join(data_dir, 'cluster_test_full_run_ok_gene_start_mismatch.metadata.tsv')
         refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
         tmpdir = 'tmp.cluster_test_full_run_ok_gene_start_mismatch'
+        shutil.rmtree(tmpdir, ignore_errors=True)
         shutil.copytree(os.path.join(data_dir, 'cluster_test_full_run_ok_gene_start_mismatch'), tmpdir)
-        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=112, total_reads_bases=1080)
+        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=112, total_reads_bases=1080)
         c.run()
         expected = [
             'gene\tgene\t1\t0\t27\t112\tcluster_name\t96\t96\t100.0\tcluster_name.l6.c30.ctg.1\t362\t27.8\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tGeneric description of gene'
@@ -289,8 +300,9 @@ def test_full_run_smtls_snp_presabs_gene(self):
         tsv_in = os.path.join(data_dir, 'cluster_full_run_smtls_snp_presabs_gene.tsv')
         refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
         tmpdir = 'tmp.cluster_test_full_run_ok_samtools_snp_pres_abs_gene'
+        shutil.rmtree(tmpdir, ignore_errors=True)
         shutil.copytree(os.path.join(data_dir, 'cluster_full_run_smtls_snp_presabs_gene'), tmpdir)
-        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=148, total_reads_bases=13320)
+        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=148, total_reads_bases=13320)
         c.run()
         expected = [
             'ref_gene\tref_gene\t1\t0\t155\t148\tcluster_name\t96\t96\t100.0\tcluster_name.l15.c30.ctg.1\t335\t39.8\t0\tHET\t.\t.\t.\tG18A\t.\t18\t18\tG\t137\t137\tG\t63\tG,A\t32,31\t.\tGeneric description of ref_gene'
@@ -307,8 +319,9 @@ def test_full_run_smtls_snp_varonly_gene_2(self):
         tsv_in = os.path.join(data_dir, 'cluster_full_run_smtls_snp_varonly_gene_2.tsv')
         refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
         tmpdir = 'tmp.cluster_full_run_smtls_snp_varonly_gene_2'
+        shutil.rmtree(tmpdir, ignore_errors=True)
         shutil.copytree(os.path.join(data_dir, 'cluster_full_run_smtls_snp_varonly_gene_2'), tmpdir)
-        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=148, total_reads_bases=13320)
+        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=148, total_reads_bases=13320)
         c.run()
         expected = [
             'ref_gene\tref_gene\t1\t1\t155\t148\tcluster_name\t96\t96\t100.0\tcluster_name.l15.c30.ctg.1\t335\t39.8\t0\tHET\t.\t.\t.\tG18A\t.\t18\t18\tG\t137\t137\tG\t63\tG,A\t32,31\t.\tGeneric description of ref_gene'
@@ -323,8 +336,9 @@ def test_full_run_known_smtls_snp_presabs_gene(self):
         tsv_in = os.path.join(data_dir, 'cluster_full_run_known_smtls_snp_presabs_gene.tsv')
         refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
         tmpdir = 'tmp.cluster_test_full_run_ok_samtools_snp_known_position_pres_abs_gene'
+        shutil.rmtree(tmpdir, ignore_errors=True)
         shutil.copytree(os.path.join(data_dir, 'cluster_full_run_known_smtls_snp_presabs_gene'), tmpdir)
-        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=148, total_reads_bases=13320)
+        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=148, total_reads_bases=13320)
         c.run()
 
         # We shouldn't get an extra 'HET' line because we already know about the snp, so
@@ -342,8 +356,9 @@ def test_full_run_smtls_snp_varonly_gene_no_snp(self):
         tsv_in = os.path.join(data_dir, 'cluster_full_run_smtls_snp_varonly_gene_no_snp.tsv')
         refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
         tmpdir = 'tmp.cluster_test_full_run_smtls_snp_varonly_gene_no_snp'
+        shutil.rmtree(tmpdir, ignore_errors=True)
         shutil.copytree(os.path.join(data_dir, 'cluster_full_run_smtls_snp_varonly_gene_no_snp'), tmpdir)
-        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=148, total_reads_bases=13320)
+        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=148, total_reads_bases=13320)
         c.run()
 
         # We shouldn't get an extra 'HET' line because we already know about the snp, so
@@ -361,8 +376,9 @@ def test_full_run_smtls_snp_varonly_gene(self):
         tsv_in = os.path.join(data_dir, 'cluster_full_run_smtls_snp_varonly_gene.tsv')
         refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
         tmpdir = 'tmp.cluster_test_full_run_ok_samtools_snp_known_position_var_only_gene_does_have_var'
+        shutil.rmtree(tmpdir, ignore_errors=True)
         shutil.copytree(os.path.join(data_dir, 'cluster_full_run_smtls_snp_varonly_gene'), tmpdir)
-        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=148, total_reads_bases=13320)
+        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=148, total_reads_bases=13320)
         c.run()
 
         # We shouldn't get an extra 'HET' line because we already know about the snp, so
@@ -380,8 +396,9 @@ def test_full_run_smtls_snp_presabs_nonc(self):
         tsv_in = os.path.join(data_dir, 'cluster_full_run_smtls_snp_presabs_nonc.tsv')
         refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
         tmpdir = 'tmp.cluster_test_full_run_smtls_snp_presabs_nonc'
+        shutil.rmtree(tmpdir, ignore_errors=True)
         shutil.copytree(os.path.join(data_dir, 'cluster_full_run_smtls_snp_presabs_nonc'), tmpdir)
-        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=148, total_reads_bases=13320)
+        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=148, total_reads_bases=13320)
         c.run()
         expected = [
             'ref_seq\tref_seq\t0\t0\t147\t148\tcluster_name\t96\t96\t100.0\tcluster_name.l15.c30.ctg.1\t335\t39.8\t0\tHET\t.\t.\t.\tG18A\t.\t18\t18\tG\t137\t137\tG\t63\tG,A\t32,31\t.\tGeneric description of ref_seq'
@@ -396,8 +413,9 @@ def test_full_run_smtls_known_snp_presabs_nonc(self):
         tsv_in = os.path.join(data_dir, 'cluster_full_run_smtls_known_snp_presabs_nonc.tsv')
         refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
         tmpdir = 'tmp.cluster_test_full_run_smtls_known_snp_presabs_nonc'
+        shutil.rmtree(tmpdir, ignore_errors=True)
         shutil.copytree(os.path.join(data_dir, 'cluster_full_run_smtls_known_snp_presabs_nonc'), tmpdir)
-        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=148, total_reads_bases=13320)
+        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=148, total_reads_bases=13320)
         c.run()
         expected = [
             'ref_seq\tref_seq\t0\t0\t147\t148\tcluster_name\t96\t96\t100.0\tcluster_name.l15.c30.ctg.1\t335\t39.8\t1\tSNP\tn\tG18A\t0\t.\t.\t18\t18\tG\t137\t137\tG\t63\tG,A\t32,31\tref_seq:0:0:G18A:.:Description of G18A\tGeneric description of ref_seq'
@@ -412,8 +430,9 @@ def test_full_run_smtls_snp_varonly_nonc(self):
         tsv_in = os.path.join(data_dir, 'cluster_full_run_smtls_snp_varonly_nonc.tsv')
         refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
         tmpdir = 'tmp.cluster_full_run_smtls_snp_varonly_nonc'
+        shutil.rmtree(tmpdir, ignore_errors=True)
         shutil.copytree(os.path.join(data_dir, 'cluster_full_run_smtls_snp_varonly_nonc'), tmpdir)
-        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=148, total_reads_bases=13320)
+        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=148, total_reads_bases=13320)
         c.run()
         expected = [
             'ref_seq\tref_seq\t0\t1\t147\t148\tcluster_name\t96\t96\t100.0\tcluster_name.l15.c30.ctg.1\t335\t39.8\t0\tHET\t.\t.\t.\tG18A\t.\t18\t18\tG\t137\t137\tG\t63\tG,A\t32,31\t.\tGeneric description of ref_seq'
@@ -428,8 +447,9 @@ def test_full_run_known_smtls_snp_presabs_nonc(self):
         tsv_in = os.path.join(data_dir, 'cluster_full_run_known_smtls_snp_presabs_nonc.tsv')
         refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
         tmpdir = 'tmp.cluster_test_full_run_ok_samtools_snp_known_position_pres_abs_noncoding'
+        shutil.rmtree(tmpdir, ignore_errors=True)
         shutil.copytree(os.path.join(data_dir, 'cluster_full_run_known_smtls_snp_presabs_nonc'), tmpdir)
-        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=148, total_reads_bases=13320)
+        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=148, total_reads_bases=13320)
         c.run()
 
         # We shouldn't get an extra 'HET' line because we already know about the snp, so
@@ -447,8 +467,9 @@ def test_full_run_smtls_snp_varonly_nonc_no_snp(self):
         tsv_in = os.path.join(data_dir, 'cluster_full_run_smtls_snp_varonly_nonc_no_snp.tsv')
         refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
         tmpdir = 'tmp.cluster_test_full_run_ok_samtools_snp_known_position_var_only_noncoding'
+        shutil.rmtree(tmpdir, ignore_errors=True)
         shutil.copytree(os.path.join(data_dir, 'cluster_full_run_smtls_snp_varonly_nonc_no_snp'), tmpdir)
-        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=148, total_reads_bases=13320)
+        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=148, total_reads_bases=13320)
         c.run()
 
         # We shouldn't get an extra 'HET' line because we already know about the snp, so
@@ -466,8 +487,9 @@ def test_full_run_cluster_test_full_run_smtls_snp_varonly_nonc(self):
         tsv_in = os.path.join(data_dir, 'cluster_test_full_run_smtls_snp_varonly_nonc.tsv')
         refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
         tmpdir = 'tmp.cluster_test_full_run_ok_samtools_snp_known_position_var_only_noncoding'
+        shutil.rmtree(tmpdir, ignore_errors=True)
         shutil.copytree(os.path.join(data_dir, 'cluster_test_full_run_smtls_snp_varonly_nonc'), tmpdir)
-        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=148, total_reads_bases=13320)
+        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=148, total_reads_bases=13320)
         c.run()
 
         # We shouldn't get an extra 'HET' line because we already know about the snp, so
@@ -485,8 +507,9 @@ def test_full_run_partial_assembly(self):
         tsv_in = os.path.join(data_dir, 'cluster_test_full_run_partial_asmbly.tsv')
         refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
         tmpdir = 'tmp.cluster_test_full_run_partial_assembly'
+        shutil.rmtree(tmpdir, ignore_errors=True)
         shutil.copytree(os.path.join(data_dir, 'cluster_test_full_run_partial_asmbly'), tmpdir)
-        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=278, total_reads_bases=15020)
+        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=278, total_reads_bases=15020)
         c.run()
 
         expected = [
@@ -502,8 +525,9 @@ def test_full_run_multiple_vars_in_codon(self):
         tsv_in = os.path.join(data_dir, 'cluster_test_full_run_multiple_vars.tsv')
         refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
         tmpdir = 'tmp.cluster_test_full_run_multiple_vars'
+        shutil.rmtree(tmpdir, ignore_errors=True)
         shutil.copytree(os.path.join(data_dir, 'cluster_test_full_run_multiple_vars'), tmpdir)
-        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=292, total_reads_bases=20900)
+        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=292, total_reads_bases=20900)
         c.run()
 
         expected = [
@@ -520,8 +544,9 @@ def test_full_run_delete_codon(self):
         tsv_in = os.path.join(data_dir, 'cluster_test_full_run_delete_codon.tsv')
         refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
         tmpdir = 'tmp.cluster_test_full_delete_codon'
+        shutil.rmtree(tmpdir, ignore_errors=True)
         shutil.copytree(os.path.join(data_dir, 'cluster_test_full_run_delete_codon'), tmpdir)
-        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=292, total_reads_bases=20900)
+        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=292, total_reads_bases=20900)
         c.run()
 
         expected = [
@@ -537,8 +562,9 @@ def test_full_run_insert_codon(self):
         tsv_in = os.path.join(data_dir, 'cluster_test_full_run_insert_codon.tsv')
         refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
         tmpdir = 'tmp.cluster_test_full_insert_codon'
+        shutil.rmtree(tmpdir, ignore_errors=True)
         shutil.copytree(os.path.join(data_dir, 'cluster_test_full_run_insert_codon'), tmpdir)
-        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=292, total_reads_bases=20900)
+        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=292, total_reads_bases=20900)
         c.run()
 
         expected = [

From e86c9c122fb4d736a7d139f025afd06f0e804669 Mon Sep 17 00:00:00 2001
From: Andrey Tovchigrechko <tovchigrechkoa@medimmune.com>
Date: Wed, 31 Jan 2018 18:47:25 -0500
Subject: [PATCH 3/3] Reintroduce Spades as another assembly option.
 Multithreading for Spades and Bowtie2. Spades improves upon default
 fermilight on challenging datasets with highly uneven coverage (viral,
 amplicon, single-cell). Multithreading for Spades and Bowtie2 subprocesses is
 adaptive - kicks in at the end of a multiprocessing map run when idle threads
 were appearing otherwise (or in cases of overall fewer clusters than total
 threads).

---
 ariba/assembly.py                             | 114 ++++++++-
 ariba/cluster.py                              |  95 ++++---
 ariba/clusters.py                             | 199 +++++++++------
 ariba/common.py                               |   6 +-
 ariba/external_progs.py                       |  16 +-
 ariba/tasks/run.py                            |   4 +-
 ariba/tests/assembly_test.py                  |  48 +++-
 ...test_assemble_with_spades_fails_reads_1.fq |   4 +
 ...test_assemble_with_spades_fails_reads_2.fq |   5 +
 ...embly_test_assemble_with_spades_reads_1.fq | 232 ++++++++++++++++++
 ...embly_test_assemble_with_spades_reads_2.fq | 232 ++++++++++++++++++
 ...ssembly_test_check_spades_log_file.log.bad |   5 +
 ...sembly_test_check_spades_log_file.log.good |   5 +
 scripts/ariba                                 |   6 +
 14 files changed, 863 insertions(+), 108 deletions(-)
 create mode 100644 ariba/tests/data/assembly_test_assemble_with_spades_fails_reads_1.fq
 create mode 100644 ariba/tests/data/assembly_test_assemble_with_spades_fails_reads_2.fq
 create mode 100644 ariba/tests/data/assembly_test_assemble_with_spades_reads_1.fq
 create mode 100644 ariba/tests/data/assembly_test_assemble_with_spades_reads_2.fq
 create mode 100644 ariba/tests/data/assembly_test_check_spades_log_file.log.bad
 create mode 100644 ariba/tests/data/assembly_test_check_spades_log_file.log.good

diff --git a/ariba/assembly.py b/ariba/assembly.py
index f028a632..0d64882c 100644
--- a/ariba/assembly.py
+++ b/ariba/assembly.py
@@ -5,6 +5,7 @@
 import pymummer
 import fermilite_ariba
 from ariba import common, faidx, mapping, bam_parse, external_progs, ref_seq_chooser
+import shlex
 
 class Error (Exception): pass
 
@@ -29,6 +30,9 @@ def __init__(self,
       nucmer_breaklen=200,
       extern_progs=None,
       clean=True,
+      spades_mode="wgs",
+      spades_options=None,
+      threads=1
     ):
         self.reads1 = os.path.abspath(reads1)
         self.reads2 = os.path.abspath(reads2)
@@ -50,6 +54,9 @@ def __init__(self,
         self.nucmer_min_len = nucmer_min_len
         self.nucmer_breaklen = nucmer_breaklen
         self.clean = clean
+        self.spades_mode = spades_mode
+        self.spades_options = spades_options
+        self.threads = threads
 
         if extern_progs is None:
             self.extern_progs = external_progs.ExternalProgs()
@@ -94,6 +101,106 @@ def _assemble_with_fermilite(self):
         self.assembled_ok = (got_from_fermilite == 0)
         os.chdir(cwd)
 
+    @staticmethod
+    def _check_spades_log_file(logfile):
+        '''SPAdes can fail with a strange error. Stop everything if this happens'''
+        f = pyfastaq.utils.open_file_read(logfile)
+
+        for line in f:
+            if line.startswith('== Error ==  system call for:') and line.rstrip().endswith('finished abnormally, err code: -7'):
+                pyfastaq.utils.close(f)
+                print('Error running SPAdes. Cannot continue. This is the error from the log file', logfile, '...', file=sys.stderr)
+                print(line, file=sys.stderr)
+                raise Error('Fatal error ("err code: -7") running spades. Cannot continue')
+
+        pyfastaq.utils.close(f)
+        return True
+
+    def _assemble_with_spades(self):
+        cwd = os.getcwd()
+        self.assembled_ok = False
+        try:
+            try:
+                os.chdir(self.working_dir)
+            except:
+                raise Error('Error chdir ' + self.working_dir)
+            spades_exe = self.extern_progs.exe('spades')
+            if not spades_exe:
+                raise Error("Spades executable has not been found")
+            spades_options = self.spades_options
+            if spades_options is not None:
+                spades_options = shlex.split(self.spades_options)
+            if self.spades_mode == "rna":
+                spades_options = ["--rna"] + (["-k","127"] if spades_options is None else spades_options)
+                spades_out_seq_base = "transcripts.fasta"
+            elif self.spades_mode == "sc":
+                spades_options = ["--sc"] + (["-k", "33,55,77,99,127","--careful"] if spades_options is None else spades_options)
+                spades_out_seq_base = "contigs.fasta"
+            elif self.spades_mode == "wgs":
+                spades_options = ["-k", "33,55,77,99,127","--careful"] if spades_options is None else spades_options
+                spades_out_seq_base = "contigs.fasta"
+            else:
+                raise ValueError("Unknown spades_mode value: {}".format(self.spades_mode))
+            asm_cmd = [spades_exe, "-t", str(self.threads), "--pe1-1", self.reads1, "--pe1-2", self.reads2, "-o", self.assembler_dir] + \
+                spades_options
+            asm_ok,err = common.syscall(asm_cmd, verbose=True, verbose_filehandle=self.log_fh, shell=False, allow_fail=True)
+            if not asm_ok:
+                print('Assembly finished with errors. These are the errors:', file=self.log_fh)
+                print(err, file=self.log_fh)
+                print('\nEnd of spades errors\n', file=self.log_fh)
+            else:
+
+                spades_log = os.path.join(self.assembler_dir, 'spades.log')
+                if os.path.exists(spades_log):
+                    self._check_spades_log_file(spades_log)
+
+                    with open(spades_log) as f:
+                        print('\n______________ SPAdes log ___________________\n', file=self.log_fh)
+                        for line in f:
+                            print(line.rstrip(), file=self.log_fh)
+                        print('\n______________ End of SPAdes log _________________\n', file=self.log_fh)
+
+                spades_warnings = os.path.join(self.assembler_dir, 'warnings.log')
+                if os.path.exists(spades_warnings):
+                    with open(spades_warnings) as f:
+                        print('\n______________ SPAdes warnings ___________________\n', file=self.log_fh)
+                        for line in f:
+                            print(line.rstrip(), file=self.log_fh)
+                        print('\n______________ End of SPAdes warnings _________________\n', file=self.log_fh)
+
+                ## fermilight module generates contig names that look like `cluster_1.l15.c17.ctg.1` where 'cluster_1'==self.contig_name_prefix
+                ## the whole structure of the contig name is expected in several places downstream where it is parsed into individual components.
+                ## For example, it is parsed into to l and c parts in ref_seq_chooser (although the parts are not actually used).
+                ## This is the code from fermilight module that generates the contig ID string:
+                ## ofs << ">" << namePrefix << ".l" << overlap << ".c" << minCount << ".ctg." << i + 1 << '\n'
+                ##
+                ## We generate the same contig name structure here using dummy values for overlap and minCount, in order
+                ## to avoid distrupting the downstream code.
+                ## Note that the fermilight module generates multiple versions of the assembly on a grid of l and c values,
+                ## and ref_seq_chooser then picks a single "best" (l,c) version based on coverage/identity of the nucmer
+                ## alignment to the reference. Spades generates a single version of the assembly, so ref_seq_chooser
+                ## can only pick that one version.
+
+                spades_out_seq = os.path.join(self.assembler_dir,spades_out_seq_base)
+                ## No need really to use general-purpose pyfastaq.sequences.file_reader here and pay performance cost for
+                ## its multi-format line tests since we are only replacing the IDs in a pre-defined format
+                if os.path.exists(spades_out_seq):
+                    with open(spades_out_seq,"r") as inp, open(self.all_assembly_contigs_fa,"w") as out:
+                        pref = self.contig_name_prefix
+                        i_cont = 0
+                        for line in inp:
+                            if line.startswith(">"):
+                                i_cont += 1
+                                line = ">{}.l15.c17.ctg.{}\n".format(pref,i_cont)
+                            out.write(line)
+                        if i_cont > 0:
+                            self.assembled_ok = True
+            if self.clean:
+                print('Deleting assembly directory', self.assembler_dir, file=self.log_fh)
+                shutil.rmtree(self.assembler_dir,ignore_errors=True)
+        finally:
+            os.chdir(cwd)
+
 
     @staticmethod
     def _fix_contig_orientation(contigs_fa, ref_fa, outfile, min_id=90, min_length=20, breaklen=200):
@@ -148,7 +255,10 @@ def _parse_bam(sequences, bam, min_scaff_depth, max_insert):
 
 
     def run(self):
-        self._assemble_with_fermilite()
+        if self.assembler == 'fermilite':
+            self._assemble_with_fermilite()
+        elif self.assembler == "spades":
+            self._assemble_with_spades()
         print('Finished running assemblies', flush=True, file=self.log_fh)
         self.sequences = {}
 
@@ -208,7 +318,7 @@ def run(self):
                 self.reads2,
                 self.final_assembly_fa,
                 self.final_assembly_bam[:-4],
-                threads=1,
+                threads=self.threads,
                 sort=True,
                 bowtie2=self.extern_progs.exe('bowtie2'),
                 bowtie2_version=self.extern_progs.version('bowtie2'),
diff --git a/ariba/cluster.py b/ariba/cluster.py
index 75d244b8..9d733f8d 100644
--- a/ariba/cluster.py
+++ b/ariba/cluster.py
@@ -43,10 +43,12 @@ def __init__(self,
       max_allele_freq=0.90,
       unique_threshold=0.03,
       max_gene_nt_extend=30,
-      spades_other_options=None,
+      spades_mode="rna", #["rna","wgs"]
+      spades_options=None,
       clean=True,
       extern_progs=None,
       random_seed=42,
+      threads_total=1
     ):
         self.root_dir = os.path.abspath(root_dir)
         self.read_store = read_store
@@ -71,7 +73,8 @@ def __init__(self,
         self.sspace_k = sspace_k
         self.sspace_sd = sspace_sd
         self.reads_insert = reads_insert
-        self.spades_other_options = spades_other_options
+        self.spades_mode = spades_mode
+        self.spades_options = spades_options
 
         self.reads_for_assembly1 = os.path.join(self.root_dir, 'reads_for_assembly_1.fq')
         self.reads_for_assembly2 = os.path.join(self.root_dir, 'reads_for_assembly_2.fq')
@@ -96,6 +99,9 @@ def __init__(self,
         self.status_flag = flag.Flag()
         self.clean = clean
 
+        self.threads_total = threads_total
+        self.remaining_clusters = None
+
         self.assembly_dir = os.path.join(self.root_dir, 'Assembly')
         self.final_assembly_fa = os.path.join(self.root_dir, 'assembly.fa')
         self.final_assembly_bam = os.path.join(self.root_dir, 'assembly.reads_mapped.bam')
@@ -138,13 +144,31 @@ def __init__(self,
         for s in wanted_signals:
             signal.signal(s, self._receive_signal)
 
+    def _update_threads(self):
+        """Update available thread count post-construction.
+        To be called any number of times from run() method"""
+        if self.remaining_clusters is not None:
+            self.threads = max(1,self.threads_total//self.remaining_clusters.value)
+        #otherwise just keep the current (initial) value
+        print("{} detected {} threads available to it".format(self.name,self.threads), file = self.log_fh)
+
+    def _report_completion(self):
+        """Update shared counters to signal that we are done with this cluster.
+        Call just before exiting run() method (in a finally clause)"""
+        rem_clust = self.remaining_clusters
+        if rem_clust is not None:
+            # -= is non-atomic, need to acquire a lock
+            with self.remaining_clusters_lock:
+                rem_clust.value -= 1
+            # we do not need this object anymore
+            self.remaining_clusters = None
+            print("{} reported completion".format(self.name), file=self.log_fh)
 
     def _atexit(self):
         if self.log_fh is not None:
             pyfastaq.utils.close(self.log_fh)
             self.log_fh = None
 
-
     def _receive_signal(self, signum, stack):
         print('Signal', signum, 'received in cluster', self.name + '... Stopping!', file=sys.stderr, flush=True)
         if self.log_fh is not None:
@@ -190,7 +214,10 @@ def _set_up_input_files(self):
     def _clean_file(self, filename):
         if self.clean:
             print('Deleting file', filename, file=self.log_fh)
-            os.unlink(filename)
+            try: #protect against OSError: [Errno 16] Device or resource busy: '.nfs0000000010f0f04f000003c9' and such
+                os.unlink(filename)
+            except:
+                pass
 
 
     def _clean(self):
@@ -269,33 +296,39 @@ def _make_reads_for_assembly(number_of_wanted_reads, total_reads, reads_in1, rea
             return total_reads
 
 
-    def run(self):
-        self._set_up_input_files()
+    def run(self,remaining_clusters=None,remaining_clusters_lock=None):
+        try:
+            self.remaining_clusters = remaining_clusters
+            self.remaining_clusters_lock = remaining_clusters_lock
+            self._update_threads()
+            self._set_up_input_files()
 
-        for fname in [self.all_reads1, self.all_reads2, self.references_fa]:
-            if not os.path.exists(fname):
-                raise Error('File ' + fname + ' not found. Cannot continue')
+            for fname in [self.all_reads1, self.all_reads2, self.references_fa]:
+                if not os.path.exists(fname):
+                    raise Error('File ' + fname + ' not found. Cannot continue')
 
-        original_dir = os.getcwd()
-        os.chdir(self.root_dir)
+            original_dir = os.getcwd()
+            os.chdir(self.root_dir)
+
+            try:
+                self._run()
+            except Error as err:
+                os.chdir(original_dir)
+                print('Error running cluster! Error was:', err, sep='\n', file=self.log_fh)
+                pyfastaq.utils.close(self.log_fh)
+                self.log_fh = None
+                raise Error('Error running cluster ' + self.name + '!')
 
-        try:
-            self._run()
-        except Error as err:
             os.chdir(original_dir)
-            print('Error running cluster! Error was:', err, sep='\n', file=self.log_fh)
+            print('Finished', file=self.log_fh, flush=True)
+            print('{:_^79}'.format(' LOG FILE END ' + self.name + ' '), file=self.log_fh, flush=True)
+
+            # This stops multiprocessing complaining with the error:
+            # multiprocessing.pool.MaybeEncodingError: Error sending result: '[<ariba.cluster.Cluster object at 0x7ffa50f8bcd0>]'. Reason: 'TypeError("cannot serialize '_io.TextIOWrapper' object",)'
             pyfastaq.utils.close(self.log_fh)
             self.log_fh = None
-            raise Error('Error running cluster ' + self.name + '!')
-
-        os.chdir(original_dir)
-        print('Finished', file=self.log_fh, flush=True)
-        print('{:_^79}'.format(' LOG FILE END ' + self.name + ' '), file=self.log_fh, flush=True)
-
-        # This stops multiprocessing complaining with the error:
-        # multiprocessing.pool.MaybeEncodingError: Error sending result: '[<ariba.cluster.Cluster object at 0x7ffa50f8bcd0>]'. Reason: 'TypeError("cannot serialize '_io.TextIOWrapper' object",)'
-        pyfastaq.utils.close(self.log_fh)
-        self.log_fh = None
+        finally:
+            self._report_completion()
 
 
     def _run(self):
@@ -310,6 +343,7 @@ def _run(self):
             print('\nUsing', made_reads, 'from a total of', self.total_reads, 'for assembly.', file=self.log_fh, flush=True)
             print('Assembling reads:', file=self.log_fh, flush=True)
 
+            self._update_threads()
             self.assembly = assembly.Assembly(
               self.reads_for_assembly1,
               self.reads_for_assembly2,
@@ -323,7 +357,10 @@ def _run(self):
               contig_name_prefix=self.name,
               assembler=self.assembler,
               extern_progs=self.extern_progs,
-              clean=self.clean
+              clean=self.clean,
+              spades_mode=self.spades_mode,
+              spades_options=self.spades_options,
+              threads=self.threads
             )
 
             self.assembly.run()
@@ -332,7 +369,7 @@ def _run(self):
             self._clean_file(self.reads_for_assembly2)
             if self.clean:
                 print('Deleting Assembly directory', self.assembly_dir, file=self.log_fh, flush=True)
-                shutil.rmtree(self.assembly_dir)
+                shutil.rmtree(self.assembly_dir,ignore_errors=True)
 
 
         if self.assembled_ok and self.assembly.ref_seq_name is not None:
@@ -342,13 +379,13 @@ def _run(self):
             self.is_variant_only = '1' if is_variant_only else '0'
 
             print('\nAssembly was successful\n\nMapping reads to assembly:', file=self.log_fh, flush=True)
-
+            self._update_threads()
             mapping.run_bowtie2(
                 self.all_reads1,
                 self.all_reads2,
                 self.final_assembly_fa,
                 self.final_assembly_bam[:-4],
-                threads=1,
+                threads=self.threads,
                 sort=True,
                 bowtie2=self.extern_progs.exe('bowtie2'),
                 bowtie2_preset='very-sensitive-local',
diff --git a/ariba/clusters.py b/ariba/clusters.py
index ae50cd56..60cdd714 100644
--- a/ariba/clusters.py
+++ b/ariba/clusters.py
@@ -14,8 +14,11 @@
 
 class Error (Exception): pass
 
-
-def _run_cluster(obj, verbose, clean, fails_dir):
+# passing shared objects (remaining_clusters) through here and thus making them
+# explicit arguments to Pool.startmap when running this function. That seems to be
+# a recommended safe transfer mechanism as opposed making them attributes of a
+# pre-constructed 'obj' variable (although the docs are a bit hazy on that)
+def _run_cluster(obj, verbose, clean, fails_dir, remaining_clusters, remaining_clusters_lock):
     failed_clusters = os.listdir(fails_dir)
 
     if len(failed_clusters) > 0:
@@ -25,7 +28,7 @@ def _run_cluster(obj, verbose, clean, fails_dir):
     if verbose:
         print('Start running cluster', obj.name, 'in directory', obj.root_dir, flush=True)
     try:
-        obj.run()
+        obj.run(remaining_clusters=remaining_clusters,remaining_clusters_lock=remaining_clusters_lock)
     except:
         print('Failed cluster:', obj.name, file=sys.stderr)
         with open(os.path.join(fails_dir, obj.name), 'w'):
@@ -38,7 +41,10 @@ def _run_cluster(obj, verbose, clean, fails_dir):
         if verbose:
             print('Deleting cluster dir', obj.root_dir, flush=True)
         if os.path.exists(obj.root_dir):
-            shutil.rmtree(obj.root_dir)
+            try:
+                shutil.rmtree(obj.root_dir)
+            except:
+                pass
 
     return obj
 
@@ -56,7 +62,8 @@ def __init__(self,
       threads=1,
       verbose=False,
       assembler='fermilite',
-      spades_other=None,
+      spades_mode='rna',
+      spades_options=None,
       max_insert=1000,
       min_scaff_depth=10,
       nucmer_min_id=90,
@@ -86,10 +93,10 @@ def __init__(self,
         self.logs_dir = os.path.join(self.outdir, 'Logs')
 
         self.assembler = assembler
-        assert self.assembler in ['fermilite']
         self.assembly_kmer = assembly_kmer
         self.assembly_coverage = assembly_coverage
-        self.spades_other = spades_other
+        self.spades_mode = spades_mode
+        self.spades_options = spades_options
 
         self.cdhit_files_prefix = os.path.join(self.refdata_dir, 'cdhit')
         self.cdhit_cluster_representatives_fa = self.cdhit_files_prefix + '.cluster_representatives.fa'
@@ -135,7 +142,6 @@ def __init__(self,
                 os.mkdir(d)
             except:
                 raise Error('Error mkdir ' + d)
-
         if tmp_dir is None:
             if 'ARIBA_TMPDIR' in os.environ:
                 tmp_dir = os.path.abspath(os.environ['ARIBA_TMPDIR'])
@@ -373,6 +379,28 @@ def _init_and_run_clusters(self):
         cluster_list = []
         self.log_files = []
 
+        # How the thread count withing each Cluster.run is managed:
+        # We want to handle those cases where there are more total threads allocated to the application than there are clusters
+        # remaining to run (for example,
+        # there are only two references, and eight threads). If we keep the default thread value of 1 in cluster. Cluster,
+        # then we will be wasting the allocated threads. The most simple approach would be to divide all threads equally between clusters
+        # before calling Pool.map. Multithreaded external programs like Spades and Bowtie2 are then called with multiple threads. That should
+        # never be slower than keeping just one thread in cluster.Cluster, except maybe in the extreme cases when (if)
+        # a multi-threaded run of the external program takes longer wall-clock time than a single-threaded one.
+        # However, this solution would always keep
+        # Cluster.threads=1 if the initial number of clusters > number of total threads. This can result in inefficiency at the
+        # tail of the Pool.map execution flow - when the clusters are getting finished overall, we are waiting for the completion of
+        # fewer and fewer remaining
+        # single-threaded cluster tasks while more and more total threads are staying idle. We mitigate this through the following approach:
+        # - Create a shared Value object that holds the number of remaining clusters (remaining_clusters).
+        # - Each Cluster.run decrements the remaining_clusters when it completes
+        # - Cluster.run sets its own thread count to max(1,threads_total//remaining_clusters). This can be done as many times
+        #   as needed at various points within Cluster.run (e.g. once before Spades is called, and again before Bowtie2 is called),
+        #   in order to catch more idle threads.
+        # This is a simple and conservative approach to adaptively use all threads at the tail of the map flow. It
+        # never over-subscribes the threads, and it does not require any extra blocking within Cluster.run in order to
+        # wait for threads becoming available.
+
         for cluster_name in sorted(self.cluster_to_dir):
             counter += 1
 
@@ -406,25 +434,56 @@ def _init_and_run_clusters(self):
                 reads_insert=self.insert_size,
                 sspace_k=self.min_scaff_depth,
                 sspace_sd=self.insert_sspace_sd,
-                threads=1, # clusters now run in parallel, so this should always be 1!
+                threads=1, # initially set to 1, then will adaptively self-modify while running
                 assembled_threshold=self.assembled_threshold,
                 unique_threshold=self.unique_threshold,
                 max_gene_nt_extend=self.max_gene_nt_extend,
-                spades_other_options=self.spades_other,
+                spades_mode=self.spades_mode,
+                spades_options=self.spades_options,
                 clean=self.clean,
                 extern_progs=self.extern_progs,
+                threads_total=self.threads
             ))
-
+        # Here is why we use proxy objects from a Manager process below
+        # instead of simple shared multiprocessing.Value counter:
+        # Shared memory objects in multiprocessing use tempfile module to
+        # create temporary directory, then create temporary file inside it,
+        # memmap the file and unlink it. If TMPDIR envar points to a NFS
+        # mount, the final cleanup handler from multiprocessing will often
+        # return an exception due to a stale NFS file (.nfsxxxx) from a shutil.rmtree
+        # call. See help on tempfile.gettempdir() for how the default location of
+        # temporary files is selected. The exception is caught in except clause
+        # inside multiprocessing cleanup, and only a harmless traceback is printed,
+        # but it looks very spooky to the user and causes confusion. We use
+        # instead shared proxies from the Manager. Those do not rely on shared
+        # memory, and thus bypass the NFS issues. The counter is accesses infrequently
+        # relative to computations, so the performance does not suffer.
+        # default authkey in the manager will be some generated random-looking string
+        manager = multiprocessing.Manager()
+        remaining_clusters = manager.Value('l',len(cluster_list))
+        # manager.Value does not provide access to the internal RLock that we need for
+        # implementing atomic -=, so we need to carry around a separate RLock object.
+        remaining_clusters_lock = manager.RLock()
         try:
             if self.threads > 1:
                 self.pool = multiprocessing.Pool(self.threads)
-                cluster_list = self.pool.starmap(_run_cluster, zip(cluster_list, itertools.repeat(self.verbose), itertools.repeat(self.clean), itertools.repeat(self.fails_dir)))
+                cluster_list = self.pool.starmap(_run_cluster, zip(cluster_list, itertools.repeat(self.verbose), itertools.repeat(self.clean), itertools.repeat(self.fails_dir),
+                                                                   itertools.repeat(remaining_clusters),itertools.repeat(remaining_clusters_lock)))
+                # harvest the pool as soon as we no longer need it
+                self.pool.close()
+                self.pool.join()
             else:
                 for c in cluster_list:
-                    _run_cluster(c, self.verbose, self.clean, self.fails_dir)
+                    _run_cluster(c, self.verbose, self.clean, self.fails_dir, remaining_clusters, remaining_clusters_lock)
         except:
             self.clusters_all_ran_ok = False
 
+        if self.verbose:
+            print('Final value of remaining_clusters counter:', remaining_clusters)
+        remaining_clusters = None
+        remaining_clusters_lock = None
+        manager.shutdown()
+
         if len(os.listdir(self.fails_dir)) > 0:
             self.clusters_all_ran_ok = False
 
@@ -498,7 +557,7 @@ def _write_catted_genes_matching_refs_fasta(self, outfile):
 
     def _clean(self):
         if self.clean:
-            shutil.rmtree(self.fails_dir)
+            shutil.rmtree(self.fails_dir,ignore_errors=True)
 
             try:
                 self.tmp_dir_obj.cleanup()
@@ -507,10 +566,7 @@ def _clean(self):
 
             if self.verbose:
                 print('Deleting Logs directory', self.logs_dir)
-            try:
-                shutil.rmtree(self.logs_dir)
-            except:
-                pass
+            shutil.rmtree(self.logs_dir,ignore_errors=True)
 
             try:
                 if self.verbose:
@@ -551,67 +607,68 @@ def run(self):
 
     def _run(self):
         cwd = os.getcwd()
-        os.chdir(self.outdir)
-        self.write_versions_file(cwd)
-        self._map_and_cluster_reads()
-        self.log_files = None
-
-        if len(self.cluster_to_dir) > 0:
-            got_insert_data_ok = self._set_insert_size_data()
-            if not got_insert_data_ok:
-                print('WARNING: not enough proper read pairs (found ' + str(self.proper_pairs) + ') to determine insert size.', file=sys.stderr)
-                print('This probably means that very few reads were mapped at all. No local assemblies will be run', file=sys.stderr)
-                if self.verbose:
-                    print('Not enough proper read pairs mapped to determine insert size. Skipping all assemblies.', flush=True)
+        try:
+            os.chdir(self.outdir)
+            self.write_versions_file(cwd)
+            self._map_and_cluster_reads()
+            self.log_files = None
+
+            if len(self.cluster_to_dir) > 0:
+                got_insert_data_ok = self._set_insert_size_data()
+                if not got_insert_data_ok:
+                    print('WARNING: not enough proper read pairs (found ' + str(self.proper_pairs) + ') to determine insert size.', file=sys.stderr)
+                    print('This probably means that very few reads were mapped at all. No local assemblies will be run', file=sys.stderr)
+                    if self.verbose:
+                        print('Not enough proper read pairs mapped to determine insert size. Skipping all assemblies.', flush=True)
+                else:
+                    if self.verbose:
+                        print('{:_^79}'.format(' Assembling each cluster '))
+                        print('Will run', self.threads, 'cluster(s) in parallel', flush=True)
+                    self._init_and_run_clusters()
+                    if self.verbose:
+                        print('Finished assembling clusters\n')
             else:
                 if self.verbose:
-                    print('{:_^79}'.format(' Assembling each cluster '))
-                    print('Will run', self.threads, 'cluster(s) in parallel', flush=True)
-                self._init_and_run_clusters()
-                if self.verbose:
-                    print('Finished assembling clusters\n')
-        else:
-            if self.verbose:
-                print('No reads mapped. Skipping all assemblies', flush=True)
-            print('WARNING: no reads mapped to reference genes. Therefore no local assemblies will be run', file=sys.stderr)
+                    print('No reads mapped. Skipping all assemblies', flush=True)
+                print('WARNING: no reads mapped to reference genes. Therefore no local assemblies will be run', file=sys.stderr)
 
-        if not self.clusters_all_ran_ok:
-            raise Error('At least one cluster failed! Stopping...')
+            if not self.clusters_all_ran_ok:
+                raise Error('At least one cluster failed! Stopping...')
 
-        if self.verbose:
-            print('{:_^79}'.format(' Writing reports '), flush=True)
-            print('Making', self.report_file_all_tsv)
-        self._write_report(self.clusters, self.report_file_all_tsv)
+            if self.verbose:
+                print('{:_^79}'.format(' Writing reports '), flush=True)
+                print('Making', self.report_file_all_tsv)
+            self._write_report(self.clusters, self.report_file_all_tsv)
 
-        if self.verbose:
-            print('Making', self.report_file_filtered)
-        rf = report_filter.ReportFilter(infile=self.report_file_all_tsv)
-        rf.run(self.report_file_filtered)
+            if self.verbose:
+                print('Making', self.report_file_filtered)
+            rf = report_filter.ReportFilter(infile=self.report_file_all_tsv)
+            rf.run(self.report_file_filtered)
 
-        if self.verbose:
-            print()
-            print('{:_^79}'.format(' Writing fasta of assembled sequences '), flush=True)
-            print(self.catted_assembled_seqs_fasta, 'and', self.catted_genes_matching_refs_fasta, flush=True)
-        self._write_catted_assembled_seqs_fasta(self.catted_assembled_seqs_fasta)
-        self._write_catted_genes_matching_refs_fasta(self.catted_genes_matching_refs_fasta)
-        self._write_catted_assemblies_fasta(self.catted_assemblies_fasta)
-
-        if self.log_files is not None:
-            clusters_log_file = os.path.join(self.outdir, 'log.clusters.gz')
             if self.verbose:
                 print()
-                print('{:_^79}'.format(' Catting cluster log files '), flush=True)
-                print('Writing file', clusters_log_file, flush=True)
-            common.cat_files(self.log_files, clusters_log_file)
-
-        if self.verbose:
-            print()
-            print('{:_^79}'.format(' Cleaning files '), flush=True)
-        self._clean()
+                print('{:_^79}'.format(' Writing fasta of assembled sequences '), flush=True)
+                print(self.catted_assembled_seqs_fasta, 'and', self.catted_genes_matching_refs_fasta, flush=True)
+            self._write_catted_assembled_seqs_fasta(self.catted_assembled_seqs_fasta)
+            self._write_catted_genes_matching_refs_fasta(self.catted_genes_matching_refs_fasta)
+            self._write_catted_assemblies_fasta(self.catted_assemblies_fasta)
+
+            if self.log_files is not None:
+                clusters_log_file = os.path.join(self.outdir, 'log.clusters.gz')
+                if self.verbose:
+                    print()
+                    print('{:_^79}'.format(' Catting cluster log files '), flush=True)
+                    print('Writing file', clusters_log_file, flush=True)
+                common.cat_files(self.log_files, clusters_log_file)
 
-        Clusters._write_mlst_reports(self.mlst_profile_file, self.report_file_filtered, self.mlst_reports_prefix, verbose=self.verbose)
+            if self.verbose:
+                print()
+                print('{:_^79}'.format(' Cleaning files '), flush=True)
+            self._clean()
 
-        if self.clusters_all_ran_ok and self.verbose:
-            print('\nAll done!\n')
+            Clusters._write_mlst_reports(self.mlst_profile_file, self.report_file_filtered, self.mlst_reports_prefix, verbose=self.verbose)
 
-        os.chdir(cwd)
+            if self.clusters_all_ran_ok and self.verbose:
+                print('\nAll done!\n')
+        finally:
+            os.chdir(cwd)
diff --git a/ariba/common.py b/ariba/common.py
index a84528df..92ab3c92 100644
--- a/ariba/common.py
+++ b/ariba/common.py
@@ -12,6 +12,8 @@ class Error (Exception): pass
 def syscall(cmd, allow_fail=False, verbose=False, verbose_filehandle=sys.stdout, print_errors=True, shell=True):
     if verbose:
         print('syscall:', cmd, flush=True, file=verbose_filehandle)
+        if not shell:
+            print('syscall string:', " ".join('"{}"'.format(_) for _ in cmd), flush=True, file=verbose_filehandle)
     try:
         subprocess.check_output(cmd, shell=shell, stderr=subprocess.STDOUT)
     except subprocess.CalledProcessError as error:
@@ -26,7 +28,9 @@ def syscall(cmd, allow_fail=False, verbose=False, verbose_filehandle=sys.stdout,
             return False, errors
         else:
             sys.exit(1)
-
+    except Exception as msg:
+        print("Unexpected exception: ", msg, file=sys.stderr)
+        raise
     return True, None
 
 
diff --git a/ariba/external_progs.py b/ariba/external_progs.py
index dbbe0775..dfcd4e64 100644
--- a/ariba/external_progs.py
+++ b/ariba/external_progs.py
@@ -13,6 +13,7 @@ class Error (Exception): pass
     'bowtie2': 'bowtie2',
     'cdhit': 'cd-hit-est',
     'nucmer' : 'nucmer',
+    'spades' : 'spades.py'
 }
 
 
@@ -23,6 +24,7 @@ class Error (Exception): pass
     'bowtie2': ('--version', re.compile('.*bowtie2.*version (.*)$')),
     'cdhit': ('', re.compile('CD-HIT version ([0-9\.]+) \(')),
     'nucmer': ('--version', re.compile('^NUCmer \(NUCleotide MUMmer\) version ([0-9\.]+)')),
+    'spades': ('--version', re.compile('SPAdes\s+v([0-9\.]+)'))
 }
 
 
@@ -30,8 +32,12 @@ class Error (Exception): pass
     'bowtie2': '2.1.0',
     'cdhit': '4.6',
     'nucmer': '3.1',
+    'spades': '3.11.0'
 }
 
+prog_optional = set([
+    'spades'
+])
 
 class ExternalProgs:
     def __init__(self, verbose=False, fail_on_error=True):
@@ -47,11 +53,15 @@ def __init__(self, verbose=False, fail_on_error=True):
         warnings = []
 
         for prog in sorted(prog_to_default):
+            msg_sink = errors
+            if prog in prog_optional:
+                msg_sink = warnings
+
             prog_exe = self._get_exe(prog)
             self.progs[prog] = shutil.which(prog_exe)
 
             if self.progs[prog] is None:
-                errors.append(prog + ' not found in path. Looked for ' + prog_exe)
+                msg_sink.append(prog + ' not found in path. Looked for ' + prog_exe)
 
                 self.version_report.append('\t'.join([prog, 'NA', 'NOT_FOUND']))
                 if verbose:
@@ -63,10 +73,10 @@ def __init__(self, verbose=False, fail_on_error=True):
             if got_version:
                 self.versions[prog] = version
                 if prog in min_versions and LooseVersion(version) < LooseVersion(min_versions[prog]):
-                    errors.append(' '.join(['Found version', version, 'of', prog, 'which is too low! Please update to at least', min_versions[prog] + '. Found it here:', prog_exe]))
+                    msg_sink.append(' '.join(['Found version', version, 'of', prog, 'which is too low! Please update to at least', min_versions[prog] + '. Found it here:', prog_exe]))
             else:
                 self.versions[prog] = None
-                errors.append(version)
+                msg_sink.append(version)
                 version = 'ERROR'
 
             self.version_report.append('\t'.join([prog, version, self.progs[prog]]))
diff --git a/ariba/tasks/run.py b/ariba/tasks/run.py
index db3bd324..75f484fd 100644
--- a/ariba/tasks/run.py
+++ b/ariba/tasks/run.py
@@ -47,7 +47,7 @@ def run(options):
           extern_progs,
           version_report_lines=version_report_lines,
           assembly_coverage=options.assembly_cov,
-          assembler='fermilite',
+          assembler=options.assembler,
           threads=options.threads,
           verbose=options.verbose,
           min_scaff_depth=options.min_scaff_depth,
@@ -59,6 +59,8 @@ def run(options):
           max_gene_nt_extend=options.gene_nt_extend,
           clean=(not options.noclean),
           tmp_dir=options.tmp_dir,
+          spades_mode=options.spades_mode,
+          spades_options=options.spades_options
         )
     c.run()
 
diff --git a/ariba/tests/assembly_test.py b/ariba/tests/assembly_test.py
index f33f2c52..06368493 100644
--- a/ariba/tests/assembly_test.py
+++ b/ariba/tests/assembly_test.py
@@ -5,10 +5,11 @@
 import filecmp
 import pyfastaq
 from ariba import assembly
+from ariba import external_progs
 
 modules_dir = os.path.dirname(os.path.abspath(assembly.__file__))
 data_dir = os.path.join(modules_dir, 'tests', 'data')
-
+extern_progs = external_progs.ExternalProgs()
 
 class TestAssembly(unittest.TestCase):
     def test_run_fermilite(self):
@@ -102,3 +103,48 @@ def test_parse_bam(self):
         os.unlink(bam + '.unmapped_mates')
         os.unlink(bam + '.scaff')
 
+    def test_check_spades_log_file(self):
+        '''test _check_spades_log_file'''
+        good_file = os.path.join(data_dir, 'assembly_test_check_spades_log_file.log.good')
+        bad_file = os.path.join(data_dir, 'assembly_test_check_spades_log_file.log.bad')
+        self.assertTrue(assembly.Assembly._check_spades_log_file(good_file))
+        with self.assertRaises(assembly.Error):
+            self.assertTrue(assembly.Assembly._check_spades_log_file(bad_file))
+
+    @unittest.skipUnless(extern_progs.exe('spades'), "Spades assembler is optional and is not configured")
+    def test_assemble_with_spades(self):
+        '''test _assemble_with_spades'''
+        reads1 = os.path.join(data_dir, 'assembly_test_assemble_with_spades_reads_1.fq')
+        reads2 = os.path.join(data_dir, 'assembly_test_assemble_with_spades_reads_2.fq')
+        tmp_dir = 'tmp.test_assemble_with_spades'
+        tmp_log = 'tmp.test_assemble_with_spades.log'
+        with open(tmp_log, 'w') as tmp_log_fh:
+            print('First line', file=tmp_log_fh)
+            shutil.rmtree(tmp_dir, ignore_errors=True)
+            #using spades_options=" --only-assembler" because error correction cannot determine quality offset on this
+            #artificial dataset
+            a = assembly.Assembly(reads1, reads2, 'not needed', 'not needed', tmp_dir, 'not_needed_for_this_test.fa',
+                                  'not_needed_for_this_test.bam', tmp_log_fh, 'not needed',
+                                  assembler="spades", spades_options=" --only-assembler")
+            a._assemble_with_spades()
+        self.assertTrue(a.assembled_ok)
+        shutil.rmtree(tmp_dir,ignore_errors=True)
+        os.unlink(tmp_log)
+
+    @unittest.skipUnless(extern_progs.exe('spades'), "Spades assembler is optional and is not configured")
+    def test_assemble_with_spades_fail(self):
+        '''test _assemble_with_spades handles spades fail'''
+        reads1 = os.path.join(data_dir, 'assembly_test_assemble_with_spades_fails_reads_1.fq')
+        reads2 = os.path.join(data_dir, 'assembly_test_assemble_with_spades_fails_reads_2.fq')
+        tmp_dir = 'tmp.test_assemble_with_spades_fail'
+        tmp_log = 'tmp.test_assemble_with_spades_fail.log'
+        with open(tmp_log, 'w') as tmp_log_fh:
+            print('First line', file=tmp_log_fh)
+            shutil.rmtree(tmp_dir, ignore_errors=True)
+            a = assembly.Assembly(reads1, reads2, 'not needed', 'not needed', tmp_dir, 'not_needed_for_this_test.fa',
+                                  'not_needed_for_this_test.bam', tmp_log_fh, 'not needed',
+                                  assembler="spades", spades_options=" --only-assembler")
+            a._assemble_with_spades()
+        self.assertFalse(a.assembled_ok)
+        shutil.rmtree(tmp_dir,ignore_errors=True)
+        os.unlink(tmp_log)
diff --git a/ariba/tests/data/assembly_test_assemble_with_spades_fails_reads_1.fq b/ariba/tests/data/assembly_test_assemble_with_spades_fails_reads_1.fq
new file mode 100644
index 00000000..15574dde
--- /dev/null
+++ b/ariba/tests/data/assembly_test_assemble_with_spades_fails_reads_1.fq
@@ -0,0 +1,4 @@
+@read1/1
+CACGTTCGTCGTGATGACTGACGTCACGAGCTCTGCGTACGTCATCTAGCGTATCGTACTGACTGAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
diff --git a/ariba/tests/data/assembly_test_assemble_with_spades_fails_reads_2.fq b/ariba/tests/data/assembly_test_assemble_with_spades_fails_reads_2.fq
new file mode 100644
index 00000000..4274d1d6
--- /dev/null
+++ b/ariba/tests/data/assembly_test_assemble_with_spades_fails_reads_2.fq
@@ -0,0 +1,5 @@
+@read1/2
+CACGTTCGTCGTGATGACTGACGTCACGAGCTCTGCGTACGTCATCTAGCGTATCGTACTGACTGAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+
diff --git a/ariba/tests/data/assembly_test_assemble_with_spades_reads_1.fq b/ariba/tests/data/assembly_test_assemble_with_spades_reads_1.fq
new file mode 100644
index 00000000..c367e21f
--- /dev/null
+++ b/ariba/tests/data/assembly_test_assemble_with_spades_reads_1.fq
@@ -0,0 +1,232 @@
+@1:1:82:186/1
+GCTCTAAGTCCAACTCTGGCACCAGCTACAACTCTAATTGATATCCAACAAGACCTGTTAACATACGATGCGGAGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@1:2:6:109/1
+GGCTTTAGCCTGGCCCAATGCCCTGAGTGGCCCTCCCTTGACTAACTCTGACGCGATCAGAGGGCTAAAGTTTGTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@1:3:1:106/1
+CTCGCGGCTTTAGCCTGGCCCAATGCCCTGAGTGGCCCTCCCTTGACTAACTCTGACGCGATCAGAGGGCTAAAGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@1:4:33:136/1
+TGGCCCTCCCTTGACTAACTCTGACGCGATCAGAGGGCTAAAGTTTGTAGCTCTAAGTCCAACTCTGGCACCAGCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@1:5:196:299/1
+CCTTCTACTCCCATTGTCTTTGACGCTTTCTGATGTCAGTCGCCGGAGACCAGCTGTCTCCCTAGGGCGTTTAGGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@1:6:63:168/1
+CAGAGGGCTAAAGTTTGTAGCTCTAAGTCCAACTCTGGCACCAGCTACAACTCTAATTGATATCCAACAAGACCTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@1:7:10:111/1
+TTAGCCTGGCCCAATGCCCTGAGTGGCCCTCCCTTGACTAACTCTGACGCGATCAGAGGGCTAAAGTTTGTAGCTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@1:8:74:178/1
+AGTTTGTAGCTCTAAGTCCAACTCTGGCACCAGCTACAACTCTAATTGATATCCAACAAGACCTGTTAACATACGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@1:9:84:186/1
+TCTAAGTCCAACTCTGGCACCAGCTACAACTCTAATTGATATCCAACAAGACCTGTTAACATACGATGCGGAGGGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:1:41:144/1
+CCTTGACTAACTCTGACGCGATCATAGGGCTAAAGTTTGTAGCTCTAAGTCCAACTCTGGCACCAGCTACAACTCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:2:144:247/1
+ATACGATGCGGAGGGACTAGAGTCTCATCGTGCTCTGACGATTATATGTTGACCTTCTACTCCCATTGTCTTTGAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:3:225:329/1
+CTGATGTCAGTCGCCGGAGACCAGCTGTCTCCCTAGGGCGTATAGGTGTTCCGGATACCCGTCCTCAGGCGGTGCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:4:237:340/1
+GCCGGAGACCAGCTGTCTCCCTAGGGCGTATAGGTGTTCCGGATACCCGTCCTCAGGCGGTGCGTATGCTTGAGTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:5:45:151/1
+GACTAACTCTGACGCGATCATAGGGCTAAAGTTTGTAGCTCTAAGTCCAACTCTGGCACCAGCTACAACTCTAATT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:6:284:386/1
+CGTCCTCAGGCGGTGCGTATGCTTGAGTCGGTAATATCGTCCGGACTCATCCCTACTCTTACAACTTACGTGGTTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:7:305:407/1
+CTTGAGTCGGTAATATCGTCCGGACTCATCCCTACTCTTACAACTTACGTGGTTACGCTTACCCAGAGAAATATGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:8:213:317/1
+CTTTGACGCTTTCTGATGTCAGTCGCCGGAGACCAGCTGTCTCCCTAGGGCGTATAGGTGTTCCGGATACCCGTCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:9:183:287/1
+GATTATATGTTGACCTTCTACTCCCATTGTCTTTGACGCTTTCTGATGTCAGTCGCCGGAGACCAGCTGTCTCCCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:10:289:393/1
+TCAGGCGGTGCGTATGCTTGAGTCGGTAATATCGTCCGGACTCATCCCTACTCTTACAACTTACGTGGTTACGCTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:11:296:399/1
+GTGCGTATGCTTGAGTCGGTAATATCGTCCGGACTCATCCCTACTCTTACAACTTACGTGGTTACGCTTACCCAGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:12:270:373/1
+GTGTTCCGGATACCCGTCCTCAGGCGGTGCGTATGCTTGAGTCGGTAATATCGTCCGGACTCATCCCTACTCTTAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:13:167:271/1
+CTCATCGTGCTCTGACGATTATATGTTGACCTTCTACTCCCATTGTCTTTGACGCTTTCTGATGTCAGTCGCCGGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:14:43:147/1
+TTGACTAACTCTGACGCGATCATAGGGCTAAAGTTTGTAGCTCTAAGTCCAACTCTGGCACCAGCTACAACTCTAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:15:323:424/1
+TCCGGACTCATCCCTACTCTTACAACTTACGTGGTTACGCTTACCCAGAGAAATATGTGCGCTACCTGCTTAGCCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:16:105:207/1
+AGCTACAACTCTAATTGATATCCAACAAGACCTGTTAACATACGATGCGGAGGGACTAGAGTCTCATCGTGCTCTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:17:237:341/1
+GCCGGAGACCAGCTGTCTCCCTAGGGCGTATAGGTGTTCCGGATACCCGTCCTCAGGCGGTGCGTATGCTTGAGTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:18:3:107/1
+CGCGGCTTTAGCCTGGCCCAATGCCCTGAGTGGCCCTCCCTTGACTAACTCTGACGCGATCATAGGGCTAAAGTTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:19:272:374/1
+GTTCCGGATACCCGTCCTCAGGCGGTGCGTATGCTTGAGTCGGTAATATCGTCCGGACTCATCCCTACTCTTACAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:20:251:354/1
+GTCTCCCTAGGGCGTATAGGTGTTCCGGATACCCGTCCTCAGGCGGTGCGTATGCTTGAGTCGGTAATATCGTCCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:21:95:199/1
+CTCTGGCACCAGCTACAACTCTAATTGATATCCAACAAGACCTGTTAACATACGATGCGGAGGGACTAGAGTCTCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:22:96:199/1
+TCTGGCACCAGCTACAACTCTAATTGATATCCAACAAGACCTGTTAACATACGATGCGGAGGGACTAGAGTCTCAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:23:94:197/1
+ACTCTGGCACCAGCTACAACTCTAATTGATATCCAACAAGACCTGTTAACATACGATGCGGAGGGACTAGAGTCTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:24:185:289/1
+TTATATGTTGACCTTCTACTCCCATTGTCTTTGACGCTTTCTGATGTCAGTCGCCGGAGACCAGCTGTCTCCCTAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:25:152:256/1
+CGGAGGGACTAGAGTCTCATCGTGCTCTGACGATTATATGTTGACCTTCTACTCCCATTGTCTTTGACGCTTTCTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:26:285:389/1
+GTCCTCAGGCGGTGCGTATGCTTGAGTCGGTAATATCGTCCGGACTCATCCCTACTCTTACAACTTACGTGGTTAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:27:137:241/1
+TGTTAACATACGATGCGGAGGGACTAGAGTCTCATCGTGCTCTGACGATTATATGTTGACCTTCTACTCCCATTGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:28:261:365/1
+GGCGTATAGGTGTTCCGGATACCCGTCCTCAGGCGGTGCGTATGCTTGAGTCGGTAATATCGTCCGGACTCATCCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:29:12:116/1
+AGCCTGGCCCAATGCCCTGAGTGGCCCTCCCTTGACTAACTCTGACGCGATCATAGGGCTAAAGTTTGTAGCTCTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:30:107:210/1
+CTACAACTCTAATTGATATCCAACAAGACCTGTTAACATACGATGCGGAGGGACTAGAGTCTCATCGTGCTCTGAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:31:162:266/1
+AGAGTCTCATCGTGCTCTGACGATTATATGTTGACCTTCTACTCCCATTGTCTTTGACGCTTTCTGATGTCAGTCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:32:213:317.dup.2/1
+CTTTGACGCTTTCTGATGTCAGTCGCCGGAGACCAGCTGTCTCCCTAGGGCGTATAGGTGTTCCGGATACCCGTCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:33:24:127/1
+TGCCCTGAGTGGCCCTCCCTTGACTAACTCTGACGCGATCATAGGGCTAAAGTTTGTAGCTCTAAGTCCAACTCTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:34:84:189/1
+TCTAAGTCCAACTCTGGCACCAGCTACAACTCTAATTGATATCCAACAAGACCTGTTAACATACGATGCGGAGGGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:35:40:145/1
+CCCTTGACTAACTCTGACGCGATCATAGGGCTAAAGTTTGTAGCTCTAAGTCCAACTCTGGCACCAGCTACAACTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:36:120:223/1
+TGATATCCAACAAGACCTGTTAACATACGATGCGGAGGGACTAGAGTCTCATCGTGCTCTGACGATTATATGTTGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:37:106:211/1
+GCTACAACTCTAATTGATATCCAACAAGACCTGTTAACATACGATGCGGAGGGACTAGAGTCTCATCGTGCTCTGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:38:98:202/1
+TGGCACCAGCTACAACTCTAATTGATATCCAACAAGACCTGTTAACATACGATGCGGAGGGACTAGAGTCTCATCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:39:72:177/1
+AAAGTTTGTAGCTCTAAGTCCAACTCTGGCACCAGCTACAACTCTAATTGATATCCAACAAGACCTGTTAACATAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:40:16:120/1
+TGGCCCAATGCCCTGAGTGGCCCTCCCTTGACTAACTCTGACGCGATCATAGGGCTAAAGTTTGTAGCTCTAAGTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:41:308:410/1
+GAGTCGGTAATATCGTCCGGACTCATCCCTACTCTTACAACTTACGTGGTTACGCTTACCCAGAGAAATATGTGCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:42:26:129/1
+CCCTGAGTGGCCCTCCCTTGACTAACTCTGACGCGATCATAGGGCTAAAGTTTGTAGCTCTAAGTCCAACTCTGGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:43:130:234/1
+CAAGACCTGTTAACATACGATGCGGAGGGACTAGAGTCTCATCGTGCTCTGACGATTATATGTTGACCTTCTACTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:44:52:157/1
+TCTGACGCGATCATAGGGCTAAAGTTTGTAGCTCTAAGTCCAACTCTGGCACCAGCTACAACTCTAATTGATATCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:45:220:323/1
+GCTTTCTGATGTCAGTCGCCGGAGACCAGCTGTCTCCCTAGGGCGTATAGGTGTTCCGGATACCCGTCCTCAGGCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:46:125:228/1
+TCCAACAAGACCTGTTAACATACGATGCGGAGGGACTAGAGTCTCATCGTGCTCTGACGATTATATGTTGACCTTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:47:210:314/1
+TGTCTTTGACGCTTTCTGATGTCAGTCGCCGGAGACCAGCTGTCTCCCTAGGGCGTATAGGTGTTCCGGATACCCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:48:220:324/1
+GCTTTCTGATGTCAGTCGCCGGAGACCAGCTGTCTCCCTAGGGCGTATAGGTGTTCCGGATACCCGTCCTCAGGCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:49:25:128/1
+GCCCTGAGTGGCCCTCCCTTGACTAACTCTGACGCGATCATAGGGCTAAAGTTTGTAGCTCTAAGTCCAACTCTGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
diff --git a/ariba/tests/data/assembly_test_assemble_with_spades_reads_2.fq b/ariba/tests/data/assembly_test_assemble_with_spades_reads_2.fq
new file mode 100644
index 00000000..14747f67
--- /dev/null
+++ b/ariba/tests/data/assembly_test_assemble_with_spades_reads_2.fq
@@ -0,0 +1,232 @@
+@1:1:82:186/2
+CCTAGGGAGACAGCTGGTCTCCGGCGACTGACATCAGAAAGCGTCAAAGACAATGGGAGTAGAAGGTCAACCTATA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@1:2:6:109/2
+TCGTCAGAGCACGATGAGACTCTAGTCCCTCCGCATCGTATGTTAACAGGTCTTGTTGGATATCAATTAGAGTTGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@1:3:1:106/2
+TCAGAGCACGATGAGACTCTAGTCCCTCCGCATCGTATGTTAACAGGTCTTGTTGGATATCAATTAGAGTTGTAGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@1:4:33:136/2
+CAATGGGAGTAGAAGGTCAACCTATAATCGTCAGAGCACGATGAGACTCTAGTCCCTCCGCATCGTATGTTAACAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@1:5:196:299/2
+TTCTCTGGGTAAGCGTAACCACGTAAGTTGTAAGAGTAGGGATGAGTCCGGACGATATTACCGACTCAAGCATACG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@1:6:63:168/2
+CTCCGGCGACTGACATCAGAAAGCGTCAAAGACAATGGGAGTAGAAGGTCAACCTATAATCGTCAGAGCACGATGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@1:7:10:111/2
+AATCGTCAGAGCACGATGAGACTCTAGTCCCTCCGCATCGTATGTTAACAGGTCTTGTTGGATATCAATTAGAGTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@1:8:74:178/2
+GACAGCTGGTCTCCGGCGACTGACATCAGAAAGCGTCAAAGACAATGGGAGTAGAAGGTCAACCTATAATCGTCAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@1:9:84:186/2
+CCTAGGGAGACAGCTGGTCTCCGGCGACTGACATCAGAAAGCGTCAAAGACAATGGGAGTAGAAGGTCAACCTATA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:1:41:144/2
+GTCAAAGACAATGGGAGTAGAAGGTCAACATATAATCGTCAGAGCACGATGAGACTCTAGTCCCTCCGCATCGTAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:2:144:247/2
+CGATATTACCGACTCAAGCATACGCACCGCCTGAGGACGGGTATCCGGAACACCTATACGCCCTAGGGAGACAGCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:3:225:329/2
+TGTCGTAGGCTAAGCAGGTAGCGCACATATTTCTCTGGGTAAGCGTAACCACGTAAGTTGTAAGAGTAGGGATGAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:4:237:340/2
+GGCTCCTGCCGTGTCGTAGGCTAAGCAGGTAGCGCACATATTTCTCTGGGTAAGCGTAACCACGTAAGTTGTAAGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:5:45:151/2
+AGAAAGCGTCAAAGACAATGGGAGTAGAAGGTCAACATATAATCGTCAGAGCACGATGAGACTCTAGTCCCTCCGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:6:284:386/2
+GGTACTTCGCTAGCCGCATCAGCTGACACTTATTCAGGGCCTAGCAGGCTCCTGCCGTGTCGTAGGCTAAGCAGGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:7:305:407/2
+CTTGAACCTCAGCGCATGGTTGGTACTTCGCTAGCCGCATCAGCTGACACTTATTCAGGGCCTAGCAGGCTCCTGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:8:213:317/2
+AGCAGGTAGCGCACATATTTCTCTGGGTAAGCGTAACCACGTAAGTTGTAAGAGTAGGGATGAGTCCGGACGATAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:9:183:287/2
+GCGTAACCACGTAAGTTGTAAGAGTAGGGATGAGTCCGGACGATATTACCGACTCAAGCATACGCACCGCCTGAGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:10:289:393/2
+CATGGTTGGTACTTCGCTAGCCGCATCAGCTGACACTTATTCAGGGCCTAGCAGGCTCCTGCCGTGTCGTAGGCTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:11:296:399/2
+TCAGCGCATGGTTGGTACTTCGCTAGCCGCATCAGCTGACACTTATTCAGGGCCTAGCAGGCTCCTGCCGTGTCGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:12:270:373/2
+CCGCATCAGCTGACACTTATTCAGGGCCTAGCAGGCTCCTGCCGTGTCGTAGGCTAAGCAGGTAGCGCACATATTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:13:167:271/2
+TGTAAGAGTAGGGATGAGTCCGGACGATATTACCGACTCAAGCATACGCACCGCCTGAGGACGGGTATCCGGAACA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:14:43:147/2
+AGCGTCAAAGACAATGGGAGTAGAAGGTCAACATATAATCGTCAGAGCACGATGAGACTCTAGTCCCTCCGCATCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:15:323:424/2
+CATCTAGGTTGGACAGCCTTGAACCTCAGCGCATGGTTGGTACTTCGCTAGCCGCATCAGCTGACACTTATTCAGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:16:105:207/2
+GTATCCGGAACACCTATACGCCCTAGGGAGACAGCTGGTCTCCGGCGACTGACATCAGAAAGCGTCAAAGACAATG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:17:237:341/2
+AGGCTCCTGCCGTGTCGTAGGCTAAGCAGGTAGCGCACATATTTCTCTGGGTAAGCGTAACCACGTAAGTTGTAAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:18:3:107/2
+GTCAGAGCACGATGAGACTCTAGTCCCTCCGCATCGTATGTTAACAGGTCTTGTTGGATATCAATTAGAGTTGTAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:19:272:374/2
+GCCGCATCAGCTGACACTTATTCAGGGCCTAGCAGGCTCCTGCCGTGTCGTAGGCTAAGCAGGTAGCGCACATATT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:20:251:354/2
+TTCAGGGCCTAGCAGGCTCCTGCCGTGTCGTAGGCTAAGCAGGTAGCGCACATATTTCTCTGGGTAAGCGTAACCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:21:95:199/2
+AACACCTATACGCCCTAGGGAGACAGCTGGTCTCCGGCGACTGACATCAGAAAGCGTCAAAGACAATGGGAGTAGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:22:96:199/2
+AACACCTATACGCCCTAGGGAGACAGCTGGTCTCCGGCGACTGACATCAGAAAGCGTCAAAGACAATGGGAGTAGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:23:94:197/2
+CACCTATACGCCCTAGGGAGACAGCTGGTCTCCGGCGACTGACATCAGAAAGCGTCAAAGACAATGGGAGTAGAAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:24:185:289/2
+AAGCGTAACCACGTAAGTTGTAAGAGTAGGGATGAGTCCGGACGATATTACCGACTCAAGCATACGCACCGCCTGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:25:152:256/2
+GAGTCCGGACGATATTACCGACTCAAGCATACGCACCGCCTGAGGACGGGTATCCGGAACACCTATACGCCCTAGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:26:285:389/2
+GTTGGTACTTCGCTAGCCGCATCAGCTGACACTTATTCAGGGCCTAGCAGGCTCCTGCCGTGTCGTAGGCTAAGCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:27:137:241/2
+TACCGACTCAAGCATACGCACCGCCTGAGGACGGGTATCCGGAACACCTATACGCCCTAGGGAGACAGCTGGTCTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:28:261:365/2
+GCTGACACTTATTCAGGGCCTAGCAGGCTCCTGCCGTGTCGTAGGCTAAGCAGGTAGCGCACATATTTCTCTGGGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:29:12:116/2
+CATATAATCGTCAGAGCACGATGAGACTCTAGTCCCTCCGCATCGTATGTTAACAGGTCTTGTTGGATATCAATTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:30:107:210/2
+CGGGTATCCGGAACACCTATACGCCCTAGGGAGACAGCTGGTCTCCGGCGACTGACATCAGAAAGCGTCAAAGACA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:31:162:266/2
+GAGTAGGGATGAGTCCGGACGATATTACCGACTCAAGCATACGCACCGCCTGAGGACGGGTATCCGGAACACCTAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:32:213:317.dup.2/2
+AGCAGGTAGCGCACATATTTCTCTGGGTAAGCGTAACCACGTAAGTTGTAAGAGTAGGGATGAGTCCGGACGATAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:33:24:127/2
+TAGAAGGTCAACATATAATCGTCAGAGCACGATGAGACTCTAGTCCCTCCGCATCGTATGTTAACAGGTCTTGTTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:34:84:189/2
+CGCCCTAGGGAGACAGCTGGTCTCCGGCGACTGACATCAGAAAGCGTCAAAGACAATGGGAGTAGAAGGTCAACAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:35:40:145/2
+CGTCAAAGACAATGGGAGTAGAAGGTCAACATATAATCGTCAGAGCACGATGAGACTCTAGTCCCTCCGCATCGTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:36:120:223/2
+CACCGCCTGAGGACGGGTATCCGGAACACCTATACGCCCTAGGGAGACAGCTGGTCTCCGGCGACTGACATCAGAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:37:106:211/2
+ACGGGTATCCGGAACACCTATACGCCCTAGGGAGACAGCTGGTCTCCGGCGACTGACATCAGAAAGCGTCAAAGAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:38:98:202/2
+CGGAACACCTATACGCCCTAGGGAGACAGCTGGTCTCCGGCGACTGACATCAGAAAGCGTCAAAGACAATGGGAGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:39:72:177/2
+ACAGCTGGTCTCCGGCGACTGACATCAGAAAGCGTCAAAGACAATGGGAGTAGAAGGTCAACATATAATCGTCAGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:40:16:120/2
+TCAACATATAATCGTCAGAGCACGATGAGACTCTAGTCCCTCCGCATCGTATGTTAACAGGTCTTGTTGGATATCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:41:308:410/2
+AGCCTTGAACCTCAGCGCATGGTTGGTACTTCGCTAGCCGCATCAGCTGACACTTATTCAGGGCCTAGCAGGCTCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:42:26:129/2
+AGTAGAAGGTCAACATATAATCGTCAGAGCACGATGAGACTCTAGTCCCTCCGCATCGTATGTTAACAGGTCTTGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:43:130:234/2
+TCAAGCATACGCACCGCCTGAGGACGGGTATCCGGAACACCTATACGCCCTAGGGAGACAGCTGGTCTCCGGCGAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:44:52:157/2
+GACATCAGAAAGCGTCAAAGACAATGGGAGTAGAAGGTCAACATATAATCGTCAGAGCACGATGAGACTCTAGTCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:45:220:323/2
+AGGCTAAGCAGGTAGCGCACATATTTCTCTGGGTAAGCGTAACCACGTAAGTTGTAAGAGTAGGGATGAGTCCGGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:46:125:228/2
+ATACGCACCGCCTGAGGACGGGTATCCGGAACACCTATACGCCCTAGGGAGACAGCTGGTCTCCGGCGACTGACAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:47:210:314/2
+AGGTAGCGCACATATTTCTCTGGGTAAGCGTAACCACGTAAGTTGTAAGAGTAGGGATGAGTCCGGACGATATTAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:48:220:324/2
+TAGGCTAAGCAGGTAGCGCACATATTTCTCTGGGTAAGCGTAACCACGTAAGTTGTAAGAGTAGGGATGAGTCCGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ref2:49:25:128/2
+GTAGAAGGTCAACATATAATCGTCAGAGCACGATGAGACTCTAGTCCCTCCGCATCGTATGTTAACAGGTCTTGTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
diff --git a/ariba/tests/data/assembly_test_check_spades_log_file.log.bad b/ariba/tests/data/assembly_test_check_spades_log_file.log.bad
new file mode 100644
index 00000000..df881624
--- /dev/null
+++ b/ariba/tests/data/assembly_test_check_spades_log_file.log.bad
@@ -0,0 +1,5 @@
+line 1
+line 2
+
+== Error ==  system call for: "['/foo/bar/SPAdes-3.6.0-Linux/bin/spades', '/spam/eggs/K21/configs/config.info']" finished abnormally, err code: -7
+
diff --git a/ariba/tests/data/assembly_test_check_spades_log_file.log.good b/ariba/tests/data/assembly_test_check_spades_log_file.log.good
new file mode 100644
index 00000000..f0f06879
--- /dev/null
+++ b/ariba/tests/data/assembly_test_check_spades_log_file.log.good
@@ -0,0 +1,5 @@
+This is a dummy spades log file.
+
+It doesn't look like a real spades log file.
+
+But it doesn't have lines matching the bad stuff that will mean ariba should stop NOW.
diff --git a/scripts/ariba b/scripts/ariba
index 498dddd0..8cf4a51c 100755
--- a/scripts/ariba
+++ b/scripts/ariba
@@ -222,8 +222,14 @@ nucmer_group.add_argument('--nucmer_min_len', type=int, help='Minimum alignment
 nucmer_group.add_argument('--nucmer_breaklen', type=int, help='Value to use for -breaklen when running nucmer [%(default)s]', default=200, metavar='INT')
 
 assembly_group = subparser_run.add_argument_group('Assembly options')
+assembly_group.add_argument('--assembler', help='Assembler to use', choices=['fermilite','spades'], default='fermilite')
 assembly_group.add_argument('--assembly_cov', type=int, help='Target read coverage when sampling reads for assembly [%(default)s]', default=50, metavar='INT')
 assembly_group.add_argument('--min_scaff_depth', type=int, help='Minimum number of read pairs needed as evidence for scaffold link between two contigs [%(default)s]', default=10, metavar='INT')
+assembly_group.add_argument('--spades_mode', help='If using Spades assembler, either use default WGS mode, Single Cell mode (`spades.py --sc`) or RNA mode (`spades.py --rna`). '
+                                                  'Use SC or RNA mode if your input is from a viral sequencing with very uneven and deep coverage. '
+                                                  'Set `--assembly_cov` to some high value if using SC or RNA mode', choices=['wgs','sc','rna'], default='wgs')
+assembly_group.add_argument('--spades_options', help='Extra options to pass to Spades assembler. Sensible default options will be picked based on `--spades_mode` argument. '
+                                                     'Anything set here will replace the defaults completely')
 
 other_run_group = subparser_run.add_argument_group('Other options')
 other_run_group.add_argument('--threads', type=int, help='Experimental. Number of threads. Will run clusters in parallel, but not minimap (yet) [%(default)s]', default=1, metavar='INT')