copy files for rc

CALeDNA · Dec 4, 2023 · 78a7eaf · 78a7eaf
1 parent e8b605d
commit 78a7eaf
Show file tree

Hide file tree

Showing 2 changed files with 10 additions and 9 deletions.
diff --git a/tronko/assign/assign.sh b/tronko/assign/assign.sh
@@ -181,6 +181,8 @@ then
 
     # download old assign files
     aws s3 sync s3://$BUCKET/projects/$PROJECTID/assign/$PRIMER/paired $PROJECTID-$PRIMER/old --no-progress --endpoint-url https://js2.jetstream-cloud.org:8001/
+    # copy to rc
+    cp -r "$PROJECTID-$PRIMER/old" "$PROJECTID-$PRIMER-rc/old"
 
 
     # download QC sample paired files
@@ -202,12 +204,17 @@ then
     # create rc ASV files
     python3 /mnt/asv.py --dir $PROJECTID-$PRIMER/paired --out $PROJECTID-$PRIMER-rc/$PROJECTID-$PRIMER-paired_F.asv --primer $PRIMER --paired --rc
 
-
     # remove duplicate sequences
     if [ -f "$PROJECTID-$PRIMER/old/$PROJECTID-$PRIMER-paired.txt" ]; then
         python3 /mnt/deduplicate_asv.py --dir $PROJECTID-$PRIMER/ --old $PROJECTID-$PRIMER/old --projectid $PROJECTID --primer $PRIMER --paired
     fi
 
+    # remove rc duplicate sequences
+    if [ -f "$PROJECTID-$PRIMER-rc/old/$PROJECTID-$PRIMER-paired.txt" ]; then
+        cp $PROJECTID-$PRIMER/$PROJECTID-$PRIMER-paired*.fasta $PROJECTID-$PRIMER-rc
+        python3 /mnt/deduplicate_asv.py --dir $PROJECTID-$PRIMER-rc/ --old $PROJECTID-$PRIMER-rc/old --projectid $PROJECTID --primer $PRIMER --paired
+    fi
+
     # run tronko assign paired v1
     time tronko-assign -r -f $PROJECTID-$PRIMER/tronkodb/reference_tree.txt.gz -a $PROJECTID-$PRIMER/tronkodb/$PRIMER.fasta -p -z -w -1 $PROJECTID-$PRIMER/$PROJECTID-$PRIMER-paired_F.fasta -2 $PROJECTID-$PRIMER/$PROJECTID-$PRIMER-paired_R.fasta -6 -C 1 -c 5 -o $PROJECTID-$PRIMER/$PROJECTID-$PRIMER-paired.txt
 
@@ -220,12 +227,6 @@ then
         count_1=0
     fi
 
-    # remove duplicate sequences
-    if [ -f "$PROJECTID-$PRIMER/old/$PROJECTID-$PRIMER-paired.txt" ]; then
-        cp $PROJECTID-$PRIMER/$PROJECTID-$PRIMER-paired*.fasta $PROJECTID-$PRIMER-rc
-        python3 /mnt/deduplicate_asv.py --dir $PROJECTID-$PRIMER-rc/ --old $PROJECTID-$PRIMER/old --projectid $PROJECTID --primer $PRIMER --paired
-    fi
-
     # run tronko assign paired v2 (rc)
     time tronko-assign -r -f $PROJECTID-$PRIMER/tronkodb/reference_tree.txt.gz -a $PROJECTID-$PRIMER/tronkodb/$PRIMER.fasta -p -z -w -1 $PROJECTID-$PRIMER-rc/$PROJECTID-$PRIMER-paired_F.fasta -2 $PROJECTID-$PRIMER-rc/$PROJECTID-$PRIMER-paired_R.fasta -6 -C 1 -c 5 -o $PROJECTID-$PRIMER-rc/$PROJECTID-$PRIMER-paired.txt
 

diff --git a/tronko/assign/deduplicate_asv.py b/tronko/assign/deduplicate_asv.py
@@ -146,7 +146,7 @@ def create_dict(dir, old_dir, projectid, primer, suffix="paired_F", isPaired=Fal
         with open(asv, "r") as asv:
             for line in asv:
                 if("sequence" in line):
-                    newheaderfiles="\t".join(line.split("\t")[2:])
+                    newheaderfiles="\t".join(line.strip().split("\t")[2:])
                 elif nooccur == "":
                     # count number of new samples
                     nooccur="\t".join(line.strip().split("\t")[2:])
@@ -276,7 +276,7 @@ def rewrite_files(last_id, oldColumnCount, seq_dict, dir, projectid, primer, suf
                         # replace with new ID
                         parts = id.split('_')
                         parts[-1] = str(counter)  # Make sure new_id_number is a string
-                        id='_'.join(parts)
+                        new_id='_'.join(parts)
                         # add empty file columns
                         nline = line.strip().split("\t")[:2] + ['0'] * oldColumnCount + line.strip().split("\t")[2:]
                         nline[0]=new_id