Skip to content

Commit

Permalink
copy files for rc
Browse files Browse the repository at this point in the history
  • Loading branch information
hector-baez committed Dec 4, 2023
1 parent e8b605d commit 78a7eaf
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 9 deletions.
15 changes: 8 additions & 7 deletions tronko/assign/assign.sh
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,8 @@ then

# download old assign files
aws s3 sync s3://$BUCKET/projects/$PROJECTID/assign/$PRIMER/paired $PROJECTID-$PRIMER/old --no-progress --endpoint-url https://js2.jetstream-cloud.org:8001/
# copy to rc
cp -r "$PROJECTID-$PRIMER/old" "$PROJECTID-$PRIMER-rc/old"


# download QC sample paired files
Expand All @@ -202,12 +204,17 @@ then
# create rc ASV files
python3 /mnt/asv.py --dir $PROJECTID-$PRIMER/paired --out $PROJECTID-$PRIMER-rc/$PROJECTID-$PRIMER-paired_F.asv --primer $PRIMER --paired --rc


# remove duplicate sequences
if [ -f "$PROJECTID-$PRIMER/old/$PROJECTID-$PRIMER-paired.txt" ]; then
python3 /mnt/deduplicate_asv.py --dir $PROJECTID-$PRIMER/ --old $PROJECTID-$PRIMER/old --projectid $PROJECTID --primer $PRIMER --paired
fi

# remove rc duplicate sequences
if [ -f "$PROJECTID-$PRIMER-rc/old/$PROJECTID-$PRIMER-paired.txt" ]; then
cp $PROJECTID-$PRIMER/$PROJECTID-$PRIMER-paired*.fasta $PROJECTID-$PRIMER-rc
python3 /mnt/deduplicate_asv.py --dir $PROJECTID-$PRIMER-rc/ --old $PROJECTID-$PRIMER-rc/old --projectid $PROJECTID --primer $PRIMER --paired
fi

# run tronko assign paired v1
time tronko-assign -r -f $PROJECTID-$PRIMER/tronkodb/reference_tree.txt.gz -a $PROJECTID-$PRIMER/tronkodb/$PRIMER.fasta -p -z -w -1 $PROJECTID-$PRIMER/$PROJECTID-$PRIMER-paired_F.fasta -2 $PROJECTID-$PRIMER/$PROJECTID-$PRIMER-paired_R.fasta -6 -C 1 -c 5 -o $PROJECTID-$PRIMER/$PROJECTID-$PRIMER-paired.txt

Expand All @@ -220,12 +227,6 @@ then
count_1=0
fi

# remove duplicate sequences
if [ -f "$PROJECTID-$PRIMER/old/$PROJECTID-$PRIMER-paired.txt" ]; then
cp $PROJECTID-$PRIMER/$PROJECTID-$PRIMER-paired*.fasta $PROJECTID-$PRIMER-rc
python3 /mnt/deduplicate_asv.py --dir $PROJECTID-$PRIMER-rc/ --old $PROJECTID-$PRIMER/old --projectid $PROJECTID --primer $PRIMER --paired
fi

# run tronko assign paired v2 (rc)
time tronko-assign -r -f $PROJECTID-$PRIMER/tronkodb/reference_tree.txt.gz -a $PROJECTID-$PRIMER/tronkodb/$PRIMER.fasta -p -z -w -1 $PROJECTID-$PRIMER-rc/$PROJECTID-$PRIMER-paired_F.fasta -2 $PROJECTID-$PRIMER-rc/$PROJECTID-$PRIMER-paired_R.fasta -6 -C 1 -c 5 -o $PROJECTID-$PRIMER-rc/$PROJECTID-$PRIMER-paired.txt

Expand Down
4 changes: 2 additions & 2 deletions tronko/assign/deduplicate_asv.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def create_dict(dir, old_dir, projectid, primer, suffix="paired_F", isPaired=Fal
with open(asv, "r") as asv:
for line in asv:
if("sequence" in line):
newheaderfiles="\t".join(line.split("\t")[2:])
newheaderfiles="\t".join(line.strip().split("\t")[2:])
elif nooccur == "":
# count number of new samples
nooccur="\t".join(line.strip().split("\t")[2:])
Expand Down Expand Up @@ -276,7 +276,7 @@ def rewrite_files(last_id, oldColumnCount, seq_dict, dir, projectid, primer, suf
# replace with new ID
parts = id.split('_')
parts[-1] = str(counter) # Make sure new_id_number is a string
id='_'.join(parts)
new_id='_'.join(parts)
# add empty file columns
nline = line.strip().split("\t")[:2] + ['0'] * oldColumnCount + line.strip().split("\t")[2:]
nline[0]=new_id
Expand Down

0 comments on commit 78a7eaf

Please sign in to comment.