Skip to content

Commit

Permalink
feat(call): output motif and ref copy num in VCF
Browse files Browse the repository at this point in the history
  • Loading branch information
davidlougheed committed Feb 5, 2024
1 parent 345e387 commit ffb9fc4
Showing 1 changed file with 7 additions and 0 deletions.
7 changes: 7 additions & 0 deletions strkit/call/output/vcf.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ def build_vcf_header(sample_id: str, reference_file: str) -> pysam.VariantHeader
vh.formats.add("MC", ".", "Integer", "Motif copy number for each allele")
vh.formats.add("PS", 1, "Integer", "Phase set")

# Set up VCF info fields
vh.info.add("MOTIF", 1, "String", "Motif string")
vh.info.add("REFMC", 1, "Integer", "Motif copy number in the reference genome")

# Add INFO records for tandem repeat copies - these are new to VCF4.4! TODO
# for iv in VCF_TR_INFO_RECORDS:
# vh.info.add(*iv)
Expand Down Expand Up @@ -133,6 +137,9 @@ def _write_contig_vrs():
alleles=seq_alleles,
)

vr.info["MOTIF"] = result["motif"]
vr.info["REFMC"] = result["ref_cn"]

vr.samples[sample_id]["GT"] = tuple(map(seq_alleles_raw.index, seqs)) if seqs else _blank_entry(n_alleles)

if call is not None:
Expand Down

0 comments on commit ffb9fc4

Please sign in to comment.