From c85ec6b008909d39e588de26f11d4ad01adf2cef Mon Sep 17 00:00:00 2001 From: MattWellie Date: Thu, 27 Jun 2024 18:45:52 +1000 Subject: [PATCH] re-add pathogenic VCF stubs --- .bumpversion.cfg | 2 ++ Dockerfile | 2 +- data/pathogenic_annotated.vcf.bgz | Bin 0 -> 3265 bytes data/pathogenic_annotated.vcf.bgz.tbi | Bin 0 -> 166 bytes example_script.sh | 8 ++++---- 5 files changed, 7 insertions(+), 5 deletions(-) create mode 100644 data/pathogenic_annotated.vcf.bgz create mode 100644 data/pathogenic_annotated.vcf.bgz.tbi diff --git a/.bumpversion.cfg b/.bumpversion.cfg index e11909d..6b53ecc 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -4,3 +4,5 @@ commit = True tag = False [bumpversion:file:setup.py] + +[bumpversion:file:Dockerfile] diff --git a/Dockerfile b/Dockerfile index fb08987..00afd29 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,4 +15,4 @@ RUN apt update && apt install -y \ rm -r /var/lib/apt/lists/* && \ rm -r /var/cache/apt/* -RUN pip install --no-cache-dir git+https://github.com/populationgenomics/automated-interpretation-pipeline.git@${RELEASE} +RUN pip install --no-cache-dir git+https://github.com/populationgenomics/ClinvArbitration.git@${RELEASE} diff --git a/data/pathogenic_annotated.vcf.bgz b/data/pathogenic_annotated.vcf.bgz new file mode 100644 index 0000000000000000000000000000000000000000..20030d8a365e719bd6abcdafdcbbbb5b01234bbe GIT binary patch literal 3265 zcmV;y3_kN8iwFb&00000{{{d;LjnN*1(ldvbE7sC$DgyGf*}umXut@>EiIjKz>d9* zF9}YXZKuP?ATYb!7O*$T_`~;rV<6pagY%F`2mK_SqrZ;MWh{rbePPkU&BLq=)5KthQN#dM(!5|YQWVrw*Dyr?H7QA8u^gx)RG zrzlBY^k&^ODZ`+q-A#@zBANu{#t^Zl%L*37i^_?39X%V@cjiXYzGuXi5s@y*qW(T3 z9T}u+qAtsF4+gi4AR-LV`;6Q$8j>YV?6Z#2Fsh&lvaa@E)P|9D2?Ha&&u9&UWh7~e zq`c4Q4I^oij#Z?-&mf_NBvn^?9V$XCBnz6N^n%3BN}?)DLXXg)RtpGA3YJ8z2h@r| zPz5Zhs1MYtK|s2Wkk)5=jU04Af}rCbszZ$&G)a_Ij8PA!k%J~kDg>d=I*lA;O+l)H zdMHS2)SzJaK}rt=iH#Vrq6)gM^-#CiXaVGdG_3SEpvU$WDFPNC1HI3DYL-z&QN^eS zGHaGmWaWnvSi63ircvSSLi+9DHE$yxDL-$@A_~`B)&BQ5GmT@Cct+J`xWx;@rOY5q z2WwKgn}I@IPw@u!c3sfYf!DqmJsaHBBUk=!Y^uzdb}q<>*7Qc`2J*)kuu2_veEpk0c>C6*ac5HW22VYD>Z9YO-j zlwnK_wz6}A!o7~gmx9DHE^#;TZQy1B=Yof6!o?&^208UG5lAlQm2|=T z{_3V00Y(9{Od|{Suz5a1VD;Aa9KwcVXu}$A>OpX>M{{#wjTn3|()BisylNr@@r3mP z*vrs@fjPYVJUSWz2)_gdk!7+pdd@cB9xFn10Nc%J@Hz|P;ed+JFAY|yDhIav;G6dC z@ulYvFTGvnVG_K3=*<)Y^>tgF7DJLGE$h%1jGw15Gdcv30Xgu@xGr`snPFXn>^tCo zI$I9S)!zOzyIS1ruffgzn38gvQ>TQ5kDdM1bg}xh9PLMsi}`*!U){l<$@PQn*1xm8 zYt4sFmKI^PhrMapZD*OMB@L6-FpR?_b%+~!1?$PkGGjgPm>TQOf9A$n^|eZk1vj*B zg4y7(zs`QjO-_kdRQ|^d)XCq^@hWfXRQ?}QY<1FekIc(A&iyYbwq!U~=ZpTN)RGo? zisSwyZb|*-CEN8U<(6D|SL#)NQfbNC^-?nXlWI$Dw^&O0lUhp(U$gbJKdHCm=xM(4 z`;+hjH7I(Sn868XM@4oR*ysebBcsw&pKeb;J35kvmiO@lv?C-_^MkJ^pdBUobo2QH z^`efH2wnbh3fj?96#x0zJpt{ANm`DQ6D}(1sOiPNe>o*PbkwB(9t7D5Xh%&iU&_hk z1hk{3<)emY{n5uxf=HjO`~Kvoj`%GSZu*n6&W#(USK}`E~1tZ8{VwWZtsUTXNP~joK*$ThkpQPmiyPB5dZ)miwFb&00000{{{d;LjnM# z2F;mWZ`w!{hOe_fWG`0Pdd`{et$Kk7B~cUFChXo?2)tgT1S0T$N%P}-#)ib;!5Hjx zmrA~f3p_sao_F3eO#fKHg%#XddGYR|e=(-(yNk)=WAd29^VtWB))?_VT|Um@bv9Y8 zKfwF`!`BaEH(UO_SZAwb@)W1ZB8mT+tmczN=I%aU-F&)=wZnG3yiT^ zx$$bUSWj2;=gi+4=eIa&CWL!lEwg04h^Nche381>7c%312IsDaqkew*1%(9dKj2I8 z0idnL%duN$%jY_)GPyiyD%Lej0g<1(Kj9VpH{?ze&k*$`Ck^li14;8O- z4rNf+a@g;}p@mRT$3bsDm!r8Y2(A6C!x#uG=nI{f!)V+Wb=<#th!@K&o+ZEMtx~Fv zLHrKYFj{?Quuq&c5$|2X7itr4q$Ol5o!SMMEam*1Z*I&UA&7N5D6_HpNB3+5T-vmaH|Wp??)l77Cr4hVNlThEj5H}u(WGHYoAzwFx>LWV9nG+~NOf-7Ax)xen|2hU zWYrqJYSKb+Q5??KByjV?G>^iH%n0)++}n@B^dF5aM`4yn;huY%uh+?9z2l>U``&oq zKHXm57D4spK`rg9!C1$Qv{g~V%Ys;15XuL%AmDn@z@_~(N_%NVEP|2dO0kYx&MtfI z&*xgl|8hI{bUoT#6KQxDmGoNmiIT@#%6Gd^5{)Q9DWgHZ0aQiMr$X88p~#eKp!9Jl z^vid2%RWJBLUANj)kO#D#(=7bgU8RS`BM`AI=7uQuQbw{^dkSb^{rsr7`z`9ZbWBFm_p;r~ZN?Ag zWv4MpTn>+yxKpn|y4%2O-U6%6Z5mp-ThIoBAL_a|vI7~BAO82;!-j8~vYHM2@dpN%zub&_1(1$|5i0!DDwCZ;8CR|Bv5PSXRCWD=@UR6%&v0iH$2sCesGfC@MKpNUN|b&&7BQ=5=PMZpbFpFp)EH>8&g?wKeAKyb@EoCkWt z`_ax}G)R3Y{&HQ3zZmg+YD9V_MCG({!|1^c(`nR8r~8k^sV?O_?2p=dPFBa>*G_26 zr;y32Sc%UTd@8hq>;U<5UZ%HY<`Q*yf<&4wg;CWy59@crmZkR3B1>IoWZwV!UuKQAN`x5>61QAiXx2{hQl%@SBOMA`s{_ohA zQky_m96e6xurCFQAVJrCskeQJG-+u=eW{@F(ro&&6;-e=)wVA=M`sLmfECTDioR?| z73xd1?aTA1^1l2AXpwYx=_ddHABzYC000000RIL6LPG)o8vp|U0000000000TtQar literal 0 HcmV?d00001 diff --git a/data/pathogenic_annotated.vcf.bgz.tbi b/data/pathogenic_annotated.vcf.bgz.tbi new file mode 100644 index 0000000000000000000000000000000000000000..98eb0f714c6adc77a17f0da19579b3355c4a4760 GIT binary patch literal 166 zcmb2|=3rp}f&Xj_PR>jWoebQCpHfm%5)u-ak|cPUP6f;o?U-!b#Upv6u}4j@LrEcl zS@6ysfnAbXe2=GpbbcYW!lFlCnQP(g1&<8s8hBdr+Pe<_xSgt4!X6@XEIlFO6tmJ^ zwS%%fO6QI?aLEK*>A7^c(ak@5!Ad?!b3kYF)+xZxkQ?Q8SF3+0RUQ= BG`;`; literal 0 HcmV?d00001 diff --git a/example_script.sh b/example_script.sh index 73bfe94..b6575f7 100644 --- a/example_script.sh +++ b/example_script.sh @@ -3,7 +3,7 @@ set -ex # create a docker image from this repository -docker build --platform linux/arm64/v8 -t hail_clinvar:example --platform linux/amd64 . +docker build --platform linux/arm64/v8 -t clinvarbitration:example --platform linux/amd64 . # make local copies of the NCBI data files required as input using wget # create a directory called data, if one doesn't already exist @@ -14,7 +14,7 @@ fi #wget -O data/submission_summary.txt.gz https://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/submission_summary.txt.gz # run the docker image to generate the summarised output -docker run -v "$(pwd)/data":/data hail_clinvar:example \ +docker run -v "$(pwd)/data":/data clinvarbitration:example \ resummary -v "/data/variant_summary.txt.gz" -s "/data/submission_summary.txt.gz" -o "/data/clinvar_summary" --minimal # upon completion, this will have generated files in the data directory: @@ -26,8 +26,8 @@ docker run -v "$(pwd)/data":/data hail_clinvar:example \ ## This is where you should run VEP on data/clinvar_summary.vcf.bgz, with protein consequence annotation per transcript ## Let's imagine you did that, and the result is in data/pathogenic_annotated.vcf.bgz ## I've enclosed a 10-variant example of this, as annotated by https://www.ensembl.org/Homo_sapiens/Tools/VEP -#docker run --platform linux/amd64 -v "$(pwd)/data":/data hail_clinvar:example \ -# /bin/bash -c "python3 /clinvarbitration/clinvar_by_codon_from_vcf.py -i /data/pathogenic_annotated.vcf.bgz -o /data/pm5" +#docker run --platform linux/amd64 -v "$(pwd)/data":/data clinvarbitration:example \ +# /bin/bash -c "python3 /clinvarbitration/clinvar_by_codon.py -i /data/pathogenic_annotated.vcf.bgz -o /data/pm5" # upon completion, this will generate files in the data directory: # - data/pm5.json - a JSON file containing the PM5 results, one JSON object per line