diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 7646069..3d79891 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,6 +1,8 @@
 [bumpversion]
-current_version = 1.0.0
+current_version = 1.2.0
 commit = True
 tag = False
 
 [bumpversion:file:setup.py]
+
+[bumpversion:file:Dockerfile]
diff --git a/Dockerfile b/Dockerfile
index c3b96a5..5134ee2 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,8 +1,18 @@
-FROM hailgenetics/hail:0.2.127-py3.11
+FROM python:3.10-bullseye
 
-COPY scripts /scripts
-COPY requirements.txt /scripts/
+# take as a command line argument, or
+ARG RELEASE=${RELEASE:-1.2.0}
 
-RUN pip install --no-cache-dir -r /scripts/requirements.txt
+RUN apt update && apt install -y \
+        apt-transport-https \
+        bzip2 \
+        ca-certificates \
+        git \
+        gnupg \
+        openjdk-11-jdk-headless \
+        wget \
+        zip && \
+    rm -r /var/lib/apt/lists/* && \
+    rm -r /var/cache/apt/*
 
-WORKDIR /scripts
+RUN pip install --no-cache-dir git+https://github.com/populationgenomics/ClinvArbitration.git@${RELEASE}
diff --git a/clinvarbitration/clinvar_by_codon.py b/clinvarbitration/clinvar_by_codon.py
new file mode 100644
index 0000000..a1b4053
--- /dev/null
+++ b/clinvarbitration/clinvar_by_codon.py
@@ -0,0 +1,122 @@
+"""
+Method file for re-sorting clinvar annotations by codon
+
+Takes a VCF of annotated Pathogenic Clinvar Variants
+re-indexes the data to be queryable on Transcript and Codon
+writes the resulting Hail Table to the specified path
+
+Data as input for this script should be a VCF, annotated by VEP 110
+Compatibility with other versions of VEP is not guaranteed
+
+This makes the assumption that the annotated data here
+has been generated by summarise_clinvar_entries.py:
+
+- SNV only
+- Clinvar Pathogenic only
+- ClinVar decision/alleles/gold stars are in INFO
+"""
+
+import json
+import logging
+from argparse import ArgumentParser
+from collections import defaultdict
+
+import hail as hl
+from cyvcf2 import VCF, Variant
+
+
+def pull_vep_from_header(vcf: VCF) -> list[str]:
+    """
+    yank the CSQ line out of the VCF header
+    """
+    for element in vcf.header_iter():
+        if element['HeaderType'] == 'INFO' and element['ID'] == 'CSQ':
+            return list(entry.lower() for entry in element['Description'].split('Format: ')[-1].rstrip('"').split('|'))
+    raise IndexError('CSQ element not found in header')
+
+
+def variant_consequences(variant: Variant, csq_header: list[str]) -> list[dict[str, str]]:
+    """
+    extracts the consequences for each transcript in this variant
+
+    Args:
+        variant (Variant):
+        csq_header ():
+
+    Returns:
+        a list of all CSQ entries, cast as a dict
+    """
+
+    consequences: list[dict[str, str]] = []
+    for csq in variant.INFO['CSQ'].split(','):
+        csq_dict = dict(zip(csq_header, csq.split('|'), strict=True))
+        if 'missense_variant' in csq_dict['consequence']:
+            consequences.append(csq_dict)
+    return consequences
+
+
+def cli_main():
+    """
+    alternative access point with CLI arguments
+    """
+    logging.basicConfig(level=logging.INFO)
+    parser = ArgumentParser()
+    parser.add_argument('-i', help='Path to the annotated VCF')
+    parser.add_argument('-o', help='Root to export PM5 table and JSON to')
+    args = parser.parse_args()
+    main(input_vcf=args.i, output_root=args.o)
+
+
+def main(input_vcf: str, output_root: str):
+    """
+
+    Args:
+        input_vcf (str): path to an input vcf
+        output_root ():
+    """
+
+    # crack open a cold VCF, and have a sip
+    vcf_reader = VCF(input_vcf)
+
+    # find the header encoding all the VEP fields
+    header_csq = pull_vep_from_header(vcf_reader)
+
+    clinvar_dict = defaultdict(set)
+
+    # iterate over the variants
+    for variant in vcf_reader:
+        # extract the clinvar details (added in previous script)
+        clinvar_allele = variant.INFO['allele_id']
+        clinvar_stars = variant.INFO['gold_stars']
+        clinvar_key = f'{clinvar_allele}:{clinvar_stars}'
+
+        # iterate over all missense consequences
+        for csq_dict in variant_consequences(variant, header_csq):
+            # add this clinvar entry in relation to the protein consequence
+            protein_key = f"{csq_dict['ensp']}:{csq_dict['protein_position']}"
+            clinvar_dict[protein_key].add(clinvar_key)
+
+    # save the dictionary locally
+    json_out_path = f'{output_root}.json'
+    with open(json_out_path, 'w') as f:
+        for key, value in clinvar_dict.items():
+            new_dict = {'newkey': key, 'clinvar_alleles': '+'.join(value)}
+            f.write(f'{json.dumps(new_dict)}\n')
+
+    logging.info(f'JSON written to {json_out_path}')
+
+    # now set a schema to read that into a table... if you want hail
+    schema = hl.dtype('struct{newkey:str,clinvar_alleles:str}')
+
+    # import the table, and transmute to top-level attributes
+    ht = hl.import_table(json_out_path, no_header=True, types={'f0': schema})
+    ht = ht.transmute(**ht.f0)
+    ht = ht.key_by(ht.newkey)
+
+    # write out
+    ht.write(f'{output_root}.ht', overwrite=True)
+    logging.info(f'Hail Table written to {output_root}.ht')
+
+
+if __name__ == '__main__':
+    cli_main()
diff --git a/clinvarbitration/clinvar_by_codon_from_mt.py b/clinvarbitration/clinvar_by_codon_from_mt.py
index 64e921e..171b0b4 100644
--- a/clinvarbitration/clinvar_by_codon_from_mt.py
+++ b/clinvarbitration/clinvar_by_codon_from_mt.py
@@ -1,15 +1,14 @@
 """
-Method file for re-sorting clinvar annotations by codon
+Method file for re-sorting clinvar annotations by codon (taking annotated MatrixTable as input)
 
-This makes the assumption that the annotated data here
-has been generated by summarise_clinvar_entries.py:
+This makes the assumption that the annotated data here has been generated by summarise_clinvar_entries.py:
 
 - SNV only
 - Clinvar Pathogenic only
 - ClinVar decision/alleles/gold stars are in INFO
 
-In almost all use-cases the alternative form based on annotated VCFs
-will be used in place of this, but it's retained here just in case.
+In almost all use-cases the alternative form based on annotated VCFs will be used in place of this,
+but it's retained here just in case.
 """
 
 from argparse import ArgumentParser
diff --git a/clinvarbitration/clinvar_by_codon_from_vcf.py b/clinvarbitration/clinvar_by_codon_from_vcf.py
deleted file mode 100644
index d5e639e..0000000
--- a/clinvarbitration/clinvar_by_codon_from_vcf.py
+++ /dev/null
@@ -1,116 +0,0 @@
-"""
-Method file for re-sorting clinvar annotations by codon
-
-Data as input for this script should be a VCF, annotated by VEP 110
-Compatibility with other versions of VEP is not guaranteed
-
-This makes the assumption that the annotated data here
-has been generated by summarise_clinvar_entries.py:
-
-- SNV only
-- Clinvar Pathogenic only
-- ClinVar decision/alleles/gold stars are in INFO
-"""
-
-import json
-from argparse import ArgumentParser
-from collections import defaultdict
-
-import hail as hl
-from cyvcf2 import VCF, Variant
-
-"""
-Takes a VCF of annotated Pathogenic Clinvar Variants
-re-indexes the data to be queryable on Transcript and Codon
-writes the resulting Hail Table to the specified path
-"""
-
-# update these values to reflect the VEP version in use
-# the first should be the ENSP ID
-# the second should be the residue in the protein
-PROTEIN_ID = 'ensp'
-PROTEIN_POSITION = 'protein_position'
-
-
-def pull_vep_from_header(vcf: VCF) -> list[str]:
-    """
-    yank the CSQ line out of the VCF header
-    """
-    for element in vcf.header_iter():
-        if element['HeaderType'] == 'INFO' and element['ID'] == 'CSQ':
-            return list(
-                map(
-                    str.lower,
-                    element['Description'].split('Format: ')[-1].rstrip('"').split('|'),
-                ),
-            )
-    raise IndexError('CSQ element not found in header')
-
-
-def variant_consequences(
-    variant: Variant,
-    csq_header: list[str],
-) -> list[dict[str, str]]:
-    """
-    extracts the consequences for each transcript in this variant
-
-    Args:
-        variant (Variant):
-        csq_header ():
-
-    Returns:
-        a list of all CSQ entries, cast as a dict
-    """
-
-    consequences: list[dict[str, str]] = []
-    for csq in variant.INFO['CSQ'].split(','):
-        csq_dict = dict(zip(csq_header, csq.split('|'), strict=True))
-        if 'missense_variant' in csq_dict['consequence']:
-            consequences.append(csq_dict)
-    return consequences
-
-
-parser = ArgumentParser()
-parser.add_argument('-i', help='Path to the annotated VCF')
-parser.add_argument('-o', help='Root to export PM5 table and JSON to')
-args = parser.parse_args()
-
-# crack open a cold VCF, and have a sip
-vcf_reader = VCF(args.i)
-# find the header encoding all the VEP fields
-header_csq = pull_vep_from_header(vcf_reader)
-
-clinvar_dict = defaultdict(set)
-
-# iterate over the variants
-for variant in vcf_reader:
-    # extract the clinvar details (added in previous script)
-    clinvar_allele = variant.INFO['allele_id']
-    clinvar_stars = variant.INFO['gold_stars']
-    clinvar_key = f'{clinvar_allele}:{clinvar_stars}'
-
-    # iterate over all missense consequences
-    for csq_dict in variant_consequences(variant, header_csq):
-        # add this clinvar entry in relation to the protein consequence
-        protein_key = f"{csq_dict[PROTEIN_ID]}:{csq_dict[PROTEIN_POSITION]}"
-        clinvar_dict[protein_key].add(clinvar_key)
-
-# save the dictionary locally
-json_out_path = f'{args.o}.json'
-with open(json_out_path, 'w') as f:
-    for key, value in clinvar_dict.items():
-        new_dict = {'newkey': key, 'clinvar_alleles': '+'.join(value)}
-        f.write(f'{json.dumps(new_dict)}\n')
-print(f'JSON written to {json_out_path}')
-
-# now set a schema to read that into a table... if you want hail
-schema = hl.dtype('struct{newkey:str,clinvar_alleles:str}')
-
-# import the table, and transmute to top-level attributes
-ht = hl.import_table(json_out_path, no_header=True, types={'f0': schema})
-ht = ht.transmute(**ht.f0)
-ht = ht.key_by(ht.newkey)
-
-# write out
-ht.write(f'{args.o}.ht', overwrite=True)
-print(f'Hail Table written to {args.o}.ht')
diff --git a/clinvarbitration/resummarise.py b/clinvarbitration/resummarise_clinvar.py
similarity index 83%
rename from clinvarbitration/resummarise.py
rename to clinvarbitration/resummarise_clinvar.py
index d9b8ff2..a610275 100644
--- a/clinvarbitration/resummarise.py
+++ b/clinvarbitration/resummarise_clinvar.py
@@ -52,16 +52,19 @@
 
 # I really want the linter to just tolerate naive datetimes, but it won't
 TIMEZONE = zoneinfo.ZoneInfo('Australia/Brisbane')
+
 # published Nov 2015, available pre-print since March 2015
 # assumed to be influential since 2016
-
 ACMG_THRESHOLD = datetime(year=2016, month=1, day=1, tzinfo=TIMEZONE)
+
+# a default date assigned to un-dated entries
 VERY_OLD = datetime(year=1970, month=1, day=1, tzinfo=TIMEZONE)
+
 LARGEST_COMPLEX_INDELS = 40
 BASES = re.compile(r'[ACGTN]+')
 
 # add the exact name of any submitters whose evidence is not trusted
-BLACKLIST: list[str] = []
+BLACKLIST: set[str] = set()
 
 
 class Consequence(Enum):
@@ -130,14 +133,8 @@ def get_allele_locus_map(summary_file: str) -> dict:
         if chromosome not in ORDERED_ALLELES:
             continue
 
-        # skip chromosomal deletions and insertions, mito, or massive indels
-        if (
-            ref == 'na'
-            or alt == 'na'
-            or ref == alt
-            or 'm' in chromosome.lower()
-            or (len(ref) + len(alt)) > LARGEST_COMPLEX_INDELS
-        ):
+        # skip chromosomal deletions and insertions, or massive indels
+        if ref == 'na' or alt == 'na' or ref == alt or (len(ref) + len(alt)) > LARGEST_COMPLEX_INDELS:
             continue
 
         # don't include any of the trash bases in ClinVar
@@ -187,12 +184,7 @@ def consequence_decision(subs: list[Submission]) -> Consequence:
     decision = Consequence.UNCERTAIN
 
     # establish counts for this allele
-    counts = {
-        Consequence.BENIGN: 0,
-        Consequence.PATHOGENIC: 0,
-        Consequence.UNCERTAIN: 0,
-        'total': 0,
-    }
+    counts = {Consequence.BENIGN: 0, Consequence.PATHOGENIC: 0, Consequence.UNCERTAIN: 0, 'total': 0}
 
     for each_sub in subs:
         # for 3/4-star ratings, don't look any further
@@ -200,11 +192,7 @@ def consequence_decision(subs: list[Submission]) -> Consequence:
             return each_sub.classification
 
         counts['total'] += 1
-        if each_sub.classification in [
-            Consequence.PATHOGENIC,
-            Consequence.BENIGN,
-            Consequence.UNCERTAIN,
-        ]:
+        if each_sub.classification in [Consequence.PATHOGENIC, Consequence.BENIGN, Consequence.UNCERTAIN]:
             counts[each_sub.classification] += 1
 
     if counts[Consequence.PATHOGENIC] and counts[Consequence.BENIGN]:
@@ -263,7 +251,7 @@ def check_stars(subs: list[Submission]) -> int:
     return minimum
 
 
-def process_line(data: list[str]) -> tuple[int, Submission]:
+def process_submission_line(data: list[str]) -> tuple[int, Submission]:
     """
     takes a line, strips out useful content as a 'Submission'
 
@@ -273,7 +261,7 @@ def process_line(data: list[str]) -> tuple[int, Submission]:
     Returns:
         the allele ID and corresponding Submission details
     """
-    allele_id = int(data[0])
+    var_id = int(data[0])
     if data[1] in PATH_SIGS:
         classification = Consequence.PATHOGENIC
     elif data[1] in BENIGN_SIGS:
@@ -286,7 +274,7 @@ def process_line(data: list[str]) -> tuple[int, Submission]:
     sub = data[9].lower()
     rev_status = data[6].lower()
 
-    return allele_id, Submission(date, sub, classification, rev_status)
+    return var_id, Submission(date, sub, classification, rev_status)
 
 
 def dict_list_to_ht(list_of_dicts: list) -> hl.Table:
@@ -307,7 +295,7 @@ def dict_list_to_ht(list_of_dicts: list) -> hl.Table:
     return hl.Table.from_pandas(pdf, key=['locus', 'alleles'])
 
 
-def get_all_decisions(submission_file: str, allele_ids: set[int]) -> dict[int, list[Submission]]:
+def get_all_decisions(submission_file: str, var_ids: set[int]) -> dict[int, list[Submission]]:
     """
     obtains all submissions per-allele which pass basic criteria
         - not a blacklisted submitter
@@ -315,23 +303,22 @@ def get_all_decisions(submission_file: str, allele_ids: set[int]) -> dict[int, l
 
     Args:
         submission_file (): file containing submission-per-line
-        allele_ids (): only process alleleIDs we have pos data for
+        var_ids (): only process Var IDs we have pos data for
 
     Returns:
-        dictionary of alleles and their corresponding submissions
+        dictionary of var IDs and their corresponding submissions
     """
 
     submission_dict = defaultdict(list)
 
     for line in lines_from_gzip(submission_file):
-        a_id, line_sub = process_line(line)
+        var_id, line_sub = process_submission_line(line)
 
         # skip rows where the variantID isn't in this mapping
         # this saves a little effort on haplotypes, CNVs, and SVs
         if (
-            (a_id not in allele_ids)
+            (var_id not in var_ids)
             or (line_sub.submitter in BLACKLIST)
-            or (line_sub.review_status in USELESS_RATINGS)
             or (line_sub.classification == Consequence.UNKNOWN)
         ):
             continue
@@ -341,7 +328,7 @@ def get_all_decisions(submission_file: str, allele_ids: set[int]) -> dict[int, l
             if line_sub.classification == consequence and line_sub.submitter in submitters:
                 continue
 
-        submission_dict[a_id].append(line_sub)
+        submission_dict[var_id].append(line_sub)
 
     return submission_dict
 
@@ -402,9 +389,7 @@ def parse_into_table(json_path: str, out_path: str) -> hl.Table:
 
     # start a hail runtime
     hl.init(default_reference='GRCh38')
-
-    # # may need this as a subsequent line, depending on the Hail version being used
-    # hl.default_reference(hl.get_reference('GRCh38'))  # noqa: ERA001
+    # hl.context.init_local(default_reference='GRCh38')
 
     # define the schema for each written line
     schema = hl.dtype(
@@ -428,7 +413,9 @@ def parse_into_table(json_path: str, out_path: str) -> hl.Table:
 
     # write out to the specified location
     ht.write(f'{out_path}.ht', overwrite=True)
-    return ht
+
+    # read the localised version
+    return hl.read_table(f'{out_path}.ht')
 
 
 def write_vep_vcf(clinvar_table: hl.Table, output_root: str):
@@ -486,7 +473,45 @@ def snv_missense_filter(clinvar_table: hl.Table, output_root: str):
     logging.info(f'Wrote SNV VCF to {vcf_path}')
 
 
-def main(subs: str, variants: str, output_root: str):
+def cli_main():
+    logging.basicConfig(level=logging.INFO)
+    parser = ArgumentParser()
+    parser.add_argument('-s', help='submission_summary.txt.gz from NCBI', required=True)
+    parser.add_argument('-v', help='variant_summary.txt.gz from NCBI', required=True)
+    parser.add_argument('-o', help='output root, for table, json, and path-only VCF', required=True)
+    parser.add_argument('--minimal', help='only keep path. and 1+ star benign', action='store_true')
+    parser.add_argument('-b', help='sites to blacklist', nargs='+', default=[])
+    args = parser.parse_args()
+
+    # if sites are blacklisted on the CLI, update the global BLACKLIST value
+    # temporary solution while we continue to validate Talos
+    if args.b:
+        BLACKLIST.update(args.b)
+
+    main(subs=args.s, variants=args.v, output_root=args.o, minimal=args.minimal)
+
+
+def only_keep_talos_relevant_entries(results: list[dict]) -> list[dict]:
+    """
+    filters the results to only those used in Talos:
+    - all Pathogenic ratings
+    - all Benign with >= 1 Star
+
+    Args:
+        results (list[dict]): all results
+
+    Returns:
+        the same results, but reduced
+    """
+    return [
+        result
+        for result in results
+        if (result['clinical_significance'] == Consequence.PATHOGENIC.value)
+        or ((result['clinical_significance'] == Consequence.BENIGN.value) and (result['gold_stars'] > 0))
+    ]
+
+
+def main(subs: str, variants: str, output_root: str, minimal: bool):
     """
     Redefines what it is to be a clinvar summary
 
@@ -494,21 +519,23 @@ def main(subs: str, variants: str, output_root: str):
         subs (str): file path to all submissions (gzipped)
         variants (str): file path to variant summary (gzipped)
         output_root (str): path to write JSON out to
+        minimal (bool): only keep the talos-relevant entries
     """
 
     logging.info('Getting alleleID-VariantID-Loci from variant summary')
     allele_map = get_allele_locus_map(variants)
 
-    logging.info('Getting all decisions, indexed on clinvar AlleleID')
+    logging.info('Getting all decisions, indexed on clinvar Var ID')
+
     # the raw IDs - some have ambiguous X/Y mappings
     all_uniq_ids = {x['var_id'] for x in allele_map.values()}
-    decision_dict = get_all_decisions(submission_file=subs, allele_ids=all_uniq_ids)
+    decision_dict = get_all_decisions(submission_file=subs, var_ids=all_uniq_ids)
 
     # placeholder to fill wth per-allele decisions
     all_decisions = {}
 
     # now filter each set of decisions per allele
-    for allele_id, submissions in decision_dict.items():
+    for var_id, submissions in decision_dict.items():
         # filter against ACMG date, if appropriate
         filtered_submissions = acmg_filter_submissions(submissions)
 
@@ -525,11 +552,12 @@ def main(subs: str, variants: str, output_root: str):
         if rating in [Consequence.UNCERTAIN, Consequence.UNKNOWN]:
             continue
 
-        all_decisions[allele_id] = (rating, stars)
+        all_decisions[var_id] = (rating, stars)
 
     # now match those up with the variant coordinates
+    logging.info('Matching decisions to variant coordinates')
     complete_decisions = []
-    for var_details in allele_map.values():
+    for uniq_var_id, var_details in allele_map.items():
         var_id = var_details['var_id']
 
         # we may have found no relevant submissions for this variant
@@ -544,10 +572,17 @@ def main(subs: str, variants: str, output_root: str):
                 'position': var_details['pos'],
                 'clinical_significance': all_decisions[var_id][0].value,
                 'gold_stars': all_decisions[var_details['var_id']][1],
-                'allele_id': var_id,
+                'allele_id': allele_map[uniq_var_id]['allele'],
             },
         )
 
+    # optionally, filter to just minimal useful entries
+    if minimal:
+        logging.info('Producing the reduced output set - Pathogenic and Strong Benign')
+        complete_decisions = only_keep_talos_relevant_entries(complete_decisions)
+
+    logging.info(f'{len(complete_decisions)} ClinVar entries remain')
+
     # sort all collected decisions, trying to reduce overhead in HT later
     complete_decisions_sorted = sort_decisions(complete_decisions)
 
@@ -561,6 +596,7 @@ def main(subs: str, variants: str, output_root: str):
         for each_dict in complete_decisions_sorted:
             handle.write(f'{json.dumps(each_dict)}\n')
 
+    logging.info('JSON written to file, parsing into a Hail Table')
     ht = parse_into_table(json_path=json_output, out_path=output_root)
 
     # export this table of decisions as a tabix-indexed VCF
@@ -572,12 +608,4 @@ def main(subs: str, variants: str, output_root: str):
 
 
 if __name__ == '__main__':
-    logging.basicConfig(level=logging.INFO)
-
-    parser = ArgumentParser()
-    parser.add_argument('-s', help='submission_summary.txt.gz from NCBI', required=True)
-    parser.add_argument('-v', help='variant_summary.txt.gz from NCBI', required=True)
-    parser.add_argument('-o', help='output root, for table, json, and pathogenic-variants-only VCF', required=True)
-    args = parser.parse_args()
-
-    main(subs=args.s, variants=args.v, output_root=args.o)
+    cli_main()
diff --git a/data/pathogenic_annotated.vcf.bgz b/data/pathogenic_annotated.vcf.bgz
index a233858..20030d8 100644
Binary files a/data/pathogenic_annotated.vcf.bgz and b/data/pathogenic_annotated.vcf.bgz differ
diff --git a/data/pathogenic_annotated.vcf.bgz.tbi b/data/pathogenic_annotated.vcf.bgz.tbi
index f0beb00..98eb0f7 100644
Binary files a/data/pathogenic_annotated.vcf.bgz.tbi and b/data/pathogenic_annotated.vcf.bgz.tbi differ
diff --git a/example_script.sh b/example_script.sh
index 85294c5..b6575f7 100644
--- a/example_script.sh
+++ b/example_script.sh
@@ -3,19 +3,19 @@
 set -ex
 
 # create a docker image from this repository
-docker build -t hail_clinvar:example --platform linux/amd64 .
+docker build --platform linux/arm64/v8 -t clinvarbitration:example --platform linux/amd64 .
 
 # make local copies of the NCBI data files required as input using wget
 # create a directory called data, if one doesn't already exist
 if [ ! -d data ]; then
     mkdir data
 fi
-wget -O data/variant_summary.txt.gz https://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/variant_summary.txt.gz
-wget -O data/submission_summary.txt.gz https://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/submission_summary.txt.gz
+#wget -O data/variant_summary.txt.gz https://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/variant_summary.txt.gz
+#wget -O data/submission_summary.txt.gz https://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/submission_summary.txt.gz
 
 # run the docker image to generate the summarised output
-docker run --platform linux/amd64 -v "$(pwd)/data":/data hail_clinvar:example \
-    /bin/bash -c "python3 /scripts/resummarise.py -v /data/variant_summary.txt.gz -s /data/submission_summary.txt.gz -o /data/clinvar_summary"
+docker run -v "$(pwd)/data":/data clinvarbitration:example \
+    resummary -v "/data/variant_summary.txt.gz" -s "/data/submission_summary.txt.gz" -o "/data/clinvar_summary" --minimal
 
 # upon completion, this will have generated files in the data directory:
 # - data/clinvar_summary.json - a JSON file containing the summarised data entries, one json object per line
@@ -23,11 +23,11 @@ docker run --platform linux/amd64 -v "$(pwd)/data":/data hail_clinvar:example \
 # - data/clinvar_summary.vcf.bgz - a bgzipped file containing the pathogenic SNV entries in VCF format
 # - data/clinvar_summary.ht - a Hail Table containing the summarised data entries
 
-# This is where you should run VEP on data/clinvar_summary.vcf.bgz, with protein consequence annotation per transcript
-# Let's imagine you did that, and the result is in data/pathogenic_annotated.vcf.bgz
-# I've enclosed a 10-variant example of this, as annotated by https://www.ensembl.org/Homo_sapiens/Tools/VEP
-docker run --platform linux/amd64 -v "$(pwd)/data":/data hail_clinvar:example \
-    /bin/bash -c "python3 /scripts/clinvar_by_codon_from_vcf.py -i /data/pathogenic_annotated.vcf.bgz -o /data/pm5"
+## This is where you should run VEP on data/clinvar_summary.vcf.bgz, with protein consequence annotation per transcript
+## Let's imagine you did that, and the result is in data/pathogenic_annotated.vcf.bgz
+## I've enclosed a 10-variant example of this, as annotated by https://www.ensembl.org/Homo_sapiens/Tools/VEP
+#docker run --platform linux/amd64 -v "$(pwd)/data":/data clinvarbitration:example \
+#    /bin/bash -c "python3 /clinvarbitration/clinvar_by_codon.py -i /data/pathogenic_annotated.vcf.bgz -o /data/pm5"
 
 # upon completion, this will generate files in the data directory:
 # - data/pm5.json - a JSON file containing the PM5 results, one JSON object per line
diff --git a/pull_request_template.md b/pull_request_template.md
new file mode 100644
index 0000000..2dd1fb9
--- /dev/null
+++ b/pull_request_template.md
@@ -0,0 +1,14 @@
+# Purpose
+
+  - < The reason for this PR >
+
+## Proposed Changes
+
+  -
+  -
+
+## Checklist
+
+- [ ] Related GitHub Issue created
+- [ ] Tests covering new change
+- [ ] Linting checks pass
diff --git a/requirements.txt b/requirements.txt
index 3990e25..f6460ad 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
 cyvcf2>=0.30.28
-hail==0.2.127
+hail>=0.2.128
 pandas>=2.0.3
+pyspark>=3.3.3
diff --git a/setup.py b/setup.py
index 5da99d9..83eb3ce 100644
--- a/setup.py
+++ b/setup.py
@@ -23,12 +23,12 @@ def read_reqs(filename: str) -> list[str]:
 
 
 setup(
-    name='ClinvArbitration',
+    name='clinvarbitration',
     description='CPG ClinVar Re-interpretation',
     long_description=readme,
-    version='1.0.0',
+    version='1.2.0',
     author='Matthew Welland, CPG',
-    author_email=('matthew.welland@populationgenomics.org.au, ' 'cas.simons@populationgenomics.org.au'),
+    author_email='matthew.welland@populationgenomics.org.au, cas.simons@populationgenomics.org.au',
     url='https://github.com/populationgenomics/ClinvArbitration',
     license='MIT',
     classifiers=[
@@ -46,7 +46,13 @@ def read_reqs(filename: str) -> list[str]:
     packages=find_packages(),
     include_package_data=True,
     install_requires=read_reqs('requirements.txt'),
-    extras_require={
-        'test': read_reqs('requirements-dev.txt'),
+    extras_require={'test': read_reqs('requirements-dev.txt')},
+    entry_points={
+        'console_scripts': [
+            # Step 1; re-summarise ClinVar using altered conflict resolution
+            'resummary = clinvarbitration.resummarise_clinvar:cli_main',
+            # Step 2, post-annotation; obtain PM5 annotations from VEP annotated clinvar
+            'pm5_table = clinvarbitration.clinvar_by_codon:cli_main',
+        ],
     },
 )
diff --git a/test/test_resummarise.py b/test/test_resummarise.py
index c485ef7..11dc5a5 100644
--- a/test/test_resummarise.py
+++ b/test/test_resummarise.py
@@ -4,7 +4,7 @@
 import pytest
 import zoneinfo
 
-from clinvarbitration.resummarise import Consequence, Submission, consequence_decision
+from clinvarbitration.resummarise_clinvar import Consequence, Submission, consequence_decision
 
 TIMEZONE = zoneinfo.ZoneInfo('Australia/Brisbane')
 BASIC_SUB = Submission(datetime.now(tz=TIMEZONE), 'foo', Consequence.UNKNOWN, 'review')