diff --git a/app/Makefile b/app/Makefile
index 9ccbda19..8704d5cd 100644
--- a/app/Makefile
+++ b/app/Makefile
@@ -250,13 +250,13 @@ endif
 
 
 scrape-ca-public-charge:
-	$(PY_RUN_CMD) scrape-ca-public-charge
+	$(PY_RUN_CMD) scrapy-runner ca_public_charge
 
 ingest-ca-public-charge: check-ingest-arguments
 	$(PY_RUN_CMD) ingest-ca-public-charge "$(DATASET_ID)" "$(BENEFIT_PROGRAM)" "$(BENEFIT_REGION)" "$(FILEPATH)" $(INGEST_ARGS)
 
 scrape-edd-web:
-	$(PY_RUN_CMD) scrape-edd-web
+	$(PY_RUN_CMD) scrapy-runner edd
 
 ingest-edd-web: check-ingest-arguments
 	$(PY_RUN_CMD) ingest-edd-web "$(DATASET_ID)" "$(BENEFIT_PROGRAM)" "$(BENEFIT_REGION)" "$(FILEPATH)" $(INGEST_ARGS)
@@ -274,16 +274,18 @@ scrape-la-county-policy:
 	cd src/ingestion/la_policy/scrape; uv run --no-project scrape_la_policy_nav_bar.py
 
 	# Now that we have the expanded nav bar, scrape all the links in the nav bar
-	# Either should work:
-	# DEBUG_SCRAPINGS=true uv run --no-project scrape_la_policy.py &> out.log
-	$(PY_RUN_CMD) scrape-la-policy 2>&1 | tee out.log
+	$(PY_RUN_CMD) scrapy-runner la_policy 2>&1 | tee out.log
 
 ingest-la-county-policy: check-ingest-arguments
 	$(PY_RUN_CMD) ingest-la-policy "$(DATASET_ID)" "$(BENEFIT_PROGRAM)" "$(BENEFIT_REGION)" "$(FILEPATH)" $(INGEST_ARGS)
 
 
 scrape-irs-web:
-	$(PY_RUN_CMD) scrape-irs-web
+	$(PY_RUN_CMD) scrapy-runner irs
 
 ingest-irs-web: check-ingest-arguments
 	$(PY_RUN_CMD) ingest-irs-web "$(DATASET_ID)" "$(BENEFIT_PROGRAM)" "$(BENEFIT_REGION)" "$(FILEPATH)" $(INGEST_ARGS)
+
+
+scrape-ca-ftb:
+	$(PY_RUN_CMD) scrapy-runner ca_ftb
diff --git a/app/pyproject.toml b/app/pyproject.toml
index 55d151c3..e9223337 100644
--- a/app/pyproject.toml
+++ b/app/pyproject.toml
@@ -69,15 +69,15 @@ db-migrate = "src.db.migrations.run:up"
 db-migrate-down = "src.db.migrations.run:down"
 db-migrate-down-all = "src.db.migrations.run:downall"
 ingest-ca-public-charge = "src.ingest_ca_public_charge:main"
-scrape-ca-public-charge = "src.ingestion.scrape_ca_public_charge:main"
 ingest-edd-web = "src.ingest_edd_web:main"
 scrape-edd-web = "src.ingestion.scrape_edd_web:main"
 ingest-imagine-la = "src.ingestion.imagine_la.ingest:main"
 scrape-la-policy = "src.ingestion.scrape_la_policy:main"
 ingest-la-policy = "src.ingest_la_county_policy:main"
-scrape-irs-web = "src.ingestion.scrape_irs_web:main"
 ingest-irs-web = "src.ingest_irs_web:main"
 
+scrapy-runner = "src.ingestion.scrapy_runner:main"
+
 [tool.black]
 line-length = 100
 
diff --git a/app/src/ingestion/scrape_ca_public_charge.py b/app/src/ingestion/scrape_ca_public_charge.py
deleted file mode 100644
index 9bd8918d..00000000
--- a/app/src/ingestion/scrape_ca_public_charge.py
+++ /dev/null
@@ -1,16 +0,0 @@
-SPIDER_NAME = "ca_public_charge_spider"
-OUTPUT_JSON = "ca_public_charge_scrapings.json"
-
-
-def main() -> None:
-    import os
-
-    from .scrapy_runner import run
-
-    run(SPIDER_NAME, OUTPUT_JSON, debug=bool(os.environ.get("DEBUG_SCRAPINGS", False)))
-
-
-if __name__ == "__main__":
-    from scrapy_runner import run
-
-    run(SPIDER_NAME, OUTPUT_JSON, debug=True)
diff --git a/app/src/ingestion/scrape_edd_web.py b/app/src/ingestion/scrape_edd_web.py
deleted file mode 100644
index 13799757..00000000
--- a/app/src/ingestion/scrape_edd_web.py
+++ /dev/null
@@ -1,49 +0,0 @@
-import json
-
-
-def save_user_friendly_markdown(filename: str) -> None:
-    with open(filename, "r", encoding="utf-8") as raw_json:
-        data = json.load(raw_json)
-        with open(f"{filename}.md", "w", encoding="utf-8") as md_file:
-            for item in data:
-                item_md = ["\n\n=============================="]
-                item_md.append(f"{item['title']}, {item['url']}")
-                if "main_content" in item:
-                    item_md.append("\n------- @MAIN_CONTENT:\n")
-                    item_md.append(item["main_content"])
-                if "main_primary" in item:
-                    item_md.append("\n------- @MAIN_PRIMARY:\n")
-                    item_md.append(item["main_primary"])
-                if "nonaccordion" in item:
-                    item_md.append("\n------- @NONACCORDION:")
-                    item_md.append(item["nonaccordion"])
-                if "accordions" in item:
-                    item_md.append("\n------- @ACCORDIONS:")
-                    for heading, paras in item["accordions"].items():
-                        item_md.append(f"\n---- ## {heading}:\n")
-                        for para in paras:
-                            item_md.append(para)
-                md_file.write("\n".join(item_md))
-            print("User-friendly markdown of JSON saved to %s.md", filename)
-
-
-OUTPUT_JSON = "edd_scrapings.json"
-SPIDER_NAME = "edd_spider"
-
-
-def main() -> None:
-    import os
-
-    from .scrapy_runner import run
-
-    debug = bool(os.environ.get("DEBUG_SCRAPINGS", False))
-    run(SPIDER_NAME, OUTPUT_JSON, debug)
-
-    if debug:
-        save_user_friendly_markdown(OUTPUT_JSON)
-
-
-if __name__ == "__main__":
-    from scrapy_runner import run
-
-    run(SPIDER_NAME, OUTPUT_JSON, debug=True)
diff --git a/app/src/ingestion/scrape_irs_web.py b/app/src/ingestion/scrape_irs_web.py
deleted file mode 100644
index e51ccebd..00000000
--- a/app/src/ingestion/scrape_irs_web.py
+++ /dev/null
@@ -1,16 +0,0 @@
-SPIDER_NAME = "irs_web_spider"
-OUTPUT_JSON = "irs_web_scrapings.json"
-
-
-def main() -> None:
-    import os
-
-    from .scrapy_runner import run
-
-    run(SPIDER_NAME, OUTPUT_JSON, debug=bool(os.environ.get("DEBUG_SCRAPINGS", False)))
-
-
-if __name__ == "__main__":
-    from scrapy_runner import run
-
-    run(SPIDER_NAME, OUTPUT_JSON, debug=True)
diff --git a/app/src/ingestion/scrape_la_policy.py b/app/src/ingestion/scrape_la_policy.py
deleted file mode 100644
index 4cd6f808..00000000
--- a/app/src/ingestion/scrape_la_policy.py
+++ /dev/null
@@ -1,31 +0,0 @@
-# /// script
-# dependencies = [
-#   "install-playwright",
-#   "playwright",
-#   "scrapy",
-#   "markdownify",
-#   "nltk",
-#   "langchain_text_splitters",
-#   "html2text",
-#   "mistletoe",
-#   "nutree",
-# ]
-# ///
-# (This comment enables `uv run` to automatically create a virtual environment)
-
-SPIDER_NAME = "la_policy_spider"
-OUTPUT_JSON = "la_policy_scrapings.json"
-
-
-def main() -> None:
-    import os
-
-    from .scrapy_runner import run
-
-    run(SPIDER_NAME, OUTPUT_JSON, debug=bool(os.environ.get("DEBUG_SCRAPINGS", False)))
-
-
-if __name__ == "__main__":
-    from scrapy_runner import run
-
-    run(SPIDER_NAME, OUTPUT_JSON, debug=True)
diff --git a/app/src/ingestion/scrapy_dst/spiders/ca_ftb_spider.py b/app/src/ingestion/scrapy_dst/spiders/ca_ftb_spider.py
new file mode 100644
index 00000000..27ec6bf7
--- /dev/null
+++ b/app/src/ingestion/scrapy_dst/spiders/ca_ftb_spider.py
@@ -0,0 +1,74 @@
+import re
+from typing import Iterator, Optional
+
+import html2text
+import scrapy
+from scrapy.http import HtmlResponse
+
+
+class CaFranchiseTaxBoardSpider(scrapy.Spider):
+    # This name is used on the commandline: scrapy crawl edd_spider
+    name = "ca_ftb_spider"
+    allowed_domains = ["www.ftb.ca.gov"]
+    start_urls = ["https://www.ftb.ca.gov/file/personal/credits/index.html"]
+
+    # This is used to substitute the base URL in the cache storage
+    common_url_prefix = "https://www.ftb.ca.gov/file/"
+
+    def parse(self, response: HtmlResponse) -> Iterator[scrapy.Request | dict[str, str]]:
+        self.logger.info("Parsing %s", response.url)
+
+        nav_links = response.css("nav.local-nav a")
+        for link in nav_links:
+            if "class" in link.attrib and link.attrib["class"] == "uplevel":
+                # Skip the uplevel/back link that goes to the parent page
+                continue
+
+            assert link.attrib["href"]
+            self.logger.info("Found nav link: %s", link)
+            yield response.follow(link, callback=self.parse_childpage)
+
+        yield self.parse_childpage(response)
+
+    def parse_childpage(self, response: HtmlResponse) -> dict[str, str]:
+        self.logger.info("Parsing %s", response.url)
+
+        if (h1_count := len(response.css("h1").getall())) > 1:
+            self.logger.warning("Found %i h1 elements for %r", h1_count, response.url)
+            raise ValueError("Multiple h1 elements found")
+
+        title = to_markdown(response.css("h1").get().strip()).removeprefix("# ")
+        assert title
+
+        body = response.css("div#body-content")
+        # Drop the navigation sidebar so that we only get the main content
+        body.css("aside").drop()
+
+        markdown = to_markdown(body.get(), response.url)
+        extractions = {
+            "url": response.url,
+            "markdown": markdown,
+        }
+        return extractions
+
+
+def to_markdown(html: str, base_url: Optional[str] = None) -> str:
+    h2t = html2text.HTML2Text()
+
+    # Refer to https://github.com/Alir3z4/html2text/blob/master/docs/usage.md and html2text.config
+    # for options:
+    # 0 for no wrapping
+    h2t.body_width = 0
+    h2t.wrap_links = False
+
+    if base_url:
+        h2t.baseurl = base_url
+
+    # Exclude the <sup> and <sub> tags
+    h2t.include_sup_sub = False
+
+    markdown = h2t.handle(html)
+
+    # Consolidate newlines
+    markdown = re.sub(r"\n\n+", "\n\n", markdown)
+    return markdown.strip()
diff --git a/app/src/ingestion/scrapy_dst/spiders/irs_spider.py b/app/src/ingestion/scrapy_dst/spiders/irs_spider.py
index 6fa08bdc..28e89d0b 100644
--- a/app/src/ingestion/scrapy_dst/spiders/irs_spider.py
+++ b/app/src/ingestion/scrapy_dst/spiders/irs_spider.py
@@ -6,8 +6,6 @@
 from scrapy.linkextractors import LinkExtractor
 from scrapy.spiders.crawl import CrawlSpider, Rule
 
-AccordionSections = dict[str, list[str]]
-
 
 class IrsSpider(CrawlSpider):
     # This name is used on the commandline: scrapy crawl edd_spider
@@ -43,7 +41,7 @@ class IrsSpider(CrawlSpider):
         ),
     )
 
-    def parse_page(self, response: HtmlResponse) -> dict[str, str | AccordionSections]:
+    def parse_page(self, response: HtmlResponse) -> dict[str, str]:
         self.logger.info("Parsing %s", response.url)
         extractions = {"url": response.url}
 
diff --git a/app/src/ingestion/scrapy_runner.py b/app/src/ingestion/scrapy_runner.py
index 419faa6c..7b297a29 100644
--- a/app/src/ingestion/scrapy_runner.py
+++ b/app/src/ingestion/scrapy_runner.py
@@ -1,6 +1,8 @@
+import argparse
 import json
 import logging
 import os
+import sys
 from pprint import pprint
 
 from scrapy.crawler import CrawlerProcess
@@ -55,3 +57,26 @@ def run(spider_name: str, output_json_filename: str, debug: bool = False) -> Non
     run_spider(spider_name, output_json_filename)
     if debug:
         postprocess_json(output_json_filename)
+
+
+DATASETS = {
+    "edd": {},
+    "la_policy": {},
+    "irs": {
+        "spider": "irs_web_spider",
+    },
+    "ca_public_charge": {},
+    "ca_ftb": {},
+}
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("dataset")
+    parser.add_argument("--debug", action="store_true")
+
+    args = parser.parse_args(sys.argv[1:])
+    ds = DATASETS[args.dataset]
+    spider_id = ds.get("spider", f"{args.dataset}_spider")
+    json_output = ds.get("output", f"{spider_id.removesuffix("spider")}scrapings.json")
+    run(spider_id, json_output, debug=args.debug)