diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 3d3b1051242..9b7f493baa6 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -34,10 +34,13 @@ jobs: uses: actions/setup-python@v5 with: python-version: "3.10" - - name: Convert Tutorials + - name: Run Tutorials run: | pip install -e ".[tutorial]" - python3 scripts/convert_ipynb_to_mdx.py + python scripts/run_tutorials.py -w $(pwd) + - name: Convert Tutorials + run: | + python3 scripts/convert_ipynb_to_mdx.py --clean - name: Create new version in Docusaurus id: create-version run: | diff --git a/.github/workflows/reusable_tutorials.yml b/.github/workflows/reusable_tutorials.yml index 4586309b257..e1d3e30553f 100644 --- a/.github/workflows/reusable_tutorials.yml +++ b/.github/workflows/reusable_tutorials.yml @@ -46,10 +46,13 @@ jobs: pip install -e ".[tutorial]" - if: ${{ inputs.smoke_test }} - name: Build tutorials with smoke test + name: Run tutorials with smoke test run: | - python scripts/make_tutorials.py -w $(pwd) -e -s + python scripts/run_tutorials.py -w $(pwd) -s - if: ${{ !inputs.smoke_test }} - name: Build tutorials without smoke test + name: Run tutorials without smoke test run: | - python scripts/make_tutorials.py -w $(pwd) -e + python scripts/run_tutorials.py -w $(pwd) + - name: Build tutorials + run : | + python scripts/convert_ipynb_to_mdx.py --clean diff --git a/scripts/make_tutorials.py b/scripts/make_tutorials.py deleted file mode 100644 index 005c2bbb342..00000000000 --- a/scripts/make_tutorials.py +++ /dev/null @@ -1,277 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Meta Platforms, Inc. and affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -import argparse -import json -import os -import tarfile -import time -from pathlib import Path - -import nbformat -import papermill -from bs4 import BeautifulSoup -from nbclient.exceptions import CellTimeoutError -from nbconvert import HTMLExporter, ScriptExporter - -TUTORIALS_TO_SKIP = [ - "raytune_pytorch_cnn", # TODO: Times out CI but passes locally. Investigate. -] - - -TEMPLATE = """const CWD = process.cwd(); - -const React = require('react'); -const Tutorial = require(`${{CWD}}/core/Tutorial.js`); - -class TutorialPage extends React.Component {{ - render() {{ - const {{config: siteConfig}} = this.props; - const {{baseUrl}} = siteConfig; - return ( - - ); - }} -}} - -module.exports = TutorialPage; - -""" - -# we already load Plotly within html head on the site (just using -""" - - -def _get_paths(repo_dir: str, t_dir: str | None, tid: str) -> dict[str, str]: - if t_dir is not None: - tutorial_dir = os.path.join(repo_dir, "tutorials", t_dir) - html_dir = os.path.join(repo_dir, "website", "_tutorials", t_dir) - js_dir = os.path.join(repo_dir, "website", "pages", "tutorials", t_dir) - py_dir = os.path.join(repo_dir, "website", "static", "files", t_dir) - - for d in [tutorial_dir, html_dir, js_dir, py_dir]: - os.makedirs(d, exist_ok=True) - - tutorial_path = os.path.join(tutorial_dir, f"{tid}.ipynb") - html_path = os.path.join(html_dir, f"{tid}.html") - js_path = os.path.join(js_dir, f"{tid}.js") - ipynb_path = os.path.join(py_dir, f"{tid}.ipynb") - py_path = os.path.join(py_dir, f"{tid}.py") - else: - tutorial_dir = os.path.join(repo_dir, "tutorials") - tutorial_path = os.path.join(repo_dir, "tutorials", f"{tid}.ipynb") - html_path = os.path.join(repo_dir, "website", "_tutorials", f"{tid}.html") - js_path = os.path.join(repo_dir, "website", "pages", "tutorials", f"{tid}.js") - ipynb_path = os.path.join( - repo_dir, "website", "static", "files", f"{tid}.ipynb" - ) - py_path = os.path.join(repo_dir, "website", "static", "files", f"{tid}.py") - - paths = { - "tutorial_dir": tutorial_dir, - "tutorial_path": tutorial_path, - "html_path": html_path, - "js_path": js_path, - "ipynb_path": ipynb_path, - "py_path": py_path, - } - if t_dir is not None: - paths["tar_path"] = os.path.join(py_dir, f"{tid}.tar.gz") - return paths - - -def run_script( - tutorial: Path, timeout_minutes: int, env: dict[str, str] | None = None -) -> None: - if env is not None: - os.environ.update(env) - papermill.execute_notebook( - tutorial, - tutorial, - # This timeout is on cell-execution time, not on total runtime. - execution_timeout=timeout_minutes * 60, - ) - - -def gen_tutorials( - repo_dir: str, - exec_tutorials: bool, - name: str | None = None, - smoke_test: bool = False, -) -> None: - """Generate HTML tutorials for Docusaurus Ax site from Jupyter notebooks. - - Also create ipynb and py versions of tutorial in Docusaurus site for - download. - """ - has_errors = False - - with open(os.path.join(repo_dir, "website", "tutorials.json")) as infile: - tutorial_config = json.loads(infile.read()) - # flatten config dict - tutorial_configs = [ - config for category in tutorial_config.values() for config in category - ] - # Running only the tutorial described by "name" - if name is not None: - tutorial_configs = [d for d in tutorial_configs if d["id"] == name] - if len(tutorial_configs) == 0: - raise RuntimeError(f"No tutorial found with name {name}.") - # prepare paths for converted tutorials & files - os.makedirs(os.path.join(repo_dir, "website", "_tutorials"), exist_ok=True) - os.makedirs(os.path.join(repo_dir, "website", "static", "files"), exist_ok=True) - env = {"SMOKE_TEST": "True"} if smoke_test else None - - for config in tutorial_configs: - tid = config["id"] - t_dir = config.get("dir") - exec_on_build = config.get("exec_on_build", True) - print(f"Generating {tid} tutorial") - paths = _get_paths(repo_dir=repo_dir, t_dir=t_dir, tid=tid) - - total_time = None - - if tid in TUTORIALS_TO_SKIP: - print(f"Skipping execution of {tid}") - continue - elif exec_tutorials and exec_on_build: - tutorial_path = Path(paths["tutorial_path"]) - print(f"Executing tutorial {tid}") - start_time = time.monotonic() - - # Try / catch failures for now. We will re-raise at the end. - timeout_minutes = 15 if smoke_test else 150 - try: - # Execute notebook. - run_script( - tutorial=tutorial_path, - timeout_minutes=timeout_minutes, - env=env, - ) - total_time = time.monotonic() - start_time - print( - f"Finished executing tutorial {tid} in {total_time:.2f} seconds. " - ) - except CellTimeoutError: - has_errors = True - print( - f"Tutorial {tid} exceeded the maximum runtime of " - f"{timeout_minutes} minutes." - ) - except Exception as e: - has_errors = True - print(f"Encountered error running tutorial {tid}: \n {e}") - - # load notebook - with open(paths["tutorial_path"]) as infile: - nb_str = infile.read() - nb = nbformat.reads(nb_str, nbformat.NO_CONVERT) - # convert notebook to HTML - exporter = HTMLExporter(template_name="classic") - html, _ = exporter.from_notebook_node(nb) - - # pull out html div for notebook - soup = BeautifulSoup(html, "html.parser") - nb_meat = soup.find("div", {"id": "notebook-container"}) - del nb_meat.attrs["id"] - nb_meat.attrs["class"] = ["notebook"] - - # when output html, iframe it (useful for Ax reports) - for html_div in nb_meat.findAll("div", {"class": "output_html"}): - if html_div.html is not None: - iframe = soup.new_tag("iframe") - iframe.attrs["src"] = "data:text/html;charset=utf-8," + str( - html_div.html - ) - # replace `#` in CSS - iframe.attrs["src"] = iframe.attrs["src"].replace("#", "%23") - html_div.contents = [iframe] - - html_out = MOCK_JS_REQUIRES + str(nb_meat) - - # generate HTML file - with open(paths["html_path"], "w") as html_outfile: - html_outfile.write(html_out) - - # generate JS file - t_dir_js = t_dir if t_dir else "" - script = TEMPLATE.format( - t_dir=t_dir_js, - tid=tid, - total_time=total_time if total_time is not None else "null", - ) - with open(paths["js_path"], "w") as js_outfile: - js_outfile.write(script) - - # output tutorial in both ipynb & py form - nbformat.write(nb, paths["ipynb_path"]) - exporter = ScriptExporter() - script, _ = exporter.from_notebook_node(nb) - with open(paths["py_path"], "w") as py_outfile: - py_outfile.write(script) - - # create .tar archive (if necessary) - if t_dir is not None: - with tarfile.open(paths["tar_path"], "w:gz") as tar: - tar.add( - paths["tutorial_dir"], - arcname=os.path.basename(paths["tutorial_dir"]), - ) - - if has_errors: - raise Exception("There are errors in tutorials, will not continue to publish") - - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="Generate JS, HTML, ipynb, and py files for tutorials." - ) - parser.add_argument( - "-w", "--repo_dir", metavar="path", required=True, help="Ax repo directory." - ) - parser.add_argument( - "-s", "--smoke", action="store_true", help="Run in smoke test mode." - ) - parser.add_argument( - "-e", - "--exec_tutorials", - action="store_true", - default=False, - help="Execute tutorials (instead of just converting).", - ) - parser.add_argument( - "-n", - "--name", - help="Run a specific tutorial by name. The name should not include the " - ".ipynb extension.", - ) - args = parser.parse_args() - gen_tutorials( - args.repo_dir, - args.exec_tutorials, - smoke_test=args.smoke, - name=args.name, - ) diff --git a/scripts/run_tutorials.py b/scripts/run_tutorials.py new file mode 100644 index 00000000000..3c8f1cfeb8c --- /dev/null +++ b/scripts/run_tutorials.py @@ -0,0 +1,120 @@ +#!/usr/bin/env python3 +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import json +import os +import time +from pathlib import Path + +import papermill +from nbclient.exceptions import CellTimeoutError + +TUTORIALS_TO_SKIP = [ + "raytune_pytorch_cnn", # TODO: Times out CI but passes locally. Investigate. +] + + +def run_script( + tutorial: Path, timeout_minutes: int, env: dict[str, str] | None = None +) -> None: + if env is not None: + os.environ.update(env) + papermill.execute_notebook( + tutorial, + tutorial, + # This timeout is on cell-execution time, not on total runtime. + execution_timeout=timeout_minutes * 60, + ) + + +def run_tutorials( + repo_dir: str, + name: str | None = None, + smoke_test: bool = False, +) -> None: + """Run Jupyter notebooks. + + We check in the tutorial notebook un-run, and run them in CI as integration tests. + """ + has_errors = False + + with open(os.path.join(repo_dir, "website", "tutorials.json")) as infile: + tutorial_config = json.loads(infile.read()) + # flatten config dict + tutorial_configs = [ + config for category in tutorial_config.values() for config in category + ] + # Running only the tutorial described by "name" + if name is not None: + tutorial_configs = [d for d in tutorial_configs if d["id"] == name] + if len(tutorial_configs) == 0: + raise RuntimeError(f"No tutorial found with name {name}.") + # prepare paths for converted tutorials & files + env = {"SMOKE_TEST": "True"} if smoke_test else None + + for config in tutorial_configs: + tid = config["id"] + tutorial_path = os.path.join(repo_dir, "tutorials", tid, f"{tid}.ipynb") + + total_time = None + + if tid in TUTORIALS_TO_SKIP: + print(f"Skipping execution of {tid}") + continue + else: + print(f"Executing tutorial {tid}") + start_time = time.monotonic() + + # Try / catch failures for now. We will re-raise at the end. + timeout_minutes = 15 if smoke_test else 150 + try: + # Execute notebook. + run_script( + tutorial=tutorial_path, + timeout_minutes=timeout_minutes, + env=env, + ) + total_time = time.monotonic() - start_time + print( + f"Finished executing tutorial {tid} in {total_time:.2f} seconds. " + ) + except CellTimeoutError: + has_errors = True + print( + f"Tutorial {tid} exceeded the maximum runtime of " + f"{timeout_minutes} minutes." + ) + except Exception as e: + has_errors = True + print(f"Encountered error running tutorial {tid}: \n {e}") + + if has_errors: + raise Exception("There are errors in tutorials, will not continue to publish") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Generate JS, HTML, ipynb, and py files for tutorials." + ) + parser.add_argument( + "-w", "--repo_dir", metavar="path", required=True, help="Ax repo directory." + ) + parser.add_argument( + "-s", "--smoke", action="store_true", help="Run in smoke test mode." + ) + parser.add_argument( + "-n", + "--name", + help="Run a specific tutorial by name. The name should not include the " + ".ipynb extension.", + ) + args = parser.parse_args() + run_tutorials( + args.repo_dir, + smoke_test=args.smoke, + name=args.name, + ) diff --git a/website/README.md b/website/README.md index 297e66ca531..67f87ce2059 100644 --- a/website/README.md +++ b/website/README.md @@ -6,7 +6,7 @@ The Ax website is built using [Docusaurus](https://docusaurus.io/), a modern sta We convert tutorial notebooks to MDX for embedding as docs. This needs to be done before serving the website and can be done by running this script from the project root: ```bash -python3 scripts/convert_ipynb_to_mdx.py +python3 scripts/convert_ipynb_to_mdx.py --clean ``` If the script fails ensure you have the necessary dependencies (ideally to your virtual env):