diff --git a/.github/workflows/tests_ci.yaml b/.github/workflows/tests.yaml similarity index 90% rename from .github/workflows/tests_ci.yaml rename to .github/workflows/tests.yaml index 60f46015..25593ea6 100644 --- a/.github/workflows/tests_ci.yaml +++ b/.github/workflows/tests.yaml @@ -1,4 +1,4 @@ -name: Unit and Integration Tests +name: Static, Unit, Integration, and End-to-End Tests on: push: {} @@ -6,7 +6,7 @@ on: branches: [main] jobs: - ci: + tests: # The code for the self-hosted runners is at https://github.com/wangpatrick57/dbgym-runners. runs-on: self-hosted @@ -50,11 +50,12 @@ jobs: - name: Run integration tests # Integration tests do require external systems to be running (most commonly a database instance). # Unlike end-to-end tests though, they test a specific module in a detailed manner, much like a unit test does. - # - # We set `INTENDED_DBDATA_HARDWARE` so that it's seen when `integtest_pg_conn.py` executes `./tune/env/set_up_env_integtests.sh`. + env: + # We set `INTENDED_DBDATA_HARDWARE` so that it's seen when `integtest_pg_conn.py` executes `./tune/env/set_up_env_integtests.sh`. + INTENDED_DBDATA_HARDWARE: ssd run: | . "$HOME/.cargo/env" - export INTENDED_DBDATA_HARDWARE=ssd + export ./scripts/run_integration_tests.sh - name: Run end-to-end tests diff --git a/dependencies/requirements.txt b/dependencies/requirements.txt index 216c159a..6c0cb4b7 100644 --- a/dependencies/requirements.txt +++ b/dependencies/requirements.txt @@ -134,3 +134,4 @@ virtualenv==20.25.0 Werkzeug==3.0.1 wrapt==1.14.1 zipp==3.17.0 +streamlit==1.39.0 diff --git a/experiments/load_per_machine_envvars.sh b/experiments/load_per_machine_envvars.sh index b9772d3c..22b220c8 100644 --- a/experiments/load_per_machine_envvars.sh +++ b/experiments/load_per_machine_envvars.sh @@ -3,8 +3,13 @@ host=$(hostname) if [ "$host" == "dev4" ]; then export DBDATA_PARENT_DPATH=/mnt/nvme1n1/phw2/dbgym_tmp/ + export INTENDED_DBDATA_HARDWARE=ssd elif [ "$host" == "dev6" ]; then export DBDATA_PARENT_DPATH=/mnt/nvme0n1/phw2/dbgym_tmp/ + export INTENDED_DBDATA_HARDWARE=ssd +elif [ "$host" == "patnuc" ]; then + export DBDATA_PARENT_DPATH=../dbgym_workspace/tmp/ + export INTENDED_DBDATA_HARDWARE=hdd else echo "Did not recognize host \"$host\"" exit 1 diff --git a/scripts/pat_test.sh b/scripts/pat_test.sh index fc2ae203..f858c76d 100755 --- a/scripts/pat_test.sh +++ b/scripts/pat_test.sh @@ -3,11 +3,11 @@ set -euxo pipefail SCALE_FACTOR=0.01 -INTENDED_DBDATA_HARDWARE=ssd . ./experiments/load_per_machine_envvars.sh # space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars) -python3 task.py tune protox embedding train tpch --scale-factor $SCALE_FACTOR --iterations-per-epoch 1 --num-points-to-sample 1 --num-batches 1 --batch-size 64 --start-epoch 15 --num-samples 4 --train-max-concurrent 4 --num-curate 2 +python3 task.py dbms postgres build +python3 task.py dbms postgres dbdata tpch --scale-factor $SCALE_FACTOR --intended-dbdata-hardware $INTENDED_DBDATA_HARDWARE --dbdata-parent-dpath $DBDATA_PARENT_DPATH exit 0 # benchmark diff --git a/scripts/run_demo.sh b/scripts/run_demo.sh new file mode 100755 index 00000000..1e434a7d --- /dev/null +++ b/scripts/run_demo.sh @@ -0,0 +1,2 @@ +#!/bin/bash +python -m streamlit run tune/demo/main.py \ No newline at end of file diff --git a/task.py b/task.py index 46adb114..ff30f9c3 100644 --- a/task.py +++ b/task.py @@ -17,7 +17,7 @@ from dbms.cli import dbms_group from manage.cli import manage_group from tune.cli import tune_group -from util.workspace import DBGymConfig +from util.workspace import make_standard_dbgym_cfg # TODO(phw2): Save commit, git diff, and run command. # TODO(phw2): Remove write permissions on old run_*/ dirs to enforce that they are immutable. @@ -28,8 +28,7 @@ @click.pass_context def task(ctx: click.Context) -> None: """🛢️ CMU-DB Database Gym: github.com/cmu-db/dbgym 🏋️""" - dbgym_config_path = Path(os.getenv("DBGYM_CONFIG_PATH", "dbgym_config.yaml")) - dbgym_cfg = DBGymConfig(dbgym_config_path) + dbgym_cfg = make_standard_dbgym_cfg() ctx.obj = dbgym_cfg log_dpath = dbgym_cfg.cur_task_runs_artifacts_path(mkdir=True) diff --git a/tune/demo/__init__.py b/tune/demo/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tune/demo/main.py b/tune/demo/main.py new file mode 100644 index 00000000..9520e611 --- /dev/null +++ b/tune/demo/main.py @@ -0,0 +1,64 @@ +import streamlit as st + +from tune.env.pg_conn import PostgresConn +from util.pg import DEFAULT_POSTGRES_PORT, get_is_postgres_running +from util.workspace import ( + DEFAULT_BOOT_CONFIG_FPATH, + DBGymConfig, + default_dbdata_parent_dpath, + default_pgbin_path, + default_pristine_dbdata_snapshot_path, + make_standard_dbgym_cfg, +) + + +# This ensures that DBGymConfig is only created once. Check DBGymConfig.__init__() for why we must do this. +@st.cache_resource +def make_dbgym_cfg() -> DBGymConfig: + return make_standard_dbgym_cfg() + + +class Demo: + BENCHMARK = "tpch" + SCALE_FACTOR = 0.01 + + def __init__(self) -> None: + self.dbgym_cfg = make_dbgym_cfg() + self.pristine_dbdata_snapshot_path = default_pristine_dbdata_snapshot_path( + self.dbgym_cfg.dbgym_workspace_path, Demo.BENCHMARK, Demo.SCALE_FACTOR + ) + self.dbdata_parent_dpath = default_dbdata_parent_dpath( + self.dbgym_cfg.dbgym_workspace_path + ) + self.pgbin_dpath = default_pgbin_path(self.dbgym_cfg.dbgym_workspace_path) + self.pg_conn = PostgresConn( + self.dbgym_cfg, + DEFAULT_POSTGRES_PORT, + self.pristine_dbdata_snapshot_path, + self.dbdata_parent_dpath, + self.pgbin_dpath, + False, + DEFAULT_BOOT_CONFIG_FPATH, + ) + + def main(self) -> None: + is_postgres_running = get_is_postgres_running() + + if is_postgres_running: + st.write("Postgres is running") + + if st.button("Stop Postgres"): + self.pg_conn.shutdown_postgres() + st.rerun() + else: + st.write("Postgres is not running") + + if st.button("Start Postgres"): + self.pg_conn.restore_pristine_snapshot() + self.pg_conn.start_with_changes() + st.rerun() + + +if __name__ == "__main__": + demo = Demo() + demo.main() diff --git a/tune/env/integtest_pg_conn.py b/tune/env/integtest_pg_conn.py index e4f356af..a2470571 100644 --- a/tune/env/integtest_pg_conn.py +++ b/tune/env/integtest_pg_conn.py @@ -5,7 +5,11 @@ import yaml from tune.env.pg_conn import PostgresConn -from util.pg import get_is_postgres_running, get_running_postgres_ports +from util.pg import ( + DEFAULT_POSTGRES_PORT, + get_is_postgres_running, + get_running_postgres_ports, +) from util.workspace import ( DEFAULT_BOOT_CONFIG_FPATH, DBGymConfig, @@ -17,7 +21,6 @@ ENV_INTEGTESTS_DBGYM_CONFIG_FPATH = Path("tune/env/env_integtests_dbgym_config.yaml") BENCHMARK = "tpch" SCALE_FACTOR = 0.01 -BASE_PGPORT = 5432 def get_unittest_workspace_path() -> Path: @@ -54,7 +57,7 @@ def setUp(self) -> None: def tearDown(self) -> None: self.assertFalse(get_is_postgres_running()) - def create_pg_conn(self, pgport: int = BASE_PGPORT) -> PostgresConn: + def create_pg_conn(self, pgport: int = DEFAULT_POSTGRES_PORT) -> PostgresConn: return PostgresConn( PostgresConnTests.dbgym_cfg, pgport, @@ -79,12 +82,13 @@ def test_start_on_multiple_ports(self) -> None: pg_conn0 = self.create_pg_conn() pg_conn0.restore_pristine_snapshot() pg_conn0.start_with_changes() - self.assertEqual(set(get_running_postgres_ports()), {BASE_PGPORT}) - pg_conn1 = self.create_pg_conn(BASE_PGPORT + 1) + self.assertEqual(set(get_running_postgres_ports()), {DEFAULT_POSTGRES_PORT}) + pg_conn1 = self.create_pg_conn(DEFAULT_POSTGRES_PORT + 1) pg_conn1.restore_pristine_snapshot() pg_conn1.start_with_changes() self.assertEqual( - set(get_running_postgres_ports()), {BASE_PGPORT, BASE_PGPORT + 1} + set(get_running_postgres_ports()), + {DEFAULT_POSTGRES_PORT, DEFAULT_POSTGRES_PORT + 1}, ) # Clean up diff --git a/util/workspace.py b/util/workspace.py index 7ee7c91b..f94d70ef 100644 --- a/util/workspace.py +++ b/util/workspace.py @@ -342,6 +342,16 @@ def cur_task_runs_artifacts_path(self, *dirs: str, mkdir: bool = False) -> Path: return self.cur_task_runs_path("artifacts", *dirs, mkdir=mkdir) +def make_standard_dbgym_cfg() -> DBGymConfig: + """ + The "standard" way to make a DBGymConfig using the DBGYM_CONFIG_PATH envvar and the + default path of dbgym_config.yaml. + """ + dbgym_config_path = Path(os.getenv("DBGYM_CONFIG_PATH", "dbgym_config.yaml")) + dbgym_cfg = DBGymConfig(dbgym_config_path) + return dbgym_cfg + + def conv_inputpath_to_realabspath( dbgym_cfg: DBGymConfig, inputpath: os.PathLike[str] ) -> Path: