goodwillpunning · goodwillpunning · Mar 5, 2024 · Jan 15, 2024 · Jan 16, 2024 · Jan 19, 2024
diff --git a/.gitignore b/.gitignore
@@ -11,3 +11,9 @@
 # Build helpers
 *beaker.egg-info*
 build/*
+
+myvenv/
+
+*/.env
+.env
+
diff --git a/README.md b/README.md
@@ -84,7 +84,7 @@ metrics = benchmark.execute()
 print(metrics)
 ```
 
-`metrics` is a list of dict. Each dict is the result of a single query execution.
+`metrics` is a pandas dataframe of the result of a single query execution.
 
 If you want to examine the results as a spark DataFrame and your environment has the capability of creating a spark session, you can use spark_fixture.
 

diff --git a/dist/beaker-0.0.5-py3-none-any.whl b/dist/beaker-0.0.5-py3-none-any.whl
diff --git a/dist/beaker-0.0.5.tar.gz b/dist/beaker-0.0.5.tar.gz
diff --git a/dist/beaker-0.0.6-py3-none-any.whl b/dist/beaker-0.0.6-py3-none-any.whl
diff --git a/dist/beaker-0.0.6.tar.gz b/dist/beaker-0.0.6.tar.gz
diff --git a/examples/beaker_getting_started.dbc b/examples/beaker_getting_started.dbc
diff --git a/examples/beaker_getting_started.py b/examples/beaker_getting_started.py
@@ -1,5 +1,7 @@
 # Databricks notebook source
-# MAGIC %pip install databricks-sql-connector
+# MAGIC %pip install databricks-sql-connector -q
+# MAGIC %pip install databricks-sdk -q
+# MAGIC dbutils.library.restartPython()
 
 # COMMAND ----------
 
@@ -31,6 +33,10 @@
 
 # COMMAND ----------
 
+importlib.reload(spark_fixture)
+
+# COMMAND ----------
+
 # MAGIC %md
 # MAGIC ## Create a new Benchmark Test
 
@@ -47,21 +53,24 @@
 # COMMAND ----------
 
 # Change hostname and http_path to your dbsql warehouse
-hostname = "your-dbsql-hostname"
-http_path = "your-dbsql-http-path"
+hostname = spark.conf.get('spark.databricks.workspaceUrl')
+# Extract token from dbutils
+pat = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiToken().get()
+# OR Add the appropriate scope and key for your token if configured in databricks secrets
+# pat = dbutils.secrets.get(scope="your-scope", key="your-token")
 
-# Add the appropriate scope and key for your token
-pat = dbutils.secrets.get(scope="your-scope", key="your-token")
+# warehouse http path example, replace with your own
+http_path = "/sql/1.0/warehouses/475b94ddc7cd5211"
 
 # COMMAND ----------
 
 # Define connection parameters
 # Use the builder pattern to add parameters for connecting to the warehouse
 bm.setName(name="simple_test")
 bm.setHostname(hostname=hostname)
+bm.setWarehouseToken(token=pat)
 bm.setWarehouse(http_path=http_path)
 bm.setConcurrency(concurrency=1)
-bm.setWarehouseToken(token=pat)
 
 # Define the query to execute and target Catalog
 query_str = """
@@ -75,11 +84,7 @@
 # COMMAND ----------
 
 # Run the benchmark!
-metrics = bm.execute()
-
-# COMMAND ----------
-
-metrics
+metrics_pdf = bm.execute()
 
 # COMMAND ----------
 
@@ -88,7 +93,7 @@
 
 # COMMAND ----------
 
-df_simple_test = spark_fixture.metrics_to_df_view(metrics, "simple_test_vw")
+df_simple_test = spark_fixture.metrics_to_df_view(metrics_pdf, "simple_test_vw")
 df_simple_test.display()
 
 # COMMAND ----------
@@ -109,10 +114,19 @@
 
 # COMMAND ----------
 
+hostname = spark.conf.get('spark.databricks.workspaceUrl')
+pat = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiToken().get()
+query_str = """
+SELECT count(*)
+  FROM delta.`/databricks-datasets/nyctaxi/tables/nyctaxi_yellow`
+ WHERE passenger_count > 2
+"""
+
 new_warehouse_config = {
     "type": "warehouse",
     "runtime": "latest",
-    "size": "Large",
+    "size": "2X-Small",
+    "warehouse": "serverless",
     "min_num_clusters": 1,
     "max_num_clusters": 3,
     "enable_photon": True,
@@ -132,8 +146,8 @@
 # benchmark.preWarmTables(tables=["table_a", "table_b", "table_c"])
 
 # Run the benchmark!
-metrics = bm.execute()
-print(metrics)
+metrics_pdf = bm.execute()
+display(metrics_pdf)
 
 # COMMAND ----------
 
@@ -189,7 +203,14 @@
 
 # COMMAND ----------
 
-metrics = bm.execute()
-print(metrics)
+metrics_pdf = bm.execute()
+# Create a spark dataframe of the returned metrics pandas dataframe
+metrics_df = spark_fixture.metrics_to_df_view(metrics_pdf, view_name="metrics_view")
 
 # COMMAND ----------
+
+# MAGIC %sql select * from metrics_view
+
+# COMMAND ----------
+
+
diff --git a/examples/beaker_standalone.py b/examples/beaker_standalone.py
@@ -4,44 +4,63 @@
 logger = logging.getLogger()
 logger.setLevel(logging.INFO)
 
-sys.path.append("../src")
+from dotenv import load_dotenv
+load_dotenv()
 
-from beaker import benchmark
+sys.path.append("../src")
 
-bm = benchmark.Benchmark()
+from beaker import benchmark, sqlwarehouseutils
 
 hostname = os.getenv("DATABRICKS_HOST")
 http_path = os.getenv("DATABRICKS_HTTP_PATH")
 # Don't put tokens in plaintext in code
 access_token = os.getenv("DATABRICKS_ACCESS_TOKEN")
+catalog_name = os.getenv("CATALOG")
+schema_name = os.getenv("SCHEMA")
+
 
+bm = benchmark.Benchmark()
 bm.setName(name="simple_test")
 bm.setHostname(hostname=hostname)
-bm.setWarehouse(http_path=http_path)
-bm.setConcurrency(concurrency=2)
 bm.setWarehouseToken(token=access_token)
+bm.setWarehouse(http_path=http_path)
+bm.setConcurrency(concurrency=1)
 
 print("---- Specify query in code ------")
 query_str = """
 SELECT count(*)
   FROM delta.`/databricks-datasets/nyctaxi/tables/nyctaxi_yellow`
- WHERE passenger_count > 2
+ WHERE passenger_count > 2;
 """
 bm.setQuery(query=query_str)
 bm.setCatalog(catalog="hive_metastore")
-
-metrics = bm.execute()
-print(metrics)
+bm.setSchema(schema="default")
+metrics_pdf = bm.execute()
+print(metrics_pdf)
 
 
 print("---- Specify a single query file ------")
 bm.query_file_format = "semicolon-delimited"
 bm.setQueryFile("queries/q1.sql")
-metrics = bm.execute()
-print(metrics)
+metrics_pdf = bm.execute()
+print(metrics_pdf)
 
 
-print("---- Specify a query directory ------")
+print("---- Specify a query directory semicolon format------")
+bm.query_file_format = "semicolon-delimited"
 bm.setQueryFileDir("queries")
-metrics = bm.execute()
-print(metrics)
+metrics_pdf = bm.execute()
+print(metrics_pdf)
+
+
+print("---- Specify a query directory original format------")
+bm.query_file_format = "original"
+bm.setQueryFileDir("queries_orig")
+metrics_pdf = bm.execute()
+print(metrics_pdf)
+
+
+print("---- Close connection ------")
+bm.sql_warehouse.close_connection()
+# res = bm.stop_warehouse("c0688d9c9c6d1091")
+# print(res)
diff --git a/examples/beaker_standalone_tpch.py b/examples/beaker_standalone_tpch.py
@@ -0,0 +1,40 @@
+import os, sys
+import logging
+
+logger = logging.getLogger()
+logger.setLevel(logging.INFO)
+
+from dotenv import load_dotenv
+load_dotenv()
+
+sys.path.append("../src")
+
+from beaker import benchmark, sqlwarehouseutils
+
+hostname = os.getenv("DATABRICKS_HOST")
+http_path = os.getenv("DATABRICKS_HTTP_PATH")
+# Don't put tokens in plaintext in code
+access_token = os.getenv("DATABRICKS_ACCESS_TOKEN")
+catalog_name = "samples"
+schema_name = "tpch"
+
+bm = benchmark.Benchmark()
+bm.setName(name="simple_test")
+bm.setHostname(hostname=hostname)
+bm.setWarehouseToken(token=access_token)
+bm.setWarehouse(http_path=http_path)
+bm.setConcurrency(concurrency=1)
+
+print("---- Test prewarm table ------")
+bm.setCatalog(catalog_name)
+bm.setSchema(schema_name)
+tables = ["customer", "lineitem", "nation", "orders", "part", "partsupp", "region", "supplier"]
+# bm.preWarmTables(tables=tables)
+
+bm.query_file_format = "original"
+bm.setQueryFileDir("tpch")
+metrics_pdf = bm.execute()
+print(metrics_pdf)
+
+print("---- Close connection ------")
+bm.sql_warehouse.close_connection()
-Original file line number
+Diff line change
@@ Expand Up / @@ -11,3 +11,9 @@ @@
     # Build helpers
     *beaker.egg-info*
     build/*
+    myvenv/
+    */.env
+    .env