Merge pull request #10 from goodwillpunning/addQuickstartExamples

Add getting started examples.
goodwillpunning · Mar 4, 2023 · 8bbbdd3 · 8bbbdd3
2 parents fdc4700 + ad5de2c
commit 8bbbdd3
Show file tree

Hide file tree

Showing 3 changed files with 113 additions and 0 deletions.
diff --git a/README.md b/README.md
@@ -44,7 +44,30 @@ Finally, calling the `.execute()` function runs the benchmark test.
 metrics = benchmark.execute()
 metrics.display()
 ```
+## Authentication
+`beaker` connects to the SQL warehouse or cluster using the Databricks REST API 2.0. As a result, connection information is needed.
 
+It's recommended that you do not hard-code authentication secrets. Instead consider using environment variables.
+
+Example usage:
+
+```shell
+export DATABRICKS_HOST=<workspace-hostname>.databricks.com
+export DATABRICKS_HTTP_PATH=/sql/1.0/endpoints/<warehouse-id>
+export DATABRICKS_TOKEN=dapi01234567890
+```
+
+```python
+import os
+from beaker import Benchmark
+
+hostname = os.getenv("DATABRICKS_HOST")
+http_path = os.getenv("DATABRICKS_HTTP_PATH")
+token = os.getenv("DATABRICKS_ACCESS_TOKEN")
+
+benchmark = Benchmark(hostname=hostname, http_path=http_path, token=token)
+```                
+                
 ## Setting the benchmark queries to execute
 Beaker can execute benchmark queries is several formats:
 1. Execute a single query

diff --git a/examples/beaker_getting_started.dbc b/examples/beaker_getting_started.dbc
diff --git a/examples/beaker_getting_started.py b/examples/beaker_getting_started.py
@@ -0,0 +1,90 @@
+# Databricks notebook source
+from beaker import Benchmark
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC ## Create a new Benchmark Test
+
+# COMMAND ----------
+
+# Create a new benchmark test
+benchmark = Benchmark()
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC ## Run a Benchmark Test on an Existing SQL Warehouse/Cluster
+
+# COMMAND ----------
+
+import os
+
+# Note: Don't hard-code authentication secrets.
+# Instead, consider using environment variables.
+host = os.getenv("DATABRICKS_HOST)
+http_path = os.getenv("DATABRICKS_HTTP_PATH)
+access_token = os.getenv("DATABRICKS_ACCESS_TOKEN)
+
+# Define connection parameters
+# Use the builder pattern to add parameters for connecting to the warehouse
+benchmark.setHostname(hostname=hostname)
+benchmark.setWarehouse(http_path=http_path)
+benchmark.setConcurrency(concurrency=1)
+benchmark.setWarehouseToken(token=pat)
+
+# Define the query to execute and target Catalog
+query_str="""
+SELECT count(*)
+  FROM delta.`/databricks-datasets/nyctaxi/tables/nyctaxi_yellow`
+ WHERE passenger_count > 2
+"""
+benchmark.setQuery(query=query_str)
+benchmark.setCatalog(catalog="hive_metastore")
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC ## Generate a query metrics report
+
+# COMMAND ----------
+
+# Run the benchmark!
+metrics = benchmark.execute()
+metrics.display()
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC ## Run Benchmark Test on a new SQL Warehouse
+
+# COMMAND ----------
+
+# Launch a new SQL warehouse to execute benchmark queries on
+new_warehouse_config = {
+  "type": "warehouse",
+  "runtime": "latest",
+  "size": "Large",
+  "min_num_clusters": 1,
+  "max_num_clusters": 3,
+  "enable_photon": True
+}
+
+# Create a new Benchmark Test object
+benchmark = Benchmark()
+benchmark.setHostname(hostname=hostname)
+benchmark.setWarehouseToken(token=pat)
+benchmark.setQuery(query_str)
+benchmark.setCatalog(catalog="hive_metastore")
+benchmark.setWarehouseConfig(new_warehouse_config)
+
+# (Optional) "pre-warm" tables in the Delta Cache (runs a SELECT * to perform a full-scan)
+# benchmark.preWarmTables(tables=["table_a", "table_b", "table_c"])
+
+# Run the benchmark!
+metrics = benchmark.execute()
+metrics.display()
+
+# COMMAND ----------
+
+