diff --git a/README.md b/README.md index 23c9af7..0b72532 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,30 @@ Finally, calling the `.execute()` function runs the benchmark test. metrics = benchmark.execute() metrics.display() ``` +## Authentication +`beaker` connects to the SQL warehouse or cluster using the Databricks REST API 2.0. As a result, connection information is needed. +It's recommended that you do not hard-code authentication secrets. Instead consider using environment variables. + +Example usage: + +```shell +export DATABRICKS_HOST=.databricks.com +export DATABRICKS_HTTP_PATH=/sql/1.0/endpoints/ +export DATABRICKS_TOKEN=dapi01234567890 +``` + +```python +import os +from beaker import Benchmark + +hostname = os.getenv("DATABRICKS_HOST") +http_path = os.getenv("DATABRICKS_HTTP_PATH") +token = os.getenv("DATABRICKS_ACCESS_TOKEN") + +benchmark = Benchmark(hostname=hostname, http_path=http_path, token=token) +``` + ## Setting the benchmark queries to execute Beaker can execute benchmark queries is several formats: 1. Execute a single query diff --git a/examples/beaker_getting_started.dbc b/examples/beaker_getting_started.dbc new file mode 100644 index 0000000..1ade083 Binary files /dev/null and b/examples/beaker_getting_started.dbc differ diff --git a/examples/beaker_getting_started.py b/examples/beaker_getting_started.py new file mode 100644 index 0000000..d907c32 --- /dev/null +++ b/examples/beaker_getting_started.py @@ -0,0 +1,90 @@ +# Databricks notebook source +from beaker import Benchmark + +# COMMAND ---------- + +# MAGIC %md +# MAGIC ## Create a new Benchmark Test + +# COMMAND ---------- + +# Create a new benchmark test +benchmark = Benchmark() + +# COMMAND ---------- + +# MAGIC %md +# MAGIC ## Run a Benchmark Test on an Existing SQL Warehouse/Cluster + +# COMMAND ---------- + +import os + +# Note: Don't hard-code authentication secrets. +# Instead, consider using environment variables. +host = os.getenv("DATABRICKS_HOST) +http_path = os.getenv("DATABRICKS_HTTP_PATH) +access_token = os.getenv("DATABRICKS_ACCESS_TOKEN) + +# Define connection parameters +# Use the builder pattern to add parameters for connecting to the warehouse +benchmark.setHostname(hostname=hostname) +benchmark.setWarehouse(http_path=http_path) +benchmark.setConcurrency(concurrency=1) +benchmark.setWarehouseToken(token=pat) + +# Define the query to execute and target Catalog +query_str=""" +SELECT count(*) + FROM delta.`/databricks-datasets/nyctaxi/tables/nyctaxi_yellow` + WHERE passenger_count > 2 +""" +benchmark.setQuery(query=query_str) +benchmark.setCatalog(catalog="hive_metastore") + +# COMMAND ---------- + +# MAGIC %md +# MAGIC ## Generate a query metrics report + +# COMMAND ---------- + +# Run the benchmark! +metrics = benchmark.execute() +metrics.display() + +# COMMAND ---------- + +# MAGIC %md +# MAGIC ## Run Benchmark Test on a new SQL Warehouse + +# COMMAND ---------- + +# Launch a new SQL warehouse to execute benchmark queries on +new_warehouse_config = { + "type": "warehouse", + "runtime": "latest", + "size": "Large", + "min_num_clusters": 1, + "max_num_clusters": 3, + "enable_photon": True +} + +# Create a new Benchmark Test object +benchmark = Benchmark() +benchmark.setHostname(hostname=hostname) +benchmark.setWarehouseToken(token=pat) +benchmark.setQuery(query_str) +benchmark.setCatalog(catalog="hive_metastore") +benchmark.setWarehouseConfig(new_warehouse_config) + +# (Optional) "pre-warm" tables in the Delta Cache (runs a SELECT * to perform a full-scan) +# benchmark.preWarmTables(tables=["table_a", "table_b", "table_c"]) + +# Run the benchmark! +metrics = benchmark.execute() +metrics.display() + +# COMMAND ---------- + +