-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' into add-readme-check
- Loading branch information
Showing
13 changed files
with
369 additions
and
9 deletions.
There are no files selected for viewing
81 changes: 81 additions & 0 deletions
81
.github/workflows/sdk-featurestore_sample-automation-test-test_featurestore_vnet_samples.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
# This code is autogenerated. | ||
# Code is generated by running custom script: python3 readme.py | ||
# Any manual changes to this file may cause incorrect behavior. | ||
# Any manual changes will be overwritten if the code is regenerated. | ||
|
||
name: sdk-featurestore_sample-automation-test-test_featurestore_vnet_samples | ||
# This file is created by sdk/python/readme.py. | ||
# Please do not edit directly. | ||
on: | ||
workflow_dispatch: | ||
schedule: | ||
- cron: "3 8/12 * * *" | ||
pull_request: | ||
branches: | ||
- main | ||
paths: | ||
- sdk/python/featurestore_sample/automation-test/** | ||
- .github/workflows/sdk-featurestore_sample-automation-test-test_featurestore_vnet_samples.yml | ||
- sdk/python/dev-requirements.txt | ||
- infra/bootstrapping/** | ||
- sdk/python/setup.sh | ||
- sdk/python/featurestore_sample | ||
concurrency: | ||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} | ||
cancel-in-progress: true | ||
jobs: | ||
build: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- name: check out repo | ||
uses: actions/checkout@v2 | ||
- name: setup python | ||
uses: actions/setup-python@v2 | ||
with: | ||
python-version: "3.8" | ||
- name: pip install notebook reqs | ||
run: pip install -r sdk/python/dev-requirements.txt | ||
- name: azure login | ||
uses: azure/login@v1 | ||
with: | ||
creds: ${{secrets.AZUREML_CREDENTIALS}} | ||
- name: bootstrap resources | ||
run: | | ||
echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}'; | ||
bash bootstrap.sh | ||
working-directory: infra/bootstrapping | ||
continue-on-error: false | ||
- name: setup SDK | ||
run: | | ||
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh"; | ||
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh"; | ||
bash setup.sh | ||
working-directory: sdk/python | ||
continue-on-error: true | ||
- name: setup-cli | ||
run: | | ||
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh"; | ||
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh"; | ||
bash setup.sh | ||
working-directory: cli | ||
continue-on-error: true | ||
- name: setup feature-store resources | ||
run: | | ||
bash -x automation-test/setup-resources-vnet.sh automation-test/test_featurestore_vnet_samples.ipynb | ||
working-directory: sdk/python/featurestore_sample | ||
continue-on-error: true | ||
- name: run featurestore_sample/automation-test/test_featurestore_vnet_samples.ipynb | ||
run: | | ||
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh"; | ||
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh"; | ||
bash "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh" generate_workspace_config "../../.azureml/config.json"; | ||
bash "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh" replace_template_values "test_featurestore_vnet_samples.ipynb"; | ||
[ -f "../../.azureml/config" ] && cat "../../.azureml/config"; | ||
papermill -k python test_featurestore_vnet_samples.ipynb test_featurestore_vnet_samples.output.ipynb | ||
working-directory: sdk/python/featurestore_sample/automation-test | ||
- name: upload notebook's working folder as an artifact | ||
if: ${{ always() }} | ||
uses: actions/upload-artifact@v2 | ||
with: | ||
name: test_featurestore_vnet_samples | ||
path: sdk/python/featurestore_sample/automation-test |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
16 changes: 16 additions & 0 deletions
16
sdk/python/featurestore_sample/automation-test/feature_store_managed_vnet_config.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
managed_network: | ||
isolation_mode: allow_internet_outbound | ||
outbound_rules: | ||
- destination: | ||
service_resource_id: /subscriptions/<SUBSCRIPTION_ID>/resourcegroups/<RESOURCE_GROUP>/providers/Microsoft.Storage/storageAccounts/<STORAGE_ACCOUNT_NAME> | ||
spark_enabled: 'true' | ||
subresource_target: dfs | ||
name: sourcerulefs | ||
type: private_endpoint | ||
- destination: | ||
service_resource_id: /subscriptions/<SUBSCRIPTION_ID>/resourcegroups/<RESOURCE_GROUP>/providers/Microsoft.Keyvault/vaults/<KEY_VAULT_NAME> | ||
spark_enabled: 'true' | ||
subresource_target: vault | ||
name: defaultkeyvault | ||
type: private_endpoint | ||
public_network_access: disabled |
63 changes: 63 additions & 0 deletions
63
sdk/python/featurestore_sample/automation-test/featurestore_vnet_job.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
from pyspark.sql import SparkSession | ||
|
||
spark = SparkSession.builder.appName("AccessData").getOrCreate() | ||
|
||
import os | ||
|
||
for path, subdirs, files in os.walk("./"): | ||
for name in files: | ||
print(os.path.join(path, name)) | ||
|
||
print("======Test Vnet scenario======") | ||
from azure.ai.ml import MLClient | ||
from azure.ai.ml.identity import AzureMLOnBehalfOfCredential | ||
|
||
subscription_id = "<SUBSCRIPTION_ID>" | ||
resource_group_name = "<RESOURCE_GROUP>" | ||
featurestore_name = "<FEATURESTORE_NAME>" | ||
project_ws_vnet = "<PROJECT_WORKSPACE_NAME_VNET>" | ||
|
||
ml_client = MLClient( | ||
AzureMLOnBehalfOfCredential(), | ||
subscription_id, | ||
resource_group_name, | ||
featurestore_name, | ||
) | ||
feature_store = ml_client.workspaces.get() | ||
fs_outbound_rules_len = len(feature_store.managed_network.outbound_rules) | ||
print(f"Feature store {featurestore_name} has {fs_outbound_rules_len} outbound rules") | ||
assert fs_outbound_rules_len == 5 | ||
|
||
ml_client = MLClient( | ||
AzureMLOnBehalfOfCredential(), subscription_id, resource_group_name, project_ws_vnet | ||
) | ||
project_ws = ml_client.workspaces.get() | ||
ps_outbound_rules_len = len(project_ws.managed_network.outbound_rules) | ||
print(f"Project workspace {project_ws_vnet} has {ps_outbound_rules_len} outbound rules") | ||
assert ps_outbound_rules_len == 3 | ||
|
||
print("=======Clean up==========") | ||
try: | ||
print("----Delete feature store----------") | ||
ml_client = MLClient( | ||
AzureMLOnBehalfOfCredential(), | ||
subscription_id=subscription_id, | ||
resource_group_name=resource_group_name, | ||
) | ||
|
||
result = ml_client.feature_stores.begin_delete( | ||
name=featurestore_name, | ||
permanently_delete=True, | ||
delete_dependent_resources=False, | ||
).result() | ||
print(result) | ||
|
||
print("----Delete project workspace----------") | ||
result = ml_client.workspace.begin_delete( | ||
name=project_ws_vnet, | ||
permanently_delete=True, | ||
delete_dependent_resources=False, | ||
).result() | ||
print(result) | ||
except: | ||
pass |
21 changes: 21 additions & 0 deletions
21
sdk/python/featurestore_sample/automation-test/project_ws_managed_vnet_config.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
managed_network: | ||
isolation_mode: allow_internet_outbound | ||
outbound_rules: | ||
- destination: | ||
service_resource_id: /subscriptions/<SUBSCRIPTION_ID>/resourcegroups/<RESOURCE_GROUP>/providers/Microsoft.Storage/storageAccounts/<STORAGE_ACCOUNT_NAME> | ||
spark_enabled: 'true' | ||
subresource_target: dfs | ||
name: projectsourcerule | ||
type: private_endpoint | ||
- destination: | ||
service_resource_id: /subscriptions/<SUBSCRIPTION_ID>/resourcegroups/<RESOURCE_GROUP>/providers/Microsoft.Keyvault/vaults/<KEY_VAULT_NAME> | ||
spark_enabled: 'true' | ||
subresource_target: vault | ||
name: defaultfskeyvaultrule | ||
type: private_endpoint | ||
- destination: | ||
service_resource_id: /subscriptions/<SUBSCRIPTION_ID>/resourcegroups/<RESOURCE_GROUP>/providers/Microsoft.MachineLearningServices/workspaces/<FEATURESTORE_NAME> | ||
spark_enabled: 'true' | ||
subresource_target: amlworkspace | ||
name: featurestorerule | ||
type: private_endpoint |
70 changes: 70 additions & 0 deletions
70
sdk/python/featurestore_sample/automation-test/setup-resources-vnet.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
SUBSCRIPTION_ID=$(az account show --query id -o tsv) | ||
LOCATION=$(az ml workspace show --query location -o tsv) | ||
RESOURCE_GROUP=$(az group show --query name -o tsv) | ||
AML_WORKSPACE_NAME=$(az configure -l --query "[?name=='workspace'].value" -o tsv) | ||
VERSION=$(((RANDOM%1000)+1)) | ||
PROJECT_WORKSPACE_NAME_VNET="fs-proj-ws"${VERSION} | ||
|
||
## Create a project workspace | ||
az ml workspace create --name $PROJECT_WORKSPACE_NAME_VNET --resource-group $RESOURCE_GROUP --location $LOCATION | ||
|
||
## one-time run: config outbound rules for project workspace | ||
NETWORK_YML="notebooks/sdk_and_cli/network_isolation/network.yml" | ||
az ml workspace update --resource-group $RESOURCE_GROUP --name $PROJECT_WORKSPACE_NAME_VNET --file $NETWORK_YML | ||
|
||
## one-time run: provision network for project workspace | ||
az ml workspace provision-network --resource-group $RESOURCE_GROUP --name $PROJECT_WORKSPACE_NAME_VNET --include-spark | ||
az ml workspace show --name $PROJECT_WORKSPACE_NAME_VNET --resource-group $RESOURCE_GROUP | ||
|
||
## Create a featurestore | ||
FEATURESTORE_NAME="my-featurestore"${VERSION} | ||
FEATURESTORE_YML="featurestore/featurestore.yaml" | ||
sed -i "s/<FEATURESTORE_NAME>/$FEATURESTORE_NAME/g; | ||
s/<LOCATION>/$LOCATION/g;" $FEATURESTORE_YML | ||
az ml feature-store create --file $FEATURESTORE_YML --subscription $SUBSCRIPTION_ID --resource-group $RESOURCE_GROUP | ||
|
||
#STORAGE_ACCOUNT_NAME="fsst${VERSION}" | ||
STORAGE_ACCOUNT_RESOURCE_ID=$(az ml feature-store show --name ${FEATURESTORE_NAME} --resource-group ${RESOURCE_GROUP} --query storage_account -o tsv) | ||
STORAGE_ACCOUNT_NAME=${STORAGE_ACCOUNT_RESOURCE_ID##*/} | ||
KEY_VALUE_RESOURCE_ID=$(az ml feature-store show --name ${FEATURESTORE_NAME} --resource-group ${RESOURCE_GROUP} --query key_vault -o tsv) | ||
KEY_VAULT_NAME=${KEY_VALUE_RESOURCE_ID##*/} | ||
STORAGE_FILE_SYSTEM_NAME_OFFLINE_STORE="offline-store" | ||
STORAGE_FILE_SYSTEM_NAME_SOURCE_DATA="source-data" | ||
STORAGE_FILE_SYSTEM_NAME_OBSERVATION_DATA="observation-data" | ||
#az storage account create --name $STORAGE_ACCOUNT_NAME --enable-hierarchical-namespace true --resource-group $RESOURCE_GROUP --location $LOCATION --subscription $SUBSCRIPTION_ID | ||
az storage fs create --name $STORAGE_FILE_SYSTEM_NAME_OFFLINE_STORE --account-name $STORAGE_ACCOUNT_NAME --subscription $SUBSCRIPTION_ID | ||
az storage fs create --name $STORAGE_FILE_SYSTEM_NAME_SOURCE_DATA --account-name $STORAGE_ACCOUNT_NAME --subscription $SUBSCRIPTION_ID | ||
az storage fs create --name $STORAGE_FILE_SYSTEM_NAME_OBSERVATION_DATA --account-name $STORAGE_ACCOUNT_NAME --subscription $SUBSCRIPTION_ID | ||
|
||
# Disable the public network access for the above created default ADLS Gen2 storage account for the feature store | ||
az storage account update --name $STORAGE_ACCOUNT_NAME --resource-group $RESOURCE_GROUP --subscription $SUBSCRIPTION_ID --public-network-access disabled | ||
|
||
FEATURE_STORE_MANAGED_VNET_YML="automation-test/feature_store_managed_vnet_config.yaml" | ||
sed -i "s/<SUBSCRIPTION_ID>/$SUBSCRIPTION_ID/g; | ||
s/<RESOURCE_GROUP>/$RESOURCE_GROUP/g; | ||
s/<STORAGE_ACCOUNT_NAME>/$STORAGE_ACCOUNT_NAME/g; | ||
s/<KEY_VAULT_NAME>/$KEY_VAULT_NAME/g;" $FEATURE_STORE_MANAGED_VNET_YML | ||
az ml feature-store update --file $FEATURE_STORE_MANAGED_VNET_YML --name $FEATURESTORE_NAME --resource-group $RESOURCE_GROUP | ||
|
||
# Provision network to create necessary private endpoints (it may take approximately 20 minutes) | ||
az ml feature-store provision-network --name $FEATURESTORE_NAME --resource-group $RESOURCE_GROUP --include-spark | ||
|
||
# Check that managed virtual network is correctly enabled | ||
az ml feature-store show --name $FEATURESTORE_NAME --resource-group $RESOURCE_GROUP | ||
|
||
# Update project workspace to create private endpoints for the defined outbound rules (it may take approximately 15 minutes) | ||
PROJECT_WS_NAME_VNET_YAML="automation-test/project_ws_managed_vnet_config.yaml" | ||
sed -i "s/<SUBSCRIPTION_ID>/$SUBSCRIPTION_ID/g; | ||
s/<RESOURCE_GROUP>/$RESOURCE_GROUP/g; | ||
s/<STORAGE_ACCOUNT_NAME>/$STORAGE_ACCOUNT_NAME/g; | ||
s/<FEATURESTORE_NAME>/$FEATURESTORE_NAME/g; | ||
s/<KEY_VAULT_NAME>/$KEY_VAULT_NAME/g;" $PROJECT_WS_NAME_VNET_YAML | ||
az ml workspace update --file $PROJECT_WS_NAME_VNET_YAML --name $PROJECT_WORKSPACE_NAME_VNET --resource-group $RESOURCE_GROUP | ||
|
||
az ml workspace show --name $PROJECT_WORKSPACE_NAME_VNET --resource-group $RESOURCE_GROUP | ||
|
||
SDK_PY_JOB_FILE="automation-test/featurestore_vnet_job.py" | ||
sed -i "s/<SUBSCRIPTION_ID>/$SUBSCRIPTION_ID/g; | ||
s/<RESOURCE_GROUP>/$RESOURCE_GROUP/g; | ||
s/<FEATURESTORE_NAME>/$FEATURESTORE_NAME/g; | ||
s/<PROJECT_WORKSPACE_NAME_VNET>/$PROJECT_WORKSPACE_NAME_VNET/g;" $SDK_PY_JOB_FILE |
104 changes: 104 additions & 0 deletions
104
sdk/python/featurestore_sample/automation-test/test_featurestore_vnet_samples.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"attachments": {}, | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Use a serverless Spark compute" | ||
] | ||
}, | ||
{ | ||
"attachments": {}, | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"You should have an attached Synapse Spark pool available in your workspace. Please see documentation page: [Attach and manage a Synapse Spark pool in Azure Machine Learning (preview)](https://learn.microsoft.com/azure/machine-learning/how-to-manage-synapse-spark-pool) for more details.\n", | ||
"\n", | ||
"**Note** - To ensure successful execution of Spark job, the identity being used for the Spark job should be assigned **Contributor** and **Storage Blob Data Contributor** roles on the Azure storage account used for data input and output." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from azure.ai.ml import MLClient, spark, Input, Output\n", | ||
"from azure.identity import DefaultAzureCredential\n", | ||
"from azure.ai.ml.entities import Environment\n", | ||
"\n", | ||
"subscription_id = \"<SUBSCRIPTION_ID>\"\n", | ||
"resource_group = \"<RESOURCE_GROUP>\"\n", | ||
"workspace = \"<AML_WORKSPACE_NAME>\"\n", | ||
"ml_client = MLClient(\n", | ||
" DefaultAzureCredential(), subscription_id, resource_group, workspace\n", | ||
")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import yaml\n", | ||
"\n", | ||
"contents = \"\"\n", | ||
"with open(\"../project/env/online.yml\", \"r\") as stream:\n", | ||
" try:\n", | ||
" contents = yaml.safe_load(stream)\n", | ||
" except yaml.YAMLError as exc:\n", | ||
" print(exc)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"spark_job = spark(\n", | ||
" display_name=\"featurestore_sample_test\",\n", | ||
" code=\"../\",\n", | ||
" entry={\"file\": \"automation-test/featurestore_vnet_job.py\"},\n", | ||
" driver_cores=1,\n", | ||
" driver_memory=\"1g\",\n", | ||
" executor_cores=1,\n", | ||
" executor_memory=\"1g\",\n", | ||
" executor_instances=1,\n", | ||
" resources={\n", | ||
" \"instance_type\": \"Standard_E8S_V3\",\n", | ||
" \"runtime_version\": \"3.2.0\",\n", | ||
" },\n", | ||
" conf={\"spark.synapse.library.python.env\": contents},\n", | ||
")\n", | ||
"\n", | ||
"returned_spark_job = ml_client.jobs.create_or_update(spark_job)\n", | ||
"\n", | ||
"print(returned_spark_job.id)\n", | ||
"# Wait until the job completes\n", | ||
"ml_client.jobs.stream(returned_spark_job.name)" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3.10 - SDK V2", | ||
"language": "python", | ||
"name": "python310-sdkv2" | ||
}, | ||
"language_info": { | ||
"name": "python", | ||
"version": "3.7.10" | ||
}, | ||
"orig_nbformat": 4, | ||
"vscode": { | ||
"interpreter": { | ||
"hash": "6aeff17a1aa7735c2f7cb3a6d691fe1b4d4c3b8d2d650f644ad0f24e1b8e3f3f" | ||
} | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
4 changes: 2 additions & 2 deletions
4
sdk/python/featurestore_sample/featurestore/featurestore.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
$schema: http://azureml/sdk-2-0/FeatureStore.json | ||
|
||
name: my_featurestore | ||
name: <FEATURESTORE_NAME> | ||
description: feature store description | ||
tags: {"k1":"v1", "k2":"v2"} | ||
location: eastus | ||
location: <LOCATION> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1196,9 +1196,7 @@ | |
"deleting": false | ||
} | ||
}, | ||
"tags": [ | ||
"active-ipynb" | ||
] | ||
"tags": [] | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
|
Oops, something went wrong.