Skip to content

Commit

Permalink
Merge branch 'main' into add-readme-check
Browse files Browse the repository at this point in the history
  • Loading branch information
diondrapeck authored Jan 19, 2024
2 parents 3415e9f + 7e7fd35 commit bffb3e8
Show file tree
Hide file tree
Showing 13 changed files with 369 additions and 9 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# This code is autogenerated.
# Code is generated by running custom script: python3 readme.py
# Any manual changes to this file may cause incorrect behavior.
# Any manual changes will be overwritten if the code is regenerated.

name: sdk-featurestore_sample-automation-test-test_featurestore_vnet_samples
# This file is created by sdk/python/readme.py.
# Please do not edit directly.
on:
workflow_dispatch:
schedule:
- cron: "3 8/12 * * *"
pull_request:
branches:
- main
paths:
- sdk/python/featurestore_sample/automation-test/**
- .github/workflows/sdk-featurestore_sample-automation-test-test_featurestore_vnet_samples.yml
- sdk/python/dev-requirements.txt
- infra/bootstrapping/**
- sdk/python/setup.sh
- sdk/python/featurestore_sample
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: check out repo
uses: actions/checkout@v2
- name: setup python
uses: actions/setup-python@v2
with:
python-version: "3.8"
- name: pip install notebook reqs
run: pip install -r sdk/python/dev-requirements.txt
- name: azure login
uses: azure/login@v1
with:
creds: ${{secrets.AZUREML_CREDENTIALS}}
- name: bootstrap resources
run: |
echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
bash bootstrap.sh
working-directory: infra/bootstrapping
continue-on-error: false
- name: setup SDK
run: |
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
bash setup.sh
working-directory: sdk/python
continue-on-error: true
- name: setup-cli
run: |
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
bash setup.sh
working-directory: cli
continue-on-error: true
- name: setup feature-store resources
run: |
bash -x automation-test/setup-resources-vnet.sh automation-test/test_featurestore_vnet_samples.ipynb
working-directory: sdk/python/featurestore_sample
continue-on-error: true
- name: run featurestore_sample/automation-test/test_featurestore_vnet_samples.ipynb
run: |
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
bash "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh" generate_workspace_config "../../.azureml/config.json";
bash "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh" replace_template_values "test_featurestore_vnet_samples.ipynb";
[ -f "../../.azureml/config" ] && cat "../../.azureml/config";
papermill -k python test_featurestore_vnet_samples.ipynb test_featurestore_vnet_samples.output.ipynb
working-directory: sdk/python/featurestore_sample/automation-test
- name: upload notebook's working folder as an artifact
if: ${{ always() }}
uses: actions/upload-artifact@v2
with:
name: test_featurestore_vnet_samples
path: sdk/python/featurestore_sample/automation-test
2 changes: 1 addition & 1 deletion deploy-arm-templates-az-cli.sh
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ az deployment group create -g $RESOURCE_GROUP \
codeId="$resourceScope/workspaces/$WORKSPACE/codes/score-sklearn/versions/1" \
scoringScript=score.py \
environmentId="$resourceScope/workspaces/$WORKSPACE/environments/sklearn-env/versions/$ENV_VERSION" \
model="$resourceScope/workspaces/$WORKSPACE/models/score-sklearn/versions/1" \
model="$resourceScope/workspaces/$WORKSPACE/models/sklearn/versions/1" \
endpointComputeType=Managed \
skuName=Standard_F2s_v2 \
skuCapacity=1
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
managed_network:
isolation_mode: allow_internet_outbound
outbound_rules:
- destination:
service_resource_id: /subscriptions/<SUBSCRIPTION_ID>/resourcegroups/<RESOURCE_GROUP>/providers/Microsoft.Storage/storageAccounts/<STORAGE_ACCOUNT_NAME>
spark_enabled: 'true'
subresource_target: dfs
name: sourcerulefs
type: private_endpoint
- destination:
service_resource_id: /subscriptions/<SUBSCRIPTION_ID>/resourcegroups/<RESOURCE_GROUP>/providers/Microsoft.Keyvault/vaults/<KEY_VAULT_NAME>
spark_enabled: 'true'
subresource_target: vault
name: defaultkeyvault
type: private_endpoint
public_network_access: disabled
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
from pyspark.sql import SparkSession

spark = SparkSession.builder.appName("AccessData").getOrCreate()

import os

for path, subdirs, files in os.walk("./"):
for name in files:
print(os.path.join(path, name))

print("======Test Vnet scenario======")
from azure.ai.ml import MLClient
from azure.ai.ml.identity import AzureMLOnBehalfOfCredential

subscription_id = "<SUBSCRIPTION_ID>"
resource_group_name = "<RESOURCE_GROUP>"
featurestore_name = "<FEATURESTORE_NAME>"
project_ws_vnet = "<PROJECT_WORKSPACE_NAME_VNET>"

ml_client = MLClient(
AzureMLOnBehalfOfCredential(),
subscription_id,
resource_group_name,
featurestore_name,
)
feature_store = ml_client.workspaces.get()
fs_outbound_rules_len = len(feature_store.managed_network.outbound_rules)
print(f"Feature store {featurestore_name} has {fs_outbound_rules_len} outbound rules")
assert fs_outbound_rules_len == 5

ml_client = MLClient(
AzureMLOnBehalfOfCredential(), subscription_id, resource_group_name, project_ws_vnet
)
project_ws = ml_client.workspaces.get()
ps_outbound_rules_len = len(project_ws.managed_network.outbound_rules)
print(f"Project workspace {project_ws_vnet} has {ps_outbound_rules_len} outbound rules")
assert ps_outbound_rules_len == 3

print("=======Clean up==========")
try:
print("----Delete feature store----------")
ml_client = MLClient(
AzureMLOnBehalfOfCredential(),
subscription_id=subscription_id,
resource_group_name=resource_group_name,
)

result = ml_client.feature_stores.begin_delete(
name=featurestore_name,
permanently_delete=True,
delete_dependent_resources=False,
).result()
print(result)

print("----Delete project workspace----------")
result = ml_client.workspace.begin_delete(
name=project_ws_vnet,
permanently_delete=True,
delete_dependent_resources=False,
).result()
print(result)
except:
pass
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
managed_network:
isolation_mode: allow_internet_outbound
outbound_rules:
- destination:
service_resource_id: /subscriptions/<SUBSCRIPTION_ID>/resourcegroups/<RESOURCE_GROUP>/providers/Microsoft.Storage/storageAccounts/<STORAGE_ACCOUNT_NAME>
spark_enabled: 'true'
subresource_target: dfs
name: projectsourcerule
type: private_endpoint
- destination:
service_resource_id: /subscriptions/<SUBSCRIPTION_ID>/resourcegroups/<RESOURCE_GROUP>/providers/Microsoft.Keyvault/vaults/<KEY_VAULT_NAME>
spark_enabled: 'true'
subresource_target: vault
name: defaultfskeyvaultrule
type: private_endpoint
- destination:
service_resource_id: /subscriptions/<SUBSCRIPTION_ID>/resourcegroups/<RESOURCE_GROUP>/providers/Microsoft.MachineLearningServices/workspaces/<FEATURESTORE_NAME>
spark_enabled: 'true'
subresource_target: amlworkspace
name: featurestorerule
type: private_endpoint
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
SUBSCRIPTION_ID=$(az account show --query id -o tsv)
LOCATION=$(az ml workspace show --query location -o tsv)
RESOURCE_GROUP=$(az group show --query name -o tsv)
AML_WORKSPACE_NAME=$(az configure -l --query "[?name=='workspace'].value" -o tsv)
VERSION=$(((RANDOM%1000)+1))
PROJECT_WORKSPACE_NAME_VNET="fs-proj-ws"${VERSION}

## Create a project workspace
az ml workspace create --name $PROJECT_WORKSPACE_NAME_VNET --resource-group $RESOURCE_GROUP --location $LOCATION

## one-time run: config outbound rules for project workspace
NETWORK_YML="notebooks/sdk_and_cli/network_isolation/network.yml"
az ml workspace update --resource-group $RESOURCE_GROUP --name $PROJECT_WORKSPACE_NAME_VNET --file $NETWORK_YML

## one-time run: provision network for project workspace
az ml workspace provision-network --resource-group $RESOURCE_GROUP --name $PROJECT_WORKSPACE_NAME_VNET --include-spark
az ml workspace show --name $PROJECT_WORKSPACE_NAME_VNET --resource-group $RESOURCE_GROUP

## Create a featurestore
FEATURESTORE_NAME="my-featurestore"${VERSION}
FEATURESTORE_YML="featurestore/featurestore.yaml"
sed -i "s/<FEATURESTORE_NAME>/$FEATURESTORE_NAME/g;
s/<LOCATION>/$LOCATION/g;" $FEATURESTORE_YML
az ml feature-store create --file $FEATURESTORE_YML --subscription $SUBSCRIPTION_ID --resource-group $RESOURCE_GROUP

#STORAGE_ACCOUNT_NAME="fsst${VERSION}"
STORAGE_ACCOUNT_RESOURCE_ID=$(az ml feature-store show --name ${FEATURESTORE_NAME} --resource-group ${RESOURCE_GROUP} --query storage_account -o tsv)
STORAGE_ACCOUNT_NAME=${STORAGE_ACCOUNT_RESOURCE_ID##*/}
KEY_VALUE_RESOURCE_ID=$(az ml feature-store show --name ${FEATURESTORE_NAME} --resource-group ${RESOURCE_GROUP} --query key_vault -o tsv)
KEY_VAULT_NAME=${KEY_VALUE_RESOURCE_ID##*/}
STORAGE_FILE_SYSTEM_NAME_OFFLINE_STORE="offline-store"
STORAGE_FILE_SYSTEM_NAME_SOURCE_DATA="source-data"
STORAGE_FILE_SYSTEM_NAME_OBSERVATION_DATA="observation-data"
#az storage account create --name $STORAGE_ACCOUNT_NAME --enable-hierarchical-namespace true --resource-group $RESOURCE_GROUP --location $LOCATION --subscription $SUBSCRIPTION_ID
az storage fs create --name $STORAGE_FILE_SYSTEM_NAME_OFFLINE_STORE --account-name $STORAGE_ACCOUNT_NAME --subscription $SUBSCRIPTION_ID
az storage fs create --name $STORAGE_FILE_SYSTEM_NAME_SOURCE_DATA --account-name $STORAGE_ACCOUNT_NAME --subscription $SUBSCRIPTION_ID
az storage fs create --name $STORAGE_FILE_SYSTEM_NAME_OBSERVATION_DATA --account-name $STORAGE_ACCOUNT_NAME --subscription $SUBSCRIPTION_ID

# Disable the public network access for the above created default ADLS Gen2 storage account for the feature store
az storage account update --name $STORAGE_ACCOUNT_NAME --resource-group $RESOURCE_GROUP --subscription $SUBSCRIPTION_ID --public-network-access disabled

FEATURE_STORE_MANAGED_VNET_YML="automation-test/feature_store_managed_vnet_config.yaml"
sed -i "s/<SUBSCRIPTION_ID>/$SUBSCRIPTION_ID/g;
s/<RESOURCE_GROUP>/$RESOURCE_GROUP/g;
s/<STORAGE_ACCOUNT_NAME>/$STORAGE_ACCOUNT_NAME/g;
s/<KEY_VAULT_NAME>/$KEY_VAULT_NAME/g;" $FEATURE_STORE_MANAGED_VNET_YML
az ml feature-store update --file $FEATURE_STORE_MANAGED_VNET_YML --name $FEATURESTORE_NAME --resource-group $RESOURCE_GROUP

# Provision network to create necessary private endpoints (it may take approximately 20 minutes)
az ml feature-store provision-network --name $FEATURESTORE_NAME --resource-group $RESOURCE_GROUP --include-spark

# Check that managed virtual network is correctly enabled
az ml feature-store show --name $FEATURESTORE_NAME --resource-group $RESOURCE_GROUP

# Update project workspace to create private endpoints for the defined outbound rules (it may take approximately 15 minutes)
PROJECT_WS_NAME_VNET_YAML="automation-test/project_ws_managed_vnet_config.yaml"
sed -i "s/<SUBSCRIPTION_ID>/$SUBSCRIPTION_ID/g;
s/<RESOURCE_GROUP>/$RESOURCE_GROUP/g;
s/<STORAGE_ACCOUNT_NAME>/$STORAGE_ACCOUNT_NAME/g;
s/<FEATURESTORE_NAME>/$FEATURESTORE_NAME/g;
s/<KEY_VAULT_NAME>/$KEY_VAULT_NAME/g;" $PROJECT_WS_NAME_VNET_YAML
az ml workspace update --file $PROJECT_WS_NAME_VNET_YAML --name $PROJECT_WORKSPACE_NAME_VNET --resource-group $RESOURCE_GROUP

az ml workspace show --name $PROJECT_WORKSPACE_NAME_VNET --resource-group $RESOURCE_GROUP

SDK_PY_JOB_FILE="automation-test/featurestore_vnet_job.py"
sed -i "s/<SUBSCRIPTION_ID>/$SUBSCRIPTION_ID/g;
s/<RESOURCE_GROUP>/$RESOURCE_GROUP/g;
s/<FEATURESTORE_NAME>/$FEATURESTORE_NAME/g;
s/<PROJECT_WORKSPACE_NAME_VNET>/$PROJECT_WORKSPACE_NAME_VNET/g;" $SDK_PY_JOB_FILE
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Use a serverless Spark compute"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"You should have an attached Synapse Spark pool available in your workspace. Please see documentation page: [Attach and manage a Synapse Spark pool in Azure Machine Learning (preview)](https://learn.microsoft.com/azure/machine-learning/how-to-manage-synapse-spark-pool) for more details.\n",
"\n",
"**Note** - To ensure successful execution of Spark job, the identity being used for the Spark job should be assigned **Contributor** and **Storage Blob Data Contributor** roles on the Azure storage account used for data input and output."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azure.ai.ml import MLClient, spark, Input, Output\n",
"from azure.identity import DefaultAzureCredential\n",
"from azure.ai.ml.entities import Environment\n",
"\n",
"subscription_id = \"<SUBSCRIPTION_ID>\"\n",
"resource_group = \"<RESOURCE_GROUP>\"\n",
"workspace = \"<AML_WORKSPACE_NAME>\"\n",
"ml_client = MLClient(\n",
" DefaultAzureCredential(), subscription_id, resource_group, workspace\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import yaml\n",
"\n",
"contents = \"\"\n",
"with open(\"../project/env/online.yml\", \"r\") as stream:\n",
" try:\n",
" contents = yaml.safe_load(stream)\n",
" except yaml.YAMLError as exc:\n",
" print(exc)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"spark_job = spark(\n",
" display_name=\"featurestore_sample_test\",\n",
" code=\"../\",\n",
" entry={\"file\": \"automation-test/featurestore_vnet_job.py\"},\n",
" driver_cores=1,\n",
" driver_memory=\"1g\",\n",
" executor_cores=1,\n",
" executor_memory=\"1g\",\n",
" executor_instances=1,\n",
" resources={\n",
" \"instance_type\": \"Standard_E8S_V3\",\n",
" \"runtime_version\": \"3.2.0\",\n",
" },\n",
" conf={\"spark.synapse.library.python.env\": contents},\n",
")\n",
"\n",
"returned_spark_job = ml_client.jobs.create_or_update(spark_job)\n",
"\n",
"print(returned_spark_job.id)\n",
"# Wait until the job completes\n",
"ml_client.jobs.stream(returned_spark_job.name)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.10 - SDK V2",
"language": "python",
"name": "python310-sdkv2"
},
"language_info": {
"name": "python",
"version": "3.7.10"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "6aeff17a1aa7735c2f7cb3a6d691fe1b4d4c3b8d2d650f644ad0f24e1b8e3f3f"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}
4 changes: 2 additions & 2 deletions sdk/python/featurestore_sample/featurestore/featurestore.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
$schema: http://azureml/sdk-2-0/FeatureStore.json

name: my_featurestore
name: <FEATURESTORE_NAME>
description: feature store description
tags: {"k1":"v1", "k2":"v2"}
location: eastus
location: <LOCATION>
Original file line number Diff line number Diff line change
Expand Up @@ -1702,7 +1702,7 @@
"source": [
"### Check that managed virtual network is correctly enabled\n",
"### After provisioning the network, all the outbound rules should become active\n",
"### For this tutorial, you will see 5 outbound rules\n",
"### For this tutorial, you will see 6 outbound rules\n",
"!az ml feature-store show --name $featurestore_name --resource-group $featurestore_resource_group_name"
]
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1196,9 +1196,7 @@
"deleting": false
}
},
"tags": [
"active-ipynb"
]
"tags": []
},
"outputs": [],
"source": [
Expand Down
Loading

0 comments on commit bffb3e8

Please sign in to comment.