diff --git a/.github/workflows/sdk-responsible-ai-tabular-responsibleaidashboard-education-student-attrition-classificaton-responsibleaidashboard-education-student-attrition-classificaton.yml b/.github/workflows/sdk-responsible-ai-tabular-responsibleaidashboard-education-student-attrition-classificaton-responsibleaidashboard-education-student-attrition-classificaton.yml new file mode 100644 index 00000000000..66bcfcb7ff2 --- /dev/null +++ b/.github/workflows/sdk-responsible-ai-tabular-responsibleaidashboard-education-student-attrition-classificaton-responsibleaidashboard-education-student-attrition-classificaton.yml @@ -0,0 +1,77 @@ +# This code is autogenerated. +# Code is generated by running custom script: python3 readme.py +# Any manual changes to this file may cause incorrect behavior. +# Any manual changes will be overwritten if the code is regenerated. + +name: sdk-responsible-ai-tabular-responsibleaidashboard-education-student-attrition-classificaton-responsibleaidashboard-education-student-attrition-classificaton +# This file is created by sdk/python/readme.py. +# Please do not edit directly. +on: + workflow_dispatch: + schedule: + - cron: "51 3/12 * * *" + pull_request: + branches: + - main + paths: + - sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/** + - .github/workflows/sdk-responsible-ai-tabular-responsibleaidashboard-education-student-attrition-classificaton-responsibleaidashboard-education-student-attrition-classificaton.yml + - sdk/python/dev-requirements.txt + - infra/bootstrapping/** + - sdk/python/setup.sh +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true +jobs: + build: + runs-on: ubuntu-latest + steps: + - name: check out repo + uses: actions/checkout@v2 + - name: setup python + uses: actions/setup-python@v2 + with: + python-version: "3.8" + - name: pip install notebook reqs + run: pip install -r sdk/python/dev-requirements.txt + - name: pip install mlflow reqs + run: pip install -r sdk/python/mlflow-requirements.txt + - name: azure login + uses: azure/login@v1 + with: + creds: ${{secrets.AZUREML_CREDENTIALS}} + - name: bootstrap resources + run: | + echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}'; + bash bootstrap.sh + working-directory: infra/bootstrapping + continue-on-error: false + - name: setup SDK + run: | + source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh"; + source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh"; + bash setup.sh + working-directory: sdk/python + continue-on-error: true + - name: setup-cli + run: | + source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh"; + source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh"; + bash setup.sh + working-directory: cli + continue-on-error: true + - name: run responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/responsibleaidashboard-education-student-attrition-classificaton.ipynb + run: | + source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh"; + source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh"; + bash "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh" generate_workspace_config "../../.azureml/config.json"; + bash "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh" replace_template_values "responsibleaidashboard-education-student-attrition-classificaton.ipynb"; + [ -f "../../.azureml/config" ] && cat "../../.azureml/config"; + papermill -k python responsibleaidashboard-education-student-attrition-classificaton.ipynb responsibleaidashboard-education-student-attrition-classificaton.output.ipynb + working-directory: sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton + - name: upload notebook's working folder as an artifact + if: ${{ always() }} + uses: actions/upload-artifact@v2 + with: + name: responsibleaidashboard-education-student-attrition-classificaton + path: sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton diff --git a/sdk/python/notebooks_config.ini b/sdk/python/notebooks_config.ini index 08481724c9c..4a53ee99005 100644 --- a/sdk/python/notebooks_config.ini +++ b/sdk/python/notebooks_config.ini @@ -53,3 +53,7 @@ COMPUTE_NAMES = "raitextcluster" [responsibleaidashboard-healthcare-covid-classification] USE_FORECAST_REQUIREMENTS = 0 COMPUTE_NAMES = "raitextcluster" + +[responsibleaidashboard-education-student-attrition-classificaton] +USE_FORECAST_REQUIREMENTS = 0 +COMPUTE_NAMES = "raitextcluster" diff --git a/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/counterfactual_selectIndex0.png b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/counterfactual_selectIndex0.png new file mode 100644 index 00000000000..b83b4af173f Binary files /dev/null and b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/counterfactual_selectIndex0.png differ diff --git a/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/counterfactual_start.png b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/counterfactual_start.png new file mode 100644 index 00000000000..73ad8a83390 Binary files /dev/null and b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/counterfactual_start.png differ diff --git a/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/counterfactual_update0.png b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/counterfactual_update0.png new file mode 100644 index 00000000000..bab2af73e8e Binary files /dev/null and b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/counterfactual_update0.png differ diff --git a/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/dataAnalysis_chartViewAllData.png b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/dataAnalysis_chartViewAllData.png new file mode 100644 index 00000000000..fcc4715a391 Binary files /dev/null and b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/dataAnalysis_chartViewAllData.png differ diff --git a/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/dataAnalysis_errorCohortChart.png b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/dataAnalysis_errorCohortChart.png new file mode 100644 index 00000000000..b9b201c1103 Binary files /dev/null and b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/dataAnalysis_errorCohortChart.png differ diff --git a/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/dataAnalysis_noErrorCohortChart.png b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/dataAnalysis_noErrorCohortChart.png new file mode 100644 index 00000000000..d69f9020c5b Binary files /dev/null and b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/dataAnalysis_noErrorCohortChart.png differ diff --git a/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/dataAnalysis_selectChartView.png b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/dataAnalysis_selectChartView.png new file mode 100644 index 00000000000..aa346509d33 Binary files /dev/null and b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/dataAnalysis_selectChartView.png differ diff --git a/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/dataAnalysis_selectTrueY.png b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/dataAnalysis_selectTrueY.png new file mode 100644 index 00000000000..a3d5ec4566c Binary files /dev/null and b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/dataAnalysis_selectTrueY.png differ diff --git a/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/errorAnalysis_createCohort.png b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/errorAnalysis_createCohort.png new file mode 100644 index 00000000000..e734ba72ce0 Binary files /dev/null and b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/errorAnalysis_createCohort.png differ diff --git a/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/errorAnalysis_credithrs_higherrorcoverage.png b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/errorAnalysis_credithrs_higherrorcoverage.png new file mode 100644 index 00000000000..d2225cef2ea Binary files /dev/null and b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/errorAnalysis_credithrs_higherrorcoverage.png differ diff --git a/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/errorAnalysis_credithrs_lowerrorcoverage.png b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/errorAnalysis_credithrs_lowerrorcoverage.png new file mode 100644 index 00000000000..39f1ce0d0bb Binary files /dev/null and b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/errorAnalysis_credithrs_lowerrorcoverage.png differ diff --git a/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/errorAnalysis_featureList.png b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/errorAnalysis_featureList.png new file mode 100644 index 00000000000..a6d91565cbf Binary files /dev/null and b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/errorAnalysis_featureList.png differ diff --git a/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/errorAnalysis_featureListTab.png b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/errorAnalysis_featureListTab.png new file mode 100644 index 00000000000..ab8ebe5785c Binary files /dev/null and b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/errorAnalysis_featureListTab.png differ diff --git a/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/errorAnalysis_heatMap_binSelect.png b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/errorAnalysis_heatMap_binSelect.png new file mode 100644 index 00000000000..f673d520beb Binary files /dev/null and b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/errorAnalysis_heatMap_binSelect.png differ diff --git a/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/errorAnalysis_heatMap_errorCoverage.png b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/errorAnalysis_heatMap_errorCoverage.png new file mode 100644 index 00000000000..4b29560b043 Binary files /dev/null and b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/errorAnalysis_heatMap_errorCoverage.png differ diff --git a/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/errorAnalysis_lowTenure_highUtilization.png b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/errorAnalysis_lowTenure_highUtilization.png new file mode 100644 index 00000000000..1cd320df68d Binary files /dev/null and b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/errorAnalysis_lowTenure_highUtilization.png differ diff --git a/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/errorAnalysis_lowTenure_lowUtilization.png b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/errorAnalysis_lowTenure_lowUtilization.png new file mode 100644 index 00000000000..4c985750ee2 Binary files /dev/null and b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/errorAnalysis_lowTenure_lowUtilization.png differ diff --git a/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/errorAnalysis_saveCohort.png b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/errorAnalysis_saveCohort.png new file mode 100644 index 00000000000..17b89a9f836 Binary files /dev/null and b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/errorAnalysis_saveCohort.png differ diff --git a/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/errorAnalysis_saveCohort_noError.png b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/errorAnalysis_saveCohort_noError.png new file mode 100644 index 00000000000..71e4654d024 Binary files /dev/null and b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/errorAnalysis_saveCohort_noError.png differ diff --git a/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/errorAnalysis_wholeTree.png b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/errorAnalysis_wholeTree.png new file mode 100644 index 00000000000..c49b9e59216 Binary files /dev/null and b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/errorAnalysis_wholeTree.png differ diff --git a/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/errorTree_selectNewCohort.png b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/errorTree_selectNewCohort.png new file mode 100644 index 00000000000..159b7fb83af Binary files /dev/null and b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/errorTree_selectNewCohort.png differ diff --git a/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/featureImportance_gender_classImportance.png b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/featureImportance_gender_classImportance.png new file mode 100644 index 00000000000..5fe6d155b9a Binary files /dev/null and b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/featureImportance_gender_classImportance.png differ diff --git a/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/featureImportance_gender_viewDependence.png b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/featureImportance_gender_viewDependence.png new file mode 100644 index 00000000000..203b8b947e4 Binary files /dev/null and b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/featureImportance_gender_viewDependence.png differ diff --git a/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/featureImportance_global_allCohortsLegend.png b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/featureImportance_global_allCohortsLegend.png new file mode 100644 index 00000000000..09c4ac824fc Binary files /dev/null and b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/featureImportance_global_allCohortsLegend.png differ diff --git a/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/featureImportance_global_onlyAllData.png b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/featureImportance_global_onlyAllData.png new file mode 100644 index 00000000000..2afc457c373 Binary files /dev/null and b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/featureImportance_global_onlyAllData.png differ diff --git a/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/heatMap_binThresholdSelect.png b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/heatMap_binThresholdSelect.png new file mode 100644 index 00000000000..90596c198ad Binary files /dev/null and b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/heatMap_binThresholdSelect.png differ diff --git a/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/heatMap_heatMapSelect.png b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/heatMap_heatMapSelect.png new file mode 100644 index 00000000000..910b0741d15 Binary files /dev/null and b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/heatMap_heatMapSelect.png differ diff --git a/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/modelPerf_confusionMatrix.png b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/modelPerf_confusionMatrix.png new file mode 100644 index 00000000000..d2d666c983b Binary files /dev/null and b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/modelPerf_confusionMatrix.png differ diff --git a/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/modelPerf_featureCohorts.png b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/modelPerf_featureCohorts.png new file mode 100644 index 00000000000..1cead0c25a9 Binary files /dev/null and b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/modelPerf_featureCohorts.png differ diff --git a/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/modelPerf_newCohort.png b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/modelPerf_newCohort.png new file mode 100644 index 00000000000..9ff4e5b07fb Binary files /dev/null and b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/modelPerf_newCohort.png differ diff --git a/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/modelPerf_newGEDCohort.png b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/modelPerf_newGEDCohort.png new file mode 100644 index 00000000000..aee4216f68b Binary files /dev/null and b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/modelPerf_newGEDCohort.png differ diff --git a/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/modelPerf_overview.png b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/modelPerf_overview.png new file mode 100644 index 00000000000..a34bff686bf Binary files /dev/null and b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/modelPerf_overview.png differ diff --git a/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/modelPerf_probDistribution.png b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/modelPerf_probDistribution.png new file mode 100644 index 00000000000..3ca2da4907a Binary files /dev/null and b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/modelPerf_probDistribution.png differ diff --git a/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/scorecard_location.png b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/scorecard_location.png new file mode 100644 index 00000000000..559bdba1bb6 Binary files /dev/null and b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/scorecard_location.png differ diff --git a/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/scorecard_summary.png b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/scorecard_summary.png new file mode 100644 index 00000000000..ed29e5e6889 Binary files /dev/null and b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/media/education/scorecard_summary.png differ diff --git a/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/readme.md b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/readme.md new file mode 100644 index 00000000000..47c4a4017b4 --- /dev/null +++ b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/readme.md @@ -0,0 +1,358 @@ +# Higher Education: Student Attrition +This scenario demonstrates how the Azure Machine Learning Responsible AI dashboard (RAI dashboard) +can be used to understand student attrition at a higher education institution. Student attrition refers to students +leaving or dropping out of an educational institution before completing their program. +Student attrition rates are a key performance indicator for any higher education provider and have a significant +impact on students. Understanding what causes student attrition (ie financial status, academic performance, or student wellbeing) +is a complex challenge which is often specific to individual student circumstances. + +The RAI dashboard can help higher education decision makers better understand student attrition while informing +the creation of intervents to improve student support services and foster student engagment and belonging. +The RAI dashboard also provides tools that can be used to debug the machine learning model in detail to +understanding the model's predictions and errors to ensure decisions are made in a fair and equitable manner. + +Tutorial outline: + +- [Deploy and run the notebook](#deploy-and-run-the-notebook) +- [Synthetic dataset and model descriptions](#synthetic-dataset-and-model) +- [Debugging the classification model via RAI dashboard components](#debugging-the-classification-model) + - [Error analysis](#error-analysis) + - [Model overview and performance analysis](#model-overview-and-performance-analysis) + - [Fairness](#fairness) + - [Data analysis](#data-analysis) + - [Explainability and feature importance](#explainability) + - [Counterfactual analysis](#counterfactual-analysis) + - [Causal analysis](#causal-analysis) +- [Communicate your model stats with stakeholders](#communicate-your-model-stats-with-stakeholders) + - [Scorecard](#scorecard) +- [Conclusion](#conclusion) + + +## Deploy and run the notebook +This demo relies on a single, included Jupyter notebook. +This notebook does the following: +- Loads and transforms data +- Trains and tests a model +- Creates a Responsible AI dashboard + +Once you load this notebook to your Azure ML studio workspace, follow the steps described in the notebook to create your RAI dashboard. +Once the RAI dashboard pipeline is complete, return to this page to explore the dashboard. + + +For help finding the RAI dashboard, please review this [information](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-responsible-ai-dashboard) + + +## Synthetic dataset and model + +**Machine learning model** -- The notebook trains Logistic Regression model to predict if +a student will leave a higher education program prior to completion or not (hence student attrition vs. retention). + +**WARNING: Use of synthetic data** +This accelerator was developed using synthetic data to emphasize the importance of data +privacy when analyzing student data. +For this reason, you may find some anomalies in certain metrics or dashboard components. +These should not distract from the demonstration. +The tools and techniques described here remain valid, despite any data shortcomings that may be present. + + +**Data dictionary for synthetic dataset:** + +| Type | Feature Name | Feature Description | +|--------|-------------------------------------|-------------------------------------------------------------| +| Feature | FirstGenerationinCollegeFlag | Institution classification of student as a first-generation college student | +| Feature | Gender | Gender of the student | +| Feature | Race | Student race/ethnicity, per IPEDS definition | +| Feature | HSGraduateorGED | Indicator if student received a HS diploma, GED, or other | +| Feature | Age_Term_Min | Student age at start of the first enrollment term | +| Feature | Age_Term_Max | Student age at start of the most recent enrollment term | +| Feature | Total_Terms | Total number of terms enrolled | +| Feature | Entry_Type_DualEnrollment | Student entry into university classification | +| Feature | Entry_Type_EarlyAdmission | Student entry into university classification | +| Feature | Entry_Type_FirstTimeinCollege | Student entry into university classification | +| Feature | Entry_Type_Re-Entry | Student entry into university classification | +| Feature | Entry_Type_Transfer | Student entry into university classification | +| Feature | AcademicProbation | Normalized level of academic status | +| Feature | AcademicSuspension | Normalized level of academic status | +| Feature | AcademicWarning | Normalized level of academic status | +| Feature | GoodAcademicStanding | Normalized level of academic status | +| Feature | ProbationAfterSuspen/Dismiss | Normalized level of academic status | +| Feature | TransferedToNonBusiness | Student changed program to a non-business program | +| Feature | CumulativeGPA | Cumulative university GPA | +| Feature | CumulativeCreditHoursEarnedPerTerm | Cumulative university credit hours earned | +| Feature | Blended | Percent of credit hours with specified delivery mode | +| Feature | FullyOnline | Percent of credit hours with specified delivery mode | +| Feature | RemoteLearning | Percent of credit hours with specified delivery mode | +| Feature | RemoteLearningBlended | Percent of credit hours with specified delivery mode | +| Feature | Traditional | Percent of credit hours with specified delivery mode | +| Feature | Adjunct | Percent of credit hours with specified instructor type | +| Feature | Faculty | Percent of credit hours with specified instructor type | +| Feature | Unknown_IntructorType | Percent of credit hours with specified instructor type | +| Feature | PELL_Eligible | Indicates if a student is PELL grant eligible (1=yes, 0=no) | +| Feature | Dorm_Resident | Indicates if a student lives in the campus dormitory (1=yes, 0=no) | +| Target Variable | Attrition | Model target variable. Indicates student attrition (1=yes, 0=no) | + +Follow this link to find are more detailed [data dictionary](./data_dictionary/Data_dictionary_Education.xlsx) for this scenario. + + +## Debugging the classification model +In data science and software development, the word debugging usually refers to finding and removing errors in a piece of code. +With the Responsible AI (RAI) dashboard, we can debug a machine learning model and improve its overall performance and responsible AI aspects of its predictions. + +The RAI dashboard can be configured to include the following components, each of which are illustrated below for this scenario. + +- [Error analysis](https://learn.microsoft.com/en-us/azure/machine-learning/concept-error-analysis?view=azureml-api-2), to view and understand how errors are distributed in your dataset. +- [Model overview and fairness assessment](https://learn.microsoft.com/en-us/azure/machine-learning/concept-fairness-ml?view=azureml-api-2), to evaluate the performance of your model and evaluate your model's group fairness issues (how your model's predictions affect diverse groups of people). +- [Data analysis](https://learn.microsoft.com/en-us/azure/machine-learning/concept-data-analysis?view=azureml-api-2), to understand and explore your dataset distributions and statistics. +- [Model interpretability](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-machine-learning-interpretability?view=azureml-api-2) (importance values for aggregate and individual features), to understand your model's predictions and how those overall and individual predictions are made. +- [Counterfactual what-if](https://learn.microsoft.com/en-us/azure/machine-learning/concept-counterfactual-analysis?view=azureml-api-2), to observe how feature perturbations would affect your model predictions while providing the closest data points with opposing or different model predictions. +- [Causal analysis](https://learn.microsoft.com/en-us/azure/machine-learning/concept-causal-inference?view=azureml-api-2), to use historical data to view the causal effects of treatment features on real-world outcomes. + +For more information, see the [Microsoft Learn Documentation](https://learn.microsoft.com/en-us/azure/machine-learning/concept-responsible-ai-dashboard). + +### Error analysis +At the top of the RAI dashboard you will find the error analysis tree map. +This chart simplifies the process of discovering and highlighting common failure patterns. +Look for the nodes with the darkest red color (i.e. high error rate) and a high fill line (i.e. high error coverage). +Error rate means what percentage of the selected node’s datapoints are receiving erroneous predictions. +Error coverage defines what percentage of overall errors are concentrated in the given node. + +The Error Tree visualization helps you uncover your model’s “blind spots”. +In this context, a blind spot is a group of datapoints, or a cohort, for which the model is less accurate and performant. +This could be any group of observations, that when grouped by a common characteristic, show significantly worse model performance. + +Looking at the root node of the tree (representing errors on all data) we see the error rate for all predictions is +497 out of 2500, or about 19.88%. The root node has 100% error coverage because all 2500 respondants are contained within the root node. + +![Error-Analysis-00](./media/education/errorAnalysis_wholeTree.png) + +**Explore nodes** – Looking at the right branch shown below with CumulativeCreditHoursEarnedPerTerm > 23.93 +and CumulativeCreditHoursEarnedPerTerm <= 60.63, we see an error rate of over 32%, higher than the base error rate. +This branch accounts for 74.25% of the total error coverage. + +![Error-Analysis-01](./media/education/errorAnalysis_credithrs_higherrorcoverage.png) + +This contrasts sharply with students who are a Dorm_Resident, but with CumulativeCreditHoursEarnedPerTerm <= 23.93, a group with an error rate of 5.24%. + +![Error-Analysis-01.1](./media/education/errorAnalysis_credithrs_lowerrorcoverage.png) + +These two cohorts are two good options to save in order to explore further. Click on the erroneous node again and choose the “Save as new cohort” button, at the upper-right corner of the tree map. + +![Error-Analysis-02](./media/education/errorAnalysis_createCohort.png) + +Then pick a useful name for the cohort. +It can be helpful to copy the node filters directly into the cohort name. +We called this cohort "23 < CreditHourse < 61". + +![Error-Analysis-03](./media/education/errorAnalysis_saveCohort.png) + +Similarly, click on the previously selected left node and save that as a new cohort too! +We named it "Dorm Resident and Credit Hours < 24" . + +![Error-Analysis-03.1](./media/education/errorAnalysis_saveCohort_noError.png) + + **Top features leading to errors** – Click on the feature list icon at the top of the error analysis section. + This will surface a list of features, ranked by their correlations to the model’s errors. + +![Error-Analysis-04](./media/education/errorAnalysis_featureListTab.png) + +![Error-Analysis-05](./media/education/errorAnalysis_featureList.png) + +The Heat map component can also be quite useful. +Start by selecting heat map. + +![Heat-Map-00](./media/education/heatMap_heatMapSelect.png) + +The heat map is useful for taking a closer look at certain groups, to explore different “slices” of the data. +Select up to two features to see how errors are distributed across these groups. +Find the heat map by selecting the “Heat map” tab, next to the “Feature list” tab shown above. +Each cell of the heatmap represents a slice of the dataset, +and the percentage of errors out of the total number of data points in that slice. +Like the error tree nodes, you can “save a new cohort” based on your selected cells on the heatmap. + +As an example, select “CumulativeCreditHoursEarnedPerTerm” as Feature 1 and Dorm_Resident as Feature 2. +Use the default binning threshold. + + +![Heat-Map-01](./media/education/heatMap_binThresholdSelect.png) + +Next, hover over each of the squares. +You will see Error Rate and Error Coverage. +Overall, the model has the lowest predictive error for students with high credit hours and are not dorm residents. +The error is higher when credit hours are lower and students are dorm residents. + +![Heat-Map-02](./media/education/errorAnalysis_heatMap_errorCoverage.png) + +**Custom cohorts** -- Outside the error analysis experience, you can also create custom cohorts. +Simply click on the “+ New cohort” button near the top of the chart. +You will be presented with a menu to identify the features and filters that can be used to form a cohort. + +![Custom-Cohorts-00](./media/education/errorTree_selectNewCohort.png) + +### Model overview and performance analysis +Here we can look at all the data and compare individual cohorts. +Each of the custom cohorts defined above is included in this view. +You will see the difference in accuracy metrics in our cohorts. + +![Model-Perf-00](./media/education/modelPerf_overview.png) + +Similar investigations can be performed using the visualization options presented below the metrics table. + +![Model-Perf-00.1](./media/education/modelPerf_confusionMatrix.png) + +These will present a variety of options, including several bar chart views and a confusion matrix. These can be customized by different dimensions, such as cohort or metric. + +![Model-Perf-00.2](./media/education/modelPerf_probDistribution.png) + +### Fairness +Does this model consistently predict student attrition when considering specific student population segments? +The RAI dashboard can help identify if different groups within your overall population are being impacted differently by model results. +These can be cohorts you previously defined or newly defined groups. + +**Configure the Feature Analysis tool** -- At the top of the Model overview section, select the Feature cohort option. +This component also allows you look more closely at how the model performs with respect to certain features. +Use the Feature(s) drop-down to select gender. + +![Model-Perf-01](./media/education/modelPerf_featureCohorts.png) + +Here you will see that there are significant differences in model performance across high school graduate and GED. +Overall, the model accuracy is higher for high school graduates than GED recipeints. +Further, the false positive rate is much higher for GED recipients indicating that this subgroup is more likely to be incorrectly flagged by the model as predicted to attrition. + +Let's create new cohorts, using the HSGraduateorGED feature for only GED recipients. +Click on "+ New cohort" button and apply the filters. +After defining the cohort criteria, click "Add filter" and then "Save." + +![Model-Perf-02](./media/education/modelPerf_newGEDCohort.png) + +Do this for both GED and High School graduates. +These will be very useful when applying the next dashboard component. + +### Data analysis +Next, we come to the data analysis section. +This tool allows you to look at the data behind the cohorts +and can often give you clues as to why some groups are more error-prone than others. +This information allows you to not just identify where are the blind spots, +but also understand why. +For example, your training data may have only a handful of observations for the error-prone cohort. + +Start by selecting "Chart view," then: +- Select your cohort +- Select the y-axis and change it to "Count" +- Select the x-axis and change it to "True Y" data + +This will allow us to explore the "ground truth" data from the cohort. + +![Data-Analysis-00](./media/education/dataAnalysis_selectChartView.png) + +Let's now look at the two cohorts from the error tree. +Keep the axis labels the same and toggle to the error prone cohort (23 < CreditHourse < 61). +Here we see the data is fairly balanced for student retention and attrition. + +![Data-Analysis-02](./media/education/dataAnalysis_errorCohortChart.png) + +When you switch the cohort to the group without any errors (Dorm Resident and Credit Hours < 24), +we see that this population is even more heavily weighted towards attrition. + +![Data-Analysis-03](./media/education/dataAnalysis_noErrorCohortChart.png) + +To improve performance, a next step might be to gather more data for each group and each class + and retrain the model. + +### Explainability +What factors are causing the model to mark certain students as "Attrition"? +The explainability component shows feature importance values on the global and individual level. +It consists of two views: Aggregate Feature Importance and Individual Feature Importance. + + +1.) **Global explainability (Aggregate feature importance)** -- This dashboard component allows you to see which model features are most influential in determining model outcomes across all datapoints. + +Select the “Aggregate feature importance” option, at the top of the section, +to analyze the top overall feature importance values. +Use the slider directly above the chart to select the number of features that are displayed. +You can activate or deactivate your created cohorts by toggling them on or off, +using the legend below the chart. + +![Feature-Importance-00](./media/education/featureImportance_global_allCohortsLegend.png) + +Click on the legend to remove certain cohorts from the chart. +For this analysis, use the slider to see the top four most important features, +and remove all cohorts except the "All Data" cohort. +Observing the overall feature importance values on the entire test set, we see that "CumulativeCreditHoursEarnedPerTerm", "Dorm_Resident", "PELL_Eligible", and "CumulativeGPA" are the dominant features across all datapoints. +The model uses them as strong indicators for predicting student attrition or retention. + +![Feature-Importance-00.1](./media/education/featureImportance_global_onlyAllData.png) + +Let's take a closer look at the importance of the feature of Gender. +Using the "Class importance weights" dropdown on the right of the chart, +select "Class: Attrition" to see how the model is using feature values of Gender for predicting student attrition. + +![Feature-Importance-01](./media/education/featureImportance_gender_classImportance.png) + +We see a very different model prediction pattern between Females, Males, and Unreported. +The Gender value of "Male" was positively impacting the prediction of attrition while "Female" is a negative indicator. +This indicates a significant imbalance in how the model is predicting student attrition for Females and Males. +One action item is to explore and better balance the number of Females in training set with “predicted” vs “rejected” values and retrain the model, +or use any of the unfairness mitigation algorithms covered in [Fairlearn](https://fairlearn.org/) +to mitigate the observed fairness issues. + +![Feature-Importance-02](./media/education/featureImportance_gender_viewDependence.png) + +### Counterfactual analysis +Now let's look at a male student that was predicted to be retained model and see what changes to the underlying features of a datapoint that would yield a different predicted outcome. + +Begin by updating the y-axis value to "Predicted Y," +and the x-axis to "Probability: Approved," if not already selected. + +![Counterfactual-00](./media/education/counterfactual_start.png) + +For example, use the "Selected datapoint" drop down to select Index 589. +Then click on "Create what-if counterfactual." + +![Counterfactual-01](./media/education/counterfactual_selectIndex589.png) + +This will bring you to a chart where you can see the current scenario at the top, +where the predicted class is "0", aka "Rejected," +and the next 10 datapoints showcase several other scenarios that would likely result in an opposite prediction. +In particular, you can see that this applicant has a relatively low income and +if their income were higher they would likely be approved. + +![Counterfactual-02](./media/education/counterfactual_compareIncome.png) + +Now, go to the bottom of the chart to perturb any feature values of the selected data point to see how the prediction changes. +For instance, provide a much higher value for income, such as 22,000, and you will see the predicted value changes to “1”, +indicating that this higher income would lead to the prediction of approval. +This is useful information that can be relayed to the applicant to improve their chances of getting an approval from the loan AI on their next application. + +To see this alternate outcome on the original chart, +click on "Save as a new datapoint" button, +at the bottom of the screen. + +![Counterfactual-03](./media/education/counterfactual_update589.png) + + +## Communicate your model stats with stakeholders + +### Scorecard +The RAI dashboard is accompanied by a scorecard that allows you to easily share information about your model and predictions with people who cannot easily access the Azure ML studio. + +In this example, the scorecard is generated as a named output. +The easiest way to find it is by navigating to the folder in your Azure ML studio workspace where you saved the Jupyter notebook. + +![Scorecard-00](./media/education/scorecard_location.png) + + + Here you will find a PDF document that contains valuable information about the model itself and its performance (see example below). + This is only the top portion of the scorecard. + The complete version includes more performance metrics, cohort-specific performance, fairness insights, and model explanations. + You can easily share it with others for auditing purposes or for helping other stakeholders build trust with your AI systems. + + +![Scorecard-01](./media/education/scorecard_summary.png)) + + +## Conclusion +The Responsible AI dashboard provides valuable tools to help you debug model performance and improve customer experience. +In this example, we saw how RAI dashboard components provided valuable insight into how the model predicted different outcomes for Females and Males. +Besides the debugging experience, the counterfactual analysis allows you to identify specific scenarios that will result in a different model outcome. +By incorporating these tools into your model development process, you can help ensure your models are being developed fairly, +are performant for all groups of people, and at the same time deliver positive outcomes for your clients, and customers. diff --git a/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/responsibleaidashboard-education-student-attrition-classificaton.ipynb b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/responsibleaidashboard-education-student-attrition-classificaton.ipynb new file mode 100644 index 00000000000..76b43b7952c --- /dev/null +++ b/sdk/python/responsible-ai/tabular/responsibleaidashboard-education-student-attrition-classificaton/responsibleaidashboard-education-student-attrition-classificaton.ipynb @@ -0,0 +1,1933 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a671f6c6-c6fb-442d-aef6-fb7c282e0221", + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + }, + "source": [ + "# Student Attrition Classification RAI dashboard\n", + "This notebook demonstrates the use of the `responsibleai` API to assess a classification model trained on a Fabricated Student Attrition classification dataset. The model predicts **if a university student will be retained for the next year at the university or prematurely leave the university (known as student attrition)** based on the independent features:\n", + "\n", + "- FirstGenerationinCollegeFlag\n", + "- Gender\n", + "- Race\n", + "- HSGraduateorGED\n", + "- Age_Term_Min\n", + "- Age_Term_Max\n", + "- Total_Terms\n", + "- Entry_Type_DualEnrollment\n", + "- Entry_Type_EarlyAdmission\n", + "- Entry_Type_FirstTimeinCollege\t\n", + "- Entry_Type_Other\n", + "- Entry_Type_Re-Entry\n", + "- Entry_Type_Transfer\n", + "- AcademicProbation\n", + "- AcademicSuspension\n", + "- AcademicSuspensionFor1Year\n", + "- AcademicWarning\n", + "- ExtendProbationForLowGpa\n", + "- GoodAcademicStanding\n", + "- ProbationAfterSuspen/Dismiss\n", + "- TransferedToNonBusiness\n", + "- CumulativeGPACumulativeCreditHoursEarnedPerTerm\n", + "- Blended\n", + "- FullyOnline\n", + "- RemoteLearning\n", + "- RemoteLearningBlended\n", + "- Traditional\n", + "- Adjunct\n", + "- Faculty\n", + "- Unknown_IntructorType\n", + "- PELL_Eligible\n", + "- Attrition\n", + "\n", + "\n", + "The Data Dictionary can be accessed through the following link: [Data_dictionary_Education](link-URL)\n", + "\n", + "The Notebook walks through the API calls necessary to create a widget with model analysis insights, then guides a visual analysis of the model." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "1dd5ed12", + "metadata": {}, + "source": [ + "## **Installation** \n", + "\n", + "If you are **running the notebook for the first time**, you need to follow a few of steps for smooth execution of notebook:\n", + "\n", + "1. Un-comment the below cell.\n", + "2. Run the cell.\n", + "3. After execution of this cell, comment the cell.\n", + "4. Re-start the kernel\n", + "5. Continue with running of all cells.\n", + "\n", + "\n", + "**Reminder** -- Be sure to set your kernel to \"Python 3.8 - AzureML,\" via the drop-down menu at the right end of the taskbar. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "913d7fa9", + "metadata": { + "gather": { + "logged": 1687360313965 + } + }, + "outputs": [], + "source": [ + "%pip install azure-ai-ml\n", + "%pip install sklearn" + ] + }, + { + "cell_type": "markdown", + "id": "37287826-d891-49f4-b473-7eb2625d270d", + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + }, + "source": [ + "## **User Configuration** \n", + "Confirm the compute name listed here is the same that was created using the included ARM template. If not, change this name so they match. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "763128bc-b820-4324-b37a-2f2005abd0ae", + "metadata": { + "gather": { + "logged": 1687360314263 + }, + "jupyter": { + "outputs_hidden": false, + "source_hidden": false + }, + "nteract": { + "transient": { + "deleting": false + } + } + }, + "outputs": [], + "source": [ + "# Pass the name of your compute instance (See step 6 below for it's use)\n", + "compute_name = \"raitextcluster\"" + ] + }, + { + "cell_type": "markdown", + "id": "7bf896d2-32e0-4df0-a69a-ccea3bd2e4c0", + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + }, + "source": [ + "## **After changing the above cell click on Run All.**\n", + "**The notebook will follow the below steps and complete execution in 15-30 minutes depending upon compute configurations**" + ] + }, + { + "cell_type": "markdown", + "id": "63bc551a-5c3a-4a5d-8a81-6f94d13b1428", + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + }, + "source": [ + "## Automated Notebook steps:\n", + "\n", + "**Step 1:** Loading the Data.\n", + "\n", + "**Step 2:** Pre-processing.\n", + "\n", + "**Step 3:** Splitting into Train Test datasets.\n", + "\n", + "**Step 4:** Registering the datasets as data assets in AML.\n", + "\n", + "**Step 5:** Define training and registering scripts for use in Training Pipeline.\n", + "\n", + "**Step 6:** Create compute instance (if compute instance name not passed).\n", + "\n", + "**Step 7:** Executing Model Training pipeline.\n", + "\n", + "**Step 8:** Define components for Responsible AI Dashboard Generation Pipeline (The components are explained in later parts).\n", + "\n", + "**Step 9:** Execute Dashboard Generation Pipeline (generate scorecard and save in directory).\n", + "\n", + "**Step 10:** Click on the link at the end of the notebook to access the dashboard generated." + ] + }, + { + "cell_type": "markdown", + "id": "2f135ebb-9123-4721-9b55-1fc753a03b44", + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + }, + "source": [ + "## Loading required modules" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eb59e9ef-ea11-4107-a2cd-82c20e5fe85a", + "metadata": { + "gather": { + "logged": 1687360314779 + }, + "jupyter": { + "outputs_hidden": false, + "source_hidden": false + }, + "nteract": { + "transient": { + "deleting": false + } + } + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import os\n", + "\n", + "import sklearn\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.pipeline import Pipeline\n", + "from sklearn.impute import SimpleImputer\n", + "from sklearn.preprocessing import StandardScaler, OneHotEncoder\n", + "from sklearn.compose import ColumnTransformer\n", + "\n", + "import zipfile\n", + "from io import BytesIO\n", + "import requests" + ] + }, + { + "cell_type": "markdown", + "id": "db7edfd5-f5a5-42ff-a718-d21f34ce3a17", + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + }, + "source": [ + "## Accessing the Data\n", + "\n", + "The following section examines the code necessary to create datasets and a model using components in AzureML." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "29bb4891-c33b-4413-a84b-353cce46f1ce", + "metadata": { + "gather": { + "logged": 1687360315037 + }, + "jupyter": { + "outputs_hidden": false, + "source_hidden": false + }, + "nteract": { + "transient": { + "deleting": false + } + } + }, + "outputs": [], + "source": [ + "def get_data(data_location, independent_features, target_feature, drop_col=None):\n", + " \"\"\"\n", + " Function to read data in Pandas dataframe\n", + " [TODO: Add any preprocessing steps within this function]\n", + "\n", + " Parameters\n", + " ----------\n", + " data_location: string\n", + " Path of the Dataset\n", + " independent_features: list\n", + " List of names of the independent features\n", + " target_feature: string\n", + " Name of the target/dependent features\n", + " drop_col: list\n", + " List of column names to drop\n", + "\n", + " Returns\n", + " -------\n", + " df: Pandas DataFrame\n", + " Pandas dataframe containing the dataset with the names passed\n", + " \"\"\"\n", + " column_names = independent_features + [target_feature]\n", + "\n", + " # Download the blob data from the provided URL\n", + " response = requests.get(data_location)\n", + " blob_content = response.content\n", + "\n", + " with zipfile.ZipFile(BytesIO(blob_content), \"r\") as zip_ref:\n", + " file_list = zip_ref.namelist()\n", + " if len(file_list) > 0:\n", + " # Assume the first file in the zip contains the data\n", + " inner_blob_name = file_list[0]\n", + " inner_blob_content = zip_ref.read(inner_blob_name)\n", + " df = pd.read_csv(BytesIO(inner_blob_content))\n", + "\n", + " # df = pd.read_csv(data_location)\n", + " l = list(df.columns)\n", + " l.remove(target_feature)\n", + " df = df[l + [target_feature]]\n", + " df.columns = column_names\n", + " if drop_col is not None:\n", + " df.drop(drop_col, axis=1, inplace=True)\n", + " df = df.dropna()\n", + " return df" + ] + }, + { + "cell_type": "markdown", + "id": "49ebcbcc-b1ef-4425-b8d3-a09d6e7a3dca", + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + }, + "source": [ + "### Reading & Encoding the dataset" + ] + }, + { + "cell_type": "markdown", + "id": "9c191e6f-1b7d-4eb2-a81d-604d95c1b1ce", + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + }, + "source": [ + "We load the data from github Repo directly and do basic pre-processing steps." + ] + }, + { + "cell_type": "markdown", + "id": "f4ad76ec", + "metadata": {}, + "source": [ + "**Categorical Codes for \"LoanStatus\":**\n", + "\n", + " **Approved: The customer was approved for the Loan**\n", + " \n", + " **Rejected: The customer was not approved for the Loan**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f92f0dd0-c79b-4438-88dd-00d15d9a5b55", + "metadata": { + "gather": { + "logged": 1687360315438 + }, + "jupyter": { + "outputs_hidden": false, + "source_hidden": false + }, + "nteract": { + "transient": { + "deleting": false + } + } + }, + "outputs": [], + "source": [ + "data_df = get_data(\n", + " # data_location=\"./Fabricated_Student_Attrition_Data.csv\",\n", + " data_location=\"https://publictestdatasets.blob.core.windows.net/data/RAI_fabricated_student_attrition_data.zip\",\n", + " target_feature=\"Attrition\",\n", + " independent_features=[\n", + " \"FirstGenerationinCollegeFlag\",\n", + " \"Gender\",\n", + " \"Race\",\n", + " \"HSGraduateorGED\",\n", + " \"Age_Term_Min\",\n", + " \"Age_Term_Max\",\n", + " \"Total_Terms\",\n", + " \"Entry_Type_DualEnrollment\",\n", + " \"Entry_Type_EarlyAdmission\",\n", + " \"Entry_Type_FirstTimeinCollege\",\n", + " \"Entry_Type_Other\",\n", + " \"Entry_Type_Re-Entry\",\n", + " \"Entry_Type_Transfer\",\n", + " \"AcademicProbation\",\n", + " \"AcademicSuspension\",\n", + " \"AcademicSuspensionFor1Year\",\n", + " \"AcademicWarning\",\n", + " \"ExtendProbationForLowGpa\",\n", + " \"GoodAcademicStanding\",\n", + " \"ProbationAfterSuspen/Dismiss\",\n", + " \"TransferedToNonBusiness\",\n", + " \"CumulativeGPA\",\n", + " \"CumulativeCreditHoursEarnedPerTerm\",\n", + " \"Blended\",\n", + " \"FullyOnline\",\n", + " \"RemoteLearning\",\n", + " \"RemoteLearningBlended\",\n", + " \"Traditional\",\n", + " \"Adjunct\",\n", + " \"Faculty\",\n", + " \"Unknown_IntructorType\",\n", + " \"PELL_Eligible\",\n", + " ],\n", + ")\n", + "\n", + "data_encoded = data_df.copy()\n", + "\n", + "attrition_encoding = {\n", + " 1: \"Attrition\",\n", + " 0: \"Retain\",\n", + "}\n", + "\n", + "data_encoded.replace({\"Attrition\": attrition_encoding}, inplace=True)\n", + "data_encoded" + ] + }, + { + "cell_type": "markdown", + "id": "a0d0324a-7062-467b-bc9d-5f5ea6459654", + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + }, + "source": [ + "### Splitting the Data into training and test datasets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e3d9b848-7a43-4ef8-b27f-a069e1000b55", + "metadata": { + "gather": { + "logged": 1687360315668 + }, + "jupyter": { + "outputs_hidden": false, + "source_hidden": false + }, + "nteract": { + "transient": { + "deleting": false + } + } + }, + "outputs": [], + "source": [ + "data_train, data_test = train_test_split(\n", + " data_encoded, test_size=0.25, random_state=31415, stratify=data_encoded[\"Attrition\"]\n", + ")\n", + "\n", + "if len(data_test) <= 5000:\n", + " print(\"Proceed with the analysis\")\n", + "else:\n", + " print(\"Reduce your test data size\")" + ] + }, + { + "cell_type": "markdown", + "id": "67603e63-940d-4e86-8d5d-25817da8b9a3", + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + }, + "source": [ + "### Get the Data to AzureML\n", + "\n", + "With the data now split into 'train' and 'test' DataFrames, we save them out to files in preparation for upload into AzureML:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9fdf2a40-6525-4d89-b8da-30d3fee832cf", + "metadata": { + "gather": { + "logged": 1687360316010 + }, + "jupyter": { + "outputs_hidden": false, + "source_hidden": false + }, + "nteract": { + "transient": { + "deleting": false + } + } + }, + "outputs": [], + "source": [ + "train_data_path = \"./data_student_attrition_classification/train/\"\n", + "test_data_path = \"./data_student_attrition_classification/test/\"\n", + "\n", + "os.makedirs(train_data_path, exist_ok=True)\n", + "os.makedirs(test_data_path, exist_ok=True)\n", + "\n", + "train_filename = train_data_path + \"student_attrition_classification_train.parquet\"\n", + "test_filename = test_data_path + \"student_attrition_classification_test.parquet\"\n", + "\n", + "data_train.to_parquet(train_filename, index=False)\n", + "data_test.to_parquet(test_filename, index=False)" + ] + }, + { + "cell_type": "markdown", + "id": "3d3ad3c1-7c34-45d7-8bd8-c4a85a135185", + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + }, + "source": [ + "We are going to create two Datasets in AzureML, one for the train and one for the test datasets. The first step is to create an `MLClient` to perform the upload. The method we use assumes that there is a `config.json` file (downloadable from the Azure or AzureML portals) present in the same directory as this notebook file:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Enter details of your AML workspace\n", + "subscription_id = \"\"\n", + "resource_group = \"\"\n", + "workspace = \"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0b6e1ca6-9ef9-43c2-95f8-a98a3b93dbb4", + "metadata": { + "gather": { + "logged": 1687360317800 + }, + "jupyter": { + "outputs_hidden": false, + "source_hidden": false + }, + "nteract": { + "transient": { + "deleting": false + } + } + }, + "outputs": [], + "source": [ + "from azure.ai.ml import MLClient\n", + "from azure.identity import DefaultAzureCredential\n", + "\n", + "credential = DefaultAzureCredential()\n", + "ml_client = MLClient(\n", + " credential=credential,\n", + " subscription_id=subscription_id,\n", + " resource_group_name=resource_group,\n", + " workspace_name=workspace,\n", + ")\n", + "print(ml_client)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3635442a-1b78-4814-b082-b8ddb7c231fa", + "metadata": { + "gather": { + "logged": 1687360318099 + }, + "jupyter": { + "outputs_hidden": false, + "source_hidden": false + }, + "nteract": { + "transient": { + "deleting": false + } + } + }, + "outputs": [], + "source": [ + "# Define Version string (optional)\n", + "rai_student_attrition_classification_example_version_string = \"1\"" + ] + }, + { + "cell_type": "markdown", + "id": "c2c758e6-a5cc-47fa-9714-16f22c8bf031", + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + }, + "source": [ + "### Create an asset MLtable (or URI file) to register the Data into workspace\n", + "This is essential, as the dashboard recognizes only registered assets. \n", + "\n", + "Reference:\n", + "https://learn.microsoft.com/en-us/azure/machine-learning/how-to-create-data-assets?tabs=Python-SDK" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6629e7f2-fe9f-44f5-8244-d9d04c670d04", + "metadata": { + "gather": { + "logged": 1687360318480 + }, + "jupyter": { + "outputs_hidden": false, + "source_hidden": false + }, + "nteract": { + "transient": { + "deleting": false + } + } + }, + "outputs": [], + "source": [ + "from azure.ai.ml.entities import Data\n", + "from azure.ai.ml.constants import AssetTypes" + ] + }, + { + "cell_type": "markdown", + "id": "9d106eef-33ae-44db-8d29-3f161ff28e5f", + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + }, + "source": [ + "#### Change the asset name of the below file if the train/test data has changed" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b2b4360e-d977-43c5-a543-0a634532c5f4", + "metadata": { + "gather": { + "logged": 1687360320065 + }, + "jupyter": { + "outputs_hidden": false, + "source_hidden": false + }, + "nteract": { + "transient": { + "deleting": false + } + } + }, + "outputs": [], + "source": [ + "input_train_data = \"train_student_attrition_classification\"\n", + "\n", + "try:\n", + " # Try getting data already registered in workspace\n", + " train_data = ml_client.data.get(\n", + " name=input_train_data,\n", + " version=rai_student_attrition_classification_example_version_string,\n", + " )\n", + "\n", + "except Exception as e:\n", + " train_data = Data(\n", + " path=train_filename,\n", + " type=AssetTypes.URI_FILE,\n", + " description=\"RAI student attrition classification example training data\",\n", + " name=input_train_data,\n", + " version=rai_student_attrition_classification_example_version_string,\n", + " )\n", + " ml_client.data.create_or_update(train_data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e6e9317e-d755-4128-9700-733e16a9bec3", + "metadata": { + "gather": { + "logged": 1687360320796 + }, + "jupyter": { + "outputs_hidden": false, + "source_hidden": false + }, + "nteract": { + "transient": { + "deleting": false + } + } + }, + "outputs": [], + "source": [ + "input_test_data = \"test_student_attrition_classification\"\n", + "\n", + "try:\n", + " # Try getting data already registered in workspace\n", + " test_data = ml_client.data.get(\n", + " name=input_test_data,\n", + " version=rai_student_attrition_classification_example_version_string,\n", + " )\n", + "\n", + "except Exception as e:\n", + " test_data = Data(\n", + " path=test_filename,\n", + " type=AssetTypes.URI_FILE,\n", + " description=\"RAI student attrition classification example test data\",\n", + " name=input_test_data,\n", + " version=rai_student_attrition_classification_example_version_string,\n", + " )\n", + " ml_client.data.create_or_update(test_data)" + ] + }, + { + "cell_type": "markdown", + "id": "3f66aeab-a4bd-4ca8-9e96-fc141f87d2ca", + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + }, + "source": [ + "## A model training pipeline\n", + "\n", + "To simplify the model creation process, we're going to use a pipeline. This will have two stages:\n", + "\n", + "1. The actual training component\n", + "2. A model registration component\n", + "\n", + "We have to register the model in AzureML in order for our RAI insights components to use it.\n", + "\n", + "### The Training Component\n", + "\n", + "The training component is for this particular model. In this case, we are going to train an `Logistic Classifier` on the input data and save it using MLFlow. We need command line arguments to specify the location of the input data, the location where MLFlow should write the output model, and the name of the target column in the dataset.\n", + "\n", + "We start by creating a directory to hold the component source:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9c0f9bde", + "metadata": { + "gather": { + "logged": 1687360321022 + } + }, + "outputs": [], + "source": [ + "os.makedirs(\"./component_src\", exist_ok=True)\n", + "os.makedirs(\"./register_model_src\", exist_ok=True)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "d9b95f7d", + "metadata": {}, + "source": [ + "**Create the training script** \n", + "This cell creates a machine learning pipeline that trains a Logistic classifier using labeled data and then saves the trained model to a specified output path using MLFlow. \n", + "- The code reads in the training data as a pandas dataframe from a specified path, extracts the target column name, and separates the target column from the feature columns. \n", + "- Feature columns are then preprocessed using both a standard scaler for numeric data and a one-hot encoder for categorical data. \n", + "- Preprocessed feature columns and target column are then fed into the Gaussian Naive Bayes classifier. \n", + "- The trained model is saved to a temporary directory and then copied to the specified output path. \n", + "- Code takes command-line arguments for the paths of the training data, the output model, and the name of the target column. \n", + "- The code also uses the Azure Machine Learning (AML) Python SDK to log the model and tracking information with MLFlow. \n", + "- Additional comments in the code provide details on each section of the pipeline." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bdb4e1af", + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile component_src/classification_training_script.py\n", + "\n", + "import argparse\n", + "import os\n", + "import shutil\n", + "import tempfile\n", + "\n", + "\n", + "from azureml.core import Run\n", + "\n", + "import mlflow\n", + "import mlflow.sklearn\n", + "from sklearn.pipeline import Pipeline\n", + "from sklearn.preprocessing import StandardScaler, OneHotEncoder, OrdinalEncoder\n", + "from sklearn.compose import ColumnTransformer\n", + "\n", + "import pandas as pd\n", + "from sklearn.linear_model import LogisticRegression\n", + "\n", + "def parse_args():\n", + " # setup arg parser\n", + " parser = argparse.ArgumentParser()\n", + "\n", + " # add arguments\n", + " parser.add_argument(\"--training_data\", type=str, help=\"Path to training data\")\n", + " parser.add_argument(\"--target_column_name\", type=str, help=\"Name of target column\")\n", + " parser.add_argument(\"--model_output\", type=str, help=\"Path of output model\")\n", + "\n", + " # parse args\n", + " args = parser.parse_args()\n", + "\n", + " # return args\n", + " return args\n", + "\n", + "\n", + "def main(args):\n", + " current_experiment = Run.get_context().experiment\n", + " tracking_uri = current_experiment.workspace.get_mlflow_tracking_uri()\n", + " print(\"tracking_uri: {0}\".format(tracking_uri))\n", + " mlflow.set_tracking_uri(tracking_uri)\n", + " mlflow.set_experiment(current_experiment.name)\n", + "\n", + " # Read in data\n", + " print(\"Reading data\")\n", + " all_data = pd.read_parquet(args.training_data)\n", + "\n", + " print(\"Extracting X_train, y_train\")\n", + " print(\"all_data cols: {0}\".format(all_data.columns))\n", + " y_train = all_data[args.target_column_name]\n", + " X_train = all_data.drop(labels=args.target_column_name, axis=\"columns\")\n", + " print(\"X_train cols: {0}\".format(X_train.columns))\n", + "\n", + " print(\"Executing Model Training pipeline\")\n", + " # We create the preprocessing pipelines for both numeric and categorical data.\n", + " numeric_transformer = Pipeline(steps=[\n", + " ('scaler', StandardScaler())])\n", + "\n", + " categorical_transformer = Pipeline(steps=[\n", + " ('onehot', OneHotEncoder(handle_unknown='ignore'))])\n", + "\n", + " continuous_features_names = ['Age_Term_Min',\t'Age_Term_Max',\t'Total_Terms',\n", + " 'Entry_Type_DualEnrollment', 'Entry_Type_EarlyAdmission','Entry_Type_FirstTimeinCollege',\n", + " 'Entry_Type_Other', 'Entry_Type_Re-Entry','Entry_Type_Transfer','AcademicProbation','AcademicSuspension',\n", + " 'AcademicSuspensionFor1Year',\t'AcademicWarning','ExtendProbationForLowGpa','GoodAcademicStanding',\n", + " 'ProbationAfterSuspen/Dismiss', 'TransferedToNonBusiness','CumulativeGPA','CumulativeCreditHoursEarnedPerTerm',\n", + " 'Blended',\t'FullyOnline','RemoteLearning',\t\n", + " 'RemoteLearningBlended','Traditional','Adjunct','Faculty','Unknown_IntructorType','PELL_Eligible']\n", + " categorical_features_names = ['FirstGenerationinCollegeFlag','Gender', 'Race',\n", + " 'HSGraduateorGED']\n", + "\n", + " transformations = ColumnTransformer(\n", + " transformers=[\n", + " ('num', numeric_transformer, continuous_features_names),\n", + " ('cat', categorical_transformer, categorical_features_names)])\n", + "\n", + " # Append classifier to preprocessing pipeline.\n", + " # Now we have a full prediction pipeline.\n", + " # The estimator can be changed to suit\n", + " model = Pipeline(steps=[('preprocessor', transformations),\n", + " ('classifier', LogisticRegression(solver='lbfgs', max_iter=1000))])\n", + "\n", + " model.fit(X_train, y_train)\n", + "\n", + " # Saving model with mlflow - leave this section unchanged\n", + " with tempfile.TemporaryDirectory() as td:\n", + " print(\"Saving model with MLFlow to temporary directory\")\n", + " tmp_output_dir = os.path.join(td, \"my_model_dir\")\n", + " mlflow.sklearn.save_model(sk_model=model, path=tmp_output_dir)\n", + "\n", + " print(\"Copying MLFlow model to output path\")\n", + " for file_name in os.listdir(tmp_output_dir):\n", + " print(\" Copying: \", file_name)\n", + " # As of Python 3.8, copytree will acquire dirs_exist_ok as\n", + " # an option, removing the need for listdir\n", + " shutil.copy2(src=os.path.join(tmp_output_dir, file_name), dst=os.path.join(args.model_output, file_name))\n", + "\n", + "\n", + "# run script\n", + "if __name__ == \"__main__\":\n", + " # add space in logs\n", + " print(\"*\" * 60)\n", + " print(\"\\n\\n\")\n", + "\n", + " # parse args\n", + " args = parse_args()\n", + "\n", + " # run main function\n", + " main(args)\n", + "\n", + " # add space in logs\n", + " print(\"*\" * 60)\n", + " print(\"\\n\\n\")" + ] + }, + { + "cell_type": "markdown", + "id": "d412b0ad", + "metadata": {}, + "source": [ + "**Define the YAML file**\n", + "\n", + "This code snippet defines an Azure Machine Learning Command Component for training a classification model on a dataset. It starts by defining a YAML configuration file that specifies the inputs and outputs of the component, the command to run, and the environment to use. The YAML file is then saved to disk.\n", + "\n", + "Next, the code uses the Azure ML Python SDK to load the Command Component from the YAML file. The resulting object can be used to run the component on a dataset, passing in the input paths and output paths as arguments.\n", + "\n", + "Overall, this code provides a simple and reusable way to define and run machine learning training components in Azure ML." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6a0ce700", + "metadata": { + "gather": { + "logged": 1687360321657 + } + }, + "outputs": [], + "source": [ + "from azure.ai.ml import load_component\n", + "\n", + "yaml_contents = (\n", + " f\"\"\"\n", + "$schema: http://azureml/sdk-2-0/CommandComponent.json\n", + "name: rai_classification_training_component\n", + "display_name: Classification training component for RAI example\n", + "version: {rai_student_attrition_classification_example_version_string}\n", + "type: command\n", + "inputs:\n", + " training_data:\n", + " type: path\n", + " target_column_name:\n", + " type: string\n", + "outputs:\n", + " model_output:\n", + " type: path\n", + "code: ./component_src/\n", + "environment: azureml://registries/azureml/environments/AzureML-responsibleai-0.20-ubuntu20.04-py38-cpu/versions/4\n", + "\"\"\"\n", + " + r\"\"\"\n", + "command: >-\n", + " python classification_training_script.py\n", + " --training_data ${{{{inputs.training_data}}}}\n", + " --target_column_name ${{{{inputs.target_column_name}}}}\n", + " --model_output ${{{{outputs.model_output}}}}\n", + "\"\"\"\n", + ")\n", + "\n", + "yaml_filename = \"RAIStudentAttritionTrainingComponent.yaml\"\n", + "\n", + "with open(yaml_filename, \"w\") as f:\n", + " f.write(yaml_contents.format(yaml_contents))\n", + "\n", + "train_model_component = load_component(source=yaml_filename)" + ] + }, + { + "cell_type": "markdown", + "id": "523752b3", + "metadata": {}, + "source": [ + "This script loads a trained model, registers it via MLFlow, and saves the registered model information to a JSON file. Users need to provide the necessary arguments to register the model, including the path to the input model, path to the output model info JSON file, base name of the registered model, and an optional suffix for the registered model name.\n", + "\n", + "To use this script, the following arguments must be defined: \n", + "- model_input_path: Path to the input model \n", + "- model_info_output_path: Path to write the model info JSON \n", + "- model_base_name: Name of the registered model \n", + "- model_name_suffix: An integer value to add as a suffix to the registered model name. If this is negative, the epoch time is used as the suffix." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "280531f8", + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile register_model_src/register.py\n", + "\n", + "# ---------------------------------------------------------\n", + "# Copyright (c) Microsoft Corporation. All rights reserved.\n", + "# ---------------------------------------------------------\n", + "\n", + "import argparse\n", + "import json\n", + "import os\n", + "import time\n", + "\n", + "\n", + "from azureml.core import Run\n", + "\n", + "import mlflow\n", + "import mlflow.sklearn\n", + "\n", + "# Based on example:\n", + "# https://docs.microsoft.com/en-us/azure/machine-learning/how-to-train-cli\n", + "# which references\n", + "# https://github.com/Azure/azureml-examples/tree/main/cli/jobs/train/lightgbm/iris\n", + "\n", + "\n", + "def parse_args():\n", + " # setup arg parser\n", + " parser = argparse.ArgumentParser()\n", + "\n", + " # add arguments\n", + " parser.add_argument(\"--model_input_path\", type=str, help=\"Path to input model\")\n", + " parser.add_argument(\n", + " \"--model_info_output_path\", type=str, help=\"Path to write model info JSON\"\n", + " )\n", + " parser.add_argument(\n", + " \"--model_base_name\", type=str, help=\"Name of the registered model\"\n", + " )\n", + " parser.add_argument(\n", + " \"--model_name_suffix\", type=int, help=\"Set negative to use epoch_secs\"\n", + " )\n", + "\n", + " # parse args\n", + " args = parser.parse_args()\n", + "\n", + " # return args\n", + " return args\n", + "\n", + "\n", + "def main(args):\n", + " current_experiment = Run.get_context().experiment\n", + " tracking_uri = current_experiment.workspace.get_mlflow_tracking_uri()\n", + " print(\"tracking_uri: {0}\".format(tracking_uri))\n", + " mlflow.set_tracking_uri(tracking_uri)\n", + " mlflow.set_experiment(current_experiment.name)\n", + "\n", + " print(\"Loading model\")\n", + " mlflow_model = mlflow.sklearn.load_model(args.model_input_path)\n", + "\n", + " if args.model_name_suffix < 0:\n", + " suffix = int(time.time())\n", + " else:\n", + " suffix = args.model_name_suffix\n", + " registered_name = \"{0}_{1}\".format(args.model_base_name, suffix)\n", + " print(f\"Registering model as {registered_name}\")\n", + "\n", + " print(\"Registering via MLFlow\")\n", + " mlflow.sklearn.log_model(\n", + " sk_model=mlflow_model,\n", + " registered_model_name=registered_name,\n", + " artifact_path=registered_name,\n", + " )\n", + "\n", + " print(\"Writing JSON\")\n", + " dict = {\"id\": \"{0}:1\".format(registered_name)}\n", + " output_path = os.path.join(args.model_info_output_path, \"model_info.json\")\n", + " with open(output_path, \"w\") as of:\n", + " json.dump(dict, fp=of)\n", + "\n", + "\n", + "# run script\n", + "if __name__ == \"__main__\":\n", + " # add space in logs\n", + " print(\"*\" * 60)\n", + " print(\"\\n\\n\")\n", + "\n", + " # parse args\n", + " args = parse_args()\n", + "\n", + " # run main function\n", + " main(args)\n", + "\n", + " # add space in logs\n", + " print(\"*\" * 60)\n", + " print(\"\\n\\n\")" + ] + }, + { + "cell_type": "markdown", + "id": "5386fd7f", + "metadata": {}, + "source": [ + "Now that the model registration script is saved on our local drive, we create a YAML file to describe it as a component to AzureML. This involves defining the inputs and outputs, specifing the AzureML environment which can run the script, and telling AzureML how to invoke the model registration script:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7ad3244d-096f-4e74-9836-54d7e84c5c7a", + "metadata": { + "gather": { + "logged": 1687360322172 + }, + "jupyter": { + "outputs_hidden": false, + "source_hidden": false + }, + "nteract": { + "transient": { + "deleting": false + } + } + }, + "outputs": [], + "source": [ + "yaml_contents = f\"\"\"\n", + "$schema: http://azureml/sdk-2-0/CommandComponent.json\n", + "name: register_model\n", + "display_name: Register Model\n", + "version: {rai_student_attrition_classification_example_version_string}\n", + "type: command\n", + "is_deterministic: False\n", + "inputs:\n", + " model_input_path:\n", + " type: path\n", + " model_base_name:\n", + " type: string\n", + " model_name_suffix: # Set negative to use epoch_secs\n", + " type: integer\n", + " default: -1\n", + "outputs:\n", + " model_info_output_path:\n", + " type: path\n", + "code: ./register_model_src/\n", + "environment: azureml://registries/azureml/environments/AzureML-responsibleai-0.20-ubuntu20.04-py38-cpu/versions/4\n", + "command: >-\n", + " python register.py\n", + " --model_input_path ${{{{inputs.model_input_path}}}}\n", + " --model_base_name ${{{{inputs.model_base_name}}}}\n", + " --model_name_suffix ${{{{inputs.model_name_suffix}}}}\n", + " --model_info_output_path ${{{{outputs.model_info_output_path}}}}\n", + "\n", + "\"\"\"\n", + "\n", + "yaml_filename = \"register.yaml\"\n", + "\n", + "with open(yaml_filename, \"w\") as f:\n", + " f.write(yaml_contents)\n", + "\n", + "register_component = load_component(source=yaml_filename)" + ] + }, + { + "cell_type": "markdown", + "id": "7f573806-90ff-4ed2-95e1-996520f884dc", + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + }, + "source": [ + "We will create a new compute instance to run the jobs if it does not already exist by the name passed in the beginning of the notebook." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d04d6675-9de0-4e8e-9d9d-f92a2a2d2259", + "metadata": { + "gather": { + "logged": 1687360322396 + }, + "jupyter": { + "outputs_hidden": false, + "source_hidden": false + }, + "nteract": { + "transient": { + "deleting": false + } + } + }, + "outputs": [], + "source": [ + "from azure.ai.ml.entities import AmlCompute\n", + "\n", + "all_compute_names = [x.name for x in ml_client.compute.list()]\n", + "\n", + "if compute_name in all_compute_names:\n", + " print(f\"Found existing compute: {compute_name}\")\n", + "else:\n", + " my_compute = AmlCompute(\n", + " name=compute_name,\n", + " size=\"Standard_DS4_v2\",\n", + " min_instances=0,\n", + " max_instances=1,\n", + " idle_time_before_scale_down=3600,\n", + " )\n", + " ml_client.compute.begin_create_or_update(my_compute).result()\n", + " print(\"Initiated compute creation\")" + ] + }, + { + "cell_type": "markdown", + "id": "693f7706", + "metadata": {}, + "source": [ + "### Running a training pipeline\n", + "\n", + "The 2 YAML files (RAILoanTrainingComponent.yaml & register.yaml) are used to define the 2 components in the model training pipeline\n", + "\n", + "We start by ensuring that the compute cluster named in the begining exists:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0917f58e-5171-426d-80f8-a27d793b4aa9", + "metadata": { + "gather": { + "logged": 1687360322618 + }, + "jupyter": { + "outputs_hidden": false, + "source_hidden": false + }, + "nteract": { + "transient": { + "deleting": false + } + } + }, + "outputs": [], + "source": [ + "import time\n", + "\n", + "model_name_suffix = int(time.time())\n", + "model_name = \"rai_student_attrition_classsification_model\"" + ] + }, + { + "cell_type": "markdown", + "id": "34bf9603-d4be-4c8a-99ba-69c1b484dd03", + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + }, + "source": [ + "Next, we define the pipeline using objects from the AzureML SDKv2. As mentioned above, there are two component jobs: one to train the model, and one to register it:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9dcfd57a-03b5-446c-bf87-af2db9aadac8", + "metadata": { + "gather": { + "logged": 1687360322882 + }, + "jupyter": { + "outputs_hidden": false, + "source_hidden": false + }, + "nteract": { + "transient": { + "deleting": false + } + } + }, + "outputs": [], + "source": [ + "from azure.ai.ml import dsl, Input\n", + "\n", + "target_feature = \"Attrition\"\n", + "categorical_features = [\n", + " \"FirstGenerationinCollegeFlag\",\n", + " \"Gender\",\n", + " \"Race\",\n", + " \"HSGraduateorGED\",\n", + "]\n", + "\n", + "loan_train_pq = Input(\n", + " type=\"uri_file\",\n", + " path=f\"azureml:{input_train_data}:{rai_student_attrition_classification_example_version_string}\",\n", + " mode=\"download\",\n", + ")\n", + "loan_test_pq = Input(\n", + " type=\"uri_file\",\n", + " path=f\"azureml:{input_test_data}:{rai_student_attrition_classification_example_version_string}\",\n", + " mode=\"download\",\n", + ")\n", + "\n", + "\n", + "@dsl.pipeline(\n", + " compute=compute_name,\n", + " description=\"Register Model for RAI Student Attrition classification example\",\n", + " experiment_name=f\"RAI_classification_Example_Model_Training_{model_name_suffix}\",\n", + ")\n", + "def my_training_pipeline(target_column_name, training_data):\n", + " trained_model = train_model_component(\n", + " target_column_name=target_column_name, training_data=training_data\n", + " )\n", + " trained_model.set_limits(timeout=1200)\n", + "\n", + " _ = register_component(\n", + " model_input_path=trained_model.outputs.model_output,\n", + " model_base_name=model_name,\n", + " model_name_suffix=model_name_suffix,\n", + " )\n", + "\n", + " return {}\n", + "\n", + "\n", + "model_registration_pipeline_job = my_training_pipeline(target_feature, loan_train_pq)" + ] + }, + { + "cell_type": "markdown", + "id": "304560ea-b575-4118-af50-51b99ba509cb", + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + }, + "source": [ + "With the pipeline definition created, we can submit it to AzureML. We define a helper function to do the submission, which waits for the submitted job to complete:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "19ad43eb-513d-4755-93f3-78c5662b3847", + "metadata": { + "gather": { + "logged": 1687360430758 + }, + "jupyter": { + "outputs_hidden": false, + "source_hidden": false + }, + "nteract": { + "transient": { + "deleting": false + } + } + }, + "outputs": [], + "source": [ + "from azure.ai.ml.entities import PipelineJob\n", + "from IPython.core.display import HTML\n", + "from IPython.display import display\n", + "\n", + "\n", + "def submit_and_wait(ml_client, pipeline_job) -> PipelineJob:\n", + " created_job = ml_client.jobs.create_or_update(pipeline_job)\n", + " assert created_job is not None\n", + "\n", + " print(\"Pipeline job can be accessed in the following URL:\")\n", + " display(HTML('{0}'.format(created_job.studio_url)))\n", + "\n", + " while created_job.status not in [\n", + " \"Completed\",\n", + " \"Failed\",\n", + " \"Canceled\",\n", + " \"NotResponding\",\n", + " ]:\n", + " time.sleep(30)\n", + " created_job = ml_client.jobs.get(created_job.name)\n", + " print(\"Latest status : {0}\".format(created_job.status))\n", + " assert created_job.status == \"Completed\"\n", + " return created_job\n", + "\n", + "\n", + "# This is the actual submission\n", + "training_job = submit_and_wait(ml_client, model_registration_pipeline_job)" + ] + }, + { + "cell_type": "markdown", + "id": "c9889100", + "metadata": {}, + "source": [ + "## Creating the RAI Insights\n", + "\n", + "We have a registered model, and can now run a pipeline to create the RAI insights. First off, compute the name of the model we registered:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c65da4a0-81da-4d30-b8e0-65be2a2caaee", + "metadata": { + "gather": { + "logged": 1687360431060 + }, + "jupyter": { + "outputs_hidden": false, + "source_hidden": false + }, + "nteract": { + "transient": { + "deleting": false + } + } + }, + "outputs": [], + "source": [ + "expected_model_id = f\"{model_name}_{model_name_suffix}:1\"\n", + "azureml_model_id = f\"azureml:{expected_model_id}\"" + ] + }, + { + "cell_type": "markdown", + "id": "d4bad8a5", + "metadata": {}, + "source": [ + "\n", + "Now, we create the RAI pipeline itself. There are four 'component stages' in this pipeline:\n", + "\n", + "1. Construct an empty `RAIInsights` object\n", + "1. Run the RAI tool components\n", + "1. Gather the tool outputs into a single `RAIInsights` object\n", + "1. (Optional) Generate a score card in pdf format summarizing model performance, and key aspects from the rai tool components\n", + "\n", + "We start by loading the RAI component definitions for use in our pipeline:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f52201d1-6962-4c3a-b27a-29c3a2547a98", + "metadata": { + "gather": { + "logged": 1687360431624 + }, + "jupyter": { + "outputs_hidden": false, + "source_hidden": false + }, + "nteract": { + "transient": { + "deleting": false + } + } + }, + "outputs": [], + "source": [ + "# Get handle to azureml registry for the RAI built in components\n", + "registry_name = \"azureml\"\n", + "\n", + "ml_client_registry = MLClient(\n", + " credential=credential,\n", + " subscription_id=subscription_id,\n", + " resource_group_name=resource_group,\n", + " registry_name=registry_name,\n", + ")\n", + "print(ml_client_registry)" + ] + }, + { + "cell_type": "markdown", + "id": "32e1594d-0bba-43cc-bc20-44f761881350", + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + }, + "source": [ + "## Add different components of ResponsibleAI dashboard to the Pipeline\n", + "\n", + "Reference:\n", + "https://learn.microsoft.com/en-us/azure/machine-learning/how-to-responsible-ai-insights-sdk-cli?tabs=python" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d1fe7afc-2d6c-4a7d-9108-edd78eb019ae", + "metadata": { + "gather": { + "logged": 1687360432626 + }, + "jupyter": { + "outputs_hidden": false, + "source_hidden": false + }, + "nteract": { + "transient": { + "deleting": false + } + } + }, + "outputs": [], + "source": [ + "label = \"latest\"\n", + "\n", + "rai_constructor_component = ml_client_registry.components.get(\n", + " name=\"microsoft_azureml_rai_tabular_insight_constructor\", label=label\n", + ")\n", + "\n", + "# We get latest version and use the same version for all components\n", + "version = rai_constructor_component.version\n", + "print(\"The current version of RAI built-in components is: \" + version)\n", + "\n", + "rai_counterfactual_component = ml_client_registry.components.get(\n", + " name=\"microsoft_azureml_rai_tabular_counterfactual\", version=version\n", + ")\n", + "rai_erroranalysis_component = ml_client_registry.components.get(\n", + " name=\"microsoft_azureml_rai_tabular_erroranalysis\", version=version\n", + ")\n", + "\n", + "rai_explanation_component = ml_client_registry.components.get(\n", + " name=\"microsoft_azureml_rai_tabular_explanation\", version=version\n", + ")\n", + "\n", + "rai_gather_component = ml_client_registry.components.get(\n", + " name=\"microsoft_azureml_rai_tabular_insight_gather\", version=version\n", + ")\n", + "\n", + "rai_scorecard_component = ml_client_registry.components.get(\n", + " name=\"microsoft_azureml_rai_tabular_score_card\", version=version\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "64a22471", + "metadata": {}, + "source": [ + "## Score card generation config\n", + "For score card generation, we need some additional configuration in a separate json file. Here we configure the following model performance metrics for reporting:\n", + "- accuracy\n", + "- precision" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f740ae68-8379-4d04-8445-ce5111dbc9c1", + "metadata": { + "gather": { + "logged": 1687360432946 + }, + "jupyter": { + "outputs_hidden": false, + "source_hidden": false + }, + "nteract": { + "transient": { + "deleting": false + } + } + }, + "outputs": [], + "source": [ + "import json\n", + "\n", + "score_card_config_dict = {\n", + " \"Model\": {\n", + " \"ModelName\": \"Student Attrition classification\",\n", + " \"ModelType\": \"Classification\",\n", + " \"ModelSummary\": \"\",\n", + " },\n", + " \"Metrics\": {\"accuracy_score\": {\"threshold\": \">=0.5\"}, \"precision_score\": {}},\n", + "}\n", + "\n", + "score_card_config_filename = (\n", + " \"rai_student_attrition_classification_score_card_config.json\"\n", + ")\n", + "\n", + "with open(score_card_config_filename, \"w\") as f:\n", + " json.dump(score_card_config_dict, f)\n", + "\n", + "score_card_config_path = Input(\n", + " type=\"uri_file\", path=score_card_config_filename, mode=\"download\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "cecaea36", + "metadata": {}, + "source": [ + "Now the pipeline itself. This creates an empty `RAIInsights` object, adds the analyses, and then gathers everything into the final `RAIInsights` output. Where complex objects need to be passed (such as a list of treatment feature names), they must be encoded as JSON strings.\n", + "\n", + "Note that the timeout for the counterfactual generation is longer, since this is a comparatively slow process." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "86692d9a-351e-404f-9ba0-1051d60be7f2", + "metadata": { + "gather": { + "logged": 1687360433171 + }, + "jupyter": { + "outputs_hidden": false, + "source_hidden": false + }, + "nteract": { + "transient": { + "deleting": false + } + } + }, + "outputs": [], + "source": [ + "import json\n", + "from azure.ai.ml import Input\n", + "from azure.ai.ml.constants import AssetTypes\n", + "\n", + "classes_in_target = json.dumps([\"Retain\", \"Attrition\"])\n", + "\n", + "\n", + "@dsl.pipeline(\n", + " compute=compute_name,\n", + " description=\"Example RAI computation on Student Attrition Classification\",\n", + " experiment_name=f\"RAI_Student_Attrition_Classification_Example_RAIInsights_Computation_{model_name_suffix}\",\n", + ")\n", + "def rai_classification_pipeline(\n", + " target_column_name,\n", + " train_data,\n", + " test_data,\n", + " score_card_config_path,\n", + "):\n", + " # Initiate the RAIInsights\n", + " create_rai_job = rai_constructor_component(\n", + " title=\"RAI Dashboard Example\",\n", + " task_type=\"classification\",\n", + " model_info=expected_model_id,\n", + " model_input=Input(type=AssetTypes.MLFLOW_MODEL, path=azureml_model_id),\n", + " train_dataset=train_data,\n", + " test_dataset=test_data,\n", + " target_column_name=target_column_name,\n", + " categorical_column_names=json.dumps(categorical_features),\n", + " classes=classes_in_target,\n", + " )\n", + " create_rai_job.set_limits(timeout=3600)\n", + "\n", + " # Add an explanation\n", + " explain_job = rai_explanation_component(\n", + " comment=\"Explanation for the classification dataset\",\n", + " rai_insights_dashboard=create_rai_job.outputs.rai_insights_dashboard,\n", + " )\n", + " explain_job.set_limits(timeout=3600)\n", + "\n", + " # Add counterfactual analysis\n", + " counterfactual_job = rai_counterfactual_component(\n", + " rai_insights_dashboard=create_rai_job.outputs.rai_insights_dashboard,\n", + " total_cfs=10,\n", + " desired_class=\"opposite\",\n", + " )\n", + " counterfactual_job.set_limits(timeout=3600)\n", + "\n", + " # Add error analysis\n", + " erroranalysis_job = rai_erroranalysis_component(\n", + " rai_insights_dashboard=create_rai_job.outputs.rai_insights_dashboard,\n", + " )\n", + " erroranalysis_job.set_limits(timeout=3600)\n", + "\n", + " # Combine everything\n", + " rai_gather_job = rai_gather_component(\n", + " constructor=create_rai_job.outputs.rai_insights_dashboard,\n", + " insight_1=explain_job.outputs.explanation,\n", + " # insight_2=causal_job.outputs.causal,\n", + " insight_3=counterfactual_job.outputs.counterfactual,\n", + " insight_4=erroranalysis_job.outputs.error_analysis,\n", + " )\n", + " rai_gather_job.set_limits(timeout=3600)\n", + "\n", + " rai_gather_job.outputs.dashboard.mode = \"upload\"\n", + " rai_gather_job.outputs.ux_json.mode = \"upload\"\n", + "\n", + " # Generate score card in pdf format for a summary report on model performance,\n", + " # and observe distrbution of error between prediction vs ground truth.\n", + " rai_scorecard_job = rai_scorecard_component(\n", + " dashboard=rai_gather_job.outputs.dashboard,\n", + " pdf_generation_config=score_card_config_path,\n", + " )\n", + "\n", + " return {\n", + " \"dashboard\": rai_gather_job.outputs.dashboard,\n", + " \"ux_json\": rai_gather_job.outputs.ux_json,\n", + " \"scorecard\": rai_scorecard_job.outputs.scorecard,\n", + " }" + ] + }, + { + "cell_type": "markdown", + "id": "1c26b9a6", + "metadata": {}, + "source": [ + "Next, we define the pipeline object itself, and ensure that the outputs will be available for download:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7b877890-c3d8-45d8-b91f-906968b7b053", + "metadata": { + "gather": { + "logged": 1687360433460 + }, + "jupyter": { + "outputs_hidden": false, + "source_hidden": false + }, + "nteract": { + "transient": { + "deleting": false + } + } + }, + "outputs": [], + "source": [ + "from datetime import datetime\n", + "from azure.ai.ml import Output\n", + "\n", + "# Pipeline to construct the RAI Insights\n", + "insights_pipeline_job = rai_classification_pipeline(\n", + " target_column_name=target_feature,\n", + " train_data=loan_train_pq,\n", + " test_data=loan_test_pq,\n", + " score_card_config_path=score_card_config_path,\n", + ")\n", + "\n", + "# Workaround to enable the download\n", + "timestamp = datetime.now().strftime(\"%Y%m%d_%H_%M_%S\")\n", + "path = f\"RAI_Student_Attrition_RAIInsights_{model_name_suffix}_{timestamp}\"\n", + "insights_pipeline_job.outputs.dashboard = Output(\n", + " path=f\"azureml://datastores/workspaceblobstore/paths/{path}/dashboard/\",\n", + " mode=\"upload\",\n", + " type=\"uri_folder\",\n", + ")\n", + "insights_pipeline_job.outputs.ux_json = Output(\n", + " path=f\"azureml://datastores/workspaceblobstore/paths/{path}/ux_json/\",\n", + " mode=\"upload\",\n", + " type=\"uri_folder\",\n", + ")\n", + "insights_pipeline_job.outputs.scorecard = Output(\n", + " path=f\"azureml://datastores/workspaceblobstore/paths/{path}/scorecard/\",\n", + " mode=\"upload\",\n", + " type=\"uri_folder\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "204c6fe5", + "metadata": {}, + "source": [ + "And submit the pipeline to AzureML for execution:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a3f713bb-6a5f-4ab7-ac59-4fe2e0c4ec7a", + "metadata": { + "gather": { + "logged": 1687362884687 + }, + "jupyter": { + "outputs_hidden": false, + "source_hidden": false + }, + "nteract": { + "transient": { + "deleting": false + } + } + }, + "outputs": [], + "source": [ + "insights_job = submit_and_wait(ml_client, insights_pipeline_job)" + ] + }, + { + "cell_type": "markdown", + "id": "65437c8f", + "metadata": {}, + "source": [ + "The dashboard should appear in the AzureML portal in the registered model view. The following cell computes the expected URI:" + ] + }, + { + "cell_type": "markdown", + "id": "69721140", + "metadata": {}, + "source": [ + "## Downloading the Scorecard PDF\n", + "\n", + "We can download the scorecard PDF from our pipeline as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "65b2a0db-3efc-444c-aec1-8a2a00d70bdb", + "metadata": { + "gather": { + "logged": 1687362885755 + } + }, + "outputs": [], + "source": [ + "target_directory = \".\"\n", + "\n", + "ml_client.jobs.download(\n", + " insights_job.name, download_path=target_directory, output_name=\"scorecard\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "9a5cbfc6-698a-4d94-a305-0d1ccd2a9511", + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + }, + "source": [ + "## To Access the Dashboard follow the link below" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e923ae8d-13bb-4056-8962-32b6959447bb", + "metadata": { + "gather": { + "logged": 1687362886032 + }, + "jupyter": { + "outputs_hidden": false, + "source_hidden": false + }, + "nteract": { + "transient": { + "deleting": false + } + } + }, + "outputs": [], + "source": [ + "sub_id = ml_client._operation_scope.subscription_id\n", + "rg_name = ml_client._operation_scope.resource_group_name\n", + "ws_name = ml_client.workspace_name\n", + "\n", + "expected_uri = f\"https://ml.azure.com/model/{expected_model_id}/model_analysis?wsid=/subscriptions/{sub_id}/resourcegroups/{rg_name}/workspaces/{ws_name}\"\n", + "\n", + "print(f\"Please visit {expected_uri} to see your analysis\")" + ] + }, + { + "cell_type": "markdown", + "id": "83d4b5d0-1704-49fb-a65c-8b30c58e7b0a", + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + }, + "source": [ + "Once this is complete, we can go to the Registered Models view in the AzureML portal, and find the model we have just registered. On the 'Model Details' page, there is a \"Responsible AI dashboard\" tab where we can view the insights which we have just uploaded." + ] + } + ], + "metadata": { + "categories": [ + "SDK v2", + "sdk", + "python", + "responsible-ai" + ], + "kernel_info": { + "name": "python38-azureml" + }, + "kernelspec": { + "display_name": "Python 3.8 - AzureML", + "language": "python", + "name": "python38-azureml" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + }, + "microsoft": { + "host": { + "AzureML": { + "notebookHasBeenCompleted": true + } + }, + "ms_spell_check": { + "ms_spell_check_language": "en" + } + }, + "nteract": { + "version": "nteract-front-end@1.0.0" + }, + "vscode": { + "interpreter": { + "hash": "8fd340b5477ca1a0b454d48a3973beff39fee032ada47a04f6f3725b469a8988" + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}