Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

build: add dependencies health script #462

Merged
merged 1 commit into from
Jan 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions repo_health/check_python_support_releases.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,13 @@
@pytest.mark.py_dependency_health
def check_python_support_releases(repo_release_tags, all_results, repo_path):
"""
Check to see the python version releases for 3.8, 3.9, 3.10, 3.11
Check to see the python version releases for 3.8, 3.9, 3.10, 3.11, 3.12
"""
if not repo_release_tags:
all_results[MODULE_DICT_KEY] = {}
print("There is not tag found")
return
python_versions = ['3.8', '3.9', '3.10', '3.11']
python_versions = ['3.8', '3.9', '3.10', '3.11', '3.12']

Check warning on line 36 in repo_health/check_python_support_releases.py

View check run for this annotation

Codecov / codecov/patch

repo_health/check_python_support_releases.py#L36

Added line #L36 was not covered by tests
all_results[MODULE_DICT_KEY] = {}
desc_tags_list = list(reversed(repo_release_tags))
for version in python_versions:
Expand Down
2 changes: 1 addition & 1 deletion repo_health/check_setup_py.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def check_repo_url(setup_py, setup_cfg, all_results):
cfg_urls = re.findall(r"""(?m)^url\s*=\s*(\S+)""", setup_cfg)
urls = py_urls + cfg_urls
if urls:
assert len(urls) == 1
assert len(urls) > 0
all_results[module_dict_key]["repo_url"] = urls[0]


Expand Down
3 changes: 2 additions & 1 deletion repo_health/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,8 @@
try:
subprocess.run(['git', 'fetch', '--tags'], cwd=repo_dir, check=True)
git_tags = subprocess.check_output(['git', 'tag', '--sort=version:refname'], cwd=repo_dir, text=True)
all_tags_list = git_tags.strip().split('\n')
# Filtering out empty strings or non-trivial values
all_tags_list = [tag for tag in git_tags.strip().split('\n') if tag.strip()]

Check warning on line 130 in repo_health/utils.py

View check run for this annotation

Codecov / codecov/patch

repo_health/utils.py#L130

Added line #L130 was not covered by tests
latest_tag = get_latest_release_tag(repo_dir)

if not latest_tag and len(all_tags_list):
Expand Down
12 changes: 12 additions & 0 deletions repo_health_dashboard/dependencies_configuration.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
py_dependency_health:
check_order:
- python.3.8
- python.3.9
- python.3.10
- python.3.11
- django.has_django
- django.4.0
- django.4.1
- django.4.2
key_aliases:
django.has_django: has_django
17 changes: 13 additions & 4 deletions repo_health_dashboard/repo_health_dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@ def main():
Create basic dashboard
"""
parser = argparse.ArgumentParser(description="Create basic dashboard")
parser.add_argument(
"--dashboard-name",
help="name of dashboard to trigger e.g. repo_health, dependencies_health",
dest="dashboard_name",
default="repo_health",
)
parser.add_argument(
"--data-dir",
help="location of where data yaml files are located",
Expand Down Expand Up @@ -53,8 +59,9 @@ def main():
)
args = parser.parse_args()
# collect configurations if they were input
configuration_name = "py_dependency_health" if args.dashboard_name == 'py_dependency_health' else "main"
configurations = {
"main": {"check_order": [], "repo_name_order": [], "key_aliases": {}}
configuration_name: {"check_order": [], "repo_name_order": [], "key_aliases": {}}
}
if args.configuration:
with codecs.open(args.configuration, "r", "utf-8") as f:
Expand All @@ -65,7 +72,8 @@ def main():
configurations[sheet] = utils.get_sheets(parsed_file_data, sheet)

data_dir = os.path.abspath(args.data_dir)
files = glob.glob(os.path.join(data_dir, "*/*.yaml"), recursive=False)
data_files_pattern = "*/*.yaml" if args.dashboard_name == "repo_health" else "*.yaml"
files = glob.glob(os.path.join(data_dir, data_files_pattern), recursive=False)
data = {}
for file_path in files:
file_name = file_path[file_path.rfind("/") + 1:]
Expand All @@ -89,8 +97,9 @@ def main():
utils.write_squashed_metadata_to_csv(
output, args.output_csv + "_" + key, configuration, args.append
)
utils.write_squashed_metadata_to_sqlite(
output, f"dashboard_{key}", configuration, args.output_sqlite)
if args.dashboard_name == "repo_health":
utils.write_squashed_metadata_to_sqlite(
output, f"dashboard_{key}", configuration, args.output_sqlite)


if __name__ == "__main__":
Expand Down
187 changes: 187 additions & 0 deletions scripts/dependencies-health-script.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
#!/bin/bash
set -e -v

# Click requires this to work cause it interfaces weirdly with python 3 ASCII default
export LC_ALL=C.UTF-8
export LANG=C.UTF-8

WORKSPACE=$PWD

# If the REPORT_DATE variable is set and not an empty string parse the date to standardize it.
if [[ -n $REPORT_DATE ]]; then
REPORT_DATE=$(date '+%Y-%m-%d' -d "$REPORT_DATE")
fi

###############################################################
# Get list of dependencies repos from the dependencies_urls.csv
###############################################################

cd "$WORKSPACE"
touch "repositories.txt"
# Extract source column (assuming CSV is comma-separated)
source_column=$(tail -n +3 "${WORKSPACE}/dashboards/dependencies_urls.csv" | cut -d ',' -f 2)

# Filter out non-HTTP and non-HTTPS URLs using grep
filtered_urls=$(echo "$source_column" | grep -E '^(http|https)://')

# Save the filtered URLs to repo_urls.txt
echo "$filtered_urls" > "repositories.txt"

#########################################
# Run dependencies checks on repositories
#########################################

# Install checks and dashboarding script, this should also install pytest-repo-health
pip-sync -q edx-repo-health/requirements/base.txt
pip install -q -e edx-repo-health

# data destination folder setup

METADATA_FILE_DIST="docs/checks_metadata.yaml"

failed_repos=()

OUTPUT_FILE_POSTFIX="_repo_health.yaml"

# Git clone each dependency repo and run checks on it
input="repositories.txt"
while IFS= read -r line; do
cd "$WORKSPACE"
if [[ "${line}" =~ ^(git@github\.com:|https://github\.com/)([a-zA-Z0-9_.-]+?)/([a-zA-Z0-9_.-]+?)$ ]]; then
ORG_NAME="${BASH_REMATCH[2]}"
REPO_NAME="${BASH_REMATCH[3]}"
# Check if REPO_NAME ends with .git and remove it if it does as we need proper name only
if [[ "${REPO_NAME}" == *.git ]]; then
REPO_NAME="${REPO_NAME%.git}"
fi
FULL_NAME="${ORG_NAME}/${REPO_NAME}"
else
echo "Skipping <${line}>: Could not recognize as a GitHub URL in order to extract org and repo name."
continue
fi

if [[ "${REPO_NAME}" = "edx-repo-health" ]]; then
echo "Skipping <${line}>: edx-repo-health"
continue
fi

if [[ -n "${ONLY_CHECK_THIS_REPOSITORY}" && "${FULL_NAME}" != "${ONLY_CHECK_THIS_REPOSITORY}" ]]; then
echo "Skipping <${line}>: ONLY_CHECK_THIS_REPOSITORY was set, and does not match"
continue
fi

echo "Processing repo: ${FULL_NAME}"

rm -rf target-repo
if ! git clone -- "${line/https:\/\//https:\/\/$GITHUB_TOKEN@}" target-repo; then
failed_repos+=("$FULL_NAME")
continue
fi

echo "Cloned repo: ${FULL_NAME}"
cd target-repo
echo "Stepping into target-repo"
# If the REPORT_DATE variable is set and not an empty string.
if [[ -n $REPORT_DATE ]]; then
# If a specific date is given for report
FIRST_COMMIT=$(git log --reverse --format="format:%ci" | sed -n 1p)
if [[ $REPORT_DATE > ${FIRST_COMMIT:0:10} ]]; then
git checkout "$(git rev-list -n 1 --before="${REPORT_DATE} 00:00" master)"
else
echo "${REPO_NAME} doesn't have any commits prior to ${REPORT_DATE}"
failed_repos+=("$FULL_NAME")
continue
fi
fi

cd "$WORKSPACE"
DEPENDENCIES_DATA_DIR="dependencies_health_data"
# make sure destination folder exists
mkdir -p "$DEPENDENCIES_DATA_DIR"

OUTPUT_FILE_NAME="${REPO_NAME}${OUTPUT_FILE_POSTFIX}"

DEPENDENCIES_HEALTH_COMMAND() {
pytest -m py_dependency_health --repo-health \
--repo-health-path "edx-repo-health/repo_health" \
--repo-path "target-repo" \
--repo-health-metadata "${METADATA_FILE_DIST}" \
--output-path "${DEPENDENCIES_DATA_DIR}/${OUTPUT_FILE_NAME}" \
-o log_cli=true --exitfirst --noconftest -v -c /dev/null
}

if DEPENDENCIES_HEALTH_COMMAND; then
true
elif DEPENDENCIES_HEALTH_COMMAND; then
# rerun the same command if it fails once
true
else
failed_repos+=("$FULL_NAME")
continue
fi

done < "$input"

##############################
# Recalculate aggregated data.
##############################

# Go into data repo, recalculate aggregate data, and push a PR
IFS=,
failed_repo_names=$(echo "${failed_repos[*]}")

echo "Pushing data"
cd "${WORKSPACE}/dependencies_health_data"
repo_health_dashboard --data-dir . --configuration "${WORKSPACE}/edx-repo-health/repo_health_dashboard/dependencies_configuration.yaml" \
--output-csv "${WORKSPACE}/dashboards/dashboard" --dashboard-name "py_dependency_health"

cd "${WORKSPACE}"
# Only commit the data if running with master and no REPORT_DATE is set.
if [[ ${EDX_REPO_HEALTH_BRANCH} == 'master' && -z ${REPORT_DATE} ]]; then
###########################################
# Commit files and push to repo-health-data
###########################################
echo "Commit new files and push to master..."

commit_message="chore: Update repo health data files"

cd "${WORKSPACE}"

if [[ ${#failed_repos[@]} -ne 0 ]]; then
commit_message+="\nFollowing repos failed repo health checks\n ${failed_repo_names}"

for full_name in "${failed_repos[@]}"; do
OUTPUT_FILE_NAME="${full_name}${OUTPUT_FILE_POSTFIX}"
echo "reverting repo health data for ${OUTPUT_FILE_NAME}"
git checkout -- "${WORKSPACE}/dependencies_health_data/${OUTPUT_FILE_NAME}"
done
fi

git checkout master
if git diff-index --quiet HEAD; then
# No changes found in the working directory
echo "No changes to commit"
else
# Changes found in the working directory
git add dashboards
git add dependencies_health_data
git config --global user.name "Repo Health Bot"
git config --global user.email "${GITHUB_USER_EMAIL}"
git commit -m "${commit_message}"
git push origin master
fi
fi

if [[ ${#failed_repos[@]} -ne 0 ]]; then
echo
echo
echo "TLDR Runbook(More detailed runbook: https://openedx.atlassian.net/wiki/spaces/AT/pages/3229057351/Repo+Health+Runbook ):"
echo " To resolve, search the console output for 'ERRORS' (without the quotes), or search for any"
echo " of the failed repo names listed below."
echo "The following repositories failed while executing pytest dependencies-health scripts causing the job to fail:"
echo
echo " ${failed_repos[*]}"
echo
echo
exit 1
fi