Skip to content
This repository has been archived by the owner on Aug 4, 2023. It is now read-only.

🔄 synced file(s) with WordPress/openverse #1092

Merged
merged 4 commits into from
Apr 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 14 additions & 62 deletions .github/workflows/new_issues.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ on:
env:
GH_TOKEN: ${{ secrets.ACCESS_TOKEN }} # Projects need a personal access token to work.
ISSUE_ID: ${{ github.event.issue.node_id }} # The global issue ID that works in both REST and GraphQL APIs.
PROJECT_ID: "PVT_kwDOAAQ2Js4AMZdL" # The ID for the Openverse project (#75).

jobs:
add_issue:
Expand All @@ -26,65 +25,18 @@ jobs:
add_issue_to_project:
name: Add new issue to project
runs-on: ubuntu-latest
env:
PRIORITY_FIELD_ID: "PVTSSF_lADOAAQ2Js4AMZdLzgH6Kbo" # The ID for the project Priority custom field.
steps:
- name: Add issue to "Backlog"
run: |
# shellcheck disable=SC2016
gh api graphql -f query='
mutation($project:ID!, $issue:ID!) {
addProjectV2ItemById(input: {projectId: $project, contentId: $issue}) {
item {
id
content {
... on Issue {
labels(first: 10) {
nodes {
name
}
}
}
}
}
}
}' -f project="$PROJECT_ID" -f issue="$ISSUE_ID" >> issue_data.json

echo 'ITEM_ID='"$(jq '.data.addProjectV2ItemById.item.id' issue_data.json)" >> "$GITHUB_ENV"
ITEM_PRIORITY="$(jq '.data.addProjectV2ItemById.item.content.labels.nodes[] | select(.name | contains("priority")).name | split(": ")[1]' issue_data.json)" >> "$GITHUB_ENV"
# The IDs for the project's Priority custom field options.
# These IDs were manually retrieved from the GitHub API.
if [[ $ITEM_PRIORITY == "low" ]]; then
PRIORITY_VALUE_ID="279ae886"
elif [[ $ITEM_PRIORITY == "medium" ]]; then
PRIORITY_VALUE_ID="333b3c1d"
elif [[ $ITEM_PRIORITY == "high" ]]; then
PRIORITY_VALUE_ID="03fe8945"
else
PRIORITY_VALUE_ID="fb76bdbc"
fi
echo 'PRIORITY_VALUE_ID='"$PRIORITY_VALUE_ID" >> "$GITHUB_ENV"

- name: Set issue priority
run: |
# shellcheck disable=SC2016
gh api graphql -f query='
mutation (
$project: ID!
$item: ID!
$priority_field: ID!
$priority_value: String!
) {
set_priority_field: updateProjectV2ItemFieldValue(input: {
projectId: $project
itemId: $item
fieldId: $priority_field
value: {
singleSelectOptionId: $priority_value
}
}) {
projectV2Item {
id
}
}
}' -f project="$PROJECT_ID" -f item="$ITEM_ID" -f priority_field="$PRIORITY_FIELD_ID" -f priority_value="$PRIORITY_VALUE_ID"
- uses: bulatt3/[email protected]
with:
project-url: https://github.com/orgs/WordPress/projects/75
github-token: ${{ secrets.ACCESS_TOKEN }}
# Exclude the issues with the following labels
labeled: project
label-operator: NOT
label-map: |
{ "Priority": [
{ "label": "🟥 priority: critical", "fieldValue": "🟥 priority: critical" },
{ "label": "🟧 priority: high", "fieldValue": "🟧 priority: high" },
{ "label": "🟨 priority: medium", "fieldValue": "🟨 priority: medium" },
{ "label": "🟩 priority: low", "fieldValue": "🟩 priority: low" }
]}
14 changes: 7 additions & 7 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ exclude: Pipfile\.lock|migrations|\.idea|node_modules|archive|retired

repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.0.1
rev: v4.4.0
hooks:
- id: trailing-whitespace
args: [--markdown-linebreak-ext=md]
Expand Down Expand Up @@ -40,26 +40,26 @@ repos:
- --lines-after-imports=2

- repo: https://github.com/asottile/pyupgrade
rev: v3.2.2
rev: v3.3.1
hooks:
- id: pyupgrade
args:
- --py310-plus

- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: "v0.0.257"
rev: "v0.0.261"
hooks:
- id: ruff

- repo: https://github.com/ambv/black
rev: 22.3.0
rev: 23.3.0
hooks:
- id: black
args:
- --safe

- repo: https://github.com/pycqa/pydocstyle
rev: 6.2.2 # 6.2.3 is slightly broken
rev: 6.3.0
hooks:
- id: pydocstyle
args:
Expand All @@ -83,11 +83,11 @@ repos:
- [email protected]

- repo: https://github.com/koalaman/shellcheck-precommit
rev: v0.8.0
rev: v0.9.0
hooks:
- id: shellcheck

- repo: https://github.com/rhysd/actionlint
rev: main
rev: v1.6.24
hooks:
- id: actionlint-docker
4 changes: 2 additions & 2 deletions docker/airflow/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ END
function header() {
size=${COLUMNS:-80}
# Print centered text between two dividers of length $size
printf '#%.0s' $(seq 1 $size) && echo
printf '#%.0s' $(seq 1 "$size") && echo
printf "%*s\n" $(( (${#1} + size) / 2)) "$1"
printf '#%.0s' $(seq 1 $size) && echo
printf '#%.0s' $(seq 1 "$size") && echo
}

if [ "$1" == help ] || [ "$1" == --help ]; then help_text && exit 0; fi
Expand Down
1 change: 0 additions & 1 deletion openverse_catalog/dags/common/popularity/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,6 @@ def drop_media_popularity_relations(
metrics=IMAGE_POPULARITY_METRICS_TABLE_NAME,
pg_timeout: float = timedelta(minutes=10).total_seconds(),
):

if media_type == AUDIO:
db_view = AUDIO_VIEW_NAME
constants = AUDIO_POPULARITY_CONSTANTS_VIEW
Expand Down
1 change: 0 additions & 1 deletion openverse_catalog/dags/common/slack.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,6 @@ def __init__(
unfurl_media: bool = True,
http_conn_id: str = SLACK_NOTIFICATIONS_CONN_ID,
):

self.http = HttpHook(method="POST", http_conn_id=http_conn_id)
self.blocks = []
self._context = {}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def check_configuration(github_pat: str, airflow_variable: str):
" a new issue."
)

for (dag, issue, predicate, task_id_pattern) in dags_to_reenable:
for dag, issue, predicate, task_id_pattern in dags_to_reenable:
dag_name = f"{dag} ({task_id_pattern})" if task_id_pattern else dag
message += f"\n - <{issue}|{dag_name}: '{predicate}'>"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ class SuggestedSubProvider(NamedTuple):


class FlickrSubProviderAuditor:

endpoint = "https://www.flickr.com/services/rest"
retries = 2

Expand Down Expand Up @@ -162,7 +161,7 @@ def audit_flickr_sub_providers():
raise AirflowSkipException("No new potential sub-providers were identified.")

message = "Consider adding the following sub-providers for Flickr:"
for (name, nsid, cc_count) in potential_sub_providers:
for name, nsid, cc_count in potential_sub_providers:
message += "\n"
message += f"{name}: {nsid} _({cc_count} cc-licensed images)_"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@


class INaturalistDataIngester(ProviderDataIngester):

providers = {"image": provider_details.INATURALIST_DEFAULT_PROVIDER}

def get_next_query_params(self, prev_query_params=None, **kwargs):
Expand Down Expand Up @@ -293,9 +292,7 @@ def load_catalog_of_life_names(task: PythonOperator, remove_api_files: bool):

@staticmethod
def create_preingestion_tasks():

with TaskGroup(group_id="preingestion_tasks") as preingestion_tasks:

check_for_file_updates = PythonOperator(
task_id="check_for_file_updates",
python_callable=INaturalistDataIngester.compare_update_dates,
Expand Down Expand Up @@ -365,9 +362,7 @@ def create_postingestion_tasks():

@staticmethod
def create_ingestion_workflow():

with TaskGroup(group_id="ingest_data") as ingest_data:

preingestion_tasks = INaturalistDataIngester.create_preingestion_tasks()

with TaskGroup(group_id="pull_image_data") as pull_data:
Expand All @@ -380,7 +375,6 @@ def create_ingestion_workflow():
),

with TaskGroup(group_id="load_image_data") as loader_tasks:

# Using the existing set up, but the indexes on the temporary table
# probably slows down the load a bit.
create_loading_table = PythonOperator(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,6 @@ def get_record_data(self, data: dict) -> dict | list[dict] | None:
# Parse out the necessary info from the record data into a dictionary.
images = []
if image_list := self._get_image_list(data):

if (foreign_landing_url := self._get_foreign_landing_url(data)) is None:
return None

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ def _get_timestamp_pairs(self, **kwargs):
# portions.
hour_slices = self._get_timestamp_query_params_list(start_ts, end_ts, 24)

for (start_hour, end_hour) in hour_slices:
for start_hour, end_hour in hour_slices:
# Get the number of records in this hour interval
record_count = self._get_record_count(start_hour, end_hour, **kwargs)
if record_count == 0:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -564,7 +564,7 @@ def extract_geo_data(media_data):
"map_datum": "GPSMapDatum",
}
geo_data = {}
for (key, value) in geo_properties.items():
for key, value in geo_properties.items():
key_value = media_data.get(value, {}).get("value")
if key_value:
geo_data[key] = key_value
Expand Down
4 changes: 2 additions & 2 deletions tests/dags/common/popularity/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,7 @@ def test_constants_view_adds_values_and_constants(
("my_provider", "views", 0.5, 50.0, 50.0, 50.0),
]
sorted_rows = sorted(list(postgres_with_image_table.cursor), key=lambda x: x[0])
for (expect_row, sorted_row) in zip(expect_rows, sorted_rows):
for expect_row, sorted_row in zip(expect_rows, sorted_rows):
assert expect_row == pytest.approx(sorted_row)


Expand Down Expand Up @@ -401,7 +401,7 @@ def test_constants_view_handles_zeros_and_missing(
("my_provider", "views", 0.8, 0.0, 1.0, 0.25),
]
sorted_rows = sorted(list(postgres_with_image_table.cursor), key=lambda x: x[0])
for (expect_row, sorted_row) in zip(expect_rows, sorted_rows):
for expect_row, sorted_row in zip(expect_rows, sorted_rows):
assert expect_row == pytest.approx(sorted_row)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,6 @@ def get_sample_id_list(sample_file, joined_on):


if __name__ == "__main__":

# PHOTOS
photo_ids = [
str(i)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,6 @@ def test_process_batch_halts_processing_after_reaching_ingestion_limit():
patch.object(image_store, "add_item"),
patch.object(ingester, "get_record_data") as get_record_data_mock,
):

# Mock `get_record_data` to return a list of 2 records
get_record_data_mock.return_value = MOCK_RECORD_DATA_LIST
record_count = ingester.process_batch(EXPECTED_BATCH_DATA)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ def test_ingest_records_raises_error_if_the_total_count_has_been_exceeded():
)
# Assert that attempting to ingest records raises an exception when
# `should_raise_error` is enabled
with (pytest.raises(Exception, match=expected_error_string)):
with pytest.raises(Exception, match=expected_error_string):
ingester.ingest_records()

# get_mock should have been called 4 times, twice for each batch (once in `get_batch`
Expand Down