Skip to content

Commit

Permalink
[datalabeling] fix: clean up old datasets before the test
Browse files Browse the repository at this point in the history
fixes #3703
  • Loading branch information
Takashi Matsuo committed May 12, 2020
1 parent 0662e10 commit ebde329
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 0 deletions.
9 changes: 9 additions & 0 deletions datalabeling/manage_dataset_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import backoff
from google.api_core.exceptions import DeadlineExceeded
from google.api_core.exceptions import RetryError
import pytest

import manage_dataset
Expand All @@ -40,6 +41,14 @@ def dataset():

@pytest.fixture(scope='module')
def cleaner():
# First delete old datasets.
try:
testing_lib.delete_old_datasets(PROJECT_ID)
# We see occational RetryError while deleting old datasets.
# We can just ignore it and move on.
except RetryError as e:
print("delete_old_datasets failed: detail {}".format(e))

resource_names = []

yield resource_names
Expand Down
23 changes: 23 additions & 0 deletions datalabeling/testing_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,12 @@
# limitations under the License.

import os
import time

import backoff
from google.api_core.client_options import ClientOptions
from google.api_core.exceptions import DeadlineExceeded
from google.api_core.exceptions import FailedPrecondition
from google.cloud import datalabeling_v1beta1 as datalabeling

import create_annotation_spec_set as annotation_spec_set_sample
Expand Down Expand Up @@ -48,6 +50,27 @@ def delete_dataset(name):
return dataset_sample.delete_dataset(name)


def delete_old_datasets(project_id):
client = create_client()
formatted_project_name = client.project_path(project_id)

response = client.list_datasets(formatted_project_name)
# It will delete datasets created more than 2 hours ago
cutoff_time = time.time() - 7200
for element in response:
if element.create_time.seconds < cutoff_time:
print("Deleting {}".format(element.name))
try:
dataset_sample.delete_dataset(element.name)
except FailedPrecondition as e:
# We're always getting FailedPrecondition with 400
# resource conflict. I don't know why.
print("Deleting {} failed.".format(element.name))
print("Detail: {}".format(e))
# To avoid quota error
time.sleep(1)


@backoff.on_exception(backoff.expo, DeadlineExceeded, max_time=RETRY_DEADLINE)
def create_annotation_spec_set(project_id):
return annotation_spec_set_sample.create_annotation_spec_set(project_id)
Expand Down

0 comments on commit ebde329

Please sign in to comment.