diff --git a/.circleci/config.yml b/.circleci/config.yml index a6969bf5..03f71d0a 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -29,7 +29,7 @@ jobs: name: 'Unit Tests' command: | source /usr/local/share/virtualenvs/tap-github/bin/activate - pip install nose coverage + pip install nose coverage parameterized nosetests --with-coverage --cover-erase --cover-package=tap_github --cover-html-dir=htmlcov tests/unittests coverage html when: always diff --git a/MANIFEST.in b/MANIFEST.in index 374734bd..68d81181 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,2 +1,3 @@ include LICENSE include tap_github/schemas/*.json +include tap_github/schemas/shared/*.json diff --git a/README.md b/README.md index 3e956789..e8c4df01 100644 --- a/README.md +++ b/README.md @@ -53,15 +53,18 @@ This tap: 3. Create the config file Create a JSON file containing the start date, access token you just created - and the path to one or multiple repositories that you want to extract data from. Each repo path should be space delimited. The repo path is relative to - `https://github.com/`. For example the path for this repository is + and the path to one or multiple repositories that you want to extract data from. Each repo path should be space delimited. The repo path is relative to `"base_url"` + (Default: `https://github.com/`). For example the path for this repository is `singer-io/tap-github`. You can also add request timeout to set the timeout for requests which is an optional parameter with default value of 300 seconds. ```json - {"access_token": "your-access-token", - "repository": "singer-io/tap-github singer-io/getting-started", - "start_date": "2021-01-01T00:00:00Z", - "request_timeout": 300} + { + "access_token": "your-access-token", + "repository": "singer-io/tap-github singer-io/getting-started", + "start_date": "2021-01-01T00:00:00Z", + "request_timeout": 300, + "base_url": "https://api.github.com" + } ``` 4. Run the tap in discovery mode to get properties.json file diff --git a/config.sample.json b/config.sample.json index aff4e2f1..61df3707 100644 --- a/config.sample.json +++ b/config.sample.json @@ -2,5 +2,6 @@ "access_token": "abcdefghijklmnopqrstuvwxyz1234567890ABCD", "repository": "singer-io/target-stitch", "start_date": "2021-01-01T00:00:00Z", - "request_timeout": 300 + "request_timeout": 300, + "base_url": "https://api.github.com" } diff --git a/tap_github/__init__.py b/tap_github/__init__.py index 5c2d768a..cc93a061 100644 --- a/tap_github/__init__.py +++ b/tap_github/__init__.py @@ -1,1193 +1,42 @@ -import os import json -import collections -import time -import requests -import backoff +import sys import singer +from tap_github.discover import discover as _discover +from tap_github.client import GithubClient +from tap_github.sync import sync as _sync -from singer import (bookmarks, metrics, metadata) -from simplejson import JSONDecodeError - -session = requests.Session() -logger = singer.get_logger() - -# set default timeout of 300 seconds -REQUEST_TIMEOUT = 300 +LOGGER = singer.get_logger() REQUIRED_CONFIG_KEYS = ['start_date', 'access_token', 'repository'] -KEY_PROPERTIES = { - 'commits': ['sha'], - 'comments': ['id'], - 'issues': ['id'], - 'assignees': ['id'], - 'collaborators': ['id'], - 'pull_requests':['id'], - 'stargazers': ['user_id'], - 'releases': ['id'], - 'reviews': ['id'], - 'review_comments': ['id'], - 'pr_commits': ['id'], - 'events': ['id'], - 'issue_events': ['id'], - 'issue_labels': ['id'], - 'issue_milestones': ['id'], - 'commit_comments': ['id'], - 'projects': ['id'], - 'project_columns': ['id'], - 'project_cards': ['id'], - 'repos': ['id'], - 'teams': ['id'], - 'team_members': ['id', 'team_slug'], - 'team_memberships': ['url'] -} - -DEFAULT_SLEEP_SECONDS = 600 -MAX_SLEEP_SECONDS = DEFAULT_SLEEP_SECONDS - -class GithubException(Exception): - pass - -class BadCredentialsException(GithubException): - pass - -class AuthException(GithubException): - pass - -class NotFoundException(GithubException): - pass - -class BadRequestException(GithubException): - pass - -class InternalServerError(GithubException): - pass - -class UnprocessableError(GithubException): - pass - -class NotModifiedError(GithubException): - pass - -class MovedPermanentlyError(GithubException): - pass - -class ConflictError(GithubException): - pass - -class RateLimitExceeded(GithubException): - pass - -ERROR_CODE_EXCEPTION_MAPPING = { - 301: { - "raise_exception": MovedPermanentlyError, - "message": "The resource you are looking for is moved to another URL." - }, - 304: { - "raise_exception": NotModifiedError, - "message": "The requested resource has not been modified since the last time you accessed it." - }, - 400:{ - "raise_exception": BadRequestException, - "message": "The request is missing or has a bad parameter." - }, - 401: { - "raise_exception": BadCredentialsException, - "message": "Invalid authorization credentials." - }, - 403: { - "raise_exception": AuthException, - "message": "User doesn't have permission to access the resource." - }, - 404: { - "raise_exception": NotFoundException, - "message": "The resource you have specified cannot be found. Alternatively the access_token is not valid for the resource" - }, - 409: { - "raise_exception": ConflictError, - "message": "The request could not be completed due to a conflict with the current state of the server." - }, - 422: { - "raise_exception": UnprocessableError, - "message": "The request was not able to process right now." - }, - 500: { - "raise_exception": InternalServerError, - "message": "An error has occurred at Github's end." - } -} - -def translate_state(state, catalog, repositories): - ''' - This tap used to only support a single repository, in which case the - state took the shape of: - { - "bookmarks": { - "commits": { - "since": "2018-11-14T13:21:20.700360Z" - } - } - } - The tap now supports multiple repos, so this function should be called - at the beginning of each run to ensure the state is translate to the - new format: - { - "bookmarks": { - "singer-io/tap-adwords": { - "commits": { - "since": "2018-11-14T13:21:20.700360Z" - } - } - "singer-io/tap-salesforce": { - "commits": { - "since": "2018-11-14T13:21:20.700360Z" - } - } - } - } - ''' - nested_dict = lambda: collections.defaultdict(nested_dict) - new_state = nested_dict() - - for stream in catalog['streams']: - stream_name = stream['tap_stream_id'] - for repo in repositories: - if bookmarks.get_bookmark(state, repo, stream_name): - return state - if bookmarks.get_bookmark(state, stream_name, 'since'): - new_state['bookmarks'][repo][stream_name]['since'] = bookmarks.get_bookmark(state, stream_name, 'since') - - return new_state - - -def get_bookmark(state, repo, stream_name, bookmark_key, start_date): - repo_stream_dict = bookmarks.get_bookmark(state, repo, stream_name) - if repo_stream_dict: - return repo_stream_dict.get(bookmark_key) - if start_date: - return start_date - return None - -def raise_for_error(resp, source): - error_code = resp.status_code - try: - response_json = resp.json() - except JSONDecodeError: - response_json = {} - - if error_code == 404: - details = ERROR_CODE_EXCEPTION_MAPPING.get(error_code).get("message") - if source == "teams": - details += ' or it is a personal account repository' - message = "HTTP-error-code: 404, Error: {}. Please refer \'{}\' for more details.".format(details, response_json.get("documentation_url")) - logger.info(message) - # don't raise a NotFoundException - return None - - message = "HTTP-error-code: {}, Error: {}".format( - error_code, ERROR_CODE_EXCEPTION_MAPPING.get(error_code, {}).get("message", "Unknown Error") if response_json == {} else response_json) - - exc = ERROR_CODE_EXCEPTION_MAPPING.get(error_code, {}).get("raise_exception", GithubException) - raise exc(message) from None - -def calculate_seconds(epoch): - current = time.time() - return int(round((epoch - current), 0)) - -def rate_throttling(response): - if int(response.headers['X-RateLimit-Remaining']) == 0: - seconds_to_sleep = calculate_seconds(int(response.headers['X-RateLimit-Reset'])) - - if seconds_to_sleep > MAX_SLEEP_SECONDS: - message = "API rate limit exceeded, please try after {} seconds.".format(seconds_to_sleep) - raise RateLimitExceeded(message) from None - - logger.info("API rate limit exceeded. Tap will retry the data collection after %s seconds.", seconds_to_sleep) - time.sleep(seconds_to_sleep) - -# pylint: disable=dangerous-default-value -# during 'Timeout' error there is also possibility of 'ConnectionError', -# hence added backoff for 'ConnectionError' too. -@backoff.on_exception(backoff.expo, (requests.Timeout, requests.ConnectionError), max_tries=5, factor=2) -def authed_get(source, url, headers={}): - with metrics.http_request_timer(source) as timer: - session.headers.update(headers) - resp = session.request(method='get', url=url, timeout=get_request_timeout()) - if resp.status_code != 200: - raise_for_error(resp, source) - timer.tags[metrics.Tag.http_status_code] = resp.status_code - rate_throttling(resp) - if resp.status_code == 404: - # return an empty response body since we're not raising a NotFoundException - resp._content = b'{}' # pylint: disable=protected-access - return resp - -def authed_get_all_pages(source, url, headers={}): - while True: - r = authed_get(source, url, headers) - yield r - if 'next' in r.links: - url = r.links['next']['url'] - else: - break - -def get_abs_path(path): - return os.path.join(os.path.dirname(os.path.realpath(__file__)), path) - - -def generate_pr_commit_schema(commit_schema): - pr_commit_schema = commit_schema.copy() - pr_commit_schema['properties']['pr_number'] = { - "type": ["null", "integer"] - } - pr_commit_schema['properties']['pr_id'] = { - "type": ["null", "string"] - } - pr_commit_schema['properties']['id'] = { - "type": ["null", "string"] - } - - return pr_commit_schema - -def load_schemas(): - schemas = {} - - for filename in os.listdir(get_abs_path('schemas')): - path = get_abs_path('schemas') + '/' + filename - file_raw = filename.replace('.json', '') - with open(path, encoding='utf-8') as file: - schemas[file_raw] = json.load(file) - - schemas['pr_commits'] = generate_pr_commit_schema(schemas['commits']) - return schemas - -class DependencyException(Exception): - pass - -def validate_dependencies(selected_stream_ids): - errs = [] - msg_tmpl = ("Unable to extract '{0}' data, " - "to receive '{0}' data, you also need to select '{1}'.") - - for main_stream, sub_streams in SUB_STREAMS.items(): - if main_stream not in selected_stream_ids: - for sub_stream in sub_streams: - if sub_stream in selected_stream_ids: - errs.append(msg_tmpl.format(sub_stream, main_stream)) - - if errs: - raise DependencyException(" ".join(errs)) - - -def write_metadata(mdata, values, breadcrumb): - mdata.append( - { - 'metadata': values, - 'breadcrumb': breadcrumb - } - ) - -def populate_metadata(schema_name, schema): - mdata = metadata.new() - #mdata = metadata.write(mdata, (), 'forced-replication-method', KEY_PROPERTIES[schema_name]) - mdata = metadata.write(mdata, (), 'table-key-properties', KEY_PROPERTIES[schema_name]) - - for field_name in schema['properties'].keys(): - if field_name in KEY_PROPERTIES[schema_name]: - mdata = metadata.write(mdata, ('properties', field_name), 'inclusion', 'automatic') - else: - mdata = metadata.write(mdata, ('properties', field_name), 'inclusion', 'available') - - return mdata - -def get_catalog(): - raw_schemas = load_schemas() - streams = [] - - for schema_name, schema in raw_schemas.items(): - - # get metadata for each field - mdata = populate_metadata(schema_name, schema) - - # create and add catalog entry - catalog_entry = { - 'stream': schema_name, - 'tap_stream_id': schema_name, - 'schema': schema, - 'metadata' : metadata.to_list(mdata), - 'key_properties': KEY_PROPERTIES[schema_name], - } - streams.append(catalog_entry) - - return {'streams': streams} - -def get_all_repos(organizations: list) -> list: +def do_discover(client): """ - Retrieves all repositories for the provided organizations and - verifies basic access for them. - - Docs: https://docs.github.com/en/rest/reference/repos#list-organization-repositories + Call the discovery function. """ - repos = [] - - for org_path in organizations: - org = org_path.split('/')[0] - for response in authed_get_all_pages( - 'get_all_repos', - 'https://api.github.com/orgs/{}/repos?sort=created&direction=desc'.format(org) - ): - org_repos = response.json() - - for repo in org_repos: - repo_full_name = repo.get('full_name') - - logger.info("Verifying access of repository: %s", repo_full_name) - verify_repo_access( - 'https://api.github.com/repos/{}/commits'.format(repo_full_name), - repo - ) + catalog = _discover(client) + # Dump catalog + json.dump(catalog, sys.stdout, indent=2) - repos.append(repo_full_name) - - return repos - -def extract_repos_from_config(config: dict ) -> list: +@singer.utils.handle_top_exception(LOGGER) +def main(): """ - Extracts all repositories from the config and calls get_all_repos() - for organizations using the wildcard 'org/*' format. + Run discover mode or sync mode. """ - repo_paths = list(filter(None, config['repository'].split(' '))) - - orgs_with_all_repos = list(filter(lambda x: x.split('/')[1] == '*', repo_paths)) - - if orgs_with_all_repos: - # remove any wildcard "org/*" occurrences from `repo_paths` - repo_paths = list(set(repo_paths).difference(set(orgs_with_all_repos))) - - # get all repositores for an org in the config - all_repos = get_all_repos(orgs_with_all_repos) - - # update repo_paths - repo_paths.extend(all_repos) - - return repo_paths - -def verify_repo_access(url_for_repo, repo): - try: - authed_get("verifying repository access", url_for_repo) - except NotFoundException: - # throwing user-friendly error message as it checks token access - message = "HTTP-error-code: 404, Error: Please check the repository name \'{}\' or you do not have sufficient permissions to access this repository.".format(repo) - raise NotFoundException(message) from None - -def verify_access_for_repo(config): - - access_token = config['access_token'] - session.headers.update({'authorization': 'token ' + access_token, 'per_page': '1', 'page': '1'}) - - repositories = extract_repos_from_config(config) - - for repo in repositories: - logger.info("Verifying access of repository: %s", repo) - - url_for_repo = "https://api.github.com/repos/{}/commits".format(repo) - - # Verifying for Repo access - verify_repo_access(url_for_repo, repo) - -def do_discover(config): - verify_access_for_repo(config) - catalog = get_catalog() - # dump catalog - print(json.dumps(catalog, indent=2)) - -def get_all_teams(schemas, repo_path, state, mdata, _start_date): - org = repo_path.split('/')[0] - with metrics.record_counter('teams') as counter: - for response in authed_get_all_pages( - 'teams', - 'https://api.github.com/orgs/{}/teams?sort=created_at&direction=desc'.format(org) - ): - teams = response.json() - extraction_time = singer.utils.now() - - for r in teams: - team_slug = r.get('slug') - r['_sdc_repository'] = repo_path - - # transform and write release record - with singer.Transformer() as transformer: - rec = transformer.transform(r, schemas['teams'], metadata=metadata.to_map(mdata['teams'])) - singer.write_record('teams', rec, time_extracted=extraction_time) - counter.increment() - - if schemas.get('team_members'): - for team_members_rec in get_all_team_members(team_slug, schemas['team_members'], repo_path, state, mdata['team_members']): - singer.write_record('team_members', team_members_rec, time_extracted=extraction_time) - - if schemas.get('team_memberships'): - for team_memberships_rec in get_all_team_memberships(team_slug, schemas['team_memberships'], repo_path, state, mdata['team_memberships']): - singer.write_record('team_memberships', team_memberships_rec, time_extracted=extraction_time) - - return state - -def get_all_team_members(team_slug, schemas, repo_path, state, mdata): - org = repo_path.split('/')[0] - with metrics.record_counter('team_members') as counter: - for response in authed_get_all_pages( - 'team_members', - 'https://api.github.com/orgs/{}/teams/{}/members?sort=created_at&direction=desc'.format(org, team_slug) - ): - team_members = response.json() - for r in team_members: - r['_sdc_repository'] = repo_path - r['team_slug'] = team_slug - - # transform and write release record - with singer.Transformer() as transformer: - rec = transformer.transform(r, schemas, metadata=metadata.to_map(mdata)) - counter.increment() - - yield rec - - return state - -def get_all_team_memberships(team_slug, schemas, repo_path, state, mdata): - org = repo_path.split('/')[0] - for response in authed_get_all_pages( - 'team_members', - 'https://api.github.com/orgs/{}/teams/{}/members?sort=created_at&direction=desc'.format(org, team_slug) - ): - team_members = response.json() - with metrics.record_counter('team_memberships') as counter: - for r in team_members: - username = r['login'] - for res in authed_get_all_pages( - 'memberships', - 'https://api.github.com/orgs/{}/teams/{}/memberships/{}'.format(org, team_slug, username) - ): - team_membership = res.json() - team_membership['_sdc_repository'] = repo_path - with singer.Transformer() as transformer: - rec = transformer.transform(team_membership, schemas, metadata=metadata.to_map(mdata)) - counter.increment() - yield rec - return state - - -def get_all_issue_events(schemas, repo_path, state, mdata, start_date): - bookmark_value = get_bookmark(state, repo_path, "issue_events", "since", start_date) - if bookmark_value: - bookmark_time = singer.utils.strptime_to_utc(bookmark_value) - else: - bookmark_time = 0 - - - with metrics.record_counter('issue_events') as counter: - for response in authed_get_all_pages( - 'issue_events', - 'https://api.github.com/repos/{}/issues/events?sort=created_at&direction=desc'.format(repo_path) - ): - events = response.json() - extraction_time = singer.utils.now() - for event in events: - event['_sdc_repository'] = repo_path - # skip records that haven't been updated since the last run - # the GitHub API doesn't currently allow a ?since param for pulls - # once we find the first piece of old data we can return, thanks to - # the sorting - updated_at = event.get('created_at') if event.get('updated_at') is None else event.get('updated_at') - if bookmark_time and singer.utils.strptime_to_utc(updated_at) < bookmark_time: - return state - - # transform and write release record - with singer.Transformer() as transformer: - rec = transformer.transform(event, schemas, metadata=metadata.to_map(mdata)) - singer.write_record('issue_events', rec, time_extracted=extraction_time) - singer.write_bookmark(state, repo_path, 'issue_events', {'since': singer.utils.strftime(extraction_time)}) - counter.increment() - - return state - - -def get_all_events(schemas, repo_path, state, mdata, start_date): - # Incremental sync off `created_at` - # https://developer.github.com/v3/issues/events/#list-events-for-a-repository - # 'https://api.github.com/repos/{}/issues/events?sort=created_at&direction=desc'.format(repo_path) - - bookmark_value = get_bookmark(state, repo_path, "events", "since", start_date) - if bookmark_value: - bookmark_time = singer.utils.strptime_to_utc(bookmark_value) - else: - bookmark_time = 0 - - with metrics.record_counter('events') as counter: - for response in authed_get_all_pages( - 'events', - 'https://api.github.com/repos/{}/events?sort=created_at&direction=desc'.format(repo_path) - ): - events = response.json() - extraction_time = singer.utils.now() - for r in events: - r['_sdc_repository'] = repo_path - - # skip records that haven't been updated since the last run - # the GitHub API doesn't currently allow a ?since param for pulls - # once we find the first piece of old data we can return, thanks to - # the sorting - updated_at = r.get('created_at') if r.get('updated_at') is None else r.get('updated_at') - if bookmark_time and singer.utils.strptime_to_utc(updated_at) < bookmark_time: - return state - - # transform and write release record - with singer.Transformer() as transformer: - rec = transformer.transform(r, schemas, metadata=metadata.to_map(mdata)) - singer.write_record('events', rec, time_extracted=extraction_time) - singer.write_bookmark(state, repo_path, 'events', {'since': singer.utils.strftime(extraction_time)}) - counter.increment() - - return state - -def get_all_issue_milestones(schemas, repo_path, state, mdata, start_date): - # Incremental sync off `due on` ??? confirm. - # https://developer.github.com/v3/issues/milestones/#list-milestones-for-a-repository - # 'https://api.github.com/repos/{}/milestones?sort=created_at&direction=desc'.format(repo_path) - bookmark_value = get_bookmark(state, repo_path, "issue_milestones", "since", start_date) - if bookmark_value: - bookmark_time = singer.utils.strptime_to_utc(bookmark_value) - else: - bookmark_time = 0 - - with metrics.record_counter('issue_milestones') as counter: - for response in authed_get_all_pages( - 'milestones', - 'https://api.github.com/repos/{}/milestones?direction=desc'.format(repo_path) - ): - milestones = response.json() - extraction_time = singer.utils.now() - for r in milestones: - r['_sdc_repository'] = repo_path - - # skip records that haven't been updated since the last run - # the GitHub API doesn't currently allow a ?since param for pulls - # once we find the first piece of old data we can return, thanks to - # the sorting - if bookmark_time and r.get("due_on") and singer.utils.strptime_to_utc(r.get("due_on")) < bookmark_time: - continue - - # transform and write release record - with singer.Transformer() as transformer: - rec = transformer.transform(r, schemas, metadata=metadata.to_map(mdata)) - singer.write_record('issue_milestones', rec, time_extracted=extraction_time) - singer.write_bookmark(state, repo_path, 'issue_milestones', {'since': singer.utils.strftime(extraction_time)}) - counter.increment() - - return state - -def get_all_issue_labels(schemas, repo_path, state, mdata, _start_date): - # https://developer.github.com/v3/issues/labels/ - # not sure if incremental key - # 'https://api.github.com/repos/{}/labels?sort=created_at&direction=desc'.format(repo_path) - - with metrics.record_counter('issue_labels') as counter: - for response in authed_get_all_pages( - 'issue_labels', - 'https://api.github.com/repos/{}/labels'.format(repo_path) - ): - issue_labels = response.json() - extraction_time = singer.utils.now() - for r in issue_labels: - r['_sdc_repository'] = repo_path - - # transform and write release record - with singer.Transformer() as transformer: - rec = transformer.transform(r, schemas, metadata=metadata.to_map(mdata)) - singer.write_record('issue_labels', rec, time_extracted=extraction_time) - counter.increment() - - return state - -def get_all_commit_comments(schemas, repo_path, state, mdata, start_date): - # https://developer.github.com/v3/repos/comments/ - # updated_at? incremental - # 'https://api.github.com/repos/{}/comments?sort=created_at&direction=desc'.format(repo_path) - bookmark_value = get_bookmark(state, repo_path, "commit_comments", "since", start_date) - if bookmark_value: - bookmark_time = singer.utils.strptime_to_utc(bookmark_value) - else: - bookmark_time = 0 - - with metrics.record_counter('commit_comments') as counter: - for response in authed_get_all_pages( - 'commit_comments', - 'https://api.github.com/repos/{}/comments?sort=created_at&direction=desc'.format(repo_path) - ): - commit_comments = response.json() - extraction_time = singer.utils.now() - for r in commit_comments: - r['_sdc_repository'] = repo_path - - # skip records that haven't been updated since the last run - # the GitHub API doesn't currently allow a ?since param for pulls - # once we find the first piece of old data we can return, thanks to - # the sorting - if bookmark_time and singer.utils.strptime_to_utc(r.get('updated_at')) < bookmark_time: - return state - - # transform and write release record - with singer.Transformer() as transformer: - rec = transformer.transform(r, schemas, metadata=metadata.to_map(mdata)) - singer.write_record('commit_comments', rec, time_extracted=extraction_time) - singer.write_bookmark(state, repo_path, 'commit_comments', {'since': singer.utils.strftime(extraction_time)}) - counter.increment() - - return state - -def get_all_projects(schemas, repo_path, state, mdata, start_date): - bookmark_value = get_bookmark(state, repo_path, "projects", "since", start_date) - if bookmark_value: - bookmark_time = singer.utils.strptime_to_utc(bookmark_value) - else: - bookmark_time = 0 - - with metrics.record_counter('projects') as counter: - #pylint: disable=too-many-nested-blocks - for response in authed_get_all_pages( - 'projects', - 'https://api.github.com/repos/{}/projects?sort=created_at&direction=desc'.format(repo_path), - { 'Accept': 'application/vnd.github.inertia-preview+json' } - ): - projects = response.json() - extraction_time = singer.utils.now() - for r in projects: - r['_sdc_repository'] = repo_path - - # skip records that haven't been updated since the last run - # the GitHub API doesn't currently allow a ?since param for pulls - # once we find the first piece of old data we can return, thanks to - # the sorting - if bookmark_time and singer.utils.strptime_to_utc(r.get('updated_at')) < bookmark_time: - return state - - # transform and write release record - with singer.Transformer() as transformer: - rec = transformer.transform(r, schemas['projects'], metadata=metadata.to_map(mdata['projects'])) - singer.write_record('projects', rec, time_extracted=extraction_time) - singer.write_bookmark(state, repo_path, 'projects', {'since': singer.utils.strftime(extraction_time)}) - counter.increment() - - project_id = r.get('id') - - - - # sync project_columns if that schema is present (only there if selected) - if schemas.get('project_columns'): - for project_column_rec in get_all_project_columns(project_id, schemas['project_columns'], repo_path, state, mdata['project_columns'], start_date): - singer.write_record('project_columns', project_column_rec, time_extracted=extraction_time) - singer.write_bookmark(state, repo_path, 'project_columns', {'since': singer.utils.strftime(extraction_time)}) - - # sync project_cards if that schema is present (only there if selected) - if schemas.get('project_cards'): - column_id = project_column_rec['id'] - for project_card_rec in get_all_project_cards(column_id, schemas['project_cards'], repo_path, state, mdata['project_cards'], start_date): - singer.write_record('project_cards', project_card_rec, time_extracted=extraction_time) - singer.write_bookmark(state, repo_path, 'project_cards', {'since': singer.utils.strftime(extraction_time)}) - return state - - -def get_all_project_cards(column_id, schemas, repo_path, state, mdata, start_date): - bookmark_value = get_bookmark(state, repo_path, "project_cards", "since", start_date) - if bookmark_value: - bookmark_time = singer.utils.strptime_to_utc(bookmark_value) - else: - bookmark_time = 0 - - with metrics.record_counter('project_cards') as counter: - for response in authed_get_all_pages( - 'project_cards', - 'https://api.github.com/projects/columns/{}/cards?sort=created_at&direction=desc'.format(column_id) - ): - project_cards = response.json() - for r in project_cards: - r['_sdc_repository'] = repo_path - - # skip records that haven't been updated since the last run - # the GitHub API doesn't currently allow a ?since param for pulls - # once we find the first piece of old data we can return, thanks to - # the sorting - if bookmark_time and singer.utils.strptime_to_utc(r.get('updated_at')) < bookmark_time: - return state - - # transform and write release record - with singer.Transformer() as transformer: - rec = transformer.transform(r, schemas, metadata=metadata.to_map(mdata)) - counter.increment() - yield rec - - return state - -def get_all_project_columns(project_id, schemas, repo_path, state, mdata, start_date): - bookmark_value = get_bookmark(state, repo_path, "project_columns", "since", start_date) - if bookmark_value: - bookmark_time = singer.utils.strptime_to_utc(bookmark_value) - else: - bookmark_time = 0 - - with metrics.record_counter('project_columns') as counter: - for response in authed_get_all_pages( - 'project_columns', - 'https://api.github.com/projects/{}/columns?sort=created_at&direction=desc'.format(project_id) - ): - project_columns = response.json() - for r in project_columns: - r['_sdc_repository'] = repo_path - - # skip records that haven't been updated since the last run - # the GitHub API doesn't currently allow a ?since param for pulls - # once we find the first piece of old data we can return, thanks to - # the sorting - if bookmark_time and singer.utils.strptime_to_utc(r.get('updated_at')) < bookmark_time: - return state - - # transform and write release record - with singer.Transformer() as transformer: - rec = transformer.transform(r, schemas, metadata=metadata.to_map(mdata)) - counter.increment() - yield rec - - return state - -def get_all_releases(schemas, repo_path, state, mdata, _start_date): - # Releases doesn't seem to have an `updated_at` property, yet can be edited. - # For this reason and since the volume of release can safely be considered low, - # bookmarks were ignored for releases. - - with metrics.record_counter('releases') as counter: - for response in authed_get_all_pages( - 'releases', - 'https://api.github.com/repos/{}/releases?sort=created_at&direction=desc'.format(repo_path) - ): - releases = response.json() - extraction_time = singer.utils.now() - for r in releases: - r['_sdc_repository'] = repo_path - - # transform and write release record - with singer.Transformer() as transformer: - rec = transformer.transform(r, schemas, metadata=metadata.to_map(mdata)) - singer.write_record('releases', rec, time_extracted=extraction_time) - counter.increment() - - return state - -def get_all_pull_requests(schemas, repo_path, state, mdata, start_date): - ''' - https://developer.github.com/v3/pulls/#list-pull-requests - ''' - - bookmark_value = get_bookmark(state, repo_path, "pull_requests", "since", start_date) - if bookmark_value: - bookmark_time = singer.utils.strptime_to_utc(bookmark_value) - else: - bookmark_time = 0 - - with metrics.record_counter('pull_requests') as counter: - with metrics.record_counter('reviews') as reviews_counter: - for response in authed_get_all_pages( - 'pull_requests', - 'https://api.github.com/repos/{}/pulls?state=all&sort=updated&direction=desc'.format(repo_path) - ): - pull_requests = response.json() - extraction_time = singer.utils.now() - for pr in pull_requests: - - - # skip records that haven't been updated since the last run - # the GitHub API doesn't currently allow a ?since param for pulls - # once we find the first piece of old data we can return, thanks to - # the sorting - if bookmark_time and singer.utils.strptime_to_utc(pr.get('updated_at')) < bookmark_time: - return state - - pr_num = pr.get('number') - pr_id = pr.get('id') - pr['_sdc_repository'] = repo_path - - # transform and write pull_request record - with singer.Transformer() as transformer: - rec = transformer.transform(pr, schemas['pull_requests'], metadata=metadata.to_map(mdata['pull_requests'])) - singer.write_record('pull_requests', rec, time_extracted=extraction_time) - singer.write_bookmark(state, repo_path, 'pull_requests', {'since': singer.utils.strftime(extraction_time)}) - counter.increment() - - # sync reviews if that schema is present (only there if selected) - if schemas.get('reviews'): - for review_rec in get_reviews_for_pr(pr_num, schemas['reviews'], repo_path, state, mdata['reviews']): - singer.write_record('reviews', review_rec, time_extracted=extraction_time) - singer.write_bookmark(state, repo_path, 'reviews', {'since': singer.utils.strftime(extraction_time)}) - - reviews_counter.increment() - - # sync review comments if that schema is present (only there if selected) - if schemas.get('review_comments'): - for review_comment_rec in get_review_comments_for_pr(pr_num, schemas['review_comments'], repo_path, state, mdata['review_comments']): - singer.write_record('review_comments', review_comment_rec, time_extracted=extraction_time) - singer.write_bookmark(state, repo_path, 'review_comments', {'since': singer.utils.strftime(extraction_time)}) - - if schemas.get('pr_commits'): - for pr_commit in get_commits_for_pr( - pr_num, - pr_id, - schemas['pr_commits'], - repo_path, - state, - mdata['pr_commits'] - ): - singer.write_record('pr_commits', pr_commit, time_extracted=extraction_time) - singer.write_bookmark(state, repo_path, 'pr_commits', {'since': singer.utils.strftime(extraction_time)}) - - return state - -def get_reviews_for_pr(pr_number, schema, repo_path, state, mdata): - for response in authed_get_all_pages( - 'reviews', - 'https://api.github.com/repos/{}/pulls/{}/reviews'.format(repo_path,pr_number) - ): - reviews = response.json() - for review in reviews: - review['_sdc_repository'] = repo_path - with singer.Transformer() as transformer: - rec = transformer.transform(review, schema, metadata=metadata.to_map(mdata)) - yield rec - - - return state - -def get_review_comments_for_pr(pr_number, schema, repo_path, state, mdata): - for response in authed_get_all_pages( - 'comments', - 'https://api.github.com/repos/{}/pulls/{}/comments'.format(repo_path,pr_number) - ): - review_comments = response.json() - for comment in review_comments: - comment['_sdc_repository'] = repo_path - with singer.Transformer() as transformer: - rec = transformer.transform(comment, schema, metadata=metadata.to_map(mdata)) - yield rec - - - return state - -def get_commits_for_pr(pr_number, pr_id, schema, repo_path, state, mdata): - for response in authed_get_all_pages( - 'pr_commits', - 'https://api.github.com/repos/{}/pulls/{}/commits'.format(repo_path,pr_number) - ): - - commit_data = response.json() - for commit in commit_data: - commit['_sdc_repository'] = repo_path - commit['pr_number'] = pr_number - commit['pr_id'] = pr_id - commit['id'] = '{}-{}'.format(pr_id, commit['sha']) - with singer.Transformer() as transformer: - rec = transformer.transform(commit, schema, metadata=metadata.to_map(mdata)) - yield rec - - return state - - -def get_all_assignees(schema, repo_path, state, mdata, _start_date): - ''' - https://developer.github.com/v3/issues/assignees/#list-assignees - ''' - with metrics.record_counter('assignees') as counter: - for response in authed_get_all_pages( - 'assignees', - 'https://api.github.com/repos/{}/assignees'.format(repo_path) - ): - assignees = response.json() - extraction_time = singer.utils.now() - for assignee in assignees: - assignee['_sdc_repository'] = repo_path - with singer.Transformer() as transformer: - rec = transformer.transform(assignee, schema, metadata=metadata.to_map(mdata)) - singer.write_record('assignees', rec, time_extracted=extraction_time) - counter.increment() - - return state - -def get_all_collaborators(schema, repo_path, state, mdata, _start_date): - ''' - https://developer.github.com/v3/repos/collaborators/#list-collaborators - ''' - with metrics.record_counter('collaborators') as counter: - try: - responses = authed_get_all_pages( - 'collaborators', - 'https://api.github.com/repos/{}/collaborators'.format(repo_path) - ) - except NotFoundException as error: - logger.info( - 'Unable to retreive collaborators stream, check access_token is valid for %s. See full error message: %s', - repo_path, error - ) - else: - for response in responses: - collaborators = response.json() - extraction_time = singer.utils.now() - for collaborator in collaborators: - collaborator['_sdc_repository'] = repo_path - with singer.Transformer() as transformer: - rec = transformer.transform(collaborator, schema, metadata=metadata.to_map(mdata)) - singer.write_record('collaborators', rec, time_extracted=extraction_time) - counter.increment() - - return state - -def get_all_commits(schema, repo_path, state, mdata, start_date): - ''' - https://developer.github.com/v3/repos/commits/#list-commits-on-a-repository - ''' - bookmark = get_bookmark(state, repo_path, "commits", "since", start_date) - if bookmark: - query_string = '?since={}'.format(bookmark) - else: - query_string = '' - - with metrics.record_counter('commits') as counter: - for response in authed_get_all_pages( - 'commits', - 'https://api.github.com/repos/{}/commits{}'.format(repo_path, query_string) - ): - commits = response.json() - extraction_time = singer.utils.now() - for commit in commits: - commit['_sdc_repository'] = repo_path - with singer.Transformer() as transformer: - rec = transformer.transform(commit, schema, metadata=metadata.to_map(mdata)) - singer.write_record('commits', rec, time_extracted=extraction_time) - singer.write_bookmark(state, repo_path, 'commits', {'since': singer.utils.strftime(extraction_time)}) - counter.increment() - - return state - -def get_all_issues(schema, repo_path, state, mdata, start_date): - ''' - https://developer.github.com/v3/issues/#list-issues-for-a-repository - ''' - - bookmark = get_bookmark(state, repo_path, "issues", "since", start_date) - if bookmark: - query_string = '&since={}'.format(bookmark) - else: - query_string = '' - - with metrics.record_counter('issues') as counter: - for response in authed_get_all_pages( - 'issues', - 'https://api.github.com/repos/{}/issues?state=all&sort=updated&direction=asc{}'.format(repo_path, query_string) - ): - issues = response.json() - extraction_time = singer.utils.now() - for issue in issues: - issue['_sdc_repository'] = repo_path - with singer.Transformer() as transformer: - rec = transformer.transform(issue, schema, metadata=metadata.to_map(mdata)) - singer.write_record('issues', rec, time_extracted=extraction_time) - singer.write_bookmark(state, repo_path, 'issues', {'since': singer.utils.strftime(extraction_time)}) - counter.increment() - return state - -def get_all_comments(schema, repo_path, state, mdata, start_date): - ''' - https://developer.github.com/v3/issues/comments/#list-comments-in-a-repository - ''' - - bookmark = get_bookmark(state, repo_path, "comments", "since", start_date) - if bookmark: - query_string = '&since={}'.format(bookmark) - else: - query_string = '' - - with metrics.record_counter('comments') as counter: - for response in authed_get_all_pages( - 'comments', - 'https://api.github.com/repos/{}/issues/comments?sort=updated&direction=asc{}'.format(repo_path, query_string) - ): - comments = response.json() - extraction_time = singer.utils.now() - for comment in comments: - comment['_sdc_repository'] = repo_path - with singer.Transformer() as transformer: - rec = transformer.transform(comment, schema, metadata=metadata.to_map(mdata)) - singer.write_record('comments', rec, time_extracted=extraction_time) - singer.write_bookmark(state, repo_path, 'comments', {'since': singer.utils.strftime(extraction_time)}) - counter.increment() - return state - -def get_all_stargazers(schema, repo_path, state, mdata, _start_date): - ''' - https://developer.github.com/v3/activity/starring/#list-stargazers - ''' - - stargazers_headers = {'Accept': 'application/vnd.github.v3.star+json'} - - with metrics.record_counter('stargazers') as counter: - for response in authed_get_all_pages( - 'stargazers', - 'https://api.github.com/repos/{}/stargazers'.format(repo_path), stargazers_headers - ): - stargazers = response.json() - extraction_time = singer.utils.now() - for stargazer in stargazers: - user_id = stargazer['user']['id'] - stargazer['_sdc_repository'] = repo_path - with singer.Transformer() as transformer: - rec = transformer.transform(stargazer, schema, metadata=metadata.to_map(mdata)) - rec['user_id'] = user_id - singer.write_record('stargazers', rec, time_extracted=extraction_time) - counter.increment() - - return state - -def get_selected_streams(catalog): - ''' - Gets selected streams. Checks schema's 'selected' - first -- and then checks metadata, looking for an empty - breadcrumb and mdata with a 'selected' entry - ''' - selected_streams = [] - for stream in catalog['streams']: - stream_metadata = stream['metadata'] - if stream['schema'].get('selected', False): - selected_streams.append(stream['tap_stream_id']) - else: - for entry in stream_metadata: - # stream metadata will have empty breadcrumb - if not entry['breadcrumb'] and entry['metadata'].get('selected',None): - selected_streams.append(stream['tap_stream_id']) - - return selected_streams - -def get_stream_from_catalog(stream_id, catalog): - for stream in catalog['streams']: - if stream['tap_stream_id'] == stream_id: - return stream - return None - -# return the 'timeout' -def get_request_timeout(): - args = singer.utils.parse_args([]) - # get the value of request timeout from config - config_request_timeout = args.config.get('request_timeout') - - # only return the timeout value if it is passed in the config and the value is not 0, "0" or "" - if config_request_timeout and float(config_request_timeout): - # return the timeout from config - return float(config_request_timeout) - - # return default timeout - return REQUEST_TIMEOUT - -SYNC_FUNCTIONS = { - 'commits': get_all_commits, - 'comments': get_all_comments, - 'issues': get_all_issues, - 'assignees': get_all_assignees, - 'collaborators': get_all_collaborators, - 'pull_requests': get_all_pull_requests, - 'releases': get_all_releases, - 'stargazers': get_all_stargazers, - 'events': get_all_events, - 'issue_events': get_all_issue_events, - 'issue_milestones': get_all_issue_milestones, - 'issue_labels': get_all_issue_labels, - 'projects': get_all_projects, - 'commit_comments': get_all_commit_comments, - 'teams': get_all_teams -} - -SUB_STREAMS = { - 'pull_requests': ['reviews', 'review_comments', 'pr_commits'], - 'projects': ['project_cards', 'project_columns'], - 'teams': ['team_members', 'team_memberships'] -} - -def do_sync(config, state, catalog): - access_token = config['access_token'] - session.headers.update({'authorization': 'token ' + access_token}) - - start_date = config['start_date'] if 'start_date' in config else None - # get selected streams, make sure stream dependencies are met - selected_stream_ids = get_selected_streams(catalog) - validate_dependencies(selected_stream_ids) - - repositories = extract_repos_from_config(config) - - state = translate_state(state, catalog, repositories) - singer.write_state(state) - - #pylint: disable=too-many-nested-blocks - for repo in repositories: - logger.info("Starting sync of repository: %s", repo) - for stream in catalog['streams']: - stream_id = stream['tap_stream_id'] - stream_schema = stream['schema'] - mdata = stream['metadata'] - - # if it is a "sub_stream", it will be sync'd by its parent - if not SYNC_FUNCTIONS.get(stream_id): - continue - - # if stream is selected, write schema and sync - if stream_id in selected_stream_ids: - singer.write_schema(stream_id, stream_schema, stream['key_properties']) - - # get sync function and any sub streams - sync_func = SYNC_FUNCTIONS[stream_id] - sub_stream_ids = SUB_STREAMS.get(stream_id, None) - - # sync stream - if not sub_stream_ids: - state = sync_func(stream_schema, repo, state, mdata, start_date) - - # handle streams with sub streams - else: - stream_schemas = {stream_id: stream_schema} - stream_mdata = {stream_id: mdata} - - # get and write selected sub stream schemas - for sub_stream_id in sub_stream_ids: - if sub_stream_id in selected_stream_ids: - sub_stream = get_stream_from_catalog(sub_stream_id, catalog) - stream_schemas[sub_stream_id] = sub_stream['schema'] - stream_mdata[sub_stream_id] = sub_stream['metadata'] - singer.write_schema(sub_stream_id, sub_stream['schema'], - sub_stream['key_properties']) - - # sync stream and it's sub streams - state = sync_func(stream_schemas, repo, state, stream_mdata, start_date) - - singer.write_state(state) - -@singer.utils.handle_top_exception(logger) -def main(): args = singer.utils.parse_args(REQUIRED_CONFIG_KEYS) - # get optional config key `max_sleep_seconds` - config_max_sleep = args.config.get('max_sleep_seconds') + config = args.config + + client = GithubClient(config) - # set global `MAX_SLEEP_SECONDS` for rate_throttling function or use default - global MAX_SLEEP_SECONDS #pylint: disable=global-statement - MAX_SLEEP_SECONDS = config_max_sleep if config_max_sleep else DEFAULT_SLEEP_SECONDS + state = {} + if args.state: + state = args.state if args.discover: - do_discover(args.config) + do_discover(client) else: - catalog = args.properties if args.properties else get_catalog() - do_sync(args.config, args.state, catalog) + catalog = args.properties if args.properties else _discover(client) + _sync(client, config, state, catalog) if __name__ == '__main__': main() diff --git a/tap_github/client.py b/tap_github/client.py new file mode 100644 index 00000000..9913a8c2 --- /dev/null +++ b/tap_github/client.py @@ -0,0 +1,344 @@ +import time +import requests +import backoff +from simplejson import JSONDecodeError +import singer +from singer import metrics + +LOGGER = singer.get_logger() +DEFAULT_SLEEP_SECONDS = 600 +DEFAULT_DOMAIN = "https://api.github.com" + +# Set default timeout of 300 seconds +REQUEST_TIMEOUT = 300 + +class GithubException(Exception): + pass + +class Server5xxError(GithubException): + pass + +class BadCredentialsException(GithubException): + pass + +class AuthException(GithubException): + pass + +class NotFoundException(GithubException): + pass + +class BadRequestException(GithubException): + pass + +class InternalServerError(Server5xxError): + pass + +class UnprocessableError(GithubException): + pass + +class NotModifiedError(GithubException): + pass + +class MovedPermanentlyError(GithubException): + pass + +class ConflictError(GithubException): + pass + +class RateLimitExceeded(GithubException): + pass + +class TooManyRequests(GithubException): + pass + + +ERROR_CODE_EXCEPTION_MAPPING = { + 301: { + "raise_exception": MovedPermanentlyError, + "message": "The resource you are looking for is moved to another URL." + }, + 304: { + "raise_exception": NotModifiedError, + "message": "The requested resource has not been modified since the last time you accessed it." + }, + 400:{ + "raise_exception": BadRequestException, + "message": "The request is missing or has a bad parameter." + }, + 401: { + "raise_exception": BadCredentialsException, + "message": "Invalid authorization credentials." + }, + 403: { + "raise_exception": AuthException, + "message": "User doesn't have permission to access the resource." + }, + 404: { + "raise_exception": NotFoundException, + "message": "The resource you have specified cannot be found. Alternatively the access_token is not valid for the resource" + }, + 409: { + "raise_exception": ConflictError, + "message": "The request could not be completed due to a conflict with the current state of the server." + }, + 422: { + "raise_exception": UnprocessableError, + "message": "The request was not able to process right now." + }, + 429: { + "raise_exception": TooManyRequests, + "message": "Too many requests occurred." + }, + 500: { + "raise_exception": InternalServerError, + "message": "An error has occurred at Github's end." + } +} + +def raise_for_error(resp, source, stream, client, should_skip_404): + """ + Retrieve the error code and the error message from the response and return custom exceptions accordingly. + """ + error_code = resp.status_code + try: + response_json = resp.json() + except JSONDecodeError: + response_json = {} + + if error_code == 404 and should_skip_404: + # Add not accessible stream into list. + client.not_accessible_repos.add(stream) + details = ERROR_CODE_EXCEPTION_MAPPING.get(error_code).get("message") + if source == "teams": + details += ' or it is a personal account repository' + message = "HTTP-error-code: 404, Error: {}. Please refer \'{}\' for more details.".format(details, response_json.get("documentation_url")) + LOGGER.warning(message) + # Don't raise a NotFoundException + return None + + message = "HTTP-error-code: {}, Error: {}".format( + error_code, ERROR_CODE_EXCEPTION_MAPPING.get(error_code, {}).get("message", "Unknown Error") if response_json == {} else response_json) + + if error_code > 500: + raise Server5xxError(message) from None + + exc = ERROR_CODE_EXCEPTION_MAPPING.get(error_code, {}).get("raise_exception", GithubException) + raise exc(message) from None + +def calculate_seconds(epoch): + """ + Calculate the seconds to sleep before making a new request. + """ + current = time.time() + return int(round((epoch - current), 0)) + +def rate_throttling(response, max_sleep_seconds): + """ + For rate limit errors, get the remaining time before retrying and calculate the time to sleep before making a new request. + """ + if 'X-RateLimit-Remaining' in response.headers: + if int(response.headers['X-RateLimit-Remaining']) == 0: + seconds_to_sleep = calculate_seconds(int(response.headers['X-RateLimit-Reset'])) + + if seconds_to_sleep > max_sleep_seconds: + message = "API rate limit exceeded, please try after {} seconds.".format(seconds_to_sleep) + raise RateLimitExceeded(message) from None + + LOGGER.info("API rate limit exceeded. Tap will retry the data collection after %s seconds.", seconds_to_sleep) + time.sleep(seconds_to_sleep) + else: + # Raise an exception if `X-RateLimit-Remaining` is not found in the header. + # API does include this key header if provided base URL is not a valid github custom domain. + raise GithubException("The API call using the specified base url was unsuccessful. Please double-check the provided base URL.") + +class GithubClient: + """ + The client class used for making REST calls to the Github API. + """ + def __init__(self, config): + self.config = config + self.session = requests.Session() + self.base_url = config['base_url'] if config.get('base_url') else DEFAULT_DOMAIN + self.max_sleep_seconds = self.config.get('max_sleep_seconds', DEFAULT_SLEEP_SECONDS) + self.set_auth_in_session() + self.not_accessible_repos = set() + + def get_request_timeout(self): + """ + Get the request timeout from the config, if not present use the default 300 seconds. + """ + # Get the value of request timeout from config + config_request_timeout = self.config.get('request_timeout') + + # Only return the timeout value if it is passed in the config and the value is not 0, "0" or "" + if config_request_timeout and float(config_request_timeout): + return float(config_request_timeout) + + # Return default timeout + return REQUEST_TIMEOUT + + def set_auth_in_session(self): + """ + Set access token in the header for authorization. + """ + access_token = self.config['access_token'] + self.session.headers.update({'authorization': 'token ' + access_token}) + + # pylint: disable=dangerous-default-value + # During 'Timeout' error there is also possibility of 'ConnectionError', + # hence added backoff for 'ConnectionError' too. + @backoff.on_exception(backoff.expo, (requests.Timeout, requests.ConnectionError, Server5xxError, TooManyRequests), max_tries=5, factor=2) + def authed_get(self, source, url, headers={}, stream="", should_skip_404 = True): + """ + Call rest API and return the response in case of status code 200. + """ + with metrics.http_request_timer(source) as timer: + self.session.headers.update(headers) + resp = self.session.request(method='get', url=url, timeout=self.get_request_timeout()) + if resp.status_code != 200: + raise_for_error(resp, source, stream, self, should_skip_404) + timer.tags[metrics.Tag.http_status_code] = resp.status_code + rate_throttling(resp, self.max_sleep_seconds) + if resp.status_code == 404: + # Return an empty response body since we're not raising a NotFoundException + resp._content = b'{}' # pylint: disable=protected-access + return resp + + def authed_get_all_pages(self, source, url, headers={}, stream="", should_skip_404 = True): + """ + Fetch all pages of records and return them. + """ + while True: + r = self.authed_get(source, url, headers, stream, should_skip_404) + yield r + + # Fetch the next page if next found in the response. + if 'next' in r.links: + url = r.links['next']['url'] + else: + # Break the loop if all pages are fetched. + break + + def verify_repo_access(self, url_for_repo, repo): + """ + Call rest API to verify that the user has sufficient permissions to access this repository. + """ + try: + self.authed_get("verifying repository access", url_for_repo) + except NotFoundException: + # Throwing user-friendly error message as it checks token access + message = "HTTP-error-code: 404, Error: Please check the repository name \'{}\' or you do not have sufficient permissions to access this repository.".format(repo) + raise NotFoundException(message) from None + + def verify_access_for_repo(self): + """ + For all the repositories mentioned in the config, check the access for each repos. + """ + repositories, org = self.extract_repos_from_config() # pylint: disable=unused-variable + + for repo in repositories: + + url_for_repo = "{}/repos/{}/commits".format(self.base_url, repo) + LOGGER.info("Verifying access of repository: %s", repo) + + # Verifying for Repo access + self.verify_repo_access(url_for_repo, repo) + + def extract_orgs_from_config(self): + """ + Extracts all organizations from the config + """ + repo_paths = list(filter(None, self.config['repository'].split(' '))) + orgs_paths = [repo.split('/')[0] for repo in repo_paths] + + return set(orgs_paths) + + def extract_repos_from_config(self): + """ + Extracts all repositories from the config and calls get_all_repos() + for organizations using the wildcard 'org/*' format. + """ + repo_paths = list(filter(None, self.config['repository'].split(' '))) + + unique_repos = set() + # Insert the duplicate repos found in the config repo_paths into duplicates + duplicate_repos = [x for x in repo_paths if x in unique_repos or (unique_repos.add(x) or False)] + if duplicate_repos: + LOGGER.warning("Duplicate repositories found: %s and will be synced only once.", duplicate_repos) + + repo_paths = list(set(repo_paths)) + + orgs_with_all_repos = [] + orgs = [] + repos_with_errors = [] + for repo in repo_paths: + # Split the repo_path by `/` as we are passing org/repo_name in the config. + split_repo_path = repo.split('/') + # Prepare list of organizations + orgs.append(split_repo_path[0]) + # Check for the second element in the split list only if the length is greater than 1 and the first/second + # element is not empty (for scenarios such as: `org/` or `/repo` which is invalid) + if len(split_repo_path) > 1 and split_repo_path[1] != '' and split_repo_path[0] != '': + # If the second element is *, we need to check access for all the repos. + if split_repo_path[1] == '*': + orgs_with_all_repos.append(repo) + else: + # If `/`/repo name/organization not found, append the repo_path in the repos_with_errors + repos_with_errors.append(repo) + + # If any repos found in repos_with_errors, raise an exception + if repos_with_errors: + raise GithubException("Please provide valid organization/repository for: {}".format(sorted(repos_with_errors))) + + if orgs_with_all_repos: + # Remove any wildcard "org/*" occurrences from `repo_paths` + repo_paths = list(set(repo_paths).difference(set(orgs_with_all_repos))) + + # Get all repositories for an org in the config + all_repos = self.get_all_repos(orgs_with_all_repos) + + # Update repo_paths + repo_paths.extend(all_repos) + + return repo_paths, set(orgs) + + def get_all_repos(self, organizations: list): + """ + Retrieves all repositories for the provided organizations and + verifies basic access for them. + + Docs: https://docs.github.com/en/rest/reference/repos#list-organization-repositories + """ + repos = [] + + for org_path in organizations: + org = org_path.split('/')[0] + try: + for response in self.authed_get_all_pages( + 'get_all_repos', + '{}/orgs/{}/repos?sort=created&direction=desc'.format(self.base_url, org), + should_skip_404 = False + ): + org_repos = response.json() + LOGGER.info("Collected repos for organization: %s", org) + + for repo in org_repos: + repo_full_name = repo.get('full_name') + LOGGER.info("Verifying access of repository: %s", repo_full_name) + + self.verify_repo_access( + '{}/repos/{}/commits'.format(self.base_url,repo_full_name), + repo + ) + + repos.append(repo_full_name) + except NotFoundException: + # Throwing user-friendly error message as it checks token access + message = "HTTP-error-code: 404, Error: Please check the organization name \'{}\' or you do not have sufficient permissions to access this organization.".format(org) + raise NotFoundException(message) from None + + return repos + + def __exit__(self, exception_type, exception_value, traceback): + # Kill the session instance. + self.session.close() diff --git a/tap_github/discover.py b/tap_github/discover.py new file mode 100644 index 00000000..386857ee --- /dev/null +++ b/tap_github/discover.py @@ -0,0 +1,36 @@ +import singer +from singer.catalog import Catalog, CatalogEntry, Schema +from tap_github.schema import get_schemas + +LOGGER = singer.get_logger() + +def discover(client): + """ + Run the discovery mode, prepare the catalog file and return catalog. + """ + # Check credential in the discover mode. + client.verify_access_for_repo() + + schemas, field_metadata = get_schemas() + catalog = Catalog([]) + + for stream_name, schema_dict in schemas.items(): + try: + schema = Schema.from_dict(schema_dict) + mdata = field_metadata[stream_name] + except Exception as err: + LOGGER.error(err) + LOGGER.error('stream_name: %s', stream_name) + LOGGER.error('type schema_dict: %s', type(schema_dict)) + raise err + + key_properties = mdata[0]['metadata'].get('table-key-properties') + catalog.streams.append(CatalogEntry( + stream=stream_name, + tap_stream_id=stream_name, + key_properties= key_properties, + schema=schema, + metadata=mdata + )) + + return catalog.to_dict() diff --git a/tap_github/schema.py b/tap_github/schema.py new file mode 100644 index 00000000..6b65176c --- /dev/null +++ b/tap_github/schema.py @@ -0,0 +1,68 @@ +import os +import json +from singer import metadata +import singer +from tap_github.streams import STREAMS + +def get_abs_path(path): + """ + Get the absolute path for the schema files. + """ + return os.path.join(os.path.dirname(os.path.realpath(__file__)), path) + +def load_schema_references(): + """ + Load the schema files from the schema folder and return the schema references. + """ + shared_schema_path = get_abs_path('schemas/shared') + + shared_file_names = [f for f in os.listdir(shared_schema_path) + if os.path.isfile(os.path.join(shared_schema_path, f))] + + refs = {} + for shared_schema_file in shared_file_names: + with open(os.path.join(shared_schema_path, shared_schema_file)) as data_file: + refs['shared/' + shared_schema_file] = json.load(data_file) + + return refs + +def get_schemas(): + """ + Load the schema references, prepare metadata for each streams and return schema and metadata for the catalog. + """ + schemas = {} + field_metadata = {} + + refs = load_schema_references() + for stream_name, stream_metadata in STREAMS.items(): + schema_path = get_abs_path('schemas/{}.json'.format(stream_name)) + + with open(schema_path) as file: + schema = json.load(file) + + schemas[stream_name] = schema + schema = singer.resolve_schema_references(schema, refs) + + mdata = metadata.new() + mdata = metadata.get_standard_metadata( + schema=schema, + key_properties = (hasattr(stream_metadata, 'key_properties') or None) and stream_metadata.key_properties, + valid_replication_keys = (hasattr(stream_metadata, 'replication_keys') or None) and stream_metadata.replication_keys, + replication_method = (hasattr(stream_metadata, 'replication_method') or None) and stream_metadata.replication_method + ) + mdata = metadata.to_map(mdata) + + # Loop through all keys and make replication keys and primary keys of child stream which are not automatic in parent stream of automatic inclusion + for field_name in schema['properties'].keys(): + + pk_child_fields = (hasattr(stream_metadata, 'pk_child_fields') or None) and stream_metadata.pk_child_fields + replication_keys = (hasattr(stream_metadata, 'replication_keys') or None) and stream_metadata.replication_keys + if (replication_keys and field_name in replication_keys) or (pk_child_fields and field_name in pk_child_fields): + mdata = metadata.write(mdata, ('properties', field_name), 'inclusion', 'automatic') + + + mdata = metadata.to_list(mdata) + field_metadata[stream_name] = mdata + + + return schemas, field_metadata diff --git a/tap_github/schemas/assignees.json b/tap_github/schemas/assignees.json index d6162a7a..5c600dd6 100644 --- a/tap_github/schemas/assignees.json +++ b/tap_github/schemas/assignees.json @@ -2,15 +2,66 @@ "type": ["null", "object"], "additionalProperties": false, "properties": { + "name": { + "type": ["null", "string"] + }, + "email": { + "type": ["null", "string"] + }, "login": { "type": ["null", "string"] }, "id": { "type": ["null", "integer"] }, + "node_id": { + "type": ["null", "string"] + }, + "avatar_url": { + "type": ["null", "string"] + }, + "gravatar_id": { + "type": ["null", "string"] + }, "url": { "type": ["null", "string"] }, + "html_url": { + "type": ["null", "string"] + }, + "followers_url": { + "type": ["null", "string"] + }, + "following_url": { + "type": ["null", "string"] + }, + "gists_url": { + "type": ["null", "string"] + }, + "starred_url": { + "type": ["null", "string"] + }, + "subscriptions_url": { + "type": ["null", "string"] + }, + "organizations_url": { + "type": ["null", "string"] + }, + "repos_url": { + "type": ["null", "string"] + }, + "events_url": { + "type": ["null", "string"] + }, + "received_events_url": { + "type": ["null", "string"] + }, + "site_admin": { + "type": ["null", "boolean"] + }, + "starred_at": { + "type": ["null", "string"] + }, "type": { "type": ["null", "string"] }, diff --git a/tap_github/schemas/collaborators.json b/tap_github/schemas/collaborators.json index d6162a7a..9f71ac07 100644 --- a/tap_github/schemas/collaborators.json +++ b/tap_github/schemas/collaborators.json @@ -8,12 +8,66 @@ "id": { "type": ["null", "integer"] }, + "email": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "node_id": { + "type": ["null", "string"] + }, + "avatar_url": { + "type": ["null", "string"] + }, + "gravatar_id": { + "type": ["null", "string"] + }, "url": { "type": ["null", "string"] }, + "html_url": { + "type": ["null", "string"] + }, + "followers_url": { + "type": ["null", "string"] + }, + "following_url": { + "type": ["null", "string"] + }, + "gists_url": { + "type": ["null", "string"] + }, + "starred_url": { + "type": ["null", "string"] + }, + "subscriptions_url": { + "type": ["null", "string"] + }, + "organizations_url": { + "type": ["null", "string"] + }, + "repos_url": { + "type": ["null", "string"] + }, + "events_url": { + "type": ["null", "string"] + }, + "received_events_url": { + "type": ["null", "string"] + }, "type": { "type": ["null", "string"] }, + "site_admin": { + "type": ["null", "boolean"] + }, + "permissions": { + "$ref": "shared/pull_permissions.json#/" + }, + "role_name": { + "type": ["null", "string"] + }, "_sdc_repository": { "type": ["string"] } diff --git a/tap_github/schemas/comments.json b/tap_github/schemas/comments.json index 7b14a643..ddded9dc 100644 --- a/tap_github/schemas/comments.json +++ b/tap_github/schemas/comments.json @@ -1,5 +1,5 @@ { - "type": "object", + "type": ["null", "object"], "properties": { "id": { "type": ["null", "integer"] @@ -16,6 +16,12 @@ "body": { "type": ["null", "string"] }, + "body_text": { + "type": ["null", "string"] + }, + "body_html": { + "type": ["null", "string"] + }, "html_url": { "type": ["null", "string"] }, @@ -27,63 +33,71 @@ }, "user": { "type": ["null", "object"], - "additionalProperties": false, - "properties": { - "login": { - "type": ["null", "string"] - }, - "id": { - "type": ["null", "integer"] - }, - "node_id": { - "type": ["null", "string"] - }, - "avatar_url": { - "type": ["null", "string"] - }, - "gravatar_id": { - "type": ["null", "string"] - }, - "url": { - "type": ["null", "string"] - }, - "html_url": { - "type": ["null", "string"] - }, - "followers_url": { - "type": ["null", "string"] - }, - "following_url": { - "type": ["null", "string"] - }, - "gists_url": { - "type": ["null", "string"] - }, - "starred_url": { - "type": ["null", "string"] - }, - "subscriptions_url": { - "type": ["null", "string"] - }, - "organizations_url": { - "type": ["null", "string"] - }, - "repos_url": { - "type": ["null", "string"] - }, - "events_url": { - "type": ["null", "string"] - }, - "received_events_url": { - "type": ["null", "string"] - }, - "type": { - "type": ["null", "string"] - }, - "site_admin": { - "type": ["null", "string"] + "properties": { + "name": { + "type": ["null", "string"] + }, + "email": { + "type": ["null", "string"] + }, + "login": { + "type": ["null", "string"] + }, + "id": { + "type": ["null", "integer"] + }, + "node_id": { + "type": ["null", "string"] + }, + "avatar_url": { + "type": ["null", "string"] + }, + "gravatar_id": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + }, + "followers_url": { + "type": ["null", "string"] + }, + "following_url": { + "type": ["null", "string"] + }, + "gists_url": { + "type": ["null", "string"] + }, + "starred_url": { + "type": ["null", "string"] + }, + "subscriptions_url": { + "type": ["null", "string"] + }, + "organizations_url": { + "type": ["null", "string"] + }, + "repos_url": { + "type": ["null", "string"] + }, + "events_url": { + "type": ["null", "string"] + }, + "received_events_url": { + "type": ["null", "string"] + }, + "type": { + "type": ["null", "string"] + }, + "site_admin": { + "type": ["null", "string"] + }, + "starred_at": { + "type": ["null", "string"] + } } - } }, "created_at": { "type": ["null", "string"], @@ -93,6 +107,12 @@ "type": ["null", "string"], "format": "date-time" }, + "performed_via_github_app": { + "$ref": "shared/performed_via_github_app.json#/" + }, + "reactions": { + "$ref": "shared/reactions.json#/" + }, "_sdc_repository": { "type": ["string"] } diff --git a/tap_github/schemas/commit_comments.json b/tap_github/schemas/commit_comments.json index c4c01222..408448dc 100644 --- a/tap_github/schemas/commit_comments.json +++ b/tap_github/schemas/commit_comments.json @@ -1,192 +1,114 @@ { - "type": [ - "null", - "object" - ], + "type": ["null", "object"], "properties": { "html_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "id": { - "type": [ - "null", - "number" - ] + "type": ["null", "number"] }, "node_id": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "body": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "path": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "position": { - "type": [ - "null", - "number" - ] + "type": ["null", "number"] }, "line": { - "type": [ - "null", - "number" - ] + "type": ["null", "number"] }, "commit_id": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "user": { - "type": [ - "null", - "object" - ], + "type": ["null", "object"], "properties": { + "name": { + "type": ["null", "string"] + }, + "email": { + "type": ["null", "string"] + }, "login": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "id": { - "type": [ - "null", - "number" - ] + "type": ["null", "number"] }, "node_id": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "avatar_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "gravatar_id": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "html_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "followers_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "following_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "gists_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "starred_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "subscriptions_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "organizations_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "repos_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "events_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "received_events_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "type": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "site_admin": { - "type": [ - "null", - "boolean" - ] + "type": ["null", "boolean"] + }, + "starred_at": { + "type": ["null", "string"] } } }, "created_at": { - "type": [ - "null", - "string" - ], + "type": ["null", "string"], "format": "date-time" }, "updated_at": { - "type": [ - "null", - "string" - ], + "type": ["null", "string"], "format": "date-time" + }, + "author_association": { + "type": ["null", "string"] + }, + "reactions": { + "$ref": "shared/reactions.json#/" } } - } \ No newline at end of file +} diff --git a/tap_github/schemas/commits.json b/tap_github/schemas/commits.json index 0e611d11..cf873448 100644 --- a/tap_github/schemas/commits.json +++ b/tap_github/schemas/commits.json @@ -1,13 +1,27 @@ { "type": ["null", "object"], - "additionalProperties": false, "properties": { "_sdc_repository": { "type": ["string"] }, - "sha": { + "node_id": { + "type": ["null", "string"] + }, + "pr_id": { + "type": ["null", "string"] + }, + "pr_number": { + "type": ["null", "integer"] + }, + "id": { + "type": ["null", "string"] + }, + "updated_at": { "type": ["null", "string"], - "description": "The git commit hash" + "format": "date-time" + }, + "sha": { + "type": ["null", "string"] }, "url": { "type": ["null", "string"] @@ -19,97 +33,61 @@ "additionalProperties": false, "properties": { "sha": { - "type": ["null", "string"], - "description": "The git hash of the parent commit" + "type": ["null", "string"] }, "url": { - "type": ["null", "string"], - "description": "The URL to the parent commit" + "type": ["null", "string"] }, "html_url": { - "type": ["null", "string"], - "description": "The HTML URL to the parent commit" + "type": ["null", "string"] } } } }, "files": { - "type": [ - "null", - "array" - ], + "type": ["null", "array"], "items": { - "type": [ - "null", - "object" - ], + "type": ["null", "object"], "properties": { "filename": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "additions": { - "type": [ - "null", - "number" - ] + "type": ["null", "number"] }, "deletions": { - "type": [ - "null", - "number" - ] + "type": ["null", "number"] }, "changes": { - "type": [ - "null", - "number" - ] + "type": ["null", "number"] }, "status": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "raw_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "blob_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "patch": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] } } } }, "html_url": { - "type": ["null", "string"], - "description": "The HTML URL to the commit" + "type": ["null", "string"] }, "comments_url": { - "type": ["null", "string"], - "description": "The URL to the commit's comments page" + "type": ["null", "string"] }, "commit": { "type": ["null", "object"], "additionalProperties": false, "properties": { "url": { - "type": ["null", "string"], - "description": "The URL to the commit" + "type": ["null", "string"] }, "tree": { "type": ["null", "object"], @@ -125,57 +103,173 @@ }, "author": { "type": ["null", "object"], - "additionalProperties": false, "properties": { - "date": { - "type": ["null", "string"], - "format": "date-time", - "description": "The date the author committed the change" - }, "name": { - "type": ["null", "string"], - "description": "The author's name" + "type": ["null", "string"] }, "email": { - "type": ["null", "string"], - "description": "The author's email" + "type": ["null", "string"] }, "login": { + "type": ["null", "string"] + }, + "id": { + "type": ["null", "integer"] + }, + "node_id": { + "type": ["null", "string"] + }, + "avatar_url": { + "type": ["null", "string"] + }, + "gravatar_id": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + }, + "followers_url": { + "type": ["null", "string"] + }, + "following_url": { + "type": ["null", "string"] + }, + "gists_url": { + "type": ["null", "string"] + }, + "starred_url": { + "type": ["null", "string"] + }, + "subscriptions_url": { + "type": ["null", "string"] + }, + "organizations_url": { + "type": ["null", "string"] + }, + "repos_url": { + "type": ["null", "string"] + }, + "events_url": { + "type": ["null", "string"] + }, + "received_events_url": { + "type": ["null", "string"] + }, + "type": { + "type": ["null", "string"] + }, + "site_admin": { + "type": ["null", "boolean"] + }, + "starred_at": { + "type": ["null", "string"] + }, + "date": { "type": ["null", "string"], - "description": "The author's login" + "format": "date-time" } } }, "message": { - "type": ["null", "string"], - "description": "The commit message" + "type": ["null", "string"] }, "committer": { "type": ["null", "object"], - "additionalProperties": false, "properties": { - "date": { - "type": ["null", "string"], - "format": "date-time", - "description": "The date the committer committed the change" - }, "name": { - "type": ["null", "string"], - "description": "The committer's name" + "type": ["null", "string"] }, "email": { - "type": ["null", "string"], - "description": "The committer's email" + "type": ["null", "string"] }, "login": { + "type": ["null", "string"] + }, + "id": { + "type": ["null", "integer"] + }, + "node_id": { + "type": ["null", "string"] + }, + "avatar_url": { + "type": ["null", "string"] + }, + "gravatar_id": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + }, + "followers_url": { + "type": ["null", "string"] + }, + "following_url": { + "type": ["null", "string"] + }, + "gists_url": { + "type": ["null", "string"] + }, + "starred_url": { + "type": ["null", "string"] + }, + "subscriptions_url": { + "type": ["null", "string"] + }, + "organizations_url": { + "type": ["null", "string"] + }, + "repos_url": { + "type": ["null", "string"] + }, + "events_url": { + "type": ["null", "string"] + }, + "received_events_url": { + "type": ["null", "string"] + }, + "type": { + "type": ["null", "string"] + }, + "site_admin": { + "type": ["null", "boolean"] + }, + "starred_at": { + "type": ["null", "string"] + }, + "date": { "type": ["null", "string"], - "description": "The committer's login" + "format": "date-time" } } }, "comment_count": { - "type": ["null", "integer"], - "description": "The number of comments on the commit" + "type": ["null", "integer"] + } + } + }, + "committer": { + "$ref": "shared/user.json#/" + }, + "author": { + "$ref": "shared/user.json#/" + }, + "stats": { + "type": ["null", "object"], + "properties": { + "additions": { + "type": ["null", "integer"] + }, + "deletions": { + "type": ["null", "integer"] + }, + "total": { + "type": ["null", "integer"] } } } diff --git a/tap_github/schemas/events.json b/tap_github/schemas/events.json index 985a39d0..266ef2c8 100644 --- a/tap_github/schemas/events.json +++ b/tap_github/schemas/events.json @@ -1,223 +1,1015 @@ { - "type": [ - "null", - "object" - ], + "type": ["null", "object"], "properties": { "_sdc_repository": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "actor": { - "type": [ - "null", - "object" - ], + "type": ["null", "object"], "properties": { "avatar_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "display_login": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "gravatar_id": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "id": { - "type": [ - "null", - "number" - ] + "type": ["null", "number"] }, "login": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] } } }, "created_at": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "id": { - "type": [ - "null", - "number" - ] + "type": ["null", "number"] }, "org": { - "type": [ - "null", - "object" - ], + "type": ["null", "object"], "properties": { "avatar_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "gravatar_id": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "id": { - "type": [ - "null", - "number" - ] + "type": ["null", "number"] }, "login": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] + }, + "display_login": { + "type": ["null", "string"] } } }, "payload": { - "type": [ - "null", - "object" - ], + "type": ["null", "object"], "properties": { "before": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "action": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "comment": { - "type": [ - "null", - "string" - ] + "type": ["null", "object", "string"], + "properties": { + "id": { + "type": ["null", "integer"] + }, + "node_id": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + }, + "body": { + "type": ["null", "string"] + }, + "body_text": { + "type": ["null", "string"] + }, + "body_html": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + }, + "user": { + "$ref": "shared/user.json#/" + }, + "created_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "updated_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "issue_url": { + "type": ["null", "string"], + "format": "uri" + }, + "author_association": { + "type": ["null", "string"] + }, + "performed_via_github_app": { + "$ref": "shared/performed_via_github_app.json#/" + }, + "reactions": { + "$ref": "shared/reactions.json#/" + } + } }, "issue": { - "type": [ - "null", - "string" - ] + "type": ["null", "object", "string"], + "properties": { + "id": { + "type": ["null", "integer"] + }, + "node_id": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + }, + "repository_url": { + "type": ["null", "string"] + }, + "labels_url": { + "type": ["null", "string"] + }, + "comments_url": { + "type": ["null", "string"] + }, + "events_url": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + }, + "number": { + "type": ["null", "integer"] + }, + "state": { + "type": ["null", "string"] + }, + "state_reason": { + "type": ["null", "string"] + }, + "title": { + "type": ["null", "string"] + }, + "body": { + "type": ["null", "string"] + }, + "user": { + "$ref": "shared/user.json#/" + }, + "labels": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "integer"], + "format": "int64" + }, + "node_id": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"], + "format": "uri" + }, + "name": { + "type": ["null", "string"] + }, + "description": { + "type": ["null", "string"] + }, + "color": { + "type": ["null", "string"] + }, + "default": { + "type": ["null", "boolean"] + } + } + }, + "assignee": { + "$ref": "shared/user.json#/" + }, + "assignees": { + "type": ["null", "array"], + "items": { + "$ref": "shared/user.json#/" + } + }, + "milestone": { + "type": ["null", "object"], + "properties": { + "url": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + }, + "labels_url": { + "type": ["null", "string"] + }, + "id": { + "type": ["null", "integer"] + }, + "node_id": { + "type": ["null", "string"] + }, + "number": { + "type": ["null", "integer"] + }, + "state": { + "type": ["null", "string"] + }, + "title": { + "type": ["null", "string"] + }, + "description": { + "type": ["null", "string"] + }, + "creator": { + "$ref": "shared/user.json#/" + }, + "open_issues": { + "type": ["null", "integer"] + }, + "closed_issues": { + "type": ["null", "integer"] + }, + "created_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "updated_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "closed_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "due_on": { + "type": ["null", "string"], + "format": "date-time" + } + } + }, + "locked": { + "type": ["null", "boolean"] + }, + "active_lock_reason": { + "type": ["null", "string"] + }, + "comments": { + "type": ["null", "integer"] + }, + "pull_request": { + "type": ["null", "object"], + "properties": { + "merged_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "diff_url": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + }, + "patch_url": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + } + } + }, + "closed_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "created_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "updated_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "draft": { + "type": ["null", "boolean"] + }, + "closed_by": { + "$ref": "shared/user.json#/" + }, + "body_html": { + "type": ["null", "string"] + }, + "body_text": { + "type": ["null", "string"] + }, + "timeline_url": { + "type": ["null", "string"] + }, + "repository": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "integer"] + }, + "node_id": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "full_name": { + "type": ["null", "string"] + }, + "license": { + "type": ["null", "object"], + "properties": { + "key": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + }, + "spdx_id": { + "type": ["null", "string"] + }, + "node_id": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + } + } + }, + "organization": { + "$ref": "shared/user.json#/" + }, + "forks": { + "type": ["null", "integer"] + }, + "permissions": { + "$ref": "shared/pull_permissions.json#/" + }, + "owner": { + "$ref": "shared/user.json#/" + }, + "private": { + "type": ["null", "boolean"] + }, + "html_url": { + "type": ["null", "string"] + }, + "description": { + "type": ["null", "string"] + }, + "fork": { + "type": ["null", "boolean"] + }, + "url": { + "type": ["null", "string"] + }, + "archive_url": { + "type": ["null", "string"] + }, + "assignees_url": { + "type": ["null", "string"] + }, + "blobs_url": { + "type": ["null", "string"] + }, + "branches_url": { + "type": ["null", "string"] + }, + "collaborators_url": { + "type": ["null", "string"] + }, + "comments_url": { + "type": ["null", "string"] + }, + "commits_url": { + "type": ["null", "string"] + }, + "compare_url": { + "type": ["null", "string"] + }, + "contents_url": { + "type": ["null", "string"] + }, + "contributors_url": { + "type": ["null", "string"] + }, + "deployments_url": { + "type": ["null", "string"] + }, + "downloads_url": { + "type": ["null", "string"] + }, + "events_url": { + "type": ["null", "string"] + }, + "forks_url": { + "type": ["null", "string"] + }, + "git_commits_url": { + "type": ["null", "string"] + }, + "git_refs_url": { + "type": ["null", "string"] + }, + "git_tags_url": { + "type": ["null", "string"] + }, + "git_url": { + "type": ["null", "string"] + }, + "issue_comment_url": { + "type": ["null", "string"] + }, + "issue_events_url": { + "type": ["null", "string"] + }, + "issues_url": { + "type": ["null", "string"] + }, + "keys_url": { + "type": ["null", "string"] + }, + "labels_url": { + "type": ["null", "string"] + }, + "languages_url": { + "type": ["null", "string"] + }, + "merges_url": { + "type": ["null", "string"] + }, + "milestones_url": { + "type": ["null", "string"] + }, + "notifications_url": { + "type": ["null", "string"] + }, + "pulls_url": { + "type": ["null", "string"] + }, + "releases_url": { + "type": ["null", "string"] + }, + "ssh_url": { + "type": ["null", "string"] + }, + "stargazers_url": { + "type": ["null", "string"] + }, + "statuses_url": { + "type": ["null", "string"] + }, + "subscribers_url": { + "type": ["null", "string"] + }, + "subscription_url": { + "type": ["null", "string"] + }, + "tags_url": { + "type": ["null", "string"] + }, + "teams_url": { + "type": ["null", "string"] + }, + "trees_url": { + "type": ["null", "string"] + }, + "clone_url": { + "type": ["null", "string"] + }, + "mirror_url": { + "type": ["null", "string"] + }, + "hooks_url": { + "type": ["null", "string"] + }, + "svn_url": { + "type": ["null", "string"] + }, + "homepage": { + "type": ["null", "string"] + }, + "language": { + "type": ["null", "string"] + }, + "forks_count": { + "type": ["null", "integer"] + }, + "stargazers_count": { + "type": ["null", "integer"] + }, + "watchers_count": { + "type": ["null", "integer"] + }, + "size": { + "type": ["null", "integer"] + }, + "default_branch": { + "type": ["null", "string"] + }, + "open_issues_count": { + "type": ["null", "integer"] + }, + "is_template": { + "type": ["null", "boolean"] + }, + "topics": { + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } + }, + "has_issues": { + "type": ["null", "boolean"] + }, + "has_projects": { + "type": ["null", "boolean"] + }, + "has_wiki": { + "type": ["null", "boolean"] + }, + "has_pages": { + "type": ["null", "boolean"] + }, + "has_downloads": { + "type": ["null", "boolean"] + }, + "archived": { + "type": ["null", "boolean"] + }, + "disabled": { + "type": ["null", "boolean"] + }, + "visibility": { + "type": ["null", "string"] + }, + "pushed_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "created_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "updated_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "allow_rebase_merge": { + "type": ["null", "boolean"] + }, + "template_repository": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "integer"] + }, + "node_id": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "full_name": { + "type": ["null", "string"] + }, + "owner": { + "type": ["null", "object"], + "properties": { + "login": { + "type": ["null", "string"] + }, + "id": { + "type": ["null", "integer"] + }, + "node_id": { + "type": ["null", "string"] + }, + "avatar_url": { + "type": ["null", "string"] + }, + "gravatar_id": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + }, + "followers_url": { + "type": ["null", "string"] + }, + "following_url": { + "type": ["null", "string"] + }, + "gists_url": { + "type": ["null", "string"] + }, + "starred_url": { + "type": ["null", "string"] + }, + "subscriptions_url": { + "type": ["null", "string"] + }, + "organizations_url": { + "type": ["null", "string"] + }, + "repos_url": { + "type": ["null", "string"] + }, + "events_url": { + "type": ["null", "string"] + }, + "received_events_url": { + "type": ["null", "string"] + }, + "type": { + "type": ["null", "string"] + }, + "site_admin": { + "type": ["null", "boolean"] + } + } + }, + "private": { + "type": ["null", "boolean"] + }, + "html_url": { + "type": ["null", "string"] + }, + "description": { + "type": ["null", "string"] + }, + "fork": { + "type": ["null", "boolean"] + }, + "url": { + "type": ["null", "string"] + }, + "archive_url": { + "type": ["null", "string"] + }, + "assignees_url": { + "type": ["null", "string"] + }, + "blobs_url": { + "type": ["null", "string"] + }, + "branches_url": { + "type": ["null", "string"] + }, + "collaborators_url": { + "type": ["null", "string"] + }, + "comments_url": { + "type": ["null", "string"] + }, + "commits_url": { + "type": ["null", "string"] + }, + "compare_url": { + "type": ["null", "string"] + }, + "contents_url": { + "type": ["null", "string"] + }, + "contributors_url": { + "type": ["null", "string"] + }, + "deployments_url": { + "type": ["null", "string"] + }, + "downloads_url": { + "type": ["null", "string"] + }, + "events_url": { + "type": ["null", "string"] + }, + "forks_url": { + "type": ["null", "string"] + }, + "git_commits_url": { + "type": ["null", "string"] + }, + "git_refs_url": { + "type": ["null", "string"] + }, + "git_tags_url": { + "type": ["null", "string"] + }, + "git_url": { + "type": ["null", "string"] + }, + "issue_comment_url": { + "type": ["null", "string"] + }, + "issue_events_url": { + "type": ["null", "string"] + }, + "issues_url": { + "type": ["null", "string"] + }, + "keys_url": { + "type": ["null", "string"] + }, + "labels_url": { + "type": ["null", "string"] + }, + "languages_url": { + "type": ["null", "string"] + }, + "merges_url": { + "type": ["null", "string"] + }, + "milestones_url": { + "type": ["null", "string"] + }, + "notifications_url": { + "type": ["null", "string"] + }, + "pulls_url": { + "type": ["null", "string"] + }, + "releases_url": { + "type": ["null", "string"] + }, + "ssh_url": { + "type": ["null", "string"] + }, + "stargazers_url": { + "type": ["null", "string"] + }, + "statuses_url": { + "type": ["null", "string"] + }, + "subscribers_url": { + "type": ["null", "string"] + }, + "subscription_url": { + "type": ["null", "string"] + }, + "tags_url": { + "type": ["null", "string"] + }, + "teams_url": { + "type": ["null", "string"] + }, + "trees_url": { + "type": ["null", "string"] + }, + "clone_url": { + "type": ["null", "string"] + }, + "mirror_url": { + "type": ["null", "string"] + }, + "hooks_url": { + "type": ["null", "string"] + }, + "svn_url": { + "type": ["null", "string"] + }, + "homepage": { + "type": ["null", "string"] + }, + "language": { + "type": ["null", "string"] + }, + "forks_count": { + "type": ["null", "integer"] + }, + "stargazers_count": { + "type": ["null", "integer"] + }, + "watchers_count": { + "type": ["null", "integer"] + }, + "size": { + "type": ["null", "integer"] + }, + "default_branch": { + "type": ["null", "string"] + }, + "open_issues_count": { + "type": ["null", "integer"] + }, + "is_template": { + "type": ["null", "boolean"] + }, + "topics": { + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } + }, + "has_issues": { + "type": ["null", "boolean"] + }, + "has_projects": { + "type": ["null", "boolean"] + }, + "has_wiki": { + "type": ["null", "boolean"] + }, + "has_pages": { + "type": ["null", "boolean"] + }, + "has_downloads": { + "type": ["null", "boolean"] + }, + "archived": { + "type": ["null", "boolean"] + }, + "disabled": { + "type": ["null", "boolean"] + }, + "visibility": { + "type": ["null", "string"] + }, + "pushed_at": { + "type": ["null", "string"] + }, + "created_at": { + "type": ["null", "string"] + }, + "updated_at": { + "type": ["null", "string"] + }, + "permissions": { + "$ref": "shared/pull_permissions.json#/" + }, + "allow_rebase_merge": { + "type": ["null", "boolean"] + }, + "temp_clone_token": { + "type": ["null", "string"] + }, + "allow_squash_merge": { + "type": ["null", "boolean"] + }, + "allow_auto_merge": { + "type": ["null", "boolean"] + }, + "delete_branch_on_merge": { + "type": ["null", "boolean"] + }, + "allow_update_branch": { + "type": ["null", "boolean"] + }, + "use_squash_pr_title_as_default": { + "type": ["null", "boolean"] + }, + "allow_merge_commit": { + "type": ["null", "boolean"] + }, + "subscribers_count": { + "type": ["null", "integer"] + }, + "network_count": { + "type": ["null", "integer"] + } + } + }, + "temp_clone_token": { + "type": ["null", "string"] + }, + "allow_squash_merge": { + "type": ["null", "boolean"] + }, + "allow_auto_merge": { + "type": ["null", "boolean"] + }, + "delete_branch_on_merge": { + "type": ["null", "boolean"] + }, + "allow_update_branch": { + "type": ["null", "boolean"] + }, + "use_squash_pr_title_as_default": { + "type": ["null", "boolean"] + }, + "allow_merge_commit": { + "type": ["null", "boolean"] + }, + "allow_forking": { + "type": ["null", "boolean"] + }, + "subscribers_count": { + "type": ["null", "integer"] + }, + "network_count": { + "type": ["null", "integer"] + }, + "open_issues": { + "type": ["null", "integer"] + }, + "watchers": { + "type": ["null", "integer"] + }, + "master_branch": { + "type": ["null", "string"] + }, + "starred_at": { + "type": ["null", "string"] + } + } + }, + "performed_via_github_app": { + "$ref": "shared/performed_via_github_app.json#/" + }, + "author_association": { + "type": ["null", "string"] + }, + "reactions": { + "$ref": "shared/reactions.json#/" + } + } + } }, "description": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "master_branch": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "pusher_type": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "ref": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "ref_type": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "commits": { - "type": [ - "null", - "array" - ], + "type": ["null", "array"], "items": { - "type": [ - "null", - "object" - ], + "type": ["null", "object"], "properties": { "author": { - "type": [ - "null", - "object" - ], + "type": ["null", "object"], "properties": { "email": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "name": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] } } }, "distinct": { - "type": [ - "null", - "boolean" - ] + "type": ["null", "boolean"] }, "message": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "sha": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] + } + } + } + }, + "pages": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "page_name": { + "type": ["null", "string"] + }, + "title": { + "type": ["null", "string"] + }, + "summary": { + "type": ["null", "string"] + }, + "action": { + "type": ["null", "string"] + }, + "sha": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] } } } @@ -225,72 +1017,39 @@ } }, "distinct_size": { - "type": [ - "null", - "number" - ] + "type": ["null", "number"] }, "head": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "push_id": { - "type": [ - "null", - "number" - ] + "type": ["null", "number"] }, "ref": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "size": { - "type": [ - "null", - "number" - ] + "type": ["null", "number"] }, "public": { - "type": [ - "null", - "boolean" - ] + "type": ["null", "boolean"] }, "repo": { - "type": [ - "null", - "object" - ], + "type": ["null", "object"], "properties": { "id": { - "type": [ - "null", - "number" - ] + "type": ["null", "number"] }, "name": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] } } }, "type": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] } } } \ No newline at end of file diff --git a/tap_github/schemas/issue_events.json b/tap_github/schemas/issue_events.json index ddd494ee..711b7f71 100644 --- a/tap_github/schemas/issue_events.json +++ b/tap_github/schemas/issue_events.json @@ -1,1473 +1,963 @@ { - "type": [ - "null", - "object" - ], + "type": ["null", "object"], "properties": { "commit_id": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "created_at": { - "type": [ - "null", - "string" - ], + "type": ["null", "string"], "format": "date-time" }, "commit_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "event": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "issue": { - "type": [ - "null", - "object" - ], + "type": ["null", "object"], "properties": { "user": { - "type": [ - "null", - "object" - ], - "properties": { - "gravatar_id": { - "type": [ - "null", - "string" - ] - }, - "received_events_url": { - "type": [ - "null", - "string" - ] - }, - "url": { - "type": [ - "null", - "string" - ] - }, - "subscriptions_url": { - "type": [ - "null", - "string" - ] - }, - "gists_url": { - "type": [ - "null", - "string" - ] - }, - "html_url": { - "type": [ - "null", - "string" - ] - }, - "repos_url": { - "type": [ - "null", - "string" - ] - }, - "events_url": { - "type": [ - "null", - "string" - ] - }, - "login": { - "type": [ - "null", - "string" - ] - }, - "followers_url": { - "type": [ - "null", - "string" - ] - }, - "starred_url": { - "type": [ - "null", - "string" - ] - }, - "avatar_url": { - "type": [ - "null", - "string" - ] - }, - "id": { - "type": [ - "null", - "integer" - ] - }, - "type": { - "type": [ - "null", - "string" - ] - }, - "site_admin": { - "type": [ - "null", - "boolean" - ] - }, - "node_id": { - "type": [ - "null", - "string" - ] - }, - "organizations_url": { - "type": [ - "null", - "string" - ] - }, - "following_url": { - "type": [ - "null", - "string" - ] - } - } + "$ref": "shared/user.json#/" }, "comments": { - "type": [ - "null", - "integer" - ] + "type": ["null", "integer"] }, "author_association": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "milestone": { - "type": [ - "null", - "object" - ], + "type": ["null", "object"], "properties": { "closed_at": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"], + "format": "date-time" }, "closed_issues": { - "type": [ - "null", - "integer" - ] + "type": ["null", "integer"] }, "created_at": { - "type": [ - "null", - "string" - ], + "type": ["null", "string"], "format": "date-time" }, "creator": { - "type": [ - "null", - "object" - ], - "properties": { - "avatar_url": { - "type": [ - "null", - "string" - ] - }, - "events_url": { - "type": [ - "null", - "string" - ] - }, - "followers_url": { - "type": [ - "null", - "string" - ] - }, - "following_url": { - "type": [ - "null", - "string" - ] - }, - "gists_url": { - "type": [ - "null", - "string" - ] - }, - "gravatar_id": { - "type": [ - "null", - "string" - ] - }, - "html_url": { - "type": [ - "null", - "string" - ] - }, - "id": { - "type": [ - "null", - "integer" - ] - }, - "login": { - "type": [ - "null", - "string" - ] - }, - "node_id": { - "type": [ - "null", - "string" - ] - }, - "organizations_url": { - "type": [ - "null", - "string" - ] - }, - "received_events_url": { - "type": [ - "null", - "string" - ] - }, - "repos_url": { - "type": [ - "null", - "string" - ] - }, - "site_admin": { - "type": [ - "null", - "boolean" - ] - }, - "starred_url": { - "type": [ - "null", - "string" - ] - }, - "subscriptions_url": { - "type": [ - "null", - "string" - ] - }, - "type": { - "type": [ - "null", - "string" - ] - }, - "url": { - "type": [ - "null", - "string" - ] - } - } + "$ref": "shared/user.json#/" }, "description": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "due_on": { - "type": [ - "null", - "string" - ], + "type": ["null", "string"], "format": "date-time" }, "html_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "id": { - "type": [ - "null", - "integer" - ] + "type": ["null", "integer"] }, "labels_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "node_id": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "number": { - "type": [ - "null", - "integer" - ] + "type": ["null", "integer"] }, "open_issues": { - "type": [ - "null", - "integer" - ] + "type": ["null", "integer"] }, "state": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "title": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "updated_at": { - "type": [ - "null", - "string" - ], + "type": ["null", "string"], "format": "date-time" }, "url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] } } }, + "closed_by": { + "$ref": "shared/user.json#/" + }, "closed_at": { - "type": [ - "null", - "string" - ], + "type": ["null", "string"], "format": "date-time" }, "html_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "body": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "active_lock_reason": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "performed_via_github_app": { - "type": [ - "null", - "string" - ] + "$ref": "shared/performed_via_github_app.json#/" }, "assignee": { - "type": [ - "null", - "object" - ], - "properties": { - "gravatar_id": { - "type": [ - "null", - "string" - ] - }, - "received_events_url": { - "type": [ - "null", - "string" - ] - }, - "url": { - "type": [ - "null", - "string" - ] - }, - "subscriptions_url": { - "type": [ - "null", - "string" - ] - }, - "gists_url": { - "type": [ - "null", - "string" - ] - }, - "html_url": { - "type": [ - "null", - "string" - ] - }, - "repos_url": { - "type": [ - "null", - "string" - ] - }, - "events_url": { - "type": [ - "null", - "string" - ] - }, - "login": { - "type": [ - "null", - "string" - ] - }, - "followers_url": { - "type": [ - "null", - "string" - ] - }, - "starred_url": { - "type": [ - "null", - "string" - ] - }, - "avatar_url": { - "type": [ - "null", - "string" - ] - }, - "id": { - "type": [ - "null", - "integer" - ] - }, - "type": { - "type": [ - "null", - "string" - ] - }, - "site_admin": { - "type": [ - "null", - "boolean" - ] - }, - "node_id": { - "type": [ - "null", - "string" - ] - }, - "organizations_url": { - "type": [ - "null", - "string" - ] - }, - "following_url": { - "type": [ - "null", - "string" - ] - } - } + "$ref": "shared/user.json#/" }, "id": { - "type": [ - "null", - "integer" - ] + "type": ["null", "integer"] }, "pull_request": { - "type": [ - "null", - "object" - ], + "type": ["null", "object"], "properties": { "diff_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "html_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "patch_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] + }, + "merged_at": { + "type": ["null", "string"], + "format": "date-time" } } }, "comments_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "locked": { - "type": [ - "null", - "boolean" - ] + "type": ["null", "boolean"] }, "labels_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "events_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "updated_at": { - "type": [ - "null", - "string" - ], + "type": ["null", "string"], "format": "date-time" }, "state": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "repository_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "number": { - "type": [ - "null", - "integer" - ] + "type": ["null", "integer"] }, "created_at": { - "type": [ - "null", - "string" - ], + "type": ["null", "string"], "format": "date-time" }, "title": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "node_id": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "labels": { - "type": [ - "null", - "array" - ], + "type": ["null", "array"], "items": { - "type": [ - "null", - "object" - ], + "type": ["null", "object"], "properties": { "name": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "description": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "id": { - "type": [ - "null", - "integer" - ] + "type": ["null", "integer"] }, "node_id": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "color": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "default": { - "type": [ - "null", - "boolean" - ] + "type": ["null", "boolean"] } } } }, - "assignees": { - "type": [ - "null", - "array" - ], - "items": { - "type": [ - "null", - "object" - ], - "properties": { - "gravatar_id": { - "type": [ - "null", - "string" - ] - }, - "received_events_url": { - "type": [ - "null", - "string" - ] - }, - "url": { - "type": [ - "null", - "string" - ] - }, - "subscriptions_url": { - "type": [ - "null", - "string" - ] - }, - "gists_url": { - "type": [ - "null", - "string" - ] - }, - "html_url": { - "type": [ - "null", - "string" - ] - }, - "repos_url": { - "type": [ - "null", - "string" - ] - }, - "events_url": { - "type": [ - "null", - "string" - ] - }, - "login": { - "type": [ - "null", - "string" - ] - }, - "followers_url": { - "type": [ - "null", - "string" - ] - }, - "starred_url": { - "type": [ - "null", - "string" - ] - }, - "avatar_url": { - "type": [ - "null", - "string" - ] - }, - "id": { - "type": [ - "null", - "integer" - ] - }, - "type": { - "type": [ - "null", - "string" - ] - }, - "site_admin": { - "type": [ - "null", - "boolean" - ] - }, - "node_id": { - "type": [ - "null", - "string" - ] - }, - "organizations_url": { - "type": [ - "null", - "string" - ] - }, - "following_url": { - "type": [ - "null", - "string" - ] + "reactions": { + "$ref": "shared/reactions.json#/" + }, + "repository": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "integer"] + }, + "node_id": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "full_name": { + "type": ["null", "string"] + }, + "license": { + "type": ["null", "object"], + "properties": { + "key": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + }, + "spdx_id": { + "type": ["null", "string"] + }, + "node_id": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + } + } + }, + "organization": { + "$ref": "shared/user.json#/" + }, + "forks": { + "type": ["null", "integer"] + }, + "permissions": { + "$ref": "shared/pull_permissions.json#/" + }, + "owner": { + "$ref": "shared/user.json#/" + }, + "private": { + "type": ["null", "boolean"] + }, + "html_url": { + "type": ["null", "string"] + }, + "description": { + "type": ["null", "string"] + }, + "fork": { + "type": ["null", "boolean"] + }, + "url": { + "type": ["null", "string"] + }, + "archive_url": { + "type": ["null", "string"] + }, + "assignees_url": { + "type": ["null", "string"] + }, + "blobs_url": { + "type": ["null", "string"] + }, + "branches_url": { + "type": ["null", "string"] + }, + "collaborators_url": { + "type": ["null", "string"] + }, + "comments_url": { + "type": ["null", "string"] + }, + "commits_url": { + "type": ["null", "string"] + }, + "compare_url": { + "type": ["null", "string"] + }, + "contents_url": { + "type": ["null", "string"] + }, + "contributors_url": { + "type": ["null", "string"] + }, + "deployments_url": { + "type": ["null", "string"] + }, + "downloads_url": { + "type": ["null", "string"] + }, + "events_url": { + "type": ["null", "string"] + }, + "forks_url": { + "type": ["null", "string"] + }, + "git_commits_url": { + "type": ["null", "string"] + }, + "git_refs_url": { + "type": ["null", "string"] + }, + "git_tags_url": { + "type": ["null", "string"] + }, + "git_url": { + "type": ["null", "string"] + }, + "issue_comment_url": { + "type": ["null", "string"] + }, + "issue_events_url": { + "type": ["null", "string"] + }, + "issues_url": { + "type": ["null", "string"] + }, + "keys_url": { + "type": ["null", "string"] + }, + "labels_url": { + "type": ["null", "string"] + }, + "languages_url": { + "type": ["null", "string"] + }, + "merges_url": { + "type": ["null", "string"] + }, + "milestones_url": { + "type": ["null", "string"] + }, + "notifications_url": { + "type": ["null", "string"] + }, + "pulls_url": { + "type": ["null", "string"] + }, + "releases_url": { + "type": ["null", "string"] + }, + "ssh_url": { + "type": ["null", "string"] + }, + "stargazers_url": { + "type": ["null", "string"] + }, + "statuses_url": { + "type": ["null", "string"] + }, + "subscribers_url": { + "type": ["null", "string"] + }, + "subscription_url": { + "type": ["null", "string"] + }, + "tags_url": { + "type": ["null", "string"] + }, + "teams_url": { + "type": ["null", "string"] + }, + "trees_url": { + "type": ["null", "string"] + }, + "clone_url": { + "type": ["null", "string"] + }, + "mirror_url": { + "type": ["null", "string"] + }, + "hooks_url": { + "type": ["null", "string"] + }, + "svn_url": { + "type": ["null", "string"] + }, + "homepage": { + "type": ["null", "string"] + }, + "language": { + "type": ["null", "string"] + }, + "forks_count": { + "type": ["null", "integer"] + }, + "stargazers_count": { + "type": ["null", "integer"] + }, + "watchers_count": { + "type": ["null", "integer"] + }, + "size": { + "type": ["null", "integer"] + }, + "default_branch": { + "type": ["null", "string"] + }, + "open_issues_count": { + "type": ["null", "integer"] + }, + "is_template": { + "type": ["null", "boolean"] + }, + "topics": { + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } + }, + "has_issues": { + "type": ["null", "boolean"] + }, + "has_projects": { + "type": ["null", "boolean"] + }, + "has_wiki": { + "type": ["null", "boolean"] + }, + "has_pages": { + "type": ["null", "boolean"] + }, + "has_downloads": { + "type": ["null", "boolean"] + }, + "archived": { + "type": ["null", "boolean"] + }, + "disabled": { + "type": ["null", "boolean"] + }, + "visibility": { + "type": ["null", "string"] + }, + "pushed_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "created_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "updated_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "allow_rebase_merge": { + "type": ["null", "boolean"] + }, + "template_repository": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "integer"] + }, + "node_id": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "full_name": { + "type": ["null", "string"] + }, + "owner": { + "$ref": "shared/user.json#/" + }, + "private": { + "type": ["null", "boolean"] + }, + "html_url": { + "type": ["null", "string"] + }, + "description": { + "type": ["null", "string"] + }, + "fork": { + "type": ["null", "boolean"] + }, + "url": { + "type": ["null", "string"] + }, + "archive_url": { + "type": ["null", "string"] + }, + "assignees_url": { + "type": ["null", "string"] + }, + "blobs_url": { + "type": ["null", "string"] + }, + "branches_url": { + "type": ["null", "string"] + }, + "collaborators_url": { + "type": ["null", "string"] + }, + "comments_url": { + "type": ["null", "string"] + }, + "commits_url": { + "type": ["null", "string"] + }, + "compare_url": { + "type": ["null", "string"] + }, + "contents_url": { + "type": ["null", "string"] + }, + "contributors_url": { + "type": ["null", "string"] + }, + "deployments_url": { + "type": ["null", "string"] + }, + "downloads_url": { + "type": ["null", "string"] + }, + "events_url": { + "type": ["null", "string"] + }, + "forks_url": { + "type": ["null", "string"] + }, + "git_commits_url": { + "type": ["null", "string"] + }, + "git_refs_url": { + "type": ["null", "string"] + }, + "git_tags_url": { + "type": ["null", "string"] + }, + "git_url": { + "type": ["null", "string"] + }, + "issue_comment_url": { + "type": ["null", "string"] + }, + "issue_events_url": { + "type": ["null", "string"] + }, + "issues_url": { + "type": ["null", "string"] + }, + "keys_url": { + "type": ["null", "string"] + }, + "labels_url": { + "type": ["null", "string"] + }, + "languages_url": { + "type": ["null", "string"] + }, + "merges_url": { + "type": ["null", "string"] + }, + "milestones_url": { + "type": ["null", "string"] + }, + "notifications_url": { + "type": ["null", "string"] + }, + "pulls_url": { + "type": ["null", "string"] + }, + "releases_url": { + "type": ["null", "string"] + }, + "ssh_url": { + "type": ["null", "string"] + }, + "stargazers_url": { + "type": ["null", "string"] + }, + "statuses_url": { + "type": ["null", "string"] + }, + "subscribers_url": { + "type": ["null", "string"] + }, + "subscription_url": { + "type": ["null", "string"] + }, + "tags_url": { + "type": ["null", "string"] + }, + "teams_url": { + "type": ["null", "string"] + }, + "trees_url": { + "type": ["null", "string"] + }, + "clone_url": { + "type": ["null", "string"] + }, + "mirror_url": { + "type": ["null", "string"] + }, + "hooks_url": { + "type": ["null", "string"] + }, + "svn_url": { + "type": ["null", "string"] + }, + "homepage": { + "type": ["null", "string"] + }, + "language": { + "type": ["null", "string"] + }, + "forks_count": { + "type": ["null", "integer"] + }, + "stargazers_count": { + "type": ["null", "integer"] + }, + "watchers_count": { + "type": ["null", "integer"] + }, + "size": { + "type": ["null", "integer"] + }, + "default_branch": { + "type": ["null", "string"] + }, + "open_issues_count": { + "type": ["null", "integer"] + }, + "is_template": { + "type": ["null", "boolean"] + }, + "topics": { + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } + }, + "has_issues": { + "type": ["null", "boolean"] + }, + "has_projects": { + "type": ["null", "boolean"] + }, + "has_wiki": { + "type": ["null", "boolean"] + }, + "has_pages": { + "type": ["null", "boolean"] + }, + "has_downloads": { + "type": ["null", "boolean"] + }, + "archived": { + "type": ["null", "boolean"] + }, + "disabled": { + "type": ["null", "boolean"] + }, + "visibility": { + "type": ["null", "string"] + }, + "pushed_at": { + "type": ["null", "string"] + }, + "created_at": { + "type": ["null", "string"] + }, + "updated_at": { + "type": ["null", "string"] + }, + "permissions": { + "$ref": "shared/pull_permissions.json#/" + }, + "allow_rebase_merge": { + "type": ["null", "boolean"] + }, + "temp_clone_token": { + "type": ["null", "string"] + }, + "allow_squash_merge": { + "type": ["null", "boolean"] + }, + "allow_auto_merge": { + "type": ["null", "boolean"] + }, + "delete_branch_on_merge": { + "type": ["null", "boolean"] + }, + "allow_update_branch": { + "type": ["null", "boolean"] + }, + "use_squash_pr_title_as_default": { + "type": ["null", "boolean"] + }, + "allow_merge_commit": { + "type": ["null", "boolean"] + }, + "subscribers_count": { + "type": ["null", "integer"] + }, + "network_count": { + "type": ["null", "integer"] + } } + }, + "temp_clone_token": { + "type": ["null", "string"] + }, + "allow_squash_merge": { + "type": ["null", "boolean"] + }, + "allow_auto_merge": { + "type": ["null", "boolean"] + }, + "delete_branch_on_merge": { + "type": ["null", "boolean"] + }, + "allow_update_branch": { + "type": ["null", "boolean"] + }, + "use_squash_pr_title_as_default": { + "type": ["null", "boolean"] + }, + "allow_merge_commit": { + "type": ["null", "boolean"] + }, + "allow_forking": { + "type": ["null", "boolean"] + }, + "subscribers_count": { + "type": ["null", "integer"] + }, + "network_count": { + "type": ["null", "integer"] + }, + "open_issues": { + "type": ["null", "integer"] + }, + "watchers": { + "type": ["null", "integer"] + }, + "master_branch": { + "type": ["null", "string"] + }, + "starred_at": { + "type": ["null", "string"] } } + }, + "assignees": { + "type": ["null", "array"], + "items": { + "$ref": "shared/user.json#/" + } } } }, "id": { - "type": [ - "null", - "integer" - ] + "type": ["null", "integer"] }, "actor": { - "type": [ - "null", - "object" - ], - "properties": { - "gravatar_id": { - "type": [ - "null", - "string" - ] - }, - "received_events_url": { - "type": [ - "null", - "string" - ] - }, - "url": { - "type": [ - "null", - "string" - ] - }, - "subscriptions_url": { - "type": [ - "null", - "string" - ] - }, - "gists_url": { - "type": [ - "null", - "string" - ] - }, - "html_url": { - "type": [ - "null", - "string" - ] - }, - "repos_url": { - "type": [ - "null", - "string" - ] - }, - "events_url": { - "type": [ - "null", - "string" - ] - }, - "login": { - "type": [ - "null", - "string" - ] - }, - "followers_url": { - "type": [ - "null", - "string" - ] - }, - "starred_url": { - "type": [ - "null", - "string" - ] - }, - "avatar_url": { - "type": [ - "null", - "string" - ] - }, - "id": { - "type": [ - "null", - "integer" - ] - }, - "type": { - "type": [ - "null", - "string" - ] - }, - "site_admin": { - "type": [ - "null", - "boolean" - ] - }, - "node_id": { - "type": [ - "null", - "string" - ] - }, - "organizations_url": { - "type": [ - "null", - "string" - ] - }, - "following_url": { - "type": [ - "null", - "string" - ] - } - } + "$ref": "shared/user.json#/" }, "node_id": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "performed_via_github_app": { - "type": [ - "null", - "string" - ] + "$ref": "shared/performed_via_github_app.json#/" }, "_sdc_repository": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "rename": { - "type": [ - "null", - "object" - ], + "type": ["null", "object"], "properties": { "to": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "from": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] } } }, "label": { - "type": [ - "null", - "object" - ], + "type": ["null", "object"], "properties": { "name": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "color": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] } } }, "requested_reviewer": { - "type": [ - "null", - "object" - ], + "$ref": "shared/user.json#/" + }, + "review_requester": { + "$ref": "shared/user.json#/" + }, + "requested_team": { + "type": ["null", "object"], "properties": { - "gravatar_id": { - "type": [ - "null", - "string" - ] - }, - "received_events_url": { - "type": [ - "null", - "string" - ] - }, - "url": { - "type": [ - "null", - "string" - ] - }, - "subscriptions_url": { - "type": [ - "null", - "string" - ] - }, - "gists_url": { - "type": [ - "null", - "string" - ] - }, - "html_url": { - "type": [ - "null", - "string" - ] + "id": { + "type": ["null", "integer"] }, - "repos_url": { - "type": [ - "null", - "string" - ] + "node_id": { + "type": ["null", "string"] }, - "events_url": { - "type": [ - "null", - "string" - ] + "name": { + "type": ["null", "string"] }, - "login": { - "type": [ - "null", - "string" - ] + "slug": { + "type": ["null", "string"] }, - "followers_url": { - "type": [ - "null", - "string" - ] + "description": { + "type": ["null", "string"] }, - "starred_url": { - "type": [ - "null", - "string" - ] + "privacy": { + "type": ["null", "string"] }, - "avatar_url": { - "type": [ - "null", - "string" - ] + "permission": { + "type": ["null", "string"] }, - "id": { - "type": [ - "null", - "integer" - ] + "permissions": { + "$ref": "shared/pull_permissions.json#/" }, - "type": { - "type": [ - "null", - "string" - ] + "url": { + "type": ["null", "string"] }, - "site_admin": { - "type": [ - "null", - "boolean" - ] + "html_url": { + "type": ["null", "string"] }, - "node_id": { - "type": [ - "null", - "string" - ] + "members_url": { + "type": ["null", "string"] }, - "organizations_url": { - "type": [ - "null", - "string" - ] + "repositories_url": { + "type": ["null", "string"] }, - "following_url": { - "type": [ - "null", - "string" - ] + "parent": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "integer"] + }, + "node_id": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + }, + "members_url": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "description": { + "type": ["null", "string"] + }, + "permission": { + "type": ["null", "string"] + }, + "privacy": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + }, + "repositories_url": { + "type": ["null", "string"] + }, + "slug": { + "type": ["null", "string"] + }, + "ldap_dn": { + "type": ["null", "string"] + } + } } } }, - "review_requester": { - "type": [ - "null", - "object" - ], + "dismissed_review": { + "type": ["null", "object"], "properties": { - "gravatar_id": { - "type": [ - "null", - "string" - ] - }, - "received_events_url": { - "type": [ - "null", - "string" - ] - }, - "url": { - "type": [ - "null", - "string" - ] - }, - "subscriptions_url": { - "type": [ - "null", - "string" - ] - }, - "gists_url": { - "type": [ - "null", - "string" - ] - }, - "html_url": { - "type": [ - "null", - "string" - ] - }, - "repos_url": { - "type": [ - "null", - "string" - ] - }, - "events_url": { - "type": [ - "null", - "string" - ] - }, - "login": { - "type": [ - "null", - "string" - ] - }, - "followers_url": { - "type": [ - "null", - "string" - ] - }, - "starred_url": { - "type": [ - "null", - "string" - ] - }, - "avatar_url": { - "type": [ - "null", - "string" - ] - }, - "id": { - "type": [ - "null", - "integer" - ] - }, - "type": { - "type": [ - "null", - "string" - ] - }, - "site_admin": { - "type": [ - "null", - "boolean" - ] + "state": { + "type": ["null", "string"] }, - "node_id": { - "type": [ - "null", - "string" - ] + "review_id": { + "type": ["null", "integer"] }, - "organizations_url": { - "type": [ - "null", - "string" - ] + "dismissal_message": { + "type": ["null", "string"] }, - "following_url": { - "type": [ - "null", - "string" - ] + "dismissal_commit_id": { + "type": ["null", "string"] } } }, - "assignee": { - "type": [ - "null", - "object" - ], + "milestone": { + "type": ["null", "object"], "properties": { - "gravatar_id": { - "type": [ - "null", - "string" - ] - }, - "received_events_url": { - "type": [ - "null", - "string" - ] - }, - "url": { - "type": [ - "null", - "string" - ] - }, - "subscriptions_url": { - "type": [ - "null", - "string" - ] - }, - "gists_url": { - "type": [ - "null", - "string" - ] - }, - "html_url": { - "type": [ - "null", - "string" - ] - }, - "repos_url": { - "type": [ - "null", - "string" - ] - }, - "events_url": { - "type": [ - "null", - "string" - ] - }, - "login": { - "type": [ - "null", - "string" - ] - }, - "followers_url": { - "type": [ - "null", - "string" - ] - }, - "starred_url": { - "type": [ - "null", - "string" - ] - }, - "avatar_url": { - "type": [ - "null", - "string" - ] - }, - "id": { - "type": [ - "null", - "integer" - ] - }, - "type": { - "type": [ - "null", - "string" - ] - }, - "site_admin": { - "type": [ - "null", - "boolean" - ] - }, - "node_id": { - "type": [ - "null", - "string" - ] - }, - "organizations_url": { - "type": [ - "null", - "string" - ] - }, - "following_url": { - "type": [ - "null", - "string" - ] + "title": { + "type":["null", "string"] } } }, - "assigner": { - "type": [ - "null", - "object" - ], + "project_card": { + "type": ["null", "object"], "properties": { - "gravatar_id": { - "type": [ - "null", - "string" - ] - }, - "received_events_url": { - "type": [ - "null", - "string" - ] - }, "url": { - "type": [ - "null", - "string" - ] - }, - "subscriptions_url": { - "type": [ - "null", - "string" - ] - }, - "gists_url": { - "type": [ - "null", - "string" - ] - }, - "html_url": { - "type": [ - "null", - "string" - ] - }, - "repos_url": { - "type": [ - "null", - "string" - ] - }, - "events_url": { - "type": [ - "null", - "string" - ] - }, - "login": { - "type": [ - "null", - "string" - ] - }, - "followers_url": { - "type": [ - "null", - "string" - ] - }, - "starred_url": { - "type": [ - "null", - "string" - ] - }, - "avatar_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "id": { - "type": [ - "null", - "integer" - ] + "type": ["null", "integer"] }, - "type": { - "type": [ - "null", - "string" - ] + "project_url": { + "type": ["null", "string"] }, - "site_admin": { - "type": [ - "null", - "boolean" - ] + "project_id": { + "type": ["null", "integer"] }, - "node_id": { - "type": [ - "null", - "string" - ] - }, - "organizations_url": { - "type": [ - "null", - "string" - ] + "column_name": { + "type": ["null", "string"] }, - "following_url": { - "type": [ - "null", - "string" - ] + "previous_column_name": { + "type": ["null", "string"] } } + }, + "draft": { + "type": ["null", "boolean"] + }, + "author_association": { + "type": ["null", "string"] + }, + "lock_reason": { + "type": ["null", "string"] + }, + "assignee": { + "$ref": "shared/user.json#/" + }, + "assigner": { + "$ref": "shared/user.json#/" } } } \ No newline at end of file diff --git a/tap_github/schemas/issue_labels.json b/tap_github/schemas/issue_labels.json index d1962337..32a097df 100644 --- a/tap_github/schemas/issue_labels.json +++ b/tap_github/schemas/issue_labels.json @@ -1,56 +1,29 @@ { - "type": [ - "null", - "object" - ], + "type": ["null", "object"], "properties": { "_sdc_repository": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "id": { - "type": [ - "null", - "number" - ] + "type": ["null", "number"] }, "node_id": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "name": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "description": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "color": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "default": { - "type": [ - "null", - "boolean" - ] + "type": ["null", "boolean"] } } - } \ No newline at end of file +} diff --git a/tap_github/schemas/issue_milestones.json b/tap_github/schemas/issue_milestones.json index d2c2f372..eb14f446 100644 --- a/tap_github/schemas/issue_milestones.json +++ b/tap_github/schemas/issue_milestones.json @@ -1,224 +1,125 @@ { - "type": [ - "null", - "object" - ], - "properties": { - "_sdc_repository": { - "type": [ - "null", - "string" - ] - }, - "url": { - "type": [ - "null", - "string" - ] - }, - "html_url": { - "type": [ - "null", - "string" - ] - }, - "labels_url": { - "type": [ - "null", - "string" - ] - }, - "id": { - "type": [ - "null", - "number" - ] - }, - "node_id": { - "type": [ - "null", - "string" - ] - }, - "number": { - "type": [ - "null", - "number" - ] - }, - "state": { - "type": [ - "null", - "string" - ] - }, - "title": { - "type": [ - "null", - "string" - ] - }, - "description": { - "type": [ - "null", - "string" - ] - }, - "creator": { - "type": [ - "null", - "object" - ], + "type": ["null", "object"], + "properties": { + "_sdc_repository": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + }, + "labels_url": { + "type": ["null", "string"] + }, + "id": { + "type": ["null", "number"] + }, + "node_id": { + "type": ["null", "string"] + }, + "number": { + "type": ["null", "number"] + }, + "state": { + "type": ["null", "string"] + }, + "title": { + "type": ["null", "string"] + }, + "description": { + "type": ["null", "string"] + }, + "creator": { + "type": ["null", "object"], "properties": { + "name": { + "type": ["null", "string"] + }, + "email": { + "type": ["null", "string"] + }, "login": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "id": { - "type": [ - "null", - "number" - ] + "type": ["null", "number"] }, "node_id": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "avatar_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "gravatar_id": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "html_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "followers_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "following_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "gists_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "starred_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "subscriptions_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "organizations_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "repos_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "events_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "received_events_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "type": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "site_admin": { - "type": [ - "null", - "boolean" - ] + "type": ["null", "boolean"] + }, + "starred_at": { + "type": ["null", "string"] } } - }, - "open_issues": { - "type": [ - "null", - "number" - ] - }, - "closed_issues": { - "type": [ - "null", - "number" - ] - }, - "created_at": { - "type": [ - "null", - "string" - ], - "format": "date-time" - }, - "updated_at": { - "type": [ - "null", - "string" - ], - "format": "date-time" - }, - "closed_at": { - "type": [ - "null", - "string" - ], - "format": "date-time" - }, - "due_on": { - "type": [ - "null", - "string" - ], - "format": "date-time" - } + }, + "open_issues": { + "type": ["null", "number"] + }, + "closed_issues": { + "type": ["null", "number"] + }, + "created_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "updated_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "closed_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "due_on": { + "type": ["null", "string"], + "format": "date-time" } - } \ No newline at end of file + } +} diff --git a/tap_github/schemas/issues.json b/tap_github/schemas/issues.json index 81737ce1..93365708 100644 --- a/tap_github/schemas/issues.json +++ b/tap_github/schemas/issues.json @@ -1,21 +1,18 @@ { "properties": { "state": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] + }, + "state_reason": { + "type": ["null", "string"] }, "url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "labels": { "type": ["null", "array"], "items": { - "type": "object", + "type": ["null", "object"], "properties": { "id": { "type": ["null", "integer"] @@ -42,265 +39,238 @@ } }, "repository_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "number": { - "type": [ - "null", - "integer" - ] + "type": ["null", "integer"] }, "closed_at": { - "type": [ - "null", - "string" - ], + "type": ["null", "string"], "format": "date-time" }, "labels_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "title": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "assignee": { - "type": [ - "null", - "object" - ], - "properties": {} + "$ref": "shared/user.json#/" + }, + "assignees": { + "type": ["null", "array"], + "items": { + "$ref": "shared/user.json#/" + } + }, + "milestone": { + "type": ["null", "object"], + "properties": { + "url": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + }, + "labels_url": { + "type": ["null", "string"] + }, + "id": { + "type": ["null", "integer"] + }, + "node_id": { + "type": ["null", "string"] + }, + "number": { + "type": ["null", "integer"] + }, + "state": { + "type": ["null", "string"] + }, + "title": { + "type": ["null", "string"] + }, + "description": { + "type": ["null", "string"] + }, + "creator": { + "$ref": "shared/user.json#/" + }, + "open_issues": { + "type": ["null", "integer"] + }, + "closed_issues": { + "type": ["null", "integer"] + }, + "created_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "updated_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "closed_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "due_on": { + "type": ["null", "string"], + "format": "date-time" + } + } + }, + "reactions": { + "$ref": "shared/reactions.json#/" + }, + "active_lock_reason": { + "type": ["null", "string"] + }, + "body_html": { + "type": ["null", "string"] + }, + "performed_via_github_app": { + "$ref": "shared/performed_via_github_app.json#/" + }, + "timeline_url": { + "type": ["null", "string"] + }, + "closed_by": { + "type": ["null", "object"], + "properties": { + "name": { + "type": ["null", "string"] + }, + "email": { + "type": ["null", "string"] + }, + "login": { + "type": ["null", "string"] + }, + "id": { + "type": ["null", "integer"] + }, + "node_id": { + "type": ["null", "string"] + }, + "avatar_url": { + "type": ["null", "string"] + }, + "gravatar_id": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + }, + "followers_url": { + "type": ["null", "string"] + }, + "following_url": { + "type": ["null", "string"] + }, + "gists_url": { + "type": ["null", "string"] + }, + "starred_url": { + "type": ["null", "string"] + }, + "subscriptions_url": { + "type": ["null", "string"] + }, + "organizations_url": { + "type": ["null", "string"] + }, + "repos_url": { + "type": ["null", "string"] + }, + "events_url": { + "type": ["null", "string"] + }, + "received_events_url": { + "type": ["null", "string"] + }, + "type": { + "type": ["null", "string"] + }, + "site_admin": { + "type": ["null", "boolean"] + }, + "starred_at": { + "type": ["null", "string"] + } + } }, "updated_at": { - "type": [ - "null", - "string" - ], + "type": ["null", "string"], "format": "date-time" }, "html_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "author_association": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "locked": { - "type": [ - "null", - "boolean" - ] + "type": ["null", "boolean"] }, "events_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "pull_request": { "properties": { "diff_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "html_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "patch_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] + }, + "merged_at": { + "type": ["null", "string"], + "format": "date-time" } }, - "type": [ - "null", - "object" - ] + "type": ["null", "object"] }, "node_id": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "body": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "comments": { - "type": [ - "null", - "integer" - ] + "type": ["null", "integer"] }, "created_at": { - "type": [ - "null", - "string" - ], + "type": ["null", "string"], "format": "date-time" }, "_sdc_repository": { "type": ["string"] }, "user": { - "properties": { - "repos_url": { - "type": [ - "null", - "string" - ] - }, - "starred_url": { - "type": [ - "null", - "string" - ] - }, - "url": { - "type": [ - "null", - "string" - ] - }, - "received_events_url": { - "type": [ - "null", - "string" - ] - }, - "site_admin": { - "type": [ - "null", - "boolean" - ] - }, - "gravatar_id": { - "type": [ - "null", - "string" - ] - }, - "following_url": { - "type": [ - "null", - "string" - ] - }, - "avatar_url": { - "type": [ - "null", - "string" - ] - }, - "events_url": { - "type": [ - "null", - "string" - ] - }, - "id": { - "type": [ - "null", - "integer" - ] - }, - "login": { - "type": [ - "null", - "string" - ] - }, - "organizations_url": { - "type": [ - "null", - "string" - ] - }, - "html_url": { - "type": [ - "null", - "string" - ] - }, - "type": { - "type": [ - "null", - "string" - ] - }, - "subscriptions_url": { - "type": [ - "null", - "string" - ] - }, - "node_id": { - "type": [ - "null", - "string" - ] - }, - "followers_url": { - "type": [ - "null", - "string" - ] - }, - "gists_url": { - "type": [ - "null", - "string" - ] - } - }, - "type": [ - "null", - "object" - ] + "$ref": "shared/user.json#/" }, "id": { - "type": [ - "null", - "integer" - ] + "type": ["null", "integer"] }, "comments_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] + }, + "body_text": { + "type": ["null", "string"] + }, + "draft": { + "type": ["null", "boolean"] } }, - "type": [ - "null", - "object" - ] + "type": ["null", "object"] } diff --git a/tap_github/schemas/pr_commits.json b/tap_github/schemas/pr_commits.json new file mode 100644 index 00000000..f4fa2f82 --- /dev/null +++ b/tap_github/schemas/pr_commits.json @@ -0,0 +1,323 @@ +{ + "type": ["null", "object"], + "properties": { + "_sdc_repository": { + "type": ["string"] + }, + "sha": { + "type": ["null", "string"], + "description": "The git commit hash" + }, + "node_id": { + "type": ["null","string"] + }, + "url": { + "type": ["null", "string"] + }, + "parents": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "additionalProperties": false, + "properties": { + "sha": { + "type": ["null", "string"], + "description": "The git hash of the parent commit" + }, + "url": { + "type": ["null", "string"], + "description": "The URL to the parent commit" + }, + "html_url": { + "type": ["null", "string"], + "description": "The HTML URL to the parent commit" + } + } + } + }, + "files": { + "type": ["null","array"], + "items": { + "type": ["null","object"], + "properties": { + "filename": { + "type": ["null","string"] + }, + "additions": { + "type": ["null","number"] + }, + "deletions": { + "type": ["null","number"] + }, + "changes": { + "type": ["null","number"] + }, + "status": { + "type": ["null","string"] + }, + "raw_url": { + "type": ["null","string"] + }, + "blob_url": { + "type": ["null","string"] + }, + "contents_url": { + "type": ["null","string"] + }, + "sha": { + "type": ["null","string"] + }, + "patch": { + "type": ["null","string"] + } + } + } + }, + "html_url": { + "type": ["null", "string"], + "description": "The HTML URL to the commit" + }, + "comments_url": { + "type": ["null", "string"], + "description": "The URL to the commit's comments page" + }, + "commit": { + "type": ["null", "object"], + "additionalProperties": false, + "properties": { + "url": { + "type": ["null", "string"], + "description": "The URL to the commit" + }, + "tree": { + "type": ["null", "object"], + "additionalProperties": false, + "properties": { + "sha": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + } + } + }, + "author": { + "type": ["null", "object"], + "properties": { + "name": { + "type": ["null", "string"] + }, + "email": { + "type": ["null", "string"] + }, + "login": { + "type": ["null", "string"] + }, + "id": { + "type": ["null", "integer"] + }, + "node_id": { + "type": ["null", "string"] + }, + "avatar_url": { + "type": ["null", "string"] + }, + "gravatar_id": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + }, + "followers_url": { + "type": ["null", "string"] + }, + "following_url": { + "type": ["null", "string"] + }, + "gists_url": { + "type": ["null", "string"] + }, + "starred_url": { + "type": ["null", "string"] + }, + "subscriptions_url": { + "type": ["null", "string"] + }, + "organizations_url": { + "type": ["null", "string"] + }, + "repos_url": { + "type": ["null", "string"] + }, + "events_url": { + "type": ["null", "string"] + }, + "received_events_url": { + "type": ["null", "string"] + }, + "type": { + "type": ["null", "string"] + }, + "site_admin": { + "type": ["null", "boolean"] + }, + "starred_at": { + "type": ["null", "string"] + }, + "date": { + "type": ["null", "string"], + "format": "date-time" + } + } + }, + "verification": { + "type": ["null", "object"], + "properties": { + "verified": { + "type": ["null", "boolean"] + }, + "reason": { + "type": ["null", "string"] + }, + "payload": { + "type": ["null", "string"] + }, + "signature": { + "type": ["null", "string"] + } + } + }, + "message": { + "type": ["null", "string"], + "description": "The commit message" + }, + "comment_count": { + "type": ["null", "integer"] + }, + "committer": { + "type": ["null", "object"], + "properties": { + "name": { + "type": ["null", "string"] + }, + "email": { + "type": ["null", "string"] + }, + "login": { + "type": ["null", "string"] + }, + "date": { + "type": ["null", "string"], + "format": "date-time" + } + } + } + } + }, + "committer": { + "type": ["null", "object"], + "properties": { + "name": { + "type": ["null", "string"] + }, + "email": { + "type": ["null", "string"] + }, + "login": { + "type": ["null", "string"] + }, + "id": { + "type": ["null", "integer"] + }, + "node_id": { + "type": ["null", "string"] + }, + "avatar_url": { + "type": ["null", "string"] + }, + "gravatar_id": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + }, + "followers_url": { + "type": ["null", "string"] + }, + "following_url": { + "type": ["null", "string"] + }, + "gists_url": { + "type": ["null", "string"] + }, + "starred_url": { + "type": ["null", "string"] + }, + "subscriptions_url": { + "type": ["null", "string"] + }, + "organizations_url": { + "type": ["null", "string"] + }, + "repos_url": { + "type": ["null", "string"] + }, + "events_url": { + "type": ["null", "string"] + }, + "received_events_url": { + "type": ["null", "string"] + }, + "type": { + "type": ["null", "string"] + }, + "site_admin": { + "type": ["null", "boolean"] + }, + "starred_at": { + "type": ["null", "string"] + }, + "date": { + "type": ["null", "string"], + "format": "date-time" + } + } + }, + "pr_number": { + "type": ["null", "integer"] + }, + "pr_id": { + "type": ["null", "string"] + }, + "id": { + "type": ["null", "string"] + }, + "author": { + "$ref": "shared/user.json#/" + }, + "stats": { + "type": ["null", "object"], + "properties": { + "additions": { + "type": ["null", "integer"] + }, + "deletions": { + "type": ["null", "integer"] + }, + "total": { + "type": ["null", "integer"] + } + } + }, + "updated_at": { + "type": ["null", "string"], + "format": "date-time" + } + } +} \ No newline at end of file diff --git a/tap_github/schemas/project_cards.json b/tap_github/schemas/project_cards.json index f0bc15ab..bb62bdf2 100644 --- a/tap_github/schemas/project_cards.json +++ b/tap_github/schemas/project_cards.json @@ -1,108 +1,120 @@ { - "type": [ - "null", - "object" - ], + "type": ["null", "object"], "properties": { "_sdc_repository": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] + }, + "project_id": { + "type": ["null", "string"] + }, + "column_name": { + "type": ["null", "string"] }, "cards_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "name": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "id": { - "type": [ - "null", - "number" - ] + "type": ["null", "number"] }, "node_id": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "note": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "creator": { - "type": [ - "null", - "object" - ], + "type": ["null", "object"], "properties": { + "name": { + "type": ["null", "string"] + }, + "email": { + "type": ["null", "string"] + }, "login": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "id": { - "type": [ - "null", - "number" - ] + "type": ["null", "number"] + }, + "node_id": { + "type": ["null", "string"] + }, + "avatar_url": { + "type": ["null", "string"] + }, + "gravatar_id": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + }, + "followers_url": { + "type": ["null", "string"] + }, + "following_url": { + "type": ["null", "string"] + }, + "gists_url": { + "type": ["null", "string"] + }, + "starred_url": { + "type": ["null", "string"] + }, + "subscriptions_url": { + "type": ["null", "string"] + }, + "organizations_url": { + "type": ["null", "string"] + }, + "repos_url": { + "type": ["null", "string"] + }, + "events_url": { + "type": ["null", "string"] + }, + "received_events_url": { + "type": ["null", "string"] + }, + "type": { + "type": ["null", "string"] + }, + "site_admin": { + "type": ["null", "boolean"] + }, + "starred_at": { + "type": ["null", "string"] } } }, "created_at": { - "type": [ - "null", - "string" - ], + "type": ["null", "string"], "format": "date-time" }, "updated_at": { - "type": [ - "null", - "string" - ], + "type": ["null", "string"], "format": "date-time" }, "archived": { - "type": [ - "null", - "boolean" - ] + "type": ["null", "boolean"] }, "column_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "content_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "project_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] } } - } \ No newline at end of file +} \ No newline at end of file diff --git a/tap_github/schemas/project_columns.json b/tap_github/schemas/project_columns.json index 1ebe6782..87e72543 100644 --- a/tap_github/schemas/project_columns.json +++ b/tap_github/schemas/project_columns.json @@ -1,64 +1,34 @@ { - "type": [ - "null", - "object" - ], + "type": ["null", "object"], "properties": { "_sdc_repository": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "project_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "cards_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "id": { - "type": [ - "null", - "number" - ] + "type": ["null", "number"] }, "node_id": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "name": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "created_at": { - "type": [ - "null", - "string" - ], + "type": ["null", "string"], "format": "date-time" }, "updated_at": { - "type": [ - "null", - "string" - ], + "type": ["null", "string"], "format": "date-time" } } - } \ No newline at end of file +} diff --git a/tap_github/schemas/projects.json b/tap_github/schemas/projects.json index 3b4f5d89..e8d659ad 100644 --- a/tap_github/schemas/projects.json +++ b/tap_github/schemas/projects.json @@ -1,102 +1,117 @@ { - "type": [ - "null", - "object" - ], - "properties": { - "owner_url": { - "type": [ - "null", - "string" - ] - }, - "url": { - "type": [ - "null", - "string" - ] - }, - "html_url": { - "type": [ - "null", - "string" - ] - }, - "columns_url": { - "type": [ - "null", - "string" - ] - }, - "id": { - "type": [ - "null", - "number" - ] - }, - "node_id": { - "type": [ - "null", - "string" - ] - }, - "name": { - "type": [ - "null", - "string" - ] - }, - "body": { - "type": [ - "null", - "string" - ] - }, - "number": { - "type": [ - "null", - "number" - ] - }, - "state": { - "type": [ - "null", - "string" - ] - }, - "creator": { - "type": [ - "null", - "object" - ], + "type": ["null", "object"], + "properties": { + "owner_url": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + }, + "columns_url": { + "type": ["null", "string"] + }, + "id": { + "type": ["null", "number"] + }, + "node_id": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "body": { + "type": ["null", "string"] + }, + "number": { + "type": ["null", "number"] + }, + "state": { + "type": ["null", "string"] + }, + "creator": { + "type": ["null", "object"], "properties": { + "name": { + "type": ["null", "string"] + }, + "email": { + "type": ["null", "string"] + }, "login": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "id": { - "type": [ - "null", - "number" - ] + "type": ["null", "number"] + }, + "node_id": { + "type": ["null", "string"] + }, + "avatar_url": { + "type": ["null", "string"] + }, + "gravatar_id": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + }, + "followers_url": { + "type": ["null", "string"] + }, + "following_url": { + "type": ["null", "string"] + }, + "gists_url": { + "type": ["null", "string"] + }, + "starred_url": { + "type": ["null", "string"] + }, + "subscriptions_url": { + "type": ["null", "string"] + }, + "organizations_url": { + "type": ["null", "string"] + }, + "repos_url": { + "type": ["null", "string"] + }, + "events_url": { + "type": ["null", "string"] + }, + "received_events_url": { + "type": ["null", "string"] + }, + "type": { + "type": ["null", "string"] + }, + "site_admin": { + "type": ["null", "boolean"] + }, + "starred_at": { + "type": ["null", "string"] } } - }, - "created_at": { - "type": [ - "null", - "string" - ], - "format": "date-time" - }, - "updated_at": { - "type": [ - "null", - "string" - ], - "format": "date-time" - } + }, + "created_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "updated_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "organization_permission": { + "type": ["null", "string"] + }, + "private": { + "type": ["null", "boolean"] } - } \ No newline at end of file + } +} \ No newline at end of file diff --git a/tap_github/schemas/pull_requests.json b/tap_github/schemas/pull_requests.json index 91c0c4bb..cbb0128d 100644 --- a/tap_github/schemas/pull_requests.json +++ b/tap_github/schemas/pull_requests.json @@ -8,6 +8,999 @@ "id": { "type": ["null", "string"] }, + "node_id": { + "type": ["null", "string"] + }, + "statuses_url": { + "type": ["null", "string"] + }, + "draft": { + "type": ["null", "boolean"] + }, + "requested_reviewers": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "name": { + "type": ["null", "string"] + }, + "email": { + "type": ["null", "string"] + }, + "login": { + "type": ["null", "string"] + }, + "id": { + "type": ["null", "integer"] + }, + "node_id": { + "type": ["null", "string"] + }, + "avatar_url": { + "type": ["null", "string"] + }, + "gravatar_id": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + }, + "followers_url": { + "type": ["null", "string"] + }, + "following_url": { + "type": ["null", "string"] + }, + "gists_url": { + "type": ["null", "string"] + }, + "starred_url": { + "type": ["null", "string"] + }, + "subscriptions_url": { + "type": ["null", "string"] + }, + "organizations_url": { + "type": ["null", "string"] + }, + "repos_url": { + "type": ["null", "string"] + }, + "events_url": { + "type": ["null", "string"] + }, + "received_events_url": { + "type": ["null", "string"] + }, + "type": { + "type": ["null", "string"] + }, + "site_admin": { + "type": ["null", "boolean"] + }, + "starred_at": { + "type": ["null", "string"] + } + } + } + }, + "merge_commit_sha": { + "type": ["null", "string"] + }, + "review_comments_url": { + "type": ["null", "string"] + }, + "active_lock_reason": { + "type": ["null", "string"] + }, + "author_association": { + "type": ["null", "string"] + }, + "diff_url": { + "type": ["null", "string"] + }, + "assignee": { + "$ref": "shared/user.json#/" + }, + "comments_url": { + "type": ["null", "string"] + }, + "head": { + "type": ["null", "object"], + "properties": { + "label": { + "type": ["null", "string"] + }, + "ref": { + "type": ["null", "string"] + }, + "repo": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "integer"] + }, + "node_id": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "full_name": { + "type": ["null", "string"] + }, + "license": { + "type": ["null", "object"], + "properties": { + "key": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + }, + "spdx_id": { + "type": ["null", "string"] + }, + "node_id": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + } + } + }, + "organization": { + "type": ["null", "object"], + "properties": { + "name": { + "type": ["null", "string"] + }, + "email": { + "type": ["null", "string"] + }, + "login": { + "type": ["null", "string"] + }, + "id": { + "type": ["null", "integer"] + }, + "node_id": { + "type": ["null", "string"] + }, + "avatar_url": { + "type": ["null", "string"] + }, + "gravatar_id": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + }, + "followers_url": { + "type": ["null", "string"] + }, + "following_url": { + "type": ["null", "string"] + }, + "gists_url": { + "type": ["null", "string"] + }, + "starred_url": { + "type": ["null", "string"] + }, + "subscriptions_url": { + "type": ["null", "string"] + }, + "organizations_url": { + "type": ["null", "string"] + }, + "repos_url": { + "type": ["null", "string"] + }, + "events_url": { + "type": ["null", "string"] + }, + "received_events_url": { + "type": ["null", "string"] + }, + "type": { + "type": ["null", "string"] + }, + "site_admin": { + "type": ["null", "boolean"] + }, + "starred_at": { + "type": ["null", "string"] + } + } + }, + "forks": { + "type": ["null", "integer"] + }, + "permissions": { + "$ref": "shared/pull_permissions.json#/" + }, + "owner": { + "$ref": "shared/user.json#/" + }, + "private": { + "type": ["null", "boolean"] + }, + "html_url": { + "type": ["null", "string"] + }, + "description": { + "type": ["null", "string"] + }, + "fork": { + "type": ["null", "boolean"] + }, + "url": { + "type": ["null", "string"] + }, + "archive_url": { + "type": ["null", "string"] + }, + "assignees_url": { + "type": ["null", "string"] + }, + "blobs_url": { + "type": ["null", "string"] + }, + "branches_url": { + "type": ["null", "string"] + }, + "collaborators_url": { + "type": ["null", "string"] + }, + "comments_url": { + "type": ["null", "string"] + }, + "commits_url": { + "type": ["null", "string"] + }, + "compare_url": { + "type": ["null", "string"] + }, + "contents_url": { + "type": ["null", "string"] + }, + "contributors_url": { + "type": ["null", "string"] + }, + "deployments_url": { + "type": ["null", "string"] + }, + "downloads_url": { + "type": ["null", "string"] + }, + "events_url": { + "type": ["null", "string"] + }, + "forks_url": { + "type": ["null", "string"] + }, + "git_commits_url": { + "type": ["null", "string"] + }, + "git_refs_url": { + "type": ["null", "string"] + }, + "git_tags_url": { + "type": ["null", "string"] + }, + "git_url": { + "type": ["null", "string"] + }, + "issue_comment_url": { + "type": ["null", "string"] + }, + "issue_events_url": { + "type": ["null", "string"] + }, + "issues_url": { + "type": ["null", "string"] + }, + "keys_url": { + "type": ["null", "string"] + }, + "labels_url": { + "type": ["null", "string"] + }, + "languages_url": { + "type": ["null", "string"] + }, + "merges_url": { + "type": ["null", "string"] + }, + "milestones_url": { + "type": ["null", "string"] + }, + "notifications_url": { + "type": ["null", "string"] + }, + "pulls_url": { + "type": ["null", "string"] + }, + "releases_url": { + "type": ["null", "string"] + }, + "ssh_url": { + "type": ["null", "string"] + }, + "stargazers_url": { + "type": ["null", "string"] + }, + "statuses_url": { + "type": ["null", "string"] + }, + "subscribers_url": { + "type": ["null", "string"] + }, + "subscription_url": { + "type": ["null", "string"] + }, + "tags_url": { + "type": ["null", "string"] + }, + "teams_url": { + "type": ["null", "string"] + }, + "trees_url": { + "type": ["null", "string"] + }, + "clone_url": { + "type": ["null", "string"] + }, + "mirror_url": { + "type": ["null", "string"] + }, + "hooks_url": { + "type": ["null", "string"] + }, + "svn_url": { + "type": ["null", "string"] + }, + "homepage": { + "type": ["null", "string"] + }, + "language": { + "type": ["null", "string"] + }, + "forks_count": { + "type": ["null", "integer"] + }, + "stargazers_count": { + "type": ["null", "integer"] + }, + "watchers_count": { + "type": ["null", "integer"] + }, + "size": { + "type": ["null", "integer"] + }, + "default_branch": { + "type": ["null", "string"] + }, + "open_issues_count": { + "type": ["null", "integer"] + }, + "is_template": { + "type": ["null", "boolean"] + }, + "topics": { + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } + }, + "has_issues": { + "type": ["null", "boolean"] + }, + "has_projects": { + "type": ["null", "boolean"] + }, + "has_wiki": { + "type": ["null", "boolean"] + }, + "has_pages": { + "type": ["null", "boolean"] + }, + "has_downloads": { + "type": ["null", "boolean"] + }, + "archived": { + "type": ["null", "boolean"] + }, + "disabled": { + "type": ["null", "boolean"] + }, + "visibility": { + "type": ["null", "string"] + }, + "pushed_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "created_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "updated_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "allow_rebase_merge": { + "type": ["null", "boolean"] + }, + "template_repository": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "integer"] + }, + "node_id": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "full_name": { + "type": ["null", "string"] + }, + "owner": { + "$ref": "shared/user.json#/" + }, + "private": { + "type": ["null", "boolean"] + }, + "html_url": { + "type": ["null", "string"] + }, + "description": { + "type": ["null", "string"] + }, + "fork": { + "type": ["null", "boolean"] + }, + "url": { + "type": ["null", "string"] + }, + "archive_url": { + "type": ["null", "string"] + }, + "assignees_url": { + "type": ["null", "string"] + }, + "blobs_url": { + "type": ["null", "string"] + }, + "branches_url": { + "type": ["null", "string"] + }, + "collaborators_url": { + "type": ["null", "string"] + }, + "comments_url": { + "type": ["null", "string"] + }, + "commits_url": { + "type": ["null", "string"] + }, + "compare_url": { + "type": ["null", "string"] + }, + "contents_url": { + "type": ["null", "string"] + }, + "contributors_url": { + "type": ["null", "string"] + }, + "deployments_url": { + "type": ["null", "string"] + }, + "downloads_url": { + "type": ["null", "string"] + }, + "events_url": { + "type": ["null", "string"] + }, + "forks_url": { + "type": ["null", "string"] + }, + "git_commits_url": { + "type": ["null", "string"] + }, + "git_refs_url": { + "type": ["null", "string"] + }, + "git_tags_url": { + "type": ["null", "string"] + }, + "git_url": { + "type": ["null", "string"] + }, + "issue_comment_url": { + "type": ["null", "string"] + }, + "issue_events_url": { + "type": ["null", "string"] + }, + "issues_url": { + "type": ["null", "string"] + }, + "keys_url": { + "type": ["null", "string"] + }, + "labels_url": { + "type": ["null", "string"] + }, + "languages_url": { + "type": ["null", "string"] + }, + "merges_url": { + "type": ["null", "string"] + }, + "milestones_url": { + "type": ["null", "string"] + }, + "notifications_url": { + "type": ["null", "string"] + }, + "pulls_url": { + "type": ["null", "string"] + }, + "releases_url": { + "type": ["null", "string"] + }, + "ssh_url": { + "type": ["null", "string"] + }, + "stargazers_url": { + "type": ["null", "string"] + }, + "statuses_url": { + "type": ["null", "string"] + }, + "subscribers_url": { + "type": ["null", "string"] + }, + "subscription_url": { + "type": ["null", "string"] + }, + "tags_url": { + "type": ["null", "string"] + }, + "teams_url": { + "type": ["null", "string"] + }, + "trees_url": { + "type": ["null", "string"] + }, + "clone_url": { + "type": ["null", "string"] + }, + "mirror_url": { + "type": ["null", "string"] + }, + "hooks_url": { + "type": ["null", "string"] + }, + "svn_url": { + "type": ["null", "string"] + }, + "homepage": { + "type": ["null", "string"] + }, + "language": { + "type": ["null", "string"] + }, + "forks_count": { + "type": ["null", "integer"] + }, + "stargazers_count": { + "type": ["null", "integer"] + }, + "watchers_count": { + "type": ["null", "integer"] + }, + "size": { + "type": ["null", "integer"] + }, + "default_branch": { + "type": ["null", "string"] + }, + "open_issues_count": { + "type": ["null", "integer"] + }, + "is_template": { + "type": ["null", "boolean"] + }, + "topics": { + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } + }, + "has_issues": { + "type": ["null", "boolean"] + }, + "has_projects": { + "type": ["null", "boolean"] + }, + "has_wiki": { + "type": ["null", "boolean"] + }, + "has_pages": { + "type": ["null", "boolean"] + }, + "has_downloads": { + "type": ["null", "boolean"] + }, + "archived": { + "type": ["null", "boolean"] + }, + "disabled": { + "type": ["null", "boolean"] + }, + "visibility": { + "type": ["null", "string"] + }, + "pushed_at": { + "type": ["null", "string"] + }, + "created_at": { + "type": ["null", "string"] + }, + "updated_at": { + "type": ["null", "string"] + }, + "permissions": { + "$ref": "shared/pull_permissions.json#/" + }, + "allow_rebase_merge": { + "type": ["null", "boolean"] + }, + "temp_clone_token": { + "type": ["null", "string"] + }, + "allow_squash_merge": { + "type": ["null", "boolean"] + }, + "allow_auto_merge": { + "type": ["null", "boolean"] + }, + "delete_branch_on_merge": { + "type": ["null", "boolean"] + }, + "allow_update_branch": { + "type": ["null", "boolean"] + }, + "use_squash_pr_title_as_default": { + "type": ["null", "boolean"] + }, + "allow_merge_commit": { + "type": ["null", "boolean"] + }, + "subscribers_count": { + "type": ["null", "integer"] + }, + "network_count": { + "type": ["null", "integer"] + } + } + }, + "temp_clone_token": { + "type": ["null", "string"] + }, + "allow_squash_merge": { + "type": ["null", "boolean"] + }, + "allow_auto_merge": { + "type": ["null", "boolean"] + }, + "delete_branch_on_merge": { + "type": ["null", "boolean"] + }, + "allow_update_branch": { + "type": ["null", "boolean"] + }, + "use_squash_pr_title_as_default": { + "type": ["null", "boolean"] + }, + "allow_merge_commit": { + "type": ["null", "boolean"] + }, + "allow_forking": { + "type": ["null", "boolean"] + }, + "subscribers_count": { + "type": ["null", "integer"] + }, + "network_count": { + "type": ["null", "integer"] + }, + "open_issues": { + "type": ["null", "integer"] + }, + "watchers": { + "type": ["null", "integer"] + }, + "master_branch": { + "type": ["null", "string"] + }, + "starred_at": { + "type": ["null", "string"] + } + } + }, + "sha": { + "type": ["null", "string"] + }, + "user": { + "$ref": "shared/user.json#/" + } + } + }, + "commits_url": { + "type": ["null", "string"] + }, + "auto_merge": { + "type": ["null", "object"], + "properties": { + "enabled_by": { + "$ref": "shared/user.json#/" + }, + "merge_method": { + "type": ["null", "string"] + }, + "commit_title": { + "type": ["null", "string"] + }, + "commit_message": { + "type": ["null", "string"] + } + } + }, + "locked": { + "type": ["null", "boolean"] + }, + "assignees": { + "type": ["null", "array"], + "items": { + "$ref": "shared/user.json#/" + } + }, + "issues_url": { + "type": ["null", "string"] + }, + "milestone": { + "type": ["null", "object"], + "properties": { + "url": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + }, + "labels_url": { + "type": ["null", "string"] + }, + "id": { + "type": ["null", "integer"] + }, + "node_id": { + "type": ["null", "string"] + }, + "number": { + "type": ["null", "integer"] + }, + "state": { + "type": ["null", "string"] + }, + "title": { + "type": ["null", "string"] + }, + "description": { + "type": ["null", "string"] + }, + "creator": { + "$ref": "shared/user.json#/" + }, + "open_issues": { + "type": ["null", "integer"] + }, + "closed_issues": { + "type": ["null", "integer"] + }, + "created_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "updated_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "closed_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "due_on": { + "type": ["null", "string"], + "format": "date-time" + } + } + }, + "_links": { + "type": ["null", "object"], + "properties": { + "comments": { + "type": ["null", "object"], + "properties": { + "href": { + "type": ["null", "string"] + } + } + }, + "commits": { + "type": ["null", "object"], + "properties": { + "href": { + "type": ["null", "string"] + } + } + }, + "statuses": { + "type": ["null", "object"], + "properties": { + "href": { + "type": ["null", "string"] + } + } + }, + "html": { + "type": ["null", "object"], + "properties": { + "href": { + "type": ["null", "string"] + } + } + }, + "issue": { + "type": ["null", "object"], + "properties": { + "href": { + "type": ["null", "string"] + } + } + }, + "review_comments": { + "type": ["null", "object"], + "properties": { + "href": { + "type": ["null", "string"] + } + } + }, + "review_comment": { + "type": ["null", "object"], + "properties": { + "href": { + "type": ["null", "string"] + } + } + }, + "self": { + "type": ["null", "object"], + "properties": { + "href": { + "type": ["null", "string"] + } + } + } + } + }, + "html_url": { + "type": ["null", "string"] + }, + "requested_teams": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "integer"] + }, + "node_id": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "slug": { + "type": ["null", "string"] + }, + "description": { + "type": ["null", "string"] + }, + "privacy": { + "type": ["null", "string"] + }, + "permission": { + "type": ["null", "string"] + }, + "permissions": { + "$ref": "shared/pull_permissions.json#/" + }, + "url": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + }, + "members_url": { + "type": ["null", "string"] + }, + "repositories_url": { + "type": ["null", "string"] + }, + "parent": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "integer"] + }, + "node_id": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + }, + "members_url": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "description": { + "type": ["null", "string"] + }, + "permission": { + "type": ["null", "string"] + }, + "privacy": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + }, + "repositories_url": { + "type": ["null", "string"] + }, + "slug": { + "type": ["null", "string"] + }, + "ldap_dn": { + "type": ["null", "string"] + } + } + } + } + } + }, + "patch_url": { + "type": ["null", "string"] + }, "url": { "type": ["null", "string"] }, @@ -53,42 +1046,583 @@ } }, "user": { - "type": ["null", "object"], - "additionalProperties": false, - "properties": { - "login": { - "type": ["null", "string"] - }, - "id": { - "type": ["null", "integer"] - } - } + "$ref": "shared/user.json#/" }, "base": { "type": ["null", "object"], "properties": { - "ref": { + "label": { "type": ["null", "string"] }, - "label": { + "ref": { "type": ["null", "string"] }, "repo": { "type": ["null", "object"], "properties": { "id": { - "type": [ "null", "integer" ] + "type": ["null", "integer"] + }, + "node_id": { + "type": ["null", "string"] }, "name": { - "type": [ "null", "string" ] + "type": ["null", "string"] + }, + "full_name": { + "type": ["null", "string"] + }, + "license": { + "type": ["null", "object"], + "properties": { + "key": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + }, + "spdx_id": { + "type": ["null", "string"] + }, + "node_id": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + } + } + }, + "organization": { + "$ref": "shared/user.json#/" + }, + "forks": { + "type": ["null", "integer"] + }, + "permissions": { + "$ref": "shared/pull_permissions.json#/" + }, + "owner": { + "$ref": "shared/user.json#/" + }, + "private": { + "type": ["null", "boolean"] + }, + "html_url": { + "type": ["null", "string"] + }, + "description": { + "type": ["null", "string"] + }, + "fork": { + "type": ["null", "boolean"] }, "url": { - "type": [ "null", "string" ] + "type": ["null", "string"] + }, + "archive_url": { + "type": ["null", "string"] + }, + "assignees_url": { + "type": ["null", "string"] + }, + "blobs_url": { + "type": ["null", "string"] + }, + "branches_url": { + "type": ["null", "string"] + }, + "collaborators_url": { + "type": ["null", "string"] + }, + "comments_url": { + "type": ["null", "string"] + }, + "commits_url": { + "type": ["null", "string"] + }, + "compare_url": { + "type": ["null", "string"] + }, + "contents_url": { + "type": ["null", "string"] + }, + "contributors_url": { + "type": ["null", "string"] + }, + "deployments_url": { + "type": ["null", "string"] + }, + "downloads_url": { + "type": ["null", "string"] + }, + "events_url": { + "type": ["null", "string"] + }, + "forks_url": { + "type": ["null", "string"] + }, + "git_commits_url": { + "type": ["null", "string"] + }, + "git_refs_url": { + "type": ["null", "string"] + }, + "git_tags_url": { + "type": ["null", "string"] + }, + "git_url": { + "type": ["null", "string"] + }, + "issue_comment_url": { + "type": ["null", "string"] + }, + "issue_events_url": { + "type": ["null", "string"] + }, + "issues_url": { + "type": ["null", "string"] + }, + "keys_url": { + "type": ["null", "string"] + }, + "labels_url": { + "type": ["null", "string"] + }, + "languages_url": { + "type": ["null", "string"] + }, + "merges_url": { + "type": ["null", "string"] + }, + "milestones_url": { + "type": ["null", "string"] + }, + "notifications_url": { + "type": ["null", "string"] + }, + "pulls_url": { + "type": ["null", "string"] + }, + "releases_url": { + "type": ["null", "string"] + }, + "ssh_url": { + "type": ["null", "string"] + }, + "stargazers_url": { + "type": ["null", "string"] + }, + "statuses_url": { + "type": ["null", "string"] + }, + "subscribers_url": { + "type": ["null", "string"] + }, + "subscription_url": { + "type": ["null", "string"] + }, + "tags_url": { + "type": ["null", "string"] + }, + "teams_url": { + "type": ["null", "string"] + }, + "trees_url": { + "type": ["null", "string"] + }, + "clone_url": { + "type": ["null", "string"] + }, + "mirror_url": { + "type": ["null", "string"] + }, + "hooks_url": { + "type": ["null", "string"] + }, + "svn_url": { + "type": ["null", "string"] + }, + "homepage": { + "type": ["null", "string"] + }, + "language": { + "type": ["null", "string"] + }, + "forks_count": { + "type": ["null", "integer"] + }, + "stargazers_count": { + "type": ["null", "integer"] + }, + "watchers_count": { + "type": ["null", "integer"] + }, + "size": { + "type": ["null", "integer"] + }, + "default_branch": { + "type": ["null", "string"] + }, + "open_issues_count": { + "type": ["null", "integer"] + }, + "is_template": { + "type": ["null", "boolean"] + }, + "topics": { + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } + }, + "has_issues": { + "type": ["null", "boolean"] + }, + "has_projects": { + "type": ["null", "boolean"] + }, + "has_wiki": { + "type": ["null", "boolean"] + }, + "has_pages": { + "type": ["null", "boolean"] + }, + "has_downloads": { + "type": ["null", "boolean"] + }, + "archived": { + "type": ["null", "boolean"] + }, + "disabled": { + "type": ["null", "boolean"] + }, + "visibility": { + "type": ["null", "string"] + }, + "pushed_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "created_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "updated_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "allow_rebase_merge": { + "type": ["null", "boolean"] + }, + "template_repository": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "integer"] + }, + "node_id": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "full_name": { + "type": ["null", "string"] + }, + "owner": { + "$ref": "shared/user.json#/" + }, + "private": { + "type": ["null", "boolean"] + }, + "html_url": { + "type": ["null", "string"] + }, + "description": { + "type": ["null", "string"] + }, + "fork": { + "type": ["null", "boolean"] + }, + "url": { + "type": ["null", "string"] + }, + "archive_url": { + "type": ["null", "string"] + }, + "assignees_url": { + "type": ["null", "string"] + }, + "blobs_url": { + "type": ["null", "string"] + }, + "branches_url": { + "type": ["null", "string"] + }, + "collaborators_url": { + "type": ["null", "string"] + }, + "comments_url": { + "type": ["null", "string"] + }, + "commits_url": { + "type": ["null", "string"] + }, + "compare_url": { + "type": ["null", "string"] + }, + "contents_url": { + "type": ["null", "string"] + }, + "contributors_url": { + "type": ["null", "string"] + }, + "deployments_url": { + "type": ["null", "string"] + }, + "downloads_url": { + "type": ["null", "string"] + }, + "events_url": { + "type": ["null", "string"] + }, + "forks_url": { + "type": ["null", "string"] + }, + "git_commits_url": { + "type": ["null", "string"] + }, + "git_refs_url": { + "type": ["null", "string"] + }, + "git_tags_url": { + "type": ["null", "string"] + }, + "git_url": { + "type": ["null", "string"] + }, + "issue_comment_url": { + "type": ["null", "string"] + }, + "issue_events_url": { + "type": ["null", "string"] + }, + "issues_url": { + "type": ["null", "string"] + }, + "keys_url": { + "type": ["null", "string"] + }, + "labels_url": { + "type": ["null", "string"] + }, + "languages_url": { + "type": ["null", "string"] + }, + "merges_url": { + "type": ["null", "string"] + }, + "milestones_url": { + "type": ["null", "string"] + }, + "notifications_url": { + "type": ["null", "string"] + }, + "pulls_url": { + "type": ["null", "string"] + }, + "releases_url": { + "type": ["null", "string"] + }, + "ssh_url": { + "type": ["null", "string"] + }, + "stargazers_url": { + "type": ["null", "string"] + }, + "statuses_url": { + "type": ["null", "string"] + }, + "subscribers_url": { + "type": ["null", "string"] + }, + "subscription_url": { + "type": ["null", "string"] + }, + "tags_url": { + "type": ["null", "string"] + }, + "teams_url": { + "type": ["null", "string"] + }, + "trees_url": { + "type": ["null", "string"] + }, + "clone_url": { + "type": ["null", "string"] + }, + "mirror_url": { + "type": ["null", "string"] + }, + "hooks_url": { + "type": ["null", "string"] + }, + "svn_url": { + "type": ["null", "string"] + }, + "homepage": { + "type": ["null", "string"] + }, + "language": { + "type": ["null", "string"] + }, + "forks_count": { + "type": ["null", "integer"] + }, + "stargazers_count": { + "type": ["null", "integer"] + }, + "watchers_count": { + "type": ["null", "integer"] + }, + "size": { + "type": ["null", "integer"] + }, + "default_branch": { + "type": ["null", "string"] + }, + "open_issues_count": { + "type": ["null", "integer"] + }, + "is_template": { + "type": ["null", "boolean"] + }, + "topics": { + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } + }, + "has_issues": { + "type": ["null", "boolean"] + }, + "has_projects": { + "type": ["null", "boolean"] + }, + "has_wiki": { + "type": ["null", "boolean"] + }, + "has_pages": { + "type": ["null", "boolean"] + }, + "has_downloads": { + "type": ["null", "boolean"] + }, + "archived": { + "type": ["null", "boolean"] + }, + "disabled": { + "type": ["null", "boolean"] + }, + "visibility": { + "type": ["null", "string"] + }, + "pushed_at": { + "type": ["null", "string"] + }, + "created_at": { + "type": ["null", "string"] + }, + "updated_at": { + "type": ["null", "string"] + }, + "permissions": { + "$ref": "shared/pull_permissions.json#/" + }, + "allow_rebase_merge": { + "type": ["null", "boolean"] + }, + "temp_clone_token": { + "type": ["null", "string"] + }, + "allow_squash_merge": { + "type": ["null", "boolean"] + }, + "allow_auto_merge": { + "type": ["null", "boolean"] + }, + "delete_branch_on_merge": { + "type": ["null", "boolean"] + }, + "allow_update_branch": { + "type": ["null", "boolean"] + }, + "use_squash_pr_title_as_default": { + "type": ["null", "boolean"] + }, + "allow_merge_commit": { + "type": ["null", "boolean"] + }, + "subscribers_count": { + "type": ["null", "integer"] + }, + "network_count": { + "type": ["null", "integer"] + } + } + }, + "temp_clone_token": { + "type": ["null", "string"] + }, + "allow_squash_merge": { + "type": ["null", "boolean"] + }, + "allow_auto_merge": { + "type": ["null", "boolean"] + }, + "delete_branch_on_merge": { + "type": ["null", "boolean"] + }, + "allow_update_branch": { + "type": ["null", "boolean"] + }, + "use_squash_pr_title_as_default": { + "type": ["null", "boolean"] + }, + "allow_merge_commit": { + "type": ["null", "boolean"] + }, + "allow_forking": { + "type": ["null", "boolean"] + }, + "subscribers_count": { + "type": ["null", "integer"] + }, + "network_count": { + "type": ["null", "integer"] + }, + "open_issues": { + "type": ["null", "integer"] + }, + "watchers": { + "type": ["null", "integer"] + }, + "master_branch": { + "type": ["null", "string"] + }, + "starred_at": { + "type": ["null", "string"] } } }, "sha": { "type": ["null", "string"] + }, + "user": { + "$ref": "shared/user.json#/" } } }, diff --git a/tap_github/schemas/releases.json b/tap_github/schemas/releases.json index 3b040003..b903a026 100644 --- a/tap_github/schemas/releases.json +++ b/tap_github/schemas/releases.json @@ -8,9 +8,151 @@ "id": { "type": ["null", "string"] }, + "node_id": { + "type": ["null", "string"] + }, "url": { "type": ["null", "string"] }, + "zipball_url": { + "type": ["null", "string"] + }, + "body_text": { + "type": ["null", "string"] + }, + "upload_url": { + "type": ["null", "string"] + }, + "assets_url": { + "type": ["null", "string"] + }, + "tarball_url": { + "type": ["null", "string"] + }, + "body_html": { + "type": ["null", "string"] + }, + "reactions": { + "$ref": "shared/reactions.json#/" + }, + "assets": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "url": { + "type": ["null", "string"] + }, + "browser_download_url": { + "type": ["null", "string"], + "format": "uri" + }, + "id": { + "type": ["null", "integer"] + }, + "node_id": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "label": { + "type": ["null", "string"] + }, + "state": { + "type": ["null", "string"] + }, + "content_type": { + "type": ["null", "string"] + }, + "size": { + "type": ["null", "integer"] + }, + "download_count": { + "type": ["null", "integer"] + }, + "created_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "updated_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "uploader": { + "type": ["null", "object"], + "properties": { + "name": { + "type": ["null", "string"] + }, + "email": { + "type": ["null", "string"] + }, + "login": { + "type": ["null", "string"] + }, + "id": { + "type": ["null", "integer"] + }, + "node_id": { + "type": ["null", "string"] + }, + "avatar_url": { + "type": ["null", "string"] + }, + "gravatar_id": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + }, + "followers_url": { + "type": ["null", "string"] + }, + "following_url": { + "type": ["null", "string"] + }, + "gists_url": { + "type": ["null", "string"] + }, + "starred_url": { + "type": ["null", "string"] + }, + "subscriptions_url": { + "type": ["null", "string"] + }, + "organizations_url": { + "type": ["null", "string"] + }, + "repos_url": { + "type": ["null", "string"] + }, + "events_url": { + "type": ["null", "string"] + }, + "received_events_url": { + "type": ["null", "string"] + }, + "type": { + "type": ["null", "string"] + }, + "site_admin": { + "type": ["null", "boolean"] + }, + "starred_at": { + "type": ["null", "string"] + } + } + } + } + } + }, + "mentions_count": { + "type": ["null", "integer"] + }, "html_url": { "type": ["null", "string"] }, @@ -33,16 +175,7 @@ "type": ["null", "boolean"] }, "author": { - "type": ["null", "object"], - "additionalProperties": false, - "properties": { - "login": { - "type": ["null", "string"] - }, - "id": { - "type": ["null", "integer"] - } - } + "$ref": "shared/user.json#/" }, "created_at": { "type": ["null", "string"], @@ -51,6 +184,10 @@ "published_at": { "type": ["null", "string"], "format": "date-time" + }, + "discussion_url": { + "type": ["null", "string"], + "format": "date-time" } } } \ No newline at end of file diff --git a/tap_github/schemas/review_comments.json b/tap_github/schemas/review_comments.json index 71452419..8eae9585 100644 --- a/tap_github/schemas/review_comments.json +++ b/tap_github/schemas/review_comments.json @@ -9,21 +9,71 @@ "type": ["null", "integer"] }, "user": { + "$ref": "shared/user.json#/" + }, + "body": { + "type": ["null", "string"] + }, + "node_id": { + "type": ["null", "string"] + }, + "_links": { "type": ["null", "object"], - "additionalProperties": false, "properties": { - "login": { - "type": ["null", "string"] + "self": { + "type": ["null", "object"], + "properties": { + "href": { + "type": ["null", "string"] + } + } + }, + "html": { + "type": ["null", "object"], + "properties": { + "href": { + "type": ["null", "string"] + } + } }, - "id": { - "type": ["null", "integer"] + "pull_request": { + "type": ["null", "object"], + "properties": { + "href": { + "type": ["null", "string"] + } + } } } }, - "body": { + "url": { "type": ["null", "string"] }, - "node_id": { + "original_start_line": { + "type": ["null", "integer"] + }, + "start_side": { + "type": ["null", "string"] + }, + "body_text": { + "type": ["null", "string"] + }, + "original_line": { + "type": ["null", "integer"] + }, + "reactions": { + "$ref": "shared/reactions.json#/" + }, + "start_line": { + "type": ["null", "integer"] + }, + "body_html": { + "type": ["null", "string"] + }, + "line": { + "type": ["null", "integer"] + }, + "side": { "type": ["null", "string"] }, "pull_request_review_id": { @@ -88,9 +138,6 @@ "head": { "type": ["null", "string"] }, - "html_url": { - "type": ["null", "string"] - }, "issue_url": { "type": ["null", "string"] }, @@ -123,6 +170,9 @@ }, "statuses_url": { "type": ["null", "string"] + }, + "pr_id": { + "type": ["null", "string"] } } } diff --git a/tap_github/schemas/reviews.json b/tap_github/schemas/reviews.json index b7ad05f9..e065a74e 100644 --- a/tap_github/schemas/reviews.json +++ b/tap_github/schemas/reviews.json @@ -8,18 +8,42 @@ "id": { "type": ["null", "integer"] }, - "user": { + "_links": { "type": ["null", "object"], - "additionalProperties": false, "properties": { - "login": { - "type": ["null", "string"] + "html": { + "type": ["null", "object"], + "properties": { + "href": { + "type": ["null", "string"] + } + } }, - "id": { - "type": ["null", "integer"] + "pull_request": { + "type": ["null", "object"], + "properties": { + "href": { + "type": ["null", "string"] + } + } } } }, + "body_html": { + "type": ["null", "string"] + }, + "body_text": { + "type": ["null", "string"] + }, + "node_id": { + "type": ["null", "string"] + }, + "author_association": { + "type": ["null", "string"] + }, + "user": { + "$ref": "shared/user.json#/" + }, "body": { "type": ["null", "string"] }, @@ -38,6 +62,9 @@ "submitted_at": { "type": ["null", "string"], "format": "date-time" + }, + "pr_id": { + "type": ["null", "string"] } } } diff --git a/tap_github/schemas/shared/issue_permissions.json b/tap_github/schemas/shared/issue_permissions.json new file mode 100644 index 00000000..2ec35a46 --- /dev/null +++ b/tap_github/schemas/shared/issue_permissions.json @@ -0,0 +1,20 @@ +{ + "type": ["null", "object"], + "properties": { + "issues": { + "type": ["null", "string"] + }, + "checks": { + "type": ["null", "string"] + }, + "metadata": { + "type": ["null", "string"] + }, + "contents": { + "type": ["null", "string"] + }, + "deployments": { + "type": ["null", "string"] + } + } + } \ No newline at end of file diff --git a/tap_github/schemas/shared/performed_via_github_app.json b/tap_github/schemas/shared/performed_via_github_app.json new file mode 100644 index 00000000..eabc7b70 --- /dev/null +++ b/tap_github/schemas/shared/performed_via_github_app.json @@ -0,0 +1,61 @@ +{ + "type": ["null", "object", "string"], + "properties": { + "id": { + "type": ["null", "integer"] + }, + "slug": { + "type": ["null", "string"] + }, + "node_id": { + "type": ["null", "string"] + }, + "owner": { + "$ref": "shared/user.json#/" + }, + "name": { + "type": ["null", "string"] + }, + "description": { + "type": ["null", "string"] + }, + "external_url": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + }, + "created_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "updated_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "permissions": { + "$ref": "shared/issue_permissions.json#/" + }, + "events": { + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } + }, + "installations_count": { + "type": ["null", "integer"] + }, + "client_id": { + "type": ["null", "string"] + }, + "client_secret": { + "type": ["null", "string"] + }, + "webhook_secret": { + "type": ["null", "string"] + }, + "pem": { + "type": ["null", "string"] + } + } +} \ No newline at end of file diff --git a/tap_github/schemas/shared/pull_permissions.json b/tap_github/schemas/shared/pull_permissions.json new file mode 100644 index 00000000..2eb4a910 --- /dev/null +++ b/tap_github/schemas/shared/pull_permissions.json @@ -0,0 +1,20 @@ +{ + "type": ["null", "object"], + "properties": { + "pull": { + "type": ["null", "boolean"] + }, + "triage": { + "type": ["null", "boolean"] + }, + "push": { + "type": ["null", "boolean"] + }, + "maintain": { + "type": ["null", "boolean"] + }, + "admin": { + "type": ["null", "boolean"] + } + } +} \ No newline at end of file diff --git a/tap_github/schemas/shared/reactions.json b/tap_github/schemas/shared/reactions.json new file mode 100644 index 00000000..543ae6ea --- /dev/null +++ b/tap_github/schemas/shared/reactions.json @@ -0,0 +1,35 @@ +{ + "type": ["null", "object"], + "properties": { + "url": { + "type": ["null", "string"] + }, + "total_count": { + "type": ["null", "integer"] + }, + "+1": { + "type": ["null", "integer"] + }, + "-1": { + "type": ["null", "integer"] + }, + "laugh": { + "type": ["null", "integer"] + }, + "confused": { + "type": ["null", "integer"] + }, + "heart": { + "type": ["null", "integer"] + }, + "hooray": { + "type": ["null", "integer"] + }, + "eyes": { + "type": ["null", "integer"] + }, + "rocket": { + "type": ["null", "integer"] + } + } +} \ No newline at end of file diff --git a/tap_github/schemas/shared/user.json b/tap_github/schemas/shared/user.json new file mode 100644 index 00000000..45c45d0c --- /dev/null +++ b/tap_github/schemas/shared/user.json @@ -0,0 +1,68 @@ +{ + "type": ["null", "object"], + "properties": { + "name": { + "type": ["null", "string"] + }, + "email": { + "type": ["null", "string"] + }, + "login": { + "type": ["null", "string"] + }, + "id": { + "type": ["null", "integer"] + }, + "node_id": { + "type": ["null", "string"] + }, + "avatar_url": { + "type": ["null", "string"] + }, + "gravatar_id": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + }, + "followers_url": { + "type": ["null", "string"] + }, + "following_url": { + "type": ["null", "string"] + }, + "gists_url": { + "type": ["null", "string"] + }, + "starred_url": { + "type": ["null", "string"] + }, + "subscriptions_url": { + "type": ["null", "string"] + }, + "organizations_url": { + "type": ["null", "string"] + }, + "repos_url": { + "type": ["null", "string"] + }, + "events_url": { + "type": ["null", "string"] + }, + "received_events_url": { + "type": ["null", "string"] + }, + "type": { + "type": ["null", "string"] + }, + "site_admin": { + "type": ["null", "boolean"] + }, + "starred_at": { + "type": ["null", "string"] + } + } +} diff --git a/tap_github/schemas/stargazers.json b/tap_github/schemas/stargazers.json index d8d67b30..54e2d9e3 100644 --- a/tap_github/schemas/stargazers.json +++ b/tap_github/schemas/stargazers.json @@ -6,13 +6,7 @@ "type": ["string"] }, "user": { - "type": ["null", "object"], - "additionalProperties": false, - "properties": { - "id": { - "type": ["null", "integer"] - } - } + "$ref": "shared/user.json#/" }, "starred_at": { "type": ["null", "string"], diff --git a/tap_github/schemas/team_members.json b/tap_github/schemas/team_members.json index b707c5e3..d872bafa 100644 --- a/tap_github/schemas/team_members.json +++ b/tap_github/schemas/team_members.json @@ -1,128 +1,74 @@ { - "type": [ - "null", - "object" - ], + "type": ["null", "object"], "properties": { "login": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "id": { - "type": [ - "null", - "number" - ] + "type": ["null", "number"] }, "node_id": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "email": { + "type": ["null", "string"] + }, + "starred_at": { + "type": ["null", "string"] }, "avatar_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "gravatar_id": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "html_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "followers_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "following_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "gists_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "starred_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "subscriptions_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "organizations_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "repos_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "events_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "received_events_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "type": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "site_admin": { - "type": [ - "null", - "boolean" - ] + "type": ["null", "boolean"] }, "_sdc_repository": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "team_slug": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] } } - } \ No newline at end of file +} diff --git a/tap_github/schemas/team_memberships.json b/tap_github/schemas/team_memberships.json index 98f80e25..1c6d89a6 100644 --- a/tap_github/schemas/team_memberships.json +++ b/tap_github/schemas/team_memberships.json @@ -1,32 +1,20 @@ { - "type": [ - "null", - "object" - ], + "type": ["null", "object"], "properties": { "url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "role": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "state": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] + }, + "login": { + "type": ["null", "string"] }, "_sdc_repository": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] } } } \ No newline at end of file diff --git a/tap_github/schemas/teams.json b/tap_github/schemas/teams.json index 43fdee44..15a26205 100644 --- a/tap_github/schemas/teams.json +++ b/tap_github/schemas/teams.json @@ -1,87 +1,47 @@ { - "type": [ - "null", - "object" - ], + "type": ["null", "object"], "properties": { "_sdc_repository": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "id": { - "type": [ - "null", - "number" - ] + "type": ["null", "number"] }, "node_id": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "html_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] + }, + "permissions": { + "$ref": "shared/pull_permissions.json#/" }, "name": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "slug": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "description": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "privacy": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "permission": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "members_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "repositories_url": { - "type": [ - "null", - "string" - ] + "type": ["null", "string"] }, "parent": { - "type": [ - "null", - "object", - "string" - ] + "type": ["null", "object", "string"] } } } diff --git a/tap_github/streams.py b/tap_github/streams.py new file mode 100644 index 00000000..278dd05a --- /dev/null +++ b/tap_github/streams.py @@ -0,0 +1,768 @@ +from datetime import datetime +import singer +from singer import (metrics, bookmarks, metadata) + +LOGGER = singer.get_logger() +DATE_FORMAT = '%Y-%m-%dT%H:%M:%SZ' + +def get_bookmark(state, repo, stream_name, bookmark_key, start_date): + """ + Return bookmark value if available in the state otherwise return start date + """ + repo_stream_dict = bookmarks.get_bookmark(state, repo, stream_name) + if repo_stream_dict: + return repo_stream_dict.get(bookmark_key) + + return start_date + +def get_schema(catalog, stream_id): + """ + Return catalog of the specified stream. + """ + stream_catalog = [cat for cat in catalog if cat['tap_stream_id'] == stream_id ][0] + return stream_catalog + +def get_child_full_url(domain, child_object, repo_path, parent_id, grand_parent_id): + """ + Build the child stream's URL based on the parent and the grandparent's ids. + """ + + if child_object.use_repository: + # The `use_repository` represents that the url contains /repos and the repository name. + child_full_url = '{}/repos/{}/{}'.format( + domain, + repo_path, + child_object.path).format(*parent_id) + + elif child_object.use_organization: + # The `use_organization` represents that the url contains the organization name. + child_full_url = '{}/{}'.format( + domain, + child_object.path).format(repo_path, *parent_id, *grand_parent_id) + + else: + # Build and return url that does not contain the repos or the organization name. + # Example: https://base_url/projects/{project_id}/columns + child_full_url = '{}/{}'.format( + domain, + child_object.path).format(*grand_parent_id) + LOGGER.info("Final url is: %s", child_full_url) + + return child_full_url + + +class Stream: + """ + A base class representing tap-github streams. + """ + tap_stream_id = None + replication_method = None + replication_keys = None + key_properties = [] + path = None + filter_param = False + id_keys = [] + use_organization = False + children = [] + pk_child_fields = [] + use_repository = False + headers = {'Accept': '*/*'} + parent = None + + def build_url(self, base_url, repo_path, bookmark): + """ + Build the full url with parameters and attributes. + """ + if self.filter_param: + # Add the since parameter for incremental streams + query_string = '?since={}'.format(bookmark) + else: + query_string = '' + + if self.use_organization: + # The `use_organization` represents that the url contains the organization name. + full_url = '{}/{}'.format( + base_url, + self.path).format(repo_path) + else: + # The url that contains /repos and the repository name. + full_url = '{}/repos/{}/{}{}'.format( + base_url, + repo_path, + self.path, + query_string) + + LOGGER.info("Final url is: %s", full_url) + return full_url + + def get_min_bookmark(self, stream, selected_streams, bookmark, repo_path, start_date, state): + """ + Get the minimum bookmark from the parent and its corresponding child bookmarks. + """ + + stream_obj = STREAMS[stream]() + min_bookmark = bookmark + if stream in selected_streams: + # Get minimum of stream's bookmark(start date in case of no bookmark) and min_bookmark + min_bookmark = min(min_bookmark, get_bookmark(state, repo_path, stream, "since", start_date)) + LOGGER.debug("New minimum bookmark is %s", min_bookmark) + + for child in stream_obj.children: + # Iterate through all children and return minimum bookmark among all. + min_bookmark = min(min_bookmark, self.get_min_bookmark(child, selected_streams, min_bookmark, repo_path, start_date, state)) + + return min_bookmark + + def write_bookmarks(self, stream, selected_streams, bookmark_value, repo_path, state): + """Write the bookmark in the state corresponding to the stream.""" + stream_obj = STREAMS[stream]() + + # If the stream is selected, write the bookmark. + if stream in selected_streams: + singer.write_bookmark(state, repo_path, stream_obj.tap_stream_id, {"since": bookmark_value}) + + # For the each child, write the bookmark if it is selected. + for child in stream_obj.children: + self.write_bookmarks(child, selected_streams, bookmark_value, repo_path, state) + + # pylint: disable=no-self-use + def get_child_records(self, + client, + catalog, + child_stream, + grand_parent_id, + repo_path, + state, + start_date, + bookmark_dttm, + stream_to_sync, + selected_stream_ids, + parent_id = None, + parent_record = None): + """ + Retrieve and write all the child records for each updated parent based on the parent record and its ids. + """ + child_object = STREAMS[child_stream]() + + child_bookmark_value = get_bookmark(state, repo_path, child_object.tap_stream_id, "since", start_date) + + if not parent_id: + parent_id = grand_parent_id + + child_full_url = get_child_full_url(client.base_url, child_object, repo_path, parent_id, grand_parent_id) + stream_catalog = get_schema(catalog, child_object.tap_stream_id) + + with metrics.record_counter(child_object.tap_stream_id) as counter: + for response in client.authed_get_all_pages( + child_object.tap_stream_id, + child_full_url, + stream = child_object.tap_stream_id + ): + records = response.json() + extraction_time = singer.utils.now() + + if isinstance(records, list): + # Loop through all the records of response + for record in records: + record['_sdc_repository'] = repo_path + child_object.add_fields_at_1st_level(record = record, parent_record = parent_record) + + with singer.Transformer() as transformer: + + rec = transformer.transform(record, stream_catalog['schema'], metadata=metadata.to_map(stream_catalog['metadata'])) + + if child_object.tap_stream_id in selected_stream_ids and record.get(child_object.replication_keys, start_date) >= child_bookmark_value: + singer.write_record(child_object.tap_stream_id, rec, time_extracted=extraction_time) + counter.increment() + + # Loop thru each child and nested child in the parent and fetch all the child records. + for nested_child in child_object.children: + if nested_child in stream_to_sync: + # Collect id of child record to pass in the API of its sub-child. + child_id = tuple(record.get(key) for key in STREAMS[nested_child]().id_keys) + # Here, grand_parent_id is the id of 1st level parent(main parent) which is required to + # pass in the API of the current child's sub-child. + child_object.get_child_records(client, catalog, nested_child, child_id, repo_path, state, start_date, bookmark_dttm, stream_to_sync, selected_stream_ids, grand_parent_id, record) + + else: + # Write JSON response directly if it is a single record only. + records['_sdc_repository'] = repo_path + child_object.add_fields_at_1st_level(record = records, parent_record = parent_record) + + with singer.Transformer() as transformer: + + rec = transformer.transform(records, stream_catalog['schema'], metadata=metadata.to_map(stream_catalog['metadata'])) + if child_object.tap_stream_id in selected_stream_ids and records.get(child_object.replication_keys, start_date) >= child_bookmark_value : + + singer.write_record(child_object.tap_stream_id, rec, time_extracted=extraction_time) + + # pylint: disable=unnecessary-pass + def add_fields_at_1st_level(self, record, parent_record = None): + """ + Add fields in the record explicitly at the 1st level of JSON. + """ + pass + +class FullTableStream(Stream): + def sync_endpoint(self, + client, + state, + catalog, + repo_path, + start_date, + selected_stream_ids, + stream_to_sync + ): + """ + A common function sync full table streams. + """ + + # build full url + full_url = self.build_url(client.base_url, repo_path, None) + + stream_catalog = get_schema(catalog, self.tap_stream_id) + + with metrics.record_counter(self.tap_stream_id) as counter: + for response in client.authed_get_all_pages( + self.tap_stream_id, + full_url, + self.headers, + stream = self.tap_stream_id + ): + records = response.json() + extraction_time = singer.utils.now() + # Loop through all records + for record in records: + + record['_sdc_repository'] = repo_path + self.add_fields_at_1st_level(record = record, parent_record = None) + + with singer.Transformer() as transformer: + rec = transformer.transform(record, stream_catalog['schema'], metadata=metadata.to_map(stream_catalog['metadata'])) + if self.tap_stream_id in selected_stream_ids: + + singer.write_record(self.tap_stream_id, rec, time_extracted=extraction_time) + + counter.increment() + + for child in self.children: + if child in stream_to_sync: + + parent_id = tuple(record.get(key) for key in STREAMS[child]().id_keys) + + # Sync child stream, if it is selected or its nested child is selected. + self.get_child_records(client, + catalog, + child, + parent_id, + repo_path, + state, + start_date, + record.get(self.replication_keys), + stream_to_sync, + selected_stream_ids, + parent_record = record) + + return state + +class IncrementalStream(Stream): + def sync_endpoint(self, + client, + state, + catalog, + repo_path, + start_date, + selected_stream_ids, + stream_to_sync + ): + + """ + A common function sync incremental streams. Sync an incremental stream for which records are not + in descending order. For, incremental streams iterate all records, write only newly updated records and + write the latest bookmark value. + """ + + parent_bookmark_value = get_bookmark(state, repo_path, self.tap_stream_id, "since", start_date) + current_time = datetime.today().strftime(DATE_FORMAT) + min_bookmark_value = self.get_min_bookmark(self.tap_stream_id, selected_stream_ids, current_time, repo_path, start_date, state) + + max_bookmark_value = min_bookmark_value + + # build full url + full_url = self.build_url(client.base_url, repo_path, min_bookmark_value) + + stream_catalog = get_schema(catalog, self.tap_stream_id) + + with metrics.record_counter(self.tap_stream_id) as counter: + for response in client.authed_get_all_pages( + self.tap_stream_id, + full_url, + self.headers, + stream = self.tap_stream_id + ): + records = response.json() + extraction_time = singer.utils.now() + # Loop through all records + for record in records: + + record['_sdc_repository'] = repo_path + self.add_fields_at_1st_level(record = record, parent_record = None) + + with singer.Transformer() as transformer: + if record.get(self.replication_keys): + if record[self.replication_keys] >= max_bookmark_value: + # Update max_bookmark_value + max_bookmark_value = record[self.replication_keys] + + bookmark_dttm = record[self.replication_keys] + + # Keep only records whose bookmark is after the last_datetime + if bookmark_dttm >= min_bookmark_value: + + if self.tap_stream_id in selected_stream_ids and bookmark_dttm >= parent_bookmark_value: + rec = transformer.transform(record, stream_catalog['schema'], metadata=metadata.to_map(stream_catalog['metadata'])) + + singer.write_record(self.tap_stream_id, rec, time_extracted=extraction_time) + counter.increment() + + for child in self.children: + if child in stream_to_sync: + + parent_id = tuple(record.get(key) for key in STREAMS[child]().id_keys) + + # Sync child stream, if it is selected or its nested child is selected. + self.get_child_records(client, + catalog, + child, + parent_id, + repo_path, + state, + start_date, + record.get(self.replication_keys), + stream_to_sync, + selected_stream_ids, + parent_record = record) + else: + LOGGER.warning("Skipping this record for %s stream with %s = %s as it is missing replication key %s.", + self.tap_stream_id, self.key_properties, record[self.key_properties], self.replication_keys) + + + # Write bookmark for incremental stream. + self.write_bookmarks(self.tap_stream_id, selected_stream_ids, max_bookmark_value, repo_path, state) + + return state + +class IncrementalOrderedStream(Stream): + + def sync_endpoint(self, + client, + state, + catalog, + repo_path, + start_date, + selected_stream_ids, + stream_to_sync + ): + """ + A sync function for streams that have records in the descending order of replication key value. For such streams, + iterate only the latest records. + """ + bookmark_value = get_bookmark(state, repo_path, self.tap_stream_id, "since", start_date) + current_time = datetime.today().strftime(DATE_FORMAT) + + min_bookmark_value = self.get_min_bookmark(self.tap_stream_id, selected_stream_ids, current_time, repo_path, start_date, state) + bookmark_time = singer.utils.strptime_to_utc(min_bookmark_value) + + # Build full url + full_url = self.build_url(client.base_url, repo_path, bookmark_value) + synced_all_records = False + stream_catalog = get_schema(catalog, self.tap_stream_id) + + parent_bookmark_value = bookmark_value + record_counter = 0 + with metrics.record_counter(self.tap_stream_id) as counter: + for response in client.authed_get_all_pages( + self.tap_stream_id, + full_url, + stream = self.tap_stream_id + ): + records = response.json() + extraction_time = singer.utils.now() + for record in records: + record['_sdc_repository'] = repo_path + self.add_fields_at_1st_level(record = record, parent_record = None) + + updated_at = record.get(self.replication_keys) + + if record_counter == 0 and updated_at > bookmark_value: + # Consider replication key value of 1st record as bookmark value. + # Because all records are in descending order of replication key value + bookmark_value = updated_at + record_counter = record_counter + 1 + + if updated_at: + if bookmark_time and singer.utils.strptime_to_utc(updated_at) < bookmark_time: + # Skip all records from now onwards because the bookmark value of the current record is less than + # last saved bookmark value and all records from now onwards will have bookmark value less than last + # saved bookmark value. + synced_all_records = True + break + + if self.tap_stream_id in selected_stream_ids and updated_at >= parent_bookmark_value: + + # Transform and write record + with singer.Transformer() as transformer: + rec = transformer.transform(record, stream_catalog['schema'], metadata=metadata.to_map(stream_catalog['metadata'])) + singer.write_record(self.tap_stream_id, rec, time_extracted=extraction_time) + counter.increment() + + for child in self.children: + if child in stream_to_sync: + parent_id = tuple(record.get(key) for key in STREAMS[child]().id_keys) + + # Sync child stream, if it is selected or its nested child is selected. + self.get_child_records(client, + catalog, + child, + parent_id, + repo_path, + state, + start_date, + record.get(self.replication_keys), + stream_to_sync, + selected_stream_ids, + parent_record = record) + else: + LOGGER.warning("Skipping this record for %s stream with %s = %s as it is missing replication key %s.", + self.tap_stream_id, self.key_properties, record[self.key_properties], self.replication_keys) + + if synced_all_records: + break + + # Write bookmark for incremental stream. + self.write_bookmarks(self.tap_stream_id, selected_stream_ids, bookmark_value, repo_path, state) + + return state + +class Reviews(IncrementalStream): + ''' + https://docs.github.com/en/rest/reference/pulls#list-reviews-for-a-pull-request + ''' + tap_stream_id = "reviews" + replication_method = "INCREMENTAL" + replication_keys = "submitted_at" + key_properties = ["id"] + path = "pulls/{}/reviews" + use_repository = True + id_keys = ['number'] + parent = 'pull_requests' + + def add_fields_at_1st_level(self, record, parent_record = None): + """ + Add fields in the record explicitly at the 1st level of JSON. + """ + record['pr_id'] = parent_record['id'] + +class ReviewComments(IncrementalOrderedStream): + ''' + https://docs.github.com/en/rest/pulls/comments#get-a-review-comment-for-a-pull-request + ''' + tap_stream_id = "review_comments" + replication_method = "INCREMENTAL" + replication_keys = "updated_at" + key_properties = ["id"] + path = "pulls/{}/comments?sort=updated_at&direction=desc" + use_repository = True + id_keys = ['number'] + parent = 'pull_requests' + + def add_fields_at_1st_level(self, record, parent_record = None): + """ + Add fields in the record explicitly at the 1st level of JSON. + """ + record['pr_id'] = parent_record['id'] + +class PRCommits(IncrementalStream): + ''' + https://docs.github.com/en/rest/reference/pulls#list-commits-on-a-pull-request + ''' + tap_stream_id = "pr_commits" + replication_method = "INCREMENTAL" + replication_keys = "updated_at" + key_properties = ["id"] + path = "pulls/{}/commits" + use_repository = True + id_keys = ['number'] + parent = 'pull_requests' + + def add_fields_at_1st_level(self, record, parent_record = None): + """ + Add fields in the record explicitly at the 1st level of JSON. + """ + record['updated_at'] = record['commit']['committer']['date'] + + record['pr_number'] = parent_record.get('number') + record['pr_id'] = parent_record.get('id') + record['id'] = '{}-{}'.format(parent_record.get('id'), record.get('sha')) + +class PullRequests(IncrementalOrderedStream): + ''' + https://developer.github.com/v3/pulls/#list-pull-requests + ''' + tap_stream_id = "pull_requests" + replication_method = "INCREMENTAL" + replication_keys = "updated_at" + key_properties = ["id"] + path = "pulls?state=all&sort=updated&direction=desc" + children = ['reviews', 'review_comments', 'pr_commits'] + pk_child_fields = ["number"] + +class ProjectCards(IncrementalStream): + ''' + https://docs.github.com/en/rest/reference/projects#list-project-cards + ''' + tap_stream_id = "project_cards" + replication_method = "INCREMENTAL" + replication_keys = "updated_at" + key_properties = ["id"] + path = "projects/columns/{}/cards" + tap_stream_id = "project_cards" + parent = 'project_columns' + id_keys = ['id'] + +class ProjectColumns(IncrementalStream): + ''' + https://docs.github.com/en/rest/reference/projects#list-project-columns + ''' + tap_stream_id = "project_columns" + replication_method = "INCREMENTAL" + replication_keys = "updated_at" + key_properties = ["id"] + path = "projects/{}/columns" + children = ["project_cards"] + parent = "projects" + id_keys = ['id'] + has_children = True + +class Projects(IncrementalStream): + ''' + https://docs.github.com/en/rest/reference/projects#list-repository-projects + ''' + tap_stream_id = "projects" + replication_method = "INCREMENTAL" + replication_keys = "updated_at" + key_properties = ["id"] + path = "projects?state=all" + tap_stream_id = "projects" + children = ["project_columns"] + child_objects = [ProjectColumns()] + +class TeamMemberships(FullTableStream): + ''' + https://docs.github.com/en/rest/reference/teams#get-team-membership-for-a-user + ''' + tap_stream_id = "team_memberships" + replication_method = "FULL_TABLE" + key_properties = ["url"] + path = "orgs/{}/teams/{}/memberships/{}" + use_organization = True + parent = 'team_members' + id_keys = ["login"] + + def add_fields_at_1st_level(self, record, parent_record = None): + """ + Add fields in the record explicitly at the 1st level of JSON. + """ + record['login'] = parent_record['login'] + +class TeamMembers(FullTableStream): + ''' + https://docs.github.com/en/rest/reference/teams#list-team-members + ''' + tap_stream_id = "team_members" + replication_method = "FULL_TABLE" + key_properties = ["team_slug", "id"] + path = "orgs/{}/teams/{}/members" + use_organization = True + id_keys = ['slug'] + children= ["team_memberships"] + has_children = True + parent = 'teams' + pk_child_fields = ['login'] + + + def add_fields_at_1st_level(self, record, parent_record = None): + """ + Add fields in the record explicitly at the 1st level of JSON. + """ + record['team_slug'] = parent_record['slug'] + +class Teams(FullTableStream): + ''' + https://docs.github.com/en/rest/reference/teams#list-teams + ''' + tap_stream_id = "teams" + replication_method = "FULL_TABLE" + key_properties = ["id"] + path = "orgs/{}/teams" + use_organization = True + children= ["team_members"] + pk_child_fields = ['slug'] + +class Commits(IncrementalStream): + ''' + https://docs.github.com/en/rest/commits/commits#list-commits-on-a-repository + ''' + tap_stream_id = "commits" + replication_method = "INCREMENTAL" + replication_keys = "updated_at" + key_properties = ["sha"] + path = "commits" + filter_param = True + + def add_fields_at_1st_level(self, record, parent_record = None): + """ + Add fields in the record explicitly at the 1st level of JSON. + """ + record['updated_at'] = record['commit']['committer']['date'] + +class Comments(IncrementalOrderedStream): + ''' + https://docs.github.com/en/rest/issues/comments#list-comments-in-a-repository + ''' + tap_stream_id = "comments" + replication_method = "INCREMENTAL" + replication_keys = "updated_at" + key_properties = ["id"] + filter_param = True + path = "issues/comments?sort=updated&direction=desc" + +class Issues(IncrementalOrderedStream): + ''' + https://docs.github.com/en/rest/issues/issues#list-repository-issues + ''' + tap_stream_id = "issues" + replication_method = "INCREMENTAL" + replication_keys = "updated_at" + key_properties = ["id"] + filter_param = True + path = "issues?state=all&sort=updated&direction=desc" + +class Assignees(FullTableStream): + ''' + https://docs.github.com/en/rest/issues/assignees#list-assignees + ''' + tap_stream_id = "assignees" + replication_method = "FULL_TABLE" + key_properties = ["id"] + path = "assignees" + +class Releases(FullTableStream): + ''' + https://docs.github.com/en/rest/releases/releases#list-releases + ''' + tap_stream_id = "releases" + replication_method = "FULL_TABLE" + key_properties = ["id"] + path = "releases?sort=created_at&direction=desc" + +class IssueLabels(FullTableStream): + ''' + https://docs.github.com/en/rest/issues/labels#list-labels-for-a-repository + ''' + tap_stream_id = "issue_labels" + replication_method = "FULL_TABLE" + key_properties = ["id"] + path = "labels" + +class IssueEvents(IncrementalOrderedStream): + ''' + https://docs.github.com/en/rest/reference/issues#list-issue-events-for-a-repository + ''' + tap_stream_id = "issue_events" + replication_method = "INCREMENTAL" + replication_keys = "created_at" + key_properties = ["id"] + path = "issues/events?sort=created_at&direction=desc" + +class Events(IncrementalStream): + ''' + https://docs.github.com/en/rest/activity/events#list-repository-events + ''' + tap_stream_id = "events" + replication_method = "INCREMENTAL" + replication_keys = "created_at" + key_properties = ["id"] + path = "events" + +class CommitComments(IncrementalStream): + ''' + https://docs.github.com/en/rest/commits/comments#list-commit-comments-for-a-repository + ''' + tap_stream_id = "commit_comments" + replication_method = "INCREMENTAL" + replication_keys = "updated_at" + key_properties = ["id"] + path = "comments" + +class IssueMilestones(IncrementalOrderedStream): + ''' + https://docs.github.com/en/rest/issues/milestones#list-milestones + ''' + tap_stream_id = "issue_milestones" + replication_method = "INCREMENTAL" + replication_keys = "updated_at" + key_properties = ["id"] + path = "milestones?direction=desc&sort=updated_at" + +class Collaborators(FullTableStream): + ''' + https://docs.github.com/en/rest/collaborators/collaborators#list-repository-collaborators + ''' + tap_stream_id = "collaborators" + replication_method = "FULL_TABLE" + key_properties = ["id"] + path = "collaborators" + +class StarGazers(FullTableStream): + ''' + https://docs.github.com/en/rest/activity/starring#list-stargazers + ''' + tap_stream_id = "stargazers" + replication_method = "FULL_TABLE" + key_properties = ["user_id"] + path = "stargazers" + headers = {'Accept': 'application/vnd.github.v3.star+json'} + + def add_fields_at_1st_level(self, record, parent_record = None): + """ + Add fields in the record explicitly at the 1st level of JSON. + """ + record['user_id'] = record['user']['id'] + + +# Dictionary of the stream classes +STREAMS = { + "commits": Commits, + "comments": Comments, + "issues": Issues, + "assignees": Assignees, + "releases": Releases, + "issue_labels": IssueLabels, + "issue_events": IssueEvents, + "events": Events, + "commit_comments": CommitComments, + "issue_milestones": IssueMilestones, + "projects": Projects, + "project_columns": ProjectColumns, + "project_cards": ProjectCards, + "pull_requests": PullRequests, + "reviews": Reviews, + "review_comments": ReviewComments, + "pr_commits": PRCommits, + "teams": Teams, + "team_members": TeamMembers, + "team_memberships": TeamMemberships, + "collaborators": Collaborators, + "stargazers": StarGazers +} diff --git a/tap_github/sync.py b/tap_github/sync.py new file mode 100644 index 00000000..a83610ad --- /dev/null +++ b/tap_github/sync.py @@ -0,0 +1,236 @@ +import collections +import singer +from singer import bookmarks +from tap_github.streams import STREAMS + +LOGGER = singer.get_logger() +STREAM_TO_SYNC_FOR_ORGS = ['teams', 'team_members', 'team_memberships'] + +def get_selected_streams(catalog): + ''' + Gets selected streams. Checks schema's 'selected' + first -- and then checks metadata, looking for an empty + breadcrumb and mdata with a 'selected' entry + ''' + selected_streams = [] + for stream in catalog['streams']: + stream_metadata = stream['metadata'] + for entry in stream_metadata: + # Stream metadata will have an empty breadcrumb + if not entry['breadcrumb'] and entry['metadata'].get('selected',None): + selected_streams.append(stream['tap_stream_id']) + + return selected_streams + +def update_currently_syncing(state, stream_name): + """ + Updates currently syncing stream in the state. + """ + if not stream_name and singer.get_currently_syncing(state): + del state['currently_syncing'] + else: + singer.set_currently_syncing(state, stream_name) + singer.write_state(state) + +def update_currently_syncing_repo(state, repo_path): + """ + Updates currently syncing repository in the state. + and flushes `currently_syncing_repo` when all repositories are synced. + """ + if (not repo_path) and ('currently_syncing_repo' in state): + del state['currently_syncing_repo'] + else: + state['currently_syncing_repo'] = repo_path + singer.write_state(state) + +def get_ordered_stream_list(currently_syncing, streams_to_sync): + """ + Get an ordered list of remaining streams to sync other streams followed by synced streams. + """ + stream_list = list(sorted(streams_to_sync)) + if currently_syncing in stream_list: + index = stream_list.index(currently_syncing) + stream_list = stream_list[index:] + stream_list[:index] + return stream_list + +def get_ordered_repos(state, repositories): + """ + Get an ordered list of remaining repos to sync followed by synced repos. + """ + syncing_repo = state.get("currently_syncing_repo") + if syncing_repo in repositories: + index = repositories.index(syncing_repo) + repositories = repositories[index:] + repositories[:index] + return repositories + +def translate_state(state, catalog, repositories): + ''' + This tap used to only support a single repository, in which case the + the state took the shape of: + { + "bookmarks": { + "commits": { + "since": "2018-11-14T13:21:20.700360Z" + } + } + } + The tap now supports multiple repos, so this function should be called + at the beginning of each run to ensure the state is translated to the + new format: + { + "bookmarks": { + "singer-io/tap-adwords": { + "commits": { + "since": "2018-11-14T13:21:20.700360Z" + } + } + "singer-io/tap-salesforce": { + "commits": { + "since": "2018-11-14T13:21:20.700360Z" + } + } + } + } + ''' + nested_dict = lambda: collections.defaultdict(nested_dict) + new_state = nested_dict() + + # Collect keys(repo_name for update state or stream_name for older state) from state available in the `bookmarks`` + previous_state_keys = state.get('bookmarks', {}).keys() + # Collect stream names from the catalog + stream_names = [stream['tap_stream_id'] for stream in catalog['streams']] + + for key in previous_state_keys: + # Loop through each key of `bookmarks` available in the previous state. + + # Case 1: + # Older connections `bookmarks` contain stream names so check if it is the stream name or not. + # If the previous state's key is found in the stream name list then continue to check other keys. Because we want + # to migrate each stream's bookmark into the repo name as mentioned below: + # Example: {`bookmarks`: {`stream_a`: `bookmark_a`}} to {`bookmarks`: {`repo_a`: {`stream_a`: `bookmark_a`}}} + + # Case 2: + # Check if the key is available in the list of currently selected repo's list or not. Newer format `bookmarks` contain repo names. + # Return the state if the previous state's key is not found in the repo name list or stream name list. + + # If the state contains a bookmark for `repo_a` and `repo_b` and the user deselects these both repos and adds another repo + # then in that case this function was returning an empty state. Now this change will return the existing state instead of the empty state. + if key not in stream_names and key not in repositories: + # Return the existing state if all repos from the previous state are deselected(not found) in the current sync. + return state + + for stream in catalog['streams']: + stream_name = stream['tap_stream_id'] + for repo in repositories: + if bookmarks.get_bookmark(state, repo, stream_name): + return state + if bookmarks.get_bookmark(state, stream_name, 'since'): + new_state['bookmarks'][repo][stream_name]['since'] = bookmarks.get_bookmark(state, stream_name, 'since') + + return new_state + +def get_stream_to_sync(catalog): + """ + Get the streams for which the sync function should be called(the parent in case of selected child streams). + """ + streams_to_sync = [] + selected_streams = get_selected_streams(catalog) + for stream_name, stream_obj in STREAMS.items(): + if stream_name in selected_streams or is_any_child_selected(stream_obj, selected_streams): + # Append the selected stream or deselected parent stream into the list, if its child or nested child is selected. + streams_to_sync.append(stream_name) + return streams_to_sync + +def is_any_child_selected(stream_obj,selected_streams): + """ + Check if any of the child streams is selected for the parent. + """ + if stream_obj.children: + for child in stream_obj.children: + if child in selected_streams: + return True + + if STREAMS[child].children: + return is_any_child_selected(STREAMS[child], selected_streams) + return False + +def write_schemas(stream_id, catalog, selected_streams): + """ + Write the schemas for each stream. + """ + stream_obj = STREAMS[stream_id]() + + if stream_id in selected_streams: + # Get catalog object for particular stream. + stream = [cat for cat in catalog['streams'] if cat['tap_stream_id'] == stream_id ][0] + singer.write_schema(stream_id, stream['schema'], stream['key_properties']) + + for child in stream_obj.children: + write_schemas(child, catalog, selected_streams) + +def sync(client, config, state, catalog): + """ + Sync selected streams. + """ + + start_date = config['start_date'] + + # Get selected streams, make sure stream dependencies are met + selected_stream_ids = get_selected_streams(catalog) + + streams_to_sync = get_stream_to_sync(catalog) + LOGGER.info('Sync stream %s', streams_to_sync) + + repositories, organizations = client.extract_repos_from_config() + + state = translate_state(state, catalog, repositories) + singer.write_state(state) + + # Sync `teams`, `team_members`and `team_memberships` streams just single time for any organization. + streams_to_sync_for_orgs = set(streams_to_sync).intersection(STREAM_TO_SYNC_FOR_ORGS) + # Loop through all organizations + if selected_stream_ids: + for orgs in organizations: + LOGGER.info("Starting sync of organization: %s", orgs) + do_sync(catalog, streams_to_sync_for_orgs, selected_stream_ids, client, start_date, state, orgs) + + # Sync other streams for all repos + streams_to_sync_for_repos = set(streams_to_sync) - streams_to_sync_for_orgs + # pylint: disable=too-many-nested-blocks + # Sync repositories only if any streams are selected + for repo in get_ordered_repos(state, repositories): + update_currently_syncing_repo(state, repo) + LOGGER.info("Starting sync of repository: %s", repo) + do_sync(catalog, streams_to_sync_for_repos, selected_stream_ids, client, start_date, state, repo) + + if client.not_accessible_repos: + # Give warning messages for a repo that is not accessible by a stream or is invalid. + message = "Please check the repository name \'{}\' or you do not have sufficient permissions to access this repository for following streams {}.".format(repo, ", ".join(client.not_accessible_repos)) + LOGGER.warning(message) + client.not_accessible_repos = set() + update_currently_syncing_repo(state, None) + +def do_sync(catalog, streams_to_sync, selected_stream_ids, client, start_date, state, repo): + """ + Sync all other streams except teams, team_members and team_memberships for each repo. + """ + currently_syncing = singer.get_currently_syncing(state) + for stream_id in get_ordered_stream_list(currently_syncing, streams_to_sync): + stream_obj = STREAMS[stream_id]() + + # If it is a "sub_stream", it will be synced as part of the parent stream + if stream_id in streams_to_sync and not stream_obj.parent: + write_schemas(stream_id, catalog, selected_stream_ids) + update_currently_syncing(state, stream_id) + + state = stream_obj.sync_endpoint(client = client, + state = state, + catalog = catalog['streams'], + repo_path = repo, + start_date = start_date, + selected_stream_ids = selected_stream_ids, + stream_to_sync = streams_to_sync + ) + + singer.write_state(state) + update_currently_syncing(state, None) diff --git a/tests/base.py b/tests/base.py index 33c0478a..1d9eeb2f 100644 --- a/tests/base.py +++ b/tests/base.py @@ -4,9 +4,7 @@ from datetime import timedelta import time -import tap_tester.menagerie as menagerie -import tap_tester.connections as connections -import tap_tester.runner as runner +from tap_tester import menagerie, runner, connections, LOGGER class TestGithubBase(unittest.TestCase): @@ -15,14 +13,17 @@ class TestGithubBase(unittest.TestCase): INCREMENTAL = "INCREMENTAL" FULL = "FULL_TABLE" BOOKMARK = "bookmark" + PK_CHILD_FIELDS = "pk_child_fields" START_DATE_FORMAT = "%Y-%m-%dT00:00:00Z" # %H:%M:%SZ + BOOKMARK_FORMAT = "%Y-%m-%dT%H:%M:%SZ" + RECORD_REPLICATION_KEY_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ" + EVENTS_RECORD_REPLICATION_KEY_FORMAT = "%Y-%m-%dT%H:%M:%SZ" DATETIME_FMT = { "%Y-%m-%dT%H:%M:%SZ", "%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S.000000Z" } START_DATE = "" - FULL_TABLE_SUB_STREAMS = ['reviews', 'review_comments', 'pr_commits', 'team_members', 'team_memberships'] OBEYS_START_DATE = "obey-start-date" def setUp(self): @@ -151,7 +152,8 @@ def expected_metadata(self): self.PRIMARY_KEYS: {"id"}, self.REPLICATION_METHOD: self.INCREMENTAL, self.BOOKMARK: {"updated_at"}, - self.OBEYS_START_DATE: True + self.OBEYS_START_DATE: True, + self.PK_CHILD_FIELDS: {"number"} }, "releases": { self.PRIMARY_KEYS: {"id"}, @@ -178,7 +180,8 @@ def expected_metadata(self): "team_members": { self.PRIMARY_KEYS: {"id", "team_slug"}, self.REPLICATION_METHOD: self.FULL, - self.OBEYS_START_DATE: False + self.OBEYS_START_DATE: False, + self.PK_CHILD_FIELDS: {"login"} }, "team_memberships": { self.PRIMARY_KEYS: {"url"}, @@ -188,12 +191,16 @@ def expected_metadata(self): "teams": { self.PRIMARY_KEYS: {"id"}, self.REPLICATION_METHOD: self.FULL, - self.OBEYS_START_DATE: False + self.OBEYS_START_DATE: False, + self.PK_CHILD_FIELDS: {"slug"} } } def expected_replication_method(self): - """return a dictionary with key of table name and value of replication method""" + """ + Return a dictionary with key of table name + and value of replication method + """ return {table: properties.get(self.REPLICATION_METHOD, None) for table, properties in self.expected_metadata().items()} @@ -212,7 +219,7 @@ def expected_streams(self): def expected_primary_keys(self): """ - return a dictionary with key of table name + Return a dictionary with the key of the table name and value as a set of primary key fields """ return {table: properties.get(self.PRIMARY_KEYS, set()) @@ -220,7 +227,8 @@ def expected_primary_keys(self): in self.expected_metadata().items()} def expected_bookmark_keys(self): - """return a dictionary with key of table name + """ + Return a dictionary with the key of the table name and value as a set of bookmark key fields """ return {table: properties.get(self.BOOKMARK, set()) @@ -229,13 +237,32 @@ def expected_bookmark_keys(self): def expected_foreign_keys(self): """ - return dictionary with key of table name and - value is set of foreign keys + Return dictionary with the key of table name and + value is a set of foreign keys """ return {} + def expected_child_pk_keys(self): + """ + Return a dictionary with key of table name + and value as a set of child streams primary key fields + which are not automatic in parent streams + """ + return {table: properties.get(self.PK_CHILD_FIELDS, set()) + for table, properties + in self.expected_metadata().items()} + + def expected_automatic_keys(self): + """ + Return a dictionary with the key of the table name + and value as a set of automatic key fields + """ + return {table: ((self.expected_primary_keys().get(table) or set()) | + (self.expected_bookmark_keys().get(table) or set()) | + (self.expected_child_pk_keys().get(table) or set())) + for table in self.expected_metadata()} - ######################### + ######################### # Helper Methods # ######################### @@ -245,10 +272,10 @@ def run_and_verify_check_mode(self, conn_id): This should be ran prior to field selection and initial sync. Return the connection id and found catalogs from menagerie. """ - # run in check mode + # Run in check mode check_job_name = runner.run_check_mode(self, conn_id) - # verify check exit codes + # Verify check exit codes exit_status = menagerie.get_exit_status(conn_id, check_job_name) menagerie.verify_check_exit_status(self, exit_status, check_job_name) @@ -256,9 +283,9 @@ def run_and_verify_check_mode(self, conn_id): self.assertGreater(len(found_catalogs), 0, msg="unable to locate schemas for connection {}".format(conn_id)) found_catalog_names = set(map(lambda c: c['stream_name'], found_catalogs)) - print(found_catalog_names) + LOGGER.info(found_catalog_names) self.assertSetEqual(self.expected_streams(), found_catalog_names, msg="discovered schemas do not match") - print("discovered schemas are OK") + LOGGER.info("discovered schemas are OK") return found_catalogs @@ -282,7 +309,7 @@ def run_and_verify_sync(self, conn_id): sum(sync_record_count.values()), 0, msg="failed to replicate any data: {}".format(sync_record_count) ) - print("total replicated row count: {}".format(sum(sync_record_count.values()))) + LOGGER.info("total replicated row count: {}".format(sum(sync_record_count.values()))) return sync_record_count @@ -311,7 +338,7 @@ def perform_and_verify_table_and_field_selection(self, # Verify all testable streams are selected selected = catalog_entry.get('annotated-schema').get('selected') - print("Validating selection on {}: {}".format(cat['stream_name'], selected)) + LOGGER.info("Validating selection on {}: {}".format(cat['stream_name'], selected)) if cat['stream_name'] not in expected_selected: self.assertFalse(selected, msg="Stream selected, but not testable.") continue # Skip remaining assertions if we aren't selecting this stream @@ -321,14 +348,14 @@ def perform_and_verify_table_and_field_selection(self, # Verify all fields within each selected stream are selected for field, field_props in catalog_entry.get('annotated-schema').get('properties').items(): field_selected = field_props.get('selected') - print("\tValidating selection on {}.{}: {}".format( + LOGGER.info("\tValidating selection on {}.{}: {}".format( cat['stream_name'], field, field_selected)) self.assertTrue(field_selected, msg="Field not selected.") else: # Verify only automatic fields are selected - expected_automatic_fields = self.expected_primary_keys().get(cat['stream_name']) + expected_automatic_keys = self.expected_automatic_keys().get(cat['stream_name']) selected_fields = self.get_selected_fields_from_metadata(catalog_entry['metadata']) - self.assertEqual(expected_automatic_fields, selected_fields) + self.assertEqual(expected_automatic_keys, selected_fields) @staticmethod def get_selected_fields_from_metadata(metadata): @@ -352,7 +379,7 @@ def select_all_streams_and_fields(conn_id, catalogs, select_all_fields: bool = T non_selected_properties = [] if not select_all_fields: - # get a list of all properties so that none are selected + # Get a list of all properties so that none are selected non_selected_properties = schema.get('annotated-schema', {}).get( 'properties', {}).keys() @@ -372,13 +399,10 @@ def timedelta_formatted(self, dtime, days=0): def is_incremental(self, stream): return self.expected_metadata()[stream][self.REPLICATION_METHOD] == self.INCREMENTAL - def is_full_table_sub_stream(self, stream): - return stream in self.FULL_TABLE_SUB_STREAMS + def is_incremental_sub_stream(self, stream): + return stream in self.INCREMENTAL_SUB_STREAMS - def dt_to_ts(self, dtime): - for date_format in self.DATETIME_FMT: - try: - date_stripped = int(time.mktime(dt.strptime(dtime, date_format).timetuple())) - return date_stripped - except ValueError: - continue + def dt_to_ts(self, dtime, format): + """Convert datetime with a format to timestamp""" + date_stripped = int(time.mktime(dt.strptime(dtime, format).timetuple())) + return date_stripped diff --git a/tests/test_github_all_fields.py b/tests/test_github_all_fields.py index 17173dc1..305a9151 100644 --- a/tests/test_github_all_fields.py +++ b/tests/test_github_all_fields.py @@ -4,6 +4,106 @@ from base import TestGithubBase +# As we are not able to generate the following fields by Github UI, so removed them from the expectation list. +KNOWN_MISSING_FIELDS = { + 'events': { + 'ref', + 'head', + 'push_id', + 'distinct_size', + 'size' + }, + 'project_cards': { + 'name', + 'cards_url', + 'column_name', + 'project_id' + }, + 'commits': { + 'files', + 'pr_id', + 'id', + 'pr_number', + 'stats', + }, + 'pr_commits': { + 'files', + 'stats' + }, + 'review_comments': { + 'assignees', + 'commits_url', + 'diff_url', + 'head', + 'review_comments_url', + 'comments_url', + 'issue_url', + 'assignee', + 'requested_teams', + 'patch_url', + 'milestone', + 'review_comment_url', + 'statuses_url', + 'requested_reviewers', + 'labels', + 'base', + 'merge_commit_sha', + 'locked', + 'body_text', + 'body_html' + }, + 'comments': { + 'home_url', + 'body_text', + 'body_html' + }, + 'team_members': { + 'email', + 'starred_at', + 'name', + }, + 'issues': { + 'body_text', + 'closed_by', + 'body_html' + }, + 'releases': { + 'discussion_url', + 'body_html', + 'body_text', + 'mentions_count', + 'reactions' + }, + 'collaborators': { + 'email', + 'name' + }, + 'reviews': { + 'body_text', + 'body_html' + }, + 'teams': { + 'permissions' + }, + 'projects': { + 'organization_permission', + 'private' + }, + 'assignees': { + 'email', + 'starred_at', + 'name' + }, + 'pull_requests': { + 'issues_url' + }, + 'issue_events': { + 'dismissed_review', + 'requested_team', + 'author_association', + 'draft' + }, +} class TestGithubAllFields(TestGithubBase): """Test that with all fields selected for a stream automatic and available fields are replicated""" @@ -14,43 +114,26 @@ def name(): def test_run(self): """ - Ensure running the tap with all streams and fields selected results in the - replication of all fields. - - Verify no unexpected streams were replicated - - Verify that more than just the automatic fields are replicated for each stream. + • Verify no unexpected streams were replicated + • Verify that more than just the automatic fields are replicated for each stream. + • Verify all fields for each stream are replicated """ - # BUG TDL-16672 - # The excluded streams are not honoring all fields selection - excluded_streams = { - 'issue_events', - 'comments', - 'projects', - 'pr_commits', - 'events', - 'review_comments', - 'issues', - 'project_cards', - 'project_columns', - 'commits', - 'collaborators' - } - - expected_streams = self.expected_streams() - excluded_streams - - # instantiate connection + + expected_streams = self.expected_streams() + # Instantiate connection conn_id = connections.ensure_connection(self) - # run check mode + # Run check mode found_catalogs = self.run_and_verify_check_mode(conn_id) - # table and field selection + # Table and field selection test_catalogs_all_fields = [catalog for catalog in found_catalogs if catalog.get('stream_name') in expected_streams] self.perform_and_verify_table_and_field_selection( conn_id, test_catalogs_all_fields, select_all_fields=True, ) - # grab metadata after performing table-and-field selection to set expectations + # Grab metadata after performing table-and-field selection to set expectations stream_to_all_catalog_fields = dict() # used for asserting all fields are replicated for catalog in test_catalogs_all_fields: stream_id, stream_name = catalog['stream_id'], catalog['stream_name'] @@ -60,7 +143,7 @@ def test_run(self): if md_entry['breadcrumb'] != []] stream_to_all_catalog_fields[stream_name] = set(fields_from_field_level_md) - # run initial sync + # Run initial sync record_count_by_stream = self.run_and_verify_sync(conn_id) synced_records = runner.get_records_from_target_output() @@ -70,21 +153,22 @@ def test_run(self): for stream in expected_streams: with self.subTest(stream=stream): - # expected values - expected_automatic_keys = self.expected_primary_keys().get(stream) + # Expected values + expected_automatic_keys = self.expected_automatic_keys().get(stream) - # get all expected keys + # Get all expected keys expected_all_keys = stream_to_all_catalog_fields[stream] - # collect actual values messages = synced_records.get(stream) - actual_all_keys = [set(message['data'].keys()) for message in messages['messages'] - if message['action'] == 'upsert'][0] - - # Verify that you get some records for each stream - self.assertGreater(record_count_by_stream.get(stream, -1), 0) - - # verify all fields for a stream were replicated + # Collect actual values + actual_all_keys = set() + for message in messages['messages']: + if message['action'] == 'upsert': + actual_all_keys.update(message['data'].keys()) + + expected_all_keys = expected_all_keys - KNOWN_MISSING_FIELDS.get(stream, set()) + + # Verify all fields for a stream were replicated self.assertGreater(len(expected_all_keys), len(expected_automatic_keys)) self.assertTrue(expected_automatic_keys.issubset(expected_all_keys), msg=f'{expected_automatic_keys-expected_all_keys} is not in "expected_all_keys"') self.assertSetEqual(expected_all_keys, actual_all_keys) diff --git a/tests/test_github_automatic_fields.py b/tests/test_github_automatic_fields.py index 7a5bc759..35b0de56 100644 --- a/tests/test_github_automatic_fields.py +++ b/tests/test_github_automatic_fields.py @@ -1,6 +1,3 @@ -""" -Test that with no fields selected for a stream automatic fields are still replicated -""" from tap_tester import runner, connections from base import TestGithubBase @@ -15,21 +12,19 @@ def name(): def test_run(self): """ - - Verify that for each stream you can get multiple pages of data - when no fields are selected. - - Verify that only the automatic fields are sent to the target. - - Verify that all replicated records have unique primary key values. + • Verify we can deselect all fields except when inclusion=automatic, which is handled by base.py methods + • Verify that only the automatic fields are sent to the target. + • Verify that all replicated records have unique primary key values. """ - # Exclude collaborators stream due to access issues in circle - expected_streams = self.expected_streams() - {'collaborators'} + expected_streams = self.expected_streams() - # instantiate connection + # Instantiate connection conn_id = connections.ensure_connection(self) - # run check mode + # Run check mode found_catalogs = self.run_and_verify_check_mode(conn_id) - # table and field selection + # Table and field selection test_catalogs_automatic_fields = [catalog for catalog in found_catalogs if catalog.get('stream_name') in expected_streams] @@ -37,20 +32,22 @@ def test_run(self): conn_id, test_catalogs_automatic_fields, select_all_fields=False, ) - # run initial sync + # Run initial sync record_count_by_stream = self.run_and_verify_sync(conn_id) synced_records = runner.get_records_from_target_output() for stream in expected_streams: with self.subTest(stream=stream): - # expected values - expected_keys = self.expected_primary_keys().get(stream) + + # Expected values + expected_primary_keys = self.expected_primary_keys()[stream] + expected_keys = self.expected_automatic_keys().get(stream) - # collect actual values + # Collect actual values data = synced_records.get(stream, {}) record_messages_keys = [set(row.get('data').keys()) for row in data.get('messages', {})] primary_keys_list = [ - tuple(message.get('data').get(expected_pk) for expected_pk in expected_keys) + tuple(message.get('data').get(expected_pk) for expected_pk in expected_primary_keys) for message in data.get('messages') if message.get('action') == 'upsert'] unique_primary_keys_list = set(primary_keys_list) diff --git a/tests/test_github_bookmarks.py b/tests/test_github_bookmarks.py index 3520a9d8..9e2c4135 100644 --- a/tests/test_github_bookmarks.py +++ b/tests/test_github_bookmarks.py @@ -8,77 +8,54 @@ class TestGithubBookmarks(TestGithubBase): + """Test tap sets a bookmark and respects it for the next sync of a stream""" + @staticmethod def name(): return "tap_tester_github_bookmarks" - @staticmethod - def convert_state_to_utc(date_str): - """ - Convert a saved bookmark value of the form '2020-08-25T13:17:36-07:00' to - a string formatted utc datetime, - in order to compare against json formatted datetime values - """ - date_object = dateutil.parser.parse(date_str) - date_object_utc = date_object.astimezone(tz=pytz.UTC) - return datetime.datetime.strftime(date_object_utc, "%Y-%m-%dT%H:%M:%SZ") - def calculated_states_by_stream(self, current_state, synced_records, replication_keys): """ Look at the bookmarks from a previous sync and set a new bookmark value based off timedelta expectations. This ensures the subsequent sync will replicate at least 1 record but, fewer records than the previous sync. - - If the test data is changed in the future this will break expectations for this test. """ timedelta_by_stream = {stream: [90,0,0] # {stream_name: [days, hours, minutes], ...} for stream in self.expected_streams()} - timedelta_by_stream['comments'] = [7, 0, 0] - timedelta_by_stream['commit_comments'] = [0, 0, 1] - timedelta_by_stream['commits'] = [0, 17, 0] - timedelta_by_stream['issue_events'] = [1, 0, 0] - timedelta_by_stream['issue_milestones'] = [0, 1, 0] - timedelta_by_stream['issues'] = [7, 0, 0] - timedelta_by_stream['pull_requests'] = [7, 0, 0] repo = self.get_properties().get('repository') - stream_to_calculated_state = {stream: "" for stream in current_state['bookmarks'][repo].keys()} + stream_to_calculated_state = {repo: {stream: "" for stream in current_state['bookmarks'][repo].keys()}} for stream, state in current_state['bookmarks'][repo].items(): state_key, state_value = next(iter(state.keys())), next(iter(state.values())) - sync_messages = [record.get('data') for record in - synced_records.get(stream, {'messages': []}).get('messages') - if record.get('action') == 'upsert'] - - # the `commits` and `pr_commits` streams don't have a top level replication_key field - if stream in ('commits', 'pr_commits'): - max_record_values = [values.get('commit', {}).get('committer', {}).get('date') - for values in sync_messages] - max_value = max(max_record_values) - else: - replication_key = next(iter(replication_keys.get(stream))) - max_record_values = [values.get(replication_key) for values in sync_messages] - max_value = max(max_record_values) - - # this is because the tap uses `time_extracted` to bookmark with `since` at execution - new_state_value = min(max_value, state_value) - state_as_datetime = dateutil.parser.parse(new_state_value) + state_as_datetime = dateutil.parser.parse(state_value) days, hours, minutes = timedelta_by_stream[stream] calculated_state_as_datetime = state_as_datetime - datetime.timedelta(days=days, hours=hours, minutes=minutes) - state_format = '%Y-%m-%dT%H:%M:%S-00:00' + state_format = '%Y-%m-%dT%H:%M:%SZ' calculated_state_formatted = datetime.datetime.strftime(calculated_state_as_datetime, state_format) - stream_to_calculated_state[stream] = {state_key: calculated_state_formatted} + stream_to_calculated_state[repo][stream] = {state_key: calculated_state_formatted} return stream_to_calculated_state def test_run(self): - # Exclude collaborators stream due to access issues in circle - expected_streams = self.expected_streams() - {'collaborators'} + """ + • Verify that for each stream you can do a sync which records bookmarks. + • Verify that the bookmark is the maximum value sent to the target for the replication key. + • Verify that a second sync respects the bookmark + All data of the second sync is >= the bookmark from the first sync + The number of records in the 2nd sync is less then the first + • Verify that for full table stream, all data replicated in sync 1 is replicated again in sync 2. + + PREREQUISITE + For EACH stream that is incrementally replicated there are multiple rows of data with + different values for the replication key + """ + expected_streams = self.expected_streams() expected_replication_keys = self.expected_bookmark_keys() expected_replication_methods = self.expected_replication_method() @@ -109,8 +86,8 @@ def test_run(self): new_states = {'bookmarks': dict()} simulated_states = self.calculated_states_by_stream(first_sync_bookmarks, first_sync_records, expected_replication_keys) - for stream, new_state in simulated_states.items(): - new_states['bookmarks'][stream] = new_state + for repo, new_state in simulated_states.items(): + new_states['bookmarks'][repo] = new_state menagerie.set_state(conn_id, new_states) ########################################################################## @@ -128,10 +105,10 @@ def test_run(self): for stream in expected_streams: with self.subTest(stream=stream): - # expected values + # Expected values expected_replication_method = expected_replication_methods[stream] - # collect information for assertions from syncs 1 & 2 base on expected values + # Collect information for assertions from syncs 1 & 2 base on expected values first_sync_count = first_sync_record_count.get(stream, 0) second_sync_count = second_sync_record_count.get(stream, 0) first_sync_messages = [record.get('data') for record in @@ -145,12 +122,15 @@ def test_run(self): if expected_replication_method == self.INCREMENTAL: - # collect information specific to incremental streams from syncs 1 & 2 + # Collect information specific to incremental streams from syncs 1 & 2 replication_key = next(iter(expected_replication_keys[stream])) first_bookmark_value = first_bookmark_key_value.get('since') second_bookmark_value = second_bookmark_key_value.get('since') - first_bookmark_value_utc = self.convert_state_to_utc(first_bookmark_value) - second_bookmark_value_utc = self.convert_state_to_utc(second_bookmark_value) + + first_bookmark_value_ts = self.dt_to_ts(first_bookmark_value, self.BOOKMARK_FORMAT) + second_bookmark_value_ts = self.dt_to_ts(second_bookmark_value, self.BOOKMARK_FORMAT) + + simulated_bookmark_value = self.dt_to_ts(new_states['bookmarks'][repo][stream]['since'], self.BOOKMARK_FORMAT) # Verify the first sync sets a bookmark of the expected form self.assertIsNotNone(first_bookmark_key_value) @@ -161,31 +141,34 @@ def test_run(self): self.assertIsNotNone(second_bookmark_key_value.get('since')) # Verify the second sync bookmark is Equal or Greater than the first sync bookmark - # the tap uses `time_extracted` and sets a bookmark using `since` for all real/pseudo incremental streams - self.assertGreaterEqual(second_bookmark_value, first_bookmark_value) - - for record in second_sync_messages: - # Verify the second sync bookmark value is the max replication key value for a given stream - if stream in ('commits', 'pr_commits'): - replication_key_value = record.get('commit', {}).get('committer', {}).get('date') - else: - replication_key_value = record.get(replication_key) - self.assertLessEqual( - replication_key_value, second_bookmark_value_utc, - msg="Second sync bookmark was set incorrectly, a record with a greater replication-key value was synced." - ) + self.assertGreaterEqual(second_bookmark_value_ts, first_bookmark_value_ts) + replication_key_format = self.RECORD_REPLICATION_KEY_FORMAT + # For events stream replication key value is coming in different format + if stream == 'events': + replication_key_format = self.EVENTS_RECORD_REPLICATION_KEY_FORMAT + for record in first_sync_messages: # Verify the first sync bookmark value is the max replication key value for a given stream - if stream in ('commits', 'pr_commits'): - replication_key_value = record.get('commit', {}).get('committer', {}).get('date') - else: - replication_key_value = record.get(replication_key) + replication_key_value = self.dt_to_ts(record.get(replication_key), replication_key_format) + self.assertLessEqual( - replication_key_value, first_bookmark_value_utc, + replication_key_value, first_bookmark_value_ts, msg="First sync bookmark was set incorrectly, a record with a greater replication-key value was synced." ) + for record in second_sync_messages: + # Verify the second sync bookmark value is the max replication key value for a given stream + replication_key_value = self.dt_to_ts(record.get(replication_key), replication_key_format) + + self.assertGreaterEqual(replication_key_value, simulated_bookmark_value, + msg="Second sync records do not respect the previous bookmark.") + + self.assertLessEqual( + replication_key_value, second_bookmark_value_ts, + msg="Second sync bookmark was set incorrectly, a record with a greater replication-key value was synced." + ) + # Verify the number of records in the 2nd sync is less then the first self.assertLessEqual(second_sync_count, first_sync_count) diff --git a/tests/test_github_discovery.py b/tests/test_github_discovery.py index 3d4c13f6..1fff7f0d 100644 --- a/tests/test_github_discovery.py +++ b/tests/test_github_discovery.py @@ -23,7 +23,7 @@ def test_run(self): • verify that primary keys are given the inclusion of automatic. • verify that all other fields have inclusion of available metadata. """ - streams_to_test = self.expected_streams() + expected_streams = self.expected_streams() conn_id = connections.ensure_connection(self) @@ -34,7 +34,7 @@ def test_run(self): self.assertTrue(all([re.fullmatch(r"[a-z_]+", name) for name in found_catalog_names]), msg="One or more streams don't follow standard naming") - for stream in streams_to_test: + for stream in expected_streams: with self.subTest(stream=stream): # Verify ensure the catalog is found for a given stream @@ -42,14 +42,15 @@ def test_run(self): if catalog["stream_name"] == stream])) self.assertIsNotNone(catalog) - # collecting expected values + # Collecting expected values expected_primary_keys = self.expected_primary_keys()[stream] - expected_automatic_fields = expected_primary_keys + expected_automatic_keys = self.expected_automatic_keys().get(stream) - # collecting actual values... + # Collecting actual values... schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) metadata = schema_and_metadata["metadata"] stream_properties = [item for item in metadata if item.get("breadcrumb") == []] + actual_fields = [md_entry.get("breadcrumb")[1] for md_entry in metadata if md_entry.get("breadcrumb") != []] actual_primary_keys = set( stream_properties[0].get( "metadata", {self.PRIMARY_KEYS: []}).get(self.PRIMARY_KEYS, []) @@ -60,24 +61,40 @@ def test_run(self): if item.get("metadata").get("inclusion") == "automatic" ) + actual_replication_method = stream_properties[0].get( + "metadata", {self.REPLICATION_METHOD: None}).get(self.REPLICATION_METHOD) + ########################################################################## ### metadata assertions ########################################################################## - # verify there is only 1 top level breadcrumb in metadata + # Verify there is only 1 top level breadcrumb in metadata self.assertTrue(len(stream_properties) == 1, msg="There is NOT only one top level breadcrumb for {}".format(stream) + \ "\nstream_properties | {}".format(stream_properties)) - # verify primary key(s) match expectations + # Verify there are no duplicate metadata entries + self.assertEqual(len(actual_fields), + len(set(actual_fields)), + msg = "duplication in the retrieved fields") + + # Verify primary key(s) match expectations self.assertSetEqual( expected_primary_keys, actual_primary_keys, ) - # verify that primary keys are given the inclusion of automatic in metadata. - self.assertSetEqual(expected_automatic_fields, actual_automatic_fields) - - # verify that all other fields have inclusion of available + # Verify that primary keys and replication keys are given the inclusion of automatic in metadata. + self.assertSetEqual(expected_automatic_keys, actual_automatic_fields) + + # Verify the actual replication matches our expected replication method + self.assertEqual( + self.expected_replication_method().get(stream, None), + actual_replication_method, + msg="The actual replication method {} doesn't match the expected {}".format( + actual_replication_method, + self.expected_replication_method().get(stream, None))) + + # Verify that all other fields have inclusion of available # This assumes there are no unsupported fields for SaaS sources self.assertTrue( all({item.get("metadata").get("inclusion") == "available" diff --git a/tests/test_github_interrupted_sync.py b/tests/test_github_interrupted_sync.py new file mode 100644 index 00000000..7c268604 --- /dev/null +++ b/tests/test_github_interrupted_sync.py @@ -0,0 +1,172 @@ +from tap_tester import connections, runner, menagerie +from base import TestGithubBase + + +class TestGithubInterruptedSync(TestGithubBase): + """Test tap's ability to recover from an interrupted sync""" + + @staticmethod + def name(): + return "tt_github_interrupted_sync_test" + + def get_properties(self): + """ + Maintain states for start_date and end_date + """ + return { + 'start_date' : '2021-10-01T00:00:00Z', + 'repository': 'singer-io/test-repo singer-io/singer-python' + } + + def test_run(self): + """ + Testing that if a sync job is interrupted and state is saved with `currently_syncing`(stream) and `currently_syncing_repo`, + the next sync job kicks off and the tap picks back up on that `currently_syncing` stream of `currently_syncing_repo`. + """ + streams_to_test = {"issues", "stargazers", "pull_requests", "issue_events"} + conn_id = connections.ensure_connection(self) + expected_replication_methods = self.expected_replication_method() + expected_replication_keys = self.expected_bookmark_keys() + repo_key = "_sdc_repository" + + start_date = self.dt_to_ts(self.get_properties().get("start_date"), self.BOOKMARK_FORMAT) + + # Run a discovery job + found_catalogs = self.run_and_verify_check_mode(conn_id) + + # Partition catalogs for use in table/field selection + test_catalogs = [catalog for catalog in found_catalogs + if catalog.get('stream_name') in streams_to_test] + self.perform_and_verify_table_and_field_selection(conn_id, test_catalogs, select_all_fields=True) + + # Run a sync + self.run_and_verify_sync(conn_id) + + # Acquire records from the target output + full_sync_records = runner.get_records_from_target_output() + full_sync_state = menagerie.get_state(conn_id) + + # Set state in which all streams of one repo(singer-io/singer-python) have completed a sync. + # And one stream (pull_requests) of other repo(singer-io/test-repo) is syncing currently. + + interrupted_state = { + "currently_syncing": "pull_requests", + "currently_syncing_repo": "singer-io/test-repo", + "bookmarks": { + "singer-io/singer-python": { + "issues": { + "since": "2022-06-22T13:32:42Z" + }, + "pull_requests": { + "since": "2022-06-22T13:32:42Z" + }, + "issue_events": { + "since": "2022-06-22T13:32:42Z" + } + }, + "singer-io/test-repo": { + "issues": { + "since": "2022-07-13T09:21:19Z" + }, + "pull_requests": { + "since": "2022-06-30T05:33:24Z" + } + } + } + } + + menagerie.set_state(conn_id, interrupted_state) + + # Run another sync + self.run_and_verify_sync(conn_id) + + # acquire records from target output + interrupted_sync_records = runner.get_records_from_target_output() + final_state = menagerie.get_state(conn_id) + currently_syncing = final_state.get('currently_syncing') + + # Checking resuming sync resulted in a successfully saved state + with self.subTest(): + + # Verify sync is not interrupted by checking currently_syncing in the state for sync + self.assertIsNone(currently_syncing) + + # Verify bookmarks are saved + self.assertIsNotNone(final_state.get('bookmarks')) + + # Verify final_state is equal to uninterrupted sync's state + # (This is what the value would have been without an interruption and proves resuming succeeds) + self.assertDictEqual(final_state, full_sync_state) + + for repository in self.get_properties().get("repository").split(): + with self.subTest(repository=repository): + + full_sync_bookmark = full_sync_state["bookmarks"][repository] + final_bookmark = final_state["bookmarks"][repository] + interrupted_repo_bookmark = interrupted_state["bookmarks"][repository] + + for stream in streams_to_test: + with self.subTest(stream=stream): + + # Expected values + expected_replication_method = expected_replication_methods[stream] + expected_primary_keys = list(self.expected_primary_keys()[stream]) + + # Gather results + full_records = [message['data'] for message in + full_sync_records.get(stream, {}).get('messages', []) + if message['data'][repo_key] == repository] + full_record_count = len(full_records) + + interrupted_records = [message['data'] for message in + interrupted_sync_records.get(stream, {}).get('messages', []) + if message['data'][repo_key] == repository] + interrupted_record_count = len(interrupted_records) + + if expected_replication_method == self.INCREMENTAL: + expected_replication_key = next(iter(expected_replication_keys[stream])) + + if stream in interrupted_repo_bookmark.keys(): + interrupted_bookmark = self.dt_to_ts(interrupted_repo_bookmark[stream]["since"], self.BOOKMARK_FORMAT) + + if stream == interrupted_state['currently_syncing'] and repository == interrupted_state['currently_syncing_repo']: + + for record in interrupted_records: + rec_time = self.dt_to_ts(record[expected_replication_key], self.RECORD_REPLICATION_KEY_FORMAT) + self.assertGreaterEqual(rec_time, interrupted_bookmark) + + # Verify all interrupted recs are in full recs + self.assertIn(record, full_records, msg='incremental table record in interrupted sync not found in full sync') + + # Record count for all streams of interrupted sync match expectations + full_records_after_interrupted_bookmark = 0 + + for record in full_records: + rec_time = self.dt_to_ts(record[expected_replication_key], self.RECORD_REPLICATION_KEY_FORMAT) + self.assertGreaterEqual(rec_time, start_date) + + if (rec_time >= interrupted_bookmark): + full_records_after_interrupted_bookmark += 1 + + self.assertEqual(full_records_after_interrupted_bookmark, len(interrupted_records), \ + msg="Expected {} records in each sync".format(full_records_after_interrupted_bookmark)) + else: + # Verify we collected records that have the same replication value as a bookmark for streams that are already synced + self.assertGreaterEqual(interrupted_record_count, 0) + else: + # Verify resuming sync replicates all records that were found in the full sync (uninterrupted) + for record in interrupted_records: + with self.subTest(record_primary_key=record[expected_primary_keys[0]]): + self.assertIn(record, full_records, msg='Unexpected record replicated in resuming sync.') + for record in full_records: + with self.subTest(record_primary_key=record[expected_primary_keys[0]]): + self.assertIn(record, interrupted_records, msg='Record missing from resuming sync.' ) + else: + # Verify full table streams do not save bookmarked values at the conclusion of a successful sync + self.assertNotIn(stream, full_sync_bookmark.keys()) + self.assertNotIn(stream, final_bookmark.keys()) + + # Verify first and second sync have the same records + self.assertEqual(full_record_count, interrupted_record_count) + for rec in interrupted_records: + self.assertIn(rec, full_records, msg='full table record in interrupted sync not found in full sync') diff --git a/tests/test_github_interrupted_sync_add_stream.py b/tests/test_github_interrupted_sync_add_stream.py new file mode 100644 index 00000000..0b46d389 --- /dev/null +++ b/tests/test_github_interrupted_sync_add_stream.py @@ -0,0 +1,177 @@ +from tap_tester import connections, runner, menagerie +from base import TestGithubBase + + +class TestGithubInterruptedSyncAddStream(TestGithubBase): + """Test tap's ability to recover from an interrupted sync""" + + @staticmethod + def name(): + return "tt_github_interrupted_sync_add_stream_test" + + def get_properties(self): + """ + Maintain states for start_date and end_date + """ + return { + 'start_date' : '2021-10-01T00:00:00Z', + 'repository': 'singer-io/test-repo singer-io/singer-python' + } + + def test_run(self): + """ + Testing that if a sync job is interrupted and state is saved with `currently_syncing`(stream) and `currently_syncing_repo`, + the next sync job kicks off and the tap picks back up on that `currently_syncing` stream of `currently_syncing_repo`. + - Verify behavior is consistent when an added stream is selected between initial and resuming sync + """ + streams_to_test = {"issues", "stargazers", "pull_requests"} + conn_id = connections.ensure_connection(self) + expected_replication_methods = self.expected_replication_method() + expected_replication_keys = self.expected_bookmark_keys() + repo_key = "_sdc_repository" + + start_date = self.dt_to_ts(self.get_properties().get("start_date"), self.BOOKMARK_FORMAT) + + # Run a discovery job + found_catalogs = self.run_and_verify_check_mode(conn_id) + + # Partition catalogs for use in table/field selection + test_catalogs = [catalog for catalog in found_catalogs + if catalog.get('stream_name') in streams_to_test] + self.perform_and_verify_table_and_field_selection(conn_id, test_catalogs, select_all_fields=True) + + # Run a sync + self.run_and_verify_sync(conn_id) + + # Acquire records from the target output + full_sync_records = runner.get_records_from_target_output() + full_sync_state = menagerie.get_state(conn_id) + + # Add a stream between syncs + added_stream = 'issue_events' + streams_to_test.add(added_stream) + test_catalogs = [catalog for catalog in found_catalogs + if catalog.get('stream_name') in streams_to_test] + # Add new stream to selected list + self.perform_and_verify_table_and_field_selection(conn_id, test_catalogs, select_all_fields=True) + + # Set state in which all streams of one repo(singer-io/singer-python) have completed a sync. + # And one stream (pull_requests) of other repo(singer-io/test-repo) is syncing currently. + + interrupted_state = { + "currently_syncing": "pull_requests", + "currently_syncing_repo": "singer-io/test-repo", + "bookmarks": { + "singer-io/singer-python": { + "issues": { + "since": "2022-06-22T13:32:42Z" + }, + "pull_requests": { + "since": "2022-06-22T13:32:42Z" + } + }, + "singer-io/test-repo": { + "issues": { + "since": "2022-07-14T07:47:21Z" + }, + "pull_requests": { + "since": "2022-07-13T07:47:21Z" + } + } + } + } + + menagerie.set_state(conn_id, interrupted_state) + + # Run another sync + self.run_and_verify_sync(conn_id) + + # acquire records from target output + interrupted_sync_records = runner.get_records_from_target_output() + final_state = menagerie.get_state(conn_id) + currently_syncing = final_state.get('currently_syncing') + + # Checking resuming sync resulted in a successfully saved state + with self.subTest(): + + # Verify sync is not interrupted by checking currently_syncing in the state for sync + self.assertIsNone(currently_syncing) + + # Verify bookmarks are saved + self.assertIsNotNone(final_state.get('bookmarks')) + + for repository in self.get_properties().get("repository").split(): + with self.subTest(repository=repository): + + full_sync_bookmark = full_sync_state["bookmarks"][repository] + final_bookmark = final_state["bookmarks"][repository] + interrupted_repo_bookmark = interrupted_state["bookmarks"][repository] + + for stream in streams_to_test: + with self.subTest(stream=stream): + + # Expected values + expected_replication_method = expected_replication_methods[stream] + + # Gather results + if stream != added_stream: + full_records = [message['data'] for message in + full_sync_records.get(stream, {}).get('messages', []) + if message['data'][repo_key] == repository] + full_record_count = len(full_records) + + interrupted_records = [message['data'] for message in + interrupted_sync_records.get(stream, {}).get('messages', []) + if message['data'][repo_key] == repository] + interrupted_record_count = len(interrupted_records) + + if expected_replication_method == self.INCREMENTAL: + expected_replication_key = next(iter(expected_replication_keys[stream])) + + if stream in full_sync_bookmark.keys(): + full_sync_stream_bookmark = self.dt_to_ts(full_sync_bookmark.get(stream, {}).get("since"), self.BOOKMARK_FORMAT) + final_sync_stream_bookmark = self.dt_to_ts(final_bookmark.get(stream, {}).get("since"), self.BOOKMARK_FORMAT) + + if stream in interrupted_repo_bookmark.keys(): + interrupted_bookmark = self.dt_to_ts(interrupted_repo_bookmark[stream]["since"], self.BOOKMARK_FORMAT) + + for record in interrupted_records: + rec_time = self.dt_to_ts(record[expected_replication_key], self.RECORD_REPLICATION_KEY_FORMAT) + self.assertGreaterEqual(rec_time, interrupted_bookmark) + + else: + # verify we collected records that have the same replication value as a bookmark for streams that are already synced + self.assertGreater(interrupted_record_count, 0) + + if stream != added_stream: + + # Verify state ends with the same value for common streams after both full and interrupted syncs + self.assertEqual(full_sync_stream_bookmark, final_sync_stream_bookmark) + + for record in interrupted_records: + + # Verify all interrupted recs are in full recs + self.assertIn(record, full_records, msg='incremental table record in interrupted sync not found in full sync') + + # Record count for all streams of interrupted sync match expectations + full_records_after_interrupted_bookmark = 0 + + for record in full_records: + rec_time = self.dt_to_ts(record[expected_replication_key], self.RECORD_REPLICATION_KEY_FORMAT) + self.assertGreater(rec_time, start_date, msg=f"{expected_replication_key} {stream} {repository} {record}") + + if (rec_time >= interrupted_bookmark): + full_records_after_interrupted_bookmark += 1 + + self.assertGreaterEqual(full_records_after_interrupted_bookmark, interrupted_record_count, \ + msg="Expected max {} records in each sync".format(full_records_after_interrupted_bookmark)) + + else: + # Verify full table streams do not save bookmarked values after a successful sync + self.assertNotIn(stream, full_sync_bookmark.keys()) + self.assertNotIn(stream, final_bookmark.keys()) + + # Verify first and second sync have the same records + self.assertEqual(full_record_count, interrupted_record_count) + for rec in interrupted_records: + self.assertIn(rec, full_records, msg='full table record in interrupted sync not found in full sync') diff --git a/tests/test_github_interrupted_sync_remove_stream.py b/tests/test_github_interrupted_sync_remove_stream.py new file mode 100644 index 00000000..04ed54d6 --- /dev/null +++ b/tests/test_github_interrupted_sync_remove_stream.py @@ -0,0 +1,202 @@ +from tap_tester import connections, runner, menagerie +from base import TestGithubBase + + +class TestGithubInterruptedSyncRemoveStream(TestGithubBase): + """Test tap's ability to recover from an interrupted sync""" + + @staticmethod + def name(): + return "tt_github_interrupted_sync_remove_stream_test" + + def get_properties(self): + """ + Maintain states for start_date and end_date + """ + return { + 'start_date' : '2021-10-01T00:00:00Z', + 'repository': 'singer-io/test-repo singer-io/singer-python' + } + + def test_run(self): + + # Test for removing any stream from state + self.run_interrupted_sync("issue_events") + + # Test for removing currently syncing stream from state + self.run_interrupted_sync("pull_requests") + + def run_interrupted_sync(self, removed_stream): + """ + Testing that if a sync job is interrupted and state is saved with `currently_syncing`(stream) and `currently_syncing_repo`, + the next sync job kicks off and the tap picks back up on that `currently_syncing` stream of `currently_syncing_repo`. + - Verify behavior is consistent when a stream is removed from the selected list between initial and resuming sync. + """ + streams_to_test = {"issues", "stargazers", "pull_requests", "issue_events"} + conn_id = connections.ensure_connection(self) + expected_replication_methods = self.expected_replication_method() + expected_replication_keys = self.expected_bookmark_keys() + repo_key = "_sdc_repository" + + start_date = self.dt_to_ts(self.get_properties().get("start_date"), self.BOOKMARK_FORMAT) + + # Run a discovery job + found_catalogs = self.run_and_verify_check_mode(conn_id) + + # Partition catalogs for use in table/field selection + test_catalogs = [catalog for catalog in found_catalogs + if catalog.get('stream_name') in streams_to_test] + self.perform_and_verify_table_and_field_selection(conn_id, test_catalogs, select_all_fields=True) + + # Run a sync + self.run_and_verify_sync(conn_id) + + # Acquire records from target output + full_sync_records = runner.get_records_from_target_output() + full_sync_state = menagerie.get_state(conn_id) + + # Create new connection for another sync + conn_id_2 = connections.ensure_connection(self) + + # Add a stream between syncs + streams_to_test = streams_to_test - {removed_stream} + found_catalogs = self.run_and_verify_check_mode(conn_id_2) + + test_catalogs = [catalog for catalog in found_catalogs + if catalog.get('stream_name') in streams_to_test] + + # Add new stream to selected list + self.perform_and_verify_table_and_field_selection(conn_id_2, test_catalogs, select_all_fields=True) + + # Set state in which all streams of one repo(singer-io/singer-python) have completed a sync. + # And one stream (pull_requests) of other repo(singer-io/test-repo) is syncing currently. + + interrupted_state = { + "currently_syncing": "pull_requests", + "currently_syncing_repo": "singer-io/test-repo", + "bookmarks": { + "singer-io/singer-python": { + "issues": { + "since": "2022-06-22T13:32:42Z" + }, + "pull_requests": { + "since": "2022-06-22T13:32:42Z" + }, + "issue_events": { + "since": "2022-06-22T13:32:42Z" + } + }, + "singer-io/test-repo": { + "issues": { + "since": "2022-07-14T07:47:21Z" + }, + "pull_requests": { + "since": "2022-07-13T07:47:21Z" + } + } + } + } + + menagerie.set_state(conn_id_2, interrupted_state) + + # Run another sync + self.run_and_verify_sync(conn_id_2) + + # Acquire records from target output + interrupted_sync_records = runner.get_records_from_target_output() + final_state = menagerie.get_state(conn_id_2) + currently_syncing = final_state.get('currently_syncing') + + # Checking resuming sync resulted in a successfully saved state + with self.subTest(): + + # Verify sync is not interrupted by checking currently_syncing in the state for sync + self.assertIsNone(currently_syncing) + + # Verify bookmarks are saved + self.assertIsNotNone(final_state.get('bookmarks')) + + for repository in self.get_properties().get("repository").split(): + with self.subTest(repository=repository): + + full_sync_bookmark = full_sync_state["bookmarks"][repository] + final_bookmark = final_state["bookmarks"][repository] + interrupted_repo_bookmark = interrupted_state["bookmarks"][repository] + + for stream in list(streams_to_test) + [removed_stream]: + with self.subTest(stream=stream): + + # Expected values + expected_replication_method = expected_replication_methods[stream] + expected_primary_keys = list(self.expected_primary_keys()[stream]) + + # Gather results + full_records = [message['data'] for message in + full_sync_records.get(stream, {}).get('messages', []) + if message['data'][repo_key] == repository] + full_record_count = len(full_records) + + if stream != removed_stream: + interrupted_records = [message['data'] for message in + interrupted_sync_records.get(stream, {}).get('messages', []) + if message['data'][repo_key] == repository] + interrupted_record_count = len(interrupted_records) + else: + self.assertNotIn(stream, interrupted_sync_records.keys()) + + if expected_replication_method == self.INCREMENTAL: + expected_replication_key = next(iter(expected_replication_keys[stream])) + full_sync_stream_bookmark = self.dt_to_ts(full_sync_bookmark.get(stream, {}).get("since"), self.BOOKMARK_FORMAT) + + if stream in interrupted_repo_bookmark.keys(): + interrupted_bookmark = self.dt_to_ts(interrupted_repo_bookmark[stream]["since"], self.BOOKMARK_FORMAT) + final_sync_stream_bookmark = self.dt_to_ts(final_bookmark.get(stream, {}).get("since"), self.BOOKMARK_FORMAT) + + if stream != removed_stream: + + # Verify state ends with the same value for common streams after both full and interrupted syncs + self.assertEqual(full_sync_stream_bookmark, final_sync_stream_bookmark) + + # Verify resuming sync only replicates records with replication key values greater or equal to + # the interrupted_state for streams that were completed, replicated during the interrupted sync. + for record in interrupted_records: + with self.subTest(record_primary_key=record[expected_primary_keys[0]]): + rec_time = self.dt_to_ts(record[expected_replication_key], self.RECORD_REPLICATION_KEY_FORMAT) + self.assertGreaterEqual(rec_time, interrupted_bookmark) + + # Verify all interrupted recs are in full recs + self.assertIn(record, full_records, msg='Incremental table record in interrupted sync not found in full sync') + + # Record count for all streams of interrupted sync match expectations + full_records_after_interrupted_bookmark = 0 + for record in full_records: + rec_time = self.dt_to_ts(record[expected_replication_key], self.RECORD_REPLICATION_KEY_FORMAT) + self.assertGreater(rec_time, start_date, msg=f"{expected_replication_key} {stream} {repository} {record}") + + if (rec_time >= interrupted_bookmark): + full_records_after_interrupted_bookmark += 1 + + self.assertGreaterEqual(full_records_after_interrupted_bookmark, interrupted_record_count, \ + msg="Expected max {} records in each sync".format(full_records_after_interrupted_bookmark)) + else: + # Verify the bookmark has not advanced for the removed stream + self.assertEqual(final_sync_stream_bookmark, interrupted_bookmark) + else: + # verify we collected records that have the same replication value as a bookmark for streams that are already synced + self.assertGreater(interrupted_record_count, 0) + + else: + # Verify full table streams do not save bookmarked values after a successful sync + self.assertNotIn(stream, full_sync_bookmark.keys()) + self.assertNotIn(stream, final_bookmark.keys()) + + # Verify first and second sync have the same records + self.assertEqual(full_record_count, interrupted_record_count) + for rec in interrupted_records: + self.assertIn(rec, full_records, msg='Full table record in interrupted sync not found in full sync') + + # Verify at least 1 record was replicated for each stream + if stream != removed_stream: + self.assertGreater(interrupted_record_count, 0) + + print(f"{stream} resumed sync records replicated: {interrupted_record_count}") diff --git a/tests/test_github_pagination.py b/tests/test_github_pagination.py index 6beed905..06a24abd 100644 --- a/tests/test_github_pagination.py +++ b/tests/test_github_pagination.py @@ -1,3 +1,5 @@ +from math import ceil + from tap_tester import runner, connections from base import TestGithubBase @@ -11,7 +13,7 @@ def name(): def get_properties(self, original: bool = True): return_value = { 'start_date' : '2020-01-01T00:00:00Z', - 'repository': 'singer-io/tap-github' + 'repository': self.repository_name } if original: return return_value @@ -21,15 +23,38 @@ def get_properties(self, original: bool = True): return return_value def test_run(self): - # page size for "pull_requests" + + streams_to_test = self.expected_streams() + + # Pagination is not supported for "team_memberships" by Github API. + # Skipping "teams" stream as it's RECORD count is <= 30. + untestable_streams = {'team_memberships', 'teams'} + + # For some streams RECORD count were not > 30 in same test-repo. + # So, separated streams on the basis of RECORD count. + self.repository_name = 'singer-io/tap-github' + expected_stream_1 = {'comments', 'stargazers', 'commits', 'pull_requests', 'reviews', 'review_comments', 'pr_commits', 'issues'} + self.run_test(expected_stream_1) + + self.repository_name = 'singer-io/test-repo' + expected_stream_2 = streams_to_test - expected_stream_1 - untestable_streams + self.run_test(expected_stream_2) + + def run_test(self, streams): + """ + • Verify that for each stream you can get multiple pages of data. + This requires we ensure more than 1 page of data exists at all times for any given stream. + • Verify by pks that the data replicated matches the data we expect. + """ + + # Page size for pagination supported streams page_size = 30 conn_id = connections.ensure_connection(self) - # Checking pagination for "pull_requests" stream - expected_streams = ["pull_requests"] + expected_streams = streams found_catalogs = self.run_and_verify_check_mode(conn_id) - # table and field selection + # Table and field selection test_catalogs = [catalog for catalog in found_catalogs if catalog.get('stream_name') in expected_streams] @@ -39,27 +64,42 @@ def test_run(self): synced_records = runner.get_records_from_target_output() + # Verify no unexpected streams were replicated + synced_stream_names = set(synced_records.keys()) + self.assertSetEqual(expected_streams, synced_stream_names) + for stream in expected_streams: with self.subTest(stream=stream): - # expected values + # Expected values expected_primary_keys = self.expected_primary_keys()[stream] - # collect information for assertions from syncs 1 & 2 base on expected values + # Collect information for assertions from syncs 1 & 2 base on expected values record_count_sync = record_count_by_stream.get(stream, 0) primary_keys_list = [tuple(message.get('data').get(expected_pk) for expected_pk in expected_primary_keys) for message in synced_records.get(stream).get('messages') if message.get('action') == 'upsert'] - # verify records are more than page size so multiple page is working - self.assertGreater(record_count_sync, page_size) - - primary_keys_list_1 = primary_keys_list[:page_size] - primary_keys_list_2 = primary_keys_list[page_size:2*page_size] - - primary_keys_page_1 = set(primary_keys_list_1) - primary_keys_page_2 = set(primary_keys_list_2) - - # Verify by private keys that data is unique for page - self.assertEqual(len(primary_keys_page_1), page_size) - self.assertTrue(primary_keys_page_1.isdisjoint(primary_keys_page_2)) + # Verify that for each stream you can get multiple pages of data + self.assertGreater(record_count_sync, page_size, + msg="The number of records is not over the stream max limit") + + # Chunk the replicated records (just primary keys) into expected pages + pages = [] + page_count = ceil(len(primary_keys_list) / page_size) + for page_index in range(page_count): + page_start = page_index * page_size + page_end = (page_index + 1) * page_size + pages.append(set(primary_keys_list[page_start:page_end])) + + # Verify by primary keys that data is unique for each page + for current_index, current_page in enumerate(pages): + with self.subTest(current_page_primary_keys=current_page): + + for other_index, other_page in enumerate(pages): + if current_index == other_index: + continue # don't compare the page to itself + + self.assertTrue( + current_page.isdisjoint(other_page), msg=f'other_page_primary_keys={other_page}' + ) \ No newline at end of file diff --git a/tests/test_github_parent_child_independednt.py b/tests/test_github_parent_child_independednt.py new file mode 100644 index 00000000..eb28da8c --- /dev/null +++ b/tests/test_github_parent_child_independednt.py @@ -0,0 +1,48 @@ +from tap_tester import runner, connections +from base import TestGithubBase + +class GithubParentChildIndependentTest(TestGithubBase): + + def name(self): + return "tap_tester_github_parent_child_test" + + def test_first_level_child_streams(self): + """ + Test case to verify that tap is working fine if only first level child streams are selected + """ + # Select first_level_child_streams only and run test + first_level_child_streams = {"team_members", "project_columns", "reviews", "review_comments", "pr_commits"} + self.run_test(first_level_child_streams) + + def test_second_level_child_streams(self): + """ + Test case to verify that tap is working fine if only second level child streams are selected + """ + # Select second_level_child_streams only and run test + second_level_child_streams = {"team_memberships", "project_cards"} + self.run_test(second_level_child_streams) + + def run_test(self, child_streams): + """ + Testing that tap is working fine if only child streams are selected + • Verify that if only child streams are selected then only child streams are replicated. + """ + # Instantiate connection + conn_id = connections.ensure_connection(self) + + # Run check mode + found_catalogs = self.run_and_verify_check_mode(conn_id) + + # Table and field selection + test_catalogs = [catalog for catalog in found_catalogs + if catalog.get('stream_name') in child_streams] + + self.perform_and_verify_table_and_field_selection(conn_id, test_catalogs) + + # Run initial sync + record_count_by_stream = self.run_and_verify_sync(conn_id) + synced_records = runner.get_records_from_target_output() + + # Verify no unexpected streams were replicated + synced_stream_names = set(synced_records.keys()) + self.assertSetEqual(child_streams, synced_stream_names) \ No newline at end of file diff --git a/tests/test_github_start_date.py b/tests/test_github_start_date.py index 34065255..5ea10ced 100644 --- a/tests/test_github_start_date.py +++ b/tests/test_github_start_date.py @@ -1,12 +1,13 @@ import os import requests -from tap_tester import connections, runner +from tap_tester import connections, runner, LOGGER from base import TestGithubBase from datetime import datetime, timedelta class GithubStartDateTest(TestGithubBase): + """Test that the start_date configuration is respected""" start_date_1 = "" start_date_2 = "" @@ -31,9 +32,23 @@ def test_run(self): # generate data for 'events' stream self.generate_data() + date_1 = '2020-04-01T00:00:00Z' + date_2 = '2021-10-08T00:00:00Z' + expected_stream_1 = {'commits'} + self.run_test(date_1, date_2, expected_stream_1) + + date_2 = '2022-07-13T00:00:00Z' + expected_stream_2 = {'issue_milestones'} + self.run_test(date_1, date_2, expected_stream_2) + + date_2 = '2022-05-06T00:00:00Z' + expected_stream_3 = {'pull_requests', 'pr_commits', 'review_comments', 'reviews'} + self.run_test(date_1, date_2, expected_stream_3) + + date_2 = '2022-01-27T00:00:00Z' # run the test for all the streams excluding 'events' stream # as for 'events' stream we have to use dynamic dates - self.run_test('2020-04-01T00:00:00Z', '2021-10-08T00:00:00Z', self.expected_streams() - {'events'}) + self.run_test(date_1, date_2, self.expected_streams() - expected_stream_1 - expected_stream_2 - expected_stream_3 - {'events'}) # As per the Documentation: https://docs.github.com/en/rest/reference/activity#events # the 'events' of past 90 days will only be returned @@ -45,13 +60,21 @@ def test_run(self): self.run_test(date_1, date_2, {'events'}) def run_test(self, date_1, date_2, streams): - """Instantiate start date according to the desired data set and run the test""" + """ + • Verify that a sync with a later start date has at least one record synced + and less records than the 1st sync with a previous start date + • Verify that each stream has less records than the earlier start date sync + • Verify all data from later start data has bookmark values >= start_date + • Verify that the minimum bookmark sent to the target for the later start_date sync + is greater than or equal to the start date + • Verify by primary key values, that all records in the 1st sync are included in the 2nd sync. + """ self.start_date_1 = date_1 self.start_date_2 = date_2 - start_date_1_epoch = self.dt_to_ts(self.start_date_1) - start_date_2_epoch = self.dt_to_ts(self.start_date_2) + start_date_1_epoch = self.dt_to_ts(self.start_date_1, self.START_DATE_FORMAT) + start_date_2_epoch = self.dt_to_ts(self.start_date_2, self.START_DATE_FORMAT) self.START_DATE = self.start_date_1 @@ -66,8 +89,7 @@ def run_test(self, date_1, date_2, streams): # run check mode found_catalogs_1 = self.run_and_verify_check_mode(conn_id_1) - # print(found_catalogs_1) - + # table and field selection test_catalogs_1_all_fields = [catalog for catalog in found_catalogs_1 if catalog.get('stream_name') in expected_streams] @@ -81,7 +103,7 @@ def run_test(self, date_1, date_2, streams): ### Update START DATE Between Syncs ########################################################################## - print("REPLICATION START DATE CHANGE: {} ===>>> {} ".format(self.START_DATE, self.start_date_2)) + LOGGER.info("REPLICATION START DATE CHANGE: {} ===>>> {} ".format(self.START_DATE, self.start_date_2)) self.START_DATE = self.start_date_2 ########################################################################## @@ -103,19 +125,11 @@ def run_test(self, date_1, date_2, streams): record_count_by_stream_2 = self.run_and_verify_sync(conn_id_2) synced_records_2 = runner.get_records_from_target_output() - # Verify the total number of records replicated in sync 1 is greater than the number - # of records replicated in sync 2 + # verify that sync 2 has at least one record synced and less records than sync 1 + self.assertGreater(sum(record_count_by_stream_2.values()), 0) self.assertGreater(sum(record_count_by_stream_1.values()), sum(record_count_by_stream_2.values())) for stream in expected_streams: - - # There are no data or not enough data for testing for below streams - # commit_comments, releases -> No data in tap-github repositery - # issue_milestones -> One data for isuue_milestones so not able to pass incremental cases - # projects, projects_columns, project_cards -> One record for project so not able to pass incremental cases - if stream in ["commit_comments", "releases", "issue_milestones", "projects", "project_columns", "project_cards"]: - continue - with self.subTest(stream=stream): # expected values @@ -136,29 +150,42 @@ def run_test(self, date_1, date_2, streams): primary_keys_sync_1 = set(primary_keys_list_1) primary_keys_sync_2 = set(primary_keys_list_2) + # verify that sync 2 has at least one record synced + self.assertGreater(record_count_sync_2, 0) + if expected_metadata.get(self.OBEYS_START_DATE): - # Sub stream fetch all data for records of related incremental super stream. - # Data of commit doesn't contain created_at or updated_at field. - # Data of isuue_milestomes contains bookmark key(due_on) with null value also. - if not self.is_full_table_sub_stream(stream) and stream != 'commits': - - # Expected bookmark key is one element in set so directly access it - bookmark_keys_list_1 = [message.get('data').get(next(iter(expected_bookmark_keys))) for message in synced_records_1.get(stream).get('messages') - if message.get('action') == 'upsert'] - bookmark_keys_list_2 = [message.get('data').get(next(iter(expected_bookmark_keys))) for message in synced_records_2.get(stream).get('messages') - if message.get('action') == 'upsert'] - - bookmark_key_sync_1 = set(bookmark_keys_list_1) - bookmark_key_sync_2 = set(bookmark_keys_list_2) - - # Verify bookmark key values are greater than or equal to start date of sync 1 - for bookmark_key_value in bookmark_key_sync_1: - self.assertGreaterEqual(self.dt_to_ts(bookmark_key_value), start_date_1_epoch) - - # Verify bookmark key values are greater than or equal to start date of sync 2 - for bookmark_key_value in bookmark_key_sync_2: - self.assertGreaterEqual(self.dt_to_ts(bookmark_key_value), start_date_2_epoch) + # Expected bookmark key is one element in set so directly access it + bookmark_keys_list_1 = [message.get('data').get(next(iter(expected_bookmark_keys))) for message in synced_records_1.get(stream).get('messages') + if message.get('action') == 'upsert'] + bookmark_keys_list_2 = [message.get('data').get(next(iter(expected_bookmark_keys))) for message in synced_records_2.get(stream).get('messages') + if message.get('action') == 'upsert'] + + bookmark_key_sync_1 = set(bookmark_keys_list_1) + bookmark_key_sync_2 = set(bookmark_keys_list_2) + + replication_key_format = self.RECORD_REPLICATION_KEY_FORMAT + # For events stream replication key value is coming in different format + if stream == 'events': + replication_key_format = self.EVENTS_RECORD_REPLICATION_KEY_FORMAT + + # Verify bookmark key values are greater than or equal to start date of sync 1 + for bookmark_key_value in bookmark_key_sync_1: + self.assertGreaterEqual( + self.dt_to_ts(bookmark_key_value, replication_key_format), start_date_1_epoch, + msg="Report pertains to a date prior to our start date.\n" + + "Sync start_date: {}\n".format(self.start_date_1) + + "Record date: {} ".format(bookmark_key_value) + ) + + # Verify bookmark key values are greater than or equal to start date of sync 2 + for bookmark_key_value in bookmark_key_sync_2: + self.assertGreaterEqual( + self.dt_to_ts(bookmark_key_value, replication_key_format), start_date_2_epoch, + msg="Report pertains to a date prior to our start date.\n" + + "Sync start_date: {}\n".format(self.start_date_2) + + "Record date: {} ".format(bookmark_key_value) + ) # Verify the number of records replicated in sync 1 is greater than the number # of records replicated in sync 2 for stream diff --git a/tests/test_github_sync.py b/tests/test_github_sync.py index d8bde66a..244cab7f 100644 --- a/tests/test_github_sync.py +++ b/tests/test_github_sync.py @@ -8,6 +8,14 @@ class TestGithubSync(TestGithubBase): def name(): return "tap_tester_github_sync_test" + def get_properties(self): + + return { + 'start_date' : '2021-10-01T00:00:00Z', + 'base_url': 'https://api.github.com', + 'repository': 'singer-io/test-repo' + } + def test_run(self): """ Testing that sync creates the appropriate catalog with valid metadata. diff --git a/tests/unittests/test_currently_syncing.py b/tests/unittests/test_currently_syncing.py new file mode 100644 index 00000000..044ae951 --- /dev/null +++ b/tests/unittests/test_currently_syncing.py @@ -0,0 +1,114 @@ +import unittest +from unittest import mock +from tap_github.sync import (update_currently_syncing_repo, update_currently_syncing, + get_ordered_stream_list, get_ordered_repos) + +class TestGetOrderedStreamList(unittest.TestCase): + """ + Test `get_ordered_stream_list` function to get ordered list od streams + """ + + streams_to_sync = ["commits", "pull_requests", "collaborators", "releases", "issue_labels", "assignees", "stargazers", "teams"] + + def test_currently_syncing_not_in_list(self): + """Test if currently syncing is not available in `streams_to_sync` list, function returns sorted streams_to_sync list.""" + expected_list = ['assignees', 'collaborators', 'commits', 'issue_labels', + 'pull_requests', 'releases', 'stargazers', 'teams'] + final_list = get_ordered_stream_list("issues", self.streams_to_sync) + + # Verify with expected ordered list of streams + self.assertEqual(final_list, expected_list) + + def test_for_interrupted_sync(self): + """Test when the sync was interrupted, the function returns ordered list of streams starting with 'currently_syncing' stream.""" + expected_list = ['releases', 'stargazers', 'teams', 'assignees', 'collaborators', + 'commits', 'issue_labels', 'pull_requests'] + final_list = get_ordered_stream_list("releases", self.streams_to_sync) + + # Verify with expected ordered list of streams + self.assertEqual(final_list, expected_list) + + def test_for_completed_sync(self): + """Test when sync was not interrupted, the function returns sorted streams_to_sync list.""" + expected_list = ['assignees', 'collaborators', 'commits', 'issue_labels', + 'pull_requests', 'releases', 'stargazers', 'teams'] + final_list = get_ordered_stream_list(None, self.streams_to_sync) + + # Verify with expected ordered list of streams + self.assertEqual(final_list, expected_list) + +class TestGetOrderedRepos(unittest.TestCase): + + """ + Test `get_ordered_repos` function to get ordered list repositories. + """ + repo_list = ["org/repo1", "org/repo2", "org/repo3", "org/repo4", "org/repo5"] + + def test_for_interupted_sync(self): + """Test when the sync was interrupted, the function returns ordered list of repositories starting with 'currently_syncing_repo'.""" + state = {"currently_syncing_repo": "org/repo3"} + expected_list = ["org/repo3", "org/repo4", "org/repo5", "org/repo1", "org/repo2"] + final_repo_list = get_ordered_repos(state, self.repo_list) + + # Verify with expected ordered list of repos + self.assertEqual(final_repo_list, expected_list) + + def test_currently_syncing_repo_removed_from_config(self): + """Test if currently syncing repo was removed from config.""" + state = {"currently_syncing_repo": "org/repo3"} + repo_list = ["org/repo1", "org/repo2", "org/repo4", "org/repo5"] + final_repo_list = get_ordered_repos(state, repo_list) + + # Verify with expected ordered list of repos + self.assertEqual(final_repo_list, repo_list) + + def test_for_completed_sync(self): + """Test when sync was not interrupted, the function returns repos list.""" + state = {} + final_repo_list = get_ordered_repos(state, self.repo_list) + + # Verify with expected ordered list of repos + self.assertEqual(final_repo_list, self.repo_list) + +@mock.patch("tap_github.sync.update_currently_syncing") +class TestUpdateCurrentlySyncingRepo(unittest.TestCase): + + """ + Test `update_currently_syncing_repo` function of sync. + """ + def test_adding_repo(self, mock_currently_syncing): + """Test for adding currently syncing repo in state""" + state = {"currently_syncing_repo": None} + update_currently_syncing_repo(state, "org/test-repo") + + # Verify with expected state + self.assertEqual(state, {"currently_syncing_repo": "org/test-repo"}) + + def test_flush_completed_repo(self, mock_currently_syncing): + """Test for removing currently syncing repo from state.""" + state = {"currently_syncing_repo": "org/test-repo"} + update_currently_syncing_repo(state, None) + + # Verify with expected state + self.assertEqual(state, {}) + +class TestUpdateCurrentlySyncing(unittest.TestCase): + + """ + Test `update_currently_syncing` function of sync. + """ + def test_update_syncing_stream(self): + """Test for adding currently syncing stream in state.""" + state = {"currently_syncing": "assignees"} + update_currently_syncing(state, "issues") + + # Verify with expected state + self.assertEqual(state, {"currently_syncing": "issues"}) + + def test_flush_currently_syncing(self): + """Test for removing currently syncing stream from state.""" + state = {"currently_syncing": "assignees"} + update_currently_syncing(state, None) + + # Verify with expected state + self.assertEqual(state, {}) diff --git a/tests/unittests/test_custom_domain.py b/tests/unittests/test_custom_domain.py new file mode 100644 index 00000000..139b2426 --- /dev/null +++ b/tests/unittests/test_custom_domain.py @@ -0,0 +1,29 @@ +import unittest +from unittest import mock +from tap_github.client import GithubClient, DEFAULT_DOMAIN + +@mock.patch('tap_github.GithubClient.verify_access_for_repo', return_value = None) +class TestCustomDomain(unittest.TestCase): + """ + Test custom domain is supported in client + """ + + def test_config_without_domain(self, mock_verify_access): + """ + Test if the domain is not given in the config + """ + mock_config = {'repository': 'singer-io/test-repo', "access_token": ""} + test_client = GithubClient(mock_config) + + # Verify domain in client is default + self.assertEqual(test_client.base_url, DEFAULT_DOMAIN) + + def test_config_with_domain(self, mock_verify_access): + """ + Test if the domain is given in the config + """ + mock_config = {'repository': 'singer-io/test-repo', "base_url": "http://CUSTOM-git.com", "access_token": ""} + test_client = GithubClient(mock_config) + + # Verify domain in client is from config + self.assertEqual(test_client.base_url, mock_config["base_url"]) diff --git a/tests/unittests/test_exception_handling.py b/tests/unittests/test_exception_handling.py index e2c86120..8c381054 100644 --- a/tests/unittests/test_exception_handling.py +++ b/tests/unittests/test_exception_handling.py @@ -1,10 +1,14 @@ from unittest import mock import tap_github +from tap_github.client import GithubClient, raise_for_error, ConflictError, BadRequestException, BadCredentialsException, AuthException, InternalServerError import unittest import requests +from parameterized import parameterized class Mockresponse: - def __init__(self, status_code, json, raise_error, headers={'X-RateLimit-Remaining': 1}, text=None, content=None): + """ Mock response object class.""" + + def __init__(self, status_code, json, raise_error, headers={'X-RateLimit-Remaining': 1}, content=None): self.status_code = status_code self.raise_error = raise_error self.text = json @@ -18,106 +22,101 @@ def raise_for_status(self): raise requests.HTTPError("Sample message") def json(self): + """ Response JSON method.""" return self.text +def get_mock_http_response(status_code, contents): + """Return http mock response.""" + response = requests.Response() + response.status_code = status_code + response._content = contents.encode() + return response + def get_response(status_code, json={}, raise_error=False, content=None): + """ Returns required mock response. """ return Mockresponse(status_code, json, raise_error, content=content) +@mock.patch("time.sleep") +@mock.patch("tap_github.client.GithubClient.verify_access_for_repo", return_value = None) @mock.patch("requests.Session.request") @mock.patch("singer.utils.parse_args") class TestExceptionHandling(unittest.TestCase): - def test_zero_content_length(self, mocked_parse_args, mocked_request): - mocked_request.return_value = get_response(400, raise_error = True, content='') - - try: - tap_github.authed_get("", "") - except tap_github.BadRequestException as e: - self.assertEqual(str(e), "HTTP-error-code: 400, Error: The request is missing or has a bad parameter.") - - def test_400_error(self, mocked_parse_args, mocked_request): - mocked_request.return_value = get_response(400, raise_error = True) - - try: - tap_github.authed_get("", "") - except tap_github.BadRequestException as e: - self.assertEqual(str(e), "HTTP-error-code: 400, Error: The request is missing or has a bad parameter.") - def test_401_error(self, mocked_parse_args, mocked_request): - mocked_request.return_value = get_response(401, raise_error = True) + """ + Test Error handling for `authed_get` method in client. + """ + + config = {"access_token": "", "repository": "org/test-repo, singer-io12/*"} + + def test_json_decoder_error(self, mocked_parse_args, mocked_request, mock_verify_access, mock_sleep): + """ + Verify handling of JSONDecoderError from the response. + """ + + mock_response = get_mock_http_response(409, "json_error") + + with self.assertRaises(ConflictError) as e: + raise_for_error(mock_response, "", "", "", True) + + # Verifying the message formed for the custom exception + self.assertEqual(str(e.exception), "HTTP-error-code: 409, Error: The request could not be completed due to a conflict with the current state of the server.") + + @parameterized.expand([ + [400, "The request is missing or has a bad parameter.", BadRequestException, '', {}, 1], + [401, "Invalid authorization credentials.", BadCredentialsException, '', {}, 1], + [403, "User doesn't have permission to access the resource.", AuthException, '', {}, 1], + [500, "An error has occurred at Github's end.", InternalServerError, '', {}, 5], + [301, "The resource you are looking for is moved to another URL.", tap_github.client.MovedPermanentlyError, '', {}, 1], + [304, "The requested resource has not been modified since the last time you accessed it.", tap_github.client.NotModifiedError, '', {}, 1], + [409, "The request could not be completed due to a conflict with the current state of the server.", tap_github.client.ConflictError, '', {}, 1], + [422, "The request was not able to process right now.", tap_github.client.UnprocessableError, '', {}, 1], + [501, "Unknown Error", tap_github.client.Server5xxError, '', {}, 5], + [429, "Too many requests occurred.", tap_github.client.TooManyRequests, '', {}, 5], + ]) + def test_error_message_and_call_count(self, mocked_parse_args, mocked_request, mock_verify_access, mock_sleep, erro_code, error_msg, error_class, content, json_msg, call_count): + """ + - Verify that `authed_get` raises an error with the proper message for different error codes. + - Verify that tap retries 5 times for Server5xxError and RateLimitExceeded error. + """ + mocked_request.return_value = get_response(erro_code, json = json_msg, raise_error = True, content = content) + test_client = GithubClient(self.config) + expected_error_message = "HTTP-error-code: {}, Error: {}".format(erro_code, error_msg) - try: - tap_github.authed_get("", "") - except tap_github.BadCredentialsException as e: - self.assertEqual(str(e), "HTTP-error-code: 401, Error: Invalid authorization credentials.") - - def test_403_error(self, mocked_parse_args, mocked_request): - mocked_request.return_value = get_response(403, raise_error = True) - - try: - tap_github.authed_get("", "") - except tap_github.AuthException as e: - self.assertEqual(str(e), "HTTP-error-code: 403, Error: User doesn't have permission to access the resource.") - - def test_404_error(self, mocked_parse_args, mocked_request): - json = {"message": "Not Found", "documentation_url": "https:/docs.github.com/"} - mocked_request.return_value = get_response(404, json = json, raise_error = True) + with self.assertRaises(error_class) as e: + test_client.authed_get("", "") - try: - tap_github.authed_get("", "") - except tap_github.NotFoundException as e: - self.assertEqual(str(e), "HTTP-error-code: 404, Error: The resource you have specified cannot be found. Please refer '{}' for more details.".format(json.get("documentation_url"))) + # Verifying the message formed for the custom exception + self.assertEqual(str(e.exception), expected_error_message) - def test_404_error_for_teams(self, mocked_parse_args, mocked_request): - json = {"message": "Not Found", "documentation_url": "https:/docs.github.com/"} - - try: - tap_github.raise_for_error(get_response(404, json = json, raise_error = True), "teams") - except tap_github.NotFoundException as e: - self.assertEqual(str(e), "HTTP-error-code: 404, Error: The resource you have specified cannot be found or it is a personal account repository. Please refer '{}' for more details.".format(json.get("documentation_url"))) - - def test_500_error(self, mocked_parse_args, mocked_request): - mocked_request.return_value = get_response(500, raise_error = True) - - try: - tap_github.authed_get("", "") - except tap_github.InternalServerError as e: - self.assertEqual(str(e), "HTTP-error-code: 500, Error: An error has occurred at Github's end.") - - def test_301_error(self, mocked_parse_args, mocked_request): - mocked_request.return_value = get_response(301, raise_error = True) + # Verify the call count for each error. + self.assertEquals(call_count, mocked_request.call_count) - try: - tap_github.authed_get("", "") - except tap_github.MovedPermanentlyError as e: - self.assertEqual(str(e), "HTTP-error-code: 301, Error: The resource you are looking for is moved to another URL.") - - def test_304_error(self, mocked_parse_args, mocked_request): - mocked_request.return_value = get_response(304, raise_error = True) - - try: - tap_github.authed_get("", "") - except tap_github.NotModifiedError as e: - self.assertEqual(str(e), "HTTP-error-code: 304, Error: The requested resource has not been modified since the last time you accessed it.") + @mock.patch("tap_github.client.LOGGER.warning") + def test_skip_404_error(self, mock_logger, mocked_parse_args, mocked_request, mock_verify_access, mock_sleep): + """ + Verify that `authed_get` skip 404 error and print the log message with the proper message. + """ + json = {"message": "Not Found", "documentation_url": "https:/docs.github.com/"} + mocked_request.return_value = get_response(404, json = json, raise_error = True) + expected_message = "HTTP-error-code: 404, Error: The resource you have specified cannot be found. Alternatively the access_token is not valid for the resource. Please refer '{}' for more details.".format(json.get("documentation_url")) + test_client = GithubClient(self.config) - def test_422_error(self, mocked_parse_args, mocked_request): - mocked_request.return_value = get_response(422, raise_error = True) + test_client.authed_get("", "") - try: - tap_github.authed_get("", "") - except tap_github.UnprocessableError as e: - self.assertEqual(str(e), "HTTP-error-code: 422, Error: The request was not able to process right now.") + # Verifying the message formed for the custom exception + self.assertEqual(mock_logger.mock_calls[0], mock.call(expected_message)) - def test_409_error(self, mocked_parse_args, mocked_request): - mocked_request.return_value = get_response(409, raise_error = True) + def test_raise_404_error_for_invalid_repo(self, mocked_parse_args, mocked_request, mock_verify_access, mock_sleep): + """ + Verify that `extract_repos_from_config` raises 404 error if invalid organization in given in the config. + """ + config = {'repository': 'singer-io12/*', "access_token": "TOKEN"} + test_client = GithubClient(config) + mocked_request.return_value = get_response(404, raise_error = True) - try: - tap_github.authed_get("", "") - except tap_github.ConflictError as e: - self.assertEqual(str(e), "HTTP-error-code: 409, Error: The request could not be completed due to a conflict with the current state of the server.") + with self.assertRaises(tap_github.client.NotFoundException) as e: + test_client.extract_repos_from_config() - def test_200_success(self, mocked_parse_args, mocked_request): - json = {"key": "value"} - mocked_request.return_value = get_response(200, json) + # Verifying the message formed for the custom exception + self.assertEqual(str(e.exception), "HTTP-error-code: 404, Error: Please check the organization name 'singer-io12' or you do not have sufficient permissions to access this organization.") - resp = tap_github.authed_get("", "") - self.assertEqual(json, resp.json()) diff --git a/tests/unittests/test_extract_repos_from_config.py b/tests/unittests/test_extract_repos_from_config.py index 4a205696..9d5a84f4 100644 --- a/tests/unittests/test_extract_repos_from_config.py +++ b/tests/unittests/test_extract_repos_from_config.py @@ -1,32 +1,66 @@ +from email.headerregistry import ParameterizedMIMEHeader import unittest -import tap_github +from unittest import mock +from tap_github.client import GithubClient, GithubException +from parameterized import parameterized -@unittest.mock.patch('tap_github.get_all_repos') +@mock.patch('tap_github.client.GithubClient.verify_access_for_repo') +@mock.patch('tap_github.client.GithubClient.get_all_repos') class TestExtractReposFromConfig(unittest.TestCase): + """ + Test `extract_repos_from_config` method from client. + """ - def test_single_repo(self, mocked_get_all_repos): - config = {'repository': 'singer-io/test-repo'} - expected_repositories = ['singer-io/test-repo'] - self.assertEqual(expected_repositories, tap_github.extract_repos_from_config(config)) - - def test_multiple_repos(self, mocked_get_all_repos): - config = {'repository': 'singer-io/test-repo singer-io/tap-github'} - expected_repositories = ['singer-io/test-repo', 'singer-io/tap-github'] - self.assertEqual(expected_repositories, tap_github.extract_repos_from_config(config)) - - def test_org_all_repos(self, mocked_get_all_repos): - config = {'repository': 'singer-io/test-repo test-org/*'} - expected_repositories = [ - 'singer-io/test-repo', - 'test-org/repo1', - 'test-org/repo2', - 'test-org/repo3' - ] - mocked_get_all_repos.return_value = [ - 'test-org/repo1', - 'test-org/repo2', - 'test-org/repo3' - ] - - self.assertEqual(expected_repositories, tap_github.extract_repos_from_config(config)) + @parameterized.expand([ + ['test_single_repo', 'singer-io/test-repo', [], ['singer-io/test-repo'], {'singer-io'}], + ['test_multiple_repos', 'singer-io/test-repo singer-io/tap-github', [], ['singer-io/tap-github', 'singer-io/test-repo'], {'singer-io'}], + ['test_org_all_repos', 'singer-io/test-repo test-org/*', ['test-org/repo1', 'test-org/repo2'], ['singer-io/test-repo', 'test-org/repo1', 'test-org/repo2'], {'singer-io', 'test-org'}] + ]) + def test_extract_repos_from_config(self, mocked_get_all_repos, mock_verify_access, name, repo_paths, all_repos, expected_repos, expected_orgs): + """ + Test `extract_repos_from_config` if only one repo path is given in config. + """ + config = {'repository': repo_paths, "access_token": "TOKEN"} + test_client = GithubClient(config) + mocked_get_all_repos.return_value = all_repos + + actual_repos, actual_orgs = test_client.extract_repos_from_config() + # Verify list of repo path with expected + self.assertEqual((sorted(expected_repos), sorted(expected_orgs)), (sorted(actual_repos), sorted(actual_orgs))) + + @parameterized.expand([ + ['test_organization_without_repo_in_config', 'singer-io', ['singer-io']], + ['test_organization_without_repo_with_slash_in_config', 'singer-io/', ['singer-io/']], + ['test_organization_with_only_slash_in_config', '/', ['/']], + ['test_organization_with_multiple_wrong_formatted_repo_path_in_config', 'singer-io/ /tap-github', ["singer-io/", "/tap-github"]] + ]) + def test_organization_without_repo_in_config(self, mocked_get_all_repos, mock_verify_access, name, repo_paths, expected_repo): + """ + Verify that the tap throws an exception with a proper error message for invalid organization names. + """ + config = {'repository': repo_paths, "access_token": "TOKEN"} + test_client = GithubClient(config) + expected_error_message = "Please provide valid organization/repository for: {}".format(sorted(expected_repo)) + with self.assertRaises(GithubException) as exc: + test_client.extract_repos_from_config() + + # Verify that we get expected error message + self.assertEqual(str(exc.exception), expected_error_message) + + @mock.patch('tap_github.client.LOGGER.warning') + def test_organization_with_duplicate_repo_paths_in_config(self, mock_warn, mocked_get_all_repos, mock_verify_access): + """ + Verify that the tap logs proper warning message for duplicate repos in config and returns list without duplicates + """ + config = {'repository': 'singer-io/tap-github singer-io/tap-github singer-io/test-repo', "access_token": "TOKEN"} + test_client = GithubClient(config) + expected_repos = ['singer-io/tap-github', 'singer-io/test-repo'] + actual_repos, orgs = test_client.extract_repos_from_config() + expected_message = "Duplicate repositories found: %s and will be synced only once." + + # Verify that the logger is called with expected error message + mock_warn.assert_called_with(expected_message, ['singer-io/tap-github']) + + # Verify that extract_repos_from_config() returns repos without duplicates + self.assertEqual(sorted(expected_repos), sorted(actual_repos)) \ No newline at end of file diff --git a/tests/unittests/test_formatting_dates.py b/tests/unittests/test_formatting_dates.py deleted file mode 100644 index 72a70925..00000000 --- a/tests/unittests/test_formatting_dates.py +++ /dev/null @@ -1,120 +0,0 @@ -import unittest -from unittest import mock -import singer -import tap_github.__init__ as tap_github - -class Mockresponse: - def __init__(self, resp, not_list=False): - self.not_list = not_list - self.json_data = resp - self.content = "github" - - def json(self): - if self.not_list: - return self.json_data - return [self.json_data] - -def get_response(json, not_list=False): - if not_list: - yield Mockresponse(json, not_list) - else: - yield Mockresponse(resp=json) - -@mock.patch("tap_github.__init__.authed_get_all_pages") -class TestRateLimit(unittest.TestCase): - - def test_due_on_none_without_state(self, mocked_request): - """ - "due_on" is "None", - so we will get 1 records - """ - json = {"due_on": None} - - mocked_request.return_value = get_response(json) - - init_state = {} - repo_path = "singer-io/tap-github" - - final_state = tap_github.get_all_issue_milestones({}, repo_path, init_state, {}, "") - # as we will get 1 record and initial bookmark is empty, checking that if bookmark exists in state file returned - self.assertTrue(final_state["bookmarks"][repo_path]["issue_milestones"]["since"]) - - def test_due_on_none_with_state(self, mocked_request): - """ - "due_on" is "None", - so we will get 1 records - """ - json = {"due_on": None} - - mocked_request.return_value = get_response(json) - - repo_path = "singer-io/tap-github" - init_state = {'bookmarks': {'singer-io/tap-github': {'issue_milestones': {'since': '2021-05-05T07:20:36.887412Z'}}}} - init_bookmark = singer.utils.strptime_to_utc(init_state["bookmarks"][repo_path]["issue_milestones"]["since"]) - - final_state = tap_github.get_all_issue_milestones({}, repo_path, init_state, {}, "") - last_bookmark = singer.utils.strptime_to_utc(final_state["bookmarks"][repo_path]["issue_milestones"]["since"]) - # as we will get 1 record, final bookmark will be greater than initial bookmark - self.assertGreater(last_bookmark, init_bookmark) - - def test_due_on_not_none_1(self, mocked_request): - """ - Bookmark value is smaller than "due_on", - so we will get 1 records - """ - json = {"due_on": "2021-05-07T07:00:00Z"} - - mocked_request.return_value = get_response(json) - mocked_request.singer.write_record.side_effect = None - - repo_path = "singer-io/tap-github" - init_state = {'bookmarks': {'singer-io/tap-github': {'issue_milestones': {'since': '2021-05-05T07:20:36.887412Z'}}}} - init_bookmark = singer.utils.strptime_to_utc(init_state["bookmarks"][repo_path]["issue_milestones"]["since"]) - - final_state = tap_github.get_all_issue_milestones({}, repo_path, init_state, {}, "") - last_bookmark = singer.utils.strptime_to_utc(final_state["bookmarks"][repo_path]["issue_milestones"]["since"]) - # as we will get 1 record, final bookmark will be greater than initial bookmark - self.assertGreater(last_bookmark, init_bookmark) - - def test_due_on_not_none_2(self, mocked_request): - """ - Bookmark value is greater than "due_on", - so we will get 0 records - """ - json = {"due_on": "2021-05-07T07:00:00Z"} - - mocked_request.return_value = get_response(json) - - repo_path = "singer-io/tap-github" - init_state = {'bookmarks': {'singer-io/tap-github': {'issue_milestones': {'since': '2021-05-08T07:20:36.887412Z'}}}} - init_bookmark = init_state["bookmarks"][repo_path]["issue_milestones"]["since"] - - final_state = tap_github.get_all_issue_milestones({}, repo_path, init_state, {}, "") - # as we will get 0 records, initial and final bookmark will be same - self.assertEqual(init_bookmark, final_state["bookmarks"][repo_path]["issue_milestones"]["since"]) - - @mock.patch("singer.write_record") - def test_data_containing_both_values(self, mocked_write_record, mocked_request): - """ - As we have 3 records here, - -> due_on = None - -> due_on > Bookmark - -> due_on < Bookmark - so, here we will get 2 records, - -> due_on = None - -> due_on > Bookmark - """ - json = [{"due_on": "2021-05-07T07:00:00Z"}, {"due_on": "2021-05-09T07:00:00Z"}, {"due_on": None}] - - mocked_request.return_value = get_response(json, True) - - repo_path = "singer-io/tap-github" - init_state = {'bookmarks': {'singer-io/tap-github': {'issue_milestones': {'since': '2021-05-08T07:20:36.887412Z'}}}} - init_bookmark = singer.utils.strptime_to_utc(init_state["bookmarks"][repo_path]["issue_milestones"]["since"]) - - final_state = tap_github.get_all_issue_milestones({}, repo_path, init_state, {}, "") - last_bookmark = singer.utils.strptime_to_utc(final_state["bookmarks"][repo_path]["issue_milestones"]["since"]) - # as we will get 2 record, final bookmark will be greater than initial bookmark - self.assertGreater(last_bookmark, init_bookmark) - # as we will get 2 record, write_records will also be called 2 times - self.assertEqual(mocked_write_record.call_count, 2) diff --git a/tests/unittests/test_get_all_repos.py b/tests/unittests/test_get_all_repos.py index c8ca7a0b..9235acad 100644 --- a/tests/unittests/test_get_all_repos.py +++ b/tests/unittests/test_get_all_repos.py @@ -1,9 +1,10 @@ import unittest +from unittest import mock import requests import requests_mock import simplejson as json -import tap_github +from tap_github.client import GithubClient from itertools import cycle @@ -12,12 +13,23 @@ ADAPTER = requests_mock.Adapter() SESSION.mount('mock://', ADAPTER) +class MockResponse(): + """ Mock response object class.""" -@unittest.mock.patch('tap_github.verify_repo_access') -@unittest.mock.patch('tap_github.authed_get_all_pages') + def __init__(self, links): + self.links = links + +@mock.patch('tap_github.client.GithubClient.verify_repo_access') +@mock.patch('tap_github.client.GithubClient.authed_get_all_pages') class TestGetAllRepos(unittest.TestCase): + """ + Test `get_all_repos` method from client. + """ + config = {"access_token": "", "repository": "test-org/repo1 test-org/repo2 test-org/repo3"} def test_single_organization(self, mocked_authed_get_all_pages, mocked_verify_repo_access): + """Verify for single organisation with all repos.""" + test_client = GithubClient(self.config) orgs = ['test-org/*'] repos = ['repo1', 'repo2', 'repo3'] @@ -39,9 +51,12 @@ def test_single_organization(self, mocked_authed_get_all_pages, mocked_verify_re ] mocked_authed_get_all_pages.return_value = [mocked_response] - self.assertEqual(expected_repositories, tap_github.get_all_repos(orgs)) + # Verify expected list of repo paths + self.assertEqual(expected_repositories, test_client.get_all_repos(orgs)) def test_multiple_organizations(self, mocked_authed_get_all_pages, mocked_verify_repo_access): + """Verify for multiple organisations with all repos.""" + test_client = GithubClient(self.config) orgs = ['test-org/*', 'singer-io/*'] repos = ['repo1', 'repo2', 'repo3'] @@ -58,7 +73,7 @@ def test_multiple_organizations(self, mocked_authed_get_all_pages, mocked_verify mocked_response = SESSION.get(mocked_url) mocked_authed_get_all_pages.return_value = [mocked_response] - call_response = tap_github.get_all_repos([org]) + call_response = test_client.get_all_repos([org]) side_effect.extend(call_response) @@ -71,4 +86,42 @@ def test_multiple_organizations(self, mocked_authed_get_all_pages, mocked_verify 'singer-io/repo3' ] + # Verify expected list of repo paths self.assertListEqual(expected_repositories, side_effect) + +@mock.patch('tap_github.client.GithubClient.verify_repo_access') +@mock.patch('tap_github.client.GithubClient.authed_get') +class TestAuthedGetAllPages(unittest.TestCase): + """ + Test `authed_get_all_pages` method from client. + """ + config = {"access_token": "", "repository": "test-org/repo1"} + + def test_for_one_page(self, mock_auth_get, mock_verify_access): + + """Verify `authed_get` is called only once if one page is available.""" + + test_client = GithubClient(self.config) + mock_auth_get.return_value = MockResponse({}) + + list(test_client.authed_get_all_pages("", "mock_url", {})) + + # Verify `auth_get` call count + self.assertEqual(mock_auth_get.call_count, 1) + + def test_for_multiple_pages(self, mock_auth_get, mock_verify_access): + + """Verify `authed_get` is called equal number times as pages available.""" + + test_client = GithubClient(self.config) + mock_auth_get.side_effect = [MockResponse({"next": {"url": "mock_url_2"}}),MockResponse({"next": {"url": "mock_url_3"}}),MockResponse({})] + + list(test_client.authed_get_all_pages("", "mock_url_1", {})) + + # Verify `auth_get` call count + self.assertEqual(mock_auth_get.call_count, 3) + + # Verify `auth_get` calls with expected url + self.assertEqual(mock_auth_get.mock_calls[0], mock.call("", "mock_url_1", {}, '', True)) + self.assertEqual(mock_auth_get.mock_calls[1], mock.call("", "mock_url_2", {}, '', True)) + self.assertEqual(mock_auth_get.mock_calls[2], mock.call("", "mock_url_3", {}, '', True)) diff --git a/tests/unittests/test_get_streams_and_state_translate.py b/tests/unittests/test_get_streams_and_state_translate.py new file mode 100644 index 00000000..c862f7b3 --- /dev/null +++ b/tests/unittests/test_get_streams_and_state_translate.py @@ -0,0 +1,135 @@ +import unittest +from tap_github.sync import get_selected_streams, translate_state, get_stream_to_sync +from parameterized import parameterized + +def get_stream_catalog(stream_name, selected_in_metadata = False): + """Return catalog for stream""" + return { + "schema":{}, + "tap_stream_id": stream_name, + "key_properties": [], + "metadata": [ + { + "breadcrumb": [], + "metadata":{"selected": selected_in_metadata} + } + ] + } + +class TestTranslateState(unittest.TestCase): + """ + Testcase for `translate_state` in sync + """ + + catalog = { + "streams": [ + get_stream_catalog("comments"), + get_stream_catalog("releases"), + get_stream_catalog("issue_labels"), + get_stream_catalog("issue_events") + ] + } + + def test_newer_format_state_with_repo_name(self): + """Verify that `translate_state` return the state itself if a newer format bookmark is found.""" + state = { + "bookmarks": { + "org/test-repo" : { + "comments": {"since": "2019-01-01T00:00:00Z"} + }, + "org/test-repo2" : {} + } + } + + final_state = translate_state(state, self.catalog, ["org/test-repo", "org/test-repo2"]) + self.assertEqual(state, dict(final_state)) + + def test_older_format_state_without_repo_name(self): + """Verify that `translate_state` migrate each stream's bookmark into the repo name""" + older_format_state = { + "bookmarks": { + "comments": {"since": "2019-01-01T00:00:00Z"} + } + } + expected_state = { + "bookmarks": { + "org/test-repo" : { + "comments": {"since": "2019-01-01T00:00:00Z"} + }, + "org/test-repo2" : { + "comments": {"since": "2019-01-01T00:00:00Z"} + } + } + } + final_state = translate_state(older_format_state, self.catalog, ["org/test-repo", "org/test-repo2"]) + self.assertEqual(expected_state, dict(final_state)) + + def test_with_empty_state(self): + """Verify for empty state""" + + final_state = translate_state({}, self.catalog, ["org/test-repo"]) + + self.assertEqual({}, dict(final_state)) + + def test_state_with_no_previous_repo_name_newer_format_bookmark(self): + """Verify that `translate_state` return the existing state if all existing repo unselected in the current sync.""" + newer_format_state = { + "bookmarks": { + "org/test-repo" : { + "comments": {"since": "2019-01-01T00:00:00Z"} + }, + "org/test-repo2" : {} + } + } + final_state = translate_state(newer_format_state, self.catalog, ["org/test-repo3", "org/test-repo4"]) + self.assertEqual(newer_format_state, dict(final_state)) + + def test_state_with_no_previous_repo_name_old_format_bookmark(self): + """Verify that `translate_state` migrate each stream's bookmark into the repo name""" + older_format_state = { + "bookmarks": { + "comments": {"since": "2019-01-01T00:00:00Z"} + } + } + expected_state = { + "bookmarks": { + "org/test-repo3" : { + "comments": {"since": "2019-01-01T00:00:00Z"} + }, + "org/test-repo4" : { + "comments": {"since": "2019-01-01T00:00:00Z"} + } + } + } + final_state = translate_state(older_format_state, self.catalog, ["org/test-repo3", "org/test-repo4"]) + self.assertEqual(expected_state, dict(final_state)) + +class TestGetStreamsToSync(unittest.TestCase): + """ + Testcase for `get_stream_to_sync` in sync + """ + + def get_catalog(self, parent=False, mid_child = False, child = False): + return { + "streams": [ + get_stream_catalog("projects", selected_in_metadata=parent), + get_stream_catalog("project_columns", selected_in_metadata=mid_child), + get_stream_catalog("project_cards", selected_in_metadata=child), + get_stream_catalog("teams", selected_in_metadata=parent), + get_stream_catalog("team_members", selected_in_metadata=mid_child), + get_stream_catalog("team_memberships", selected_in_metadata=child), + get_stream_catalog("assignees", selected_in_metadata=parent), + ] + } + + @parameterized.expand([ + ['test_parent_selected', ["assignees", "projects", "teams"], True, False, False], + ['test_mid_child_selected', ["projects", "project_columns", "teams", "team_members"], False, True, False], + ['test_lowest_child_selected', ["projects", "project_columns", "project_cards", "teams", "team_members", "team_memberships"], False, False, True] + ]) + def test_stream_selection(self, name, expected_streams, is_parent, is_mid_child, is_child): + """Test that if an only child or mid-child is selected in the catalog, then `get_stream_to_sync` returns the parent stream also""" + catalog = self.get_catalog(parent=is_parent, mid_child=is_mid_child, child=is_child) + sync_streams = get_stream_to_sync(catalog) + + self.assertEqual(sync_streams, expected_streams) diff --git a/tests/unittests/test_key_error.py b/tests/unittests/test_key_error.py deleted file mode 100644 index 7e5bb28c..00000000 --- a/tests/unittests/test_key_error.py +++ /dev/null @@ -1,160 +0,0 @@ -import unittest -from unittest import mock -import tap_github.__init__ as tap_github - -class Mockresponse: - def __init__(self, resp): - self.json_data = resp - self.content = "github" - - def json(self): - return [(self.json_data)] - -def get_response(json): - yield Mockresponse(resp=json) - -@mock.patch("tap_github.__init__.authed_get_all_pages") -class TestKeyErrorSlug(unittest.TestCase): - - @mock.patch("tap_github.__init__.get_all_team_members") - def test_slug_sub_stream_selected_slug_selected(self, mocked_team_members, mocked_request): - json = {"key": "value", "slug": "team-slug"} - - mocked_request.return_value = get_response(json) - - schemas = {"teams": "None", "team_members": "None"} - mdata_slug = [ - { - 'breadcrumb': [], - 'metadata': {'selected': True, 'table-key-properties': ['id']} - }, - { - 'breadcrumb': ['properties', 'slug'], - 'metadata': {'inclusion': 'available'} - }, - { - "breadcrumb": [ "properties", "name"], - "metadata": {"inclusion": "available"} - }] - mdata = {"teams": mdata_slug, "team_members": mdata_slug} - tap_github.get_all_teams(schemas, "tap-github", {}, mdata, "") - self.assertEqual(mocked_team_members.call_count, 1) - - @mock.patch("tap_github.__init__.get_all_team_members") - def test_slug_sub_stream_not_selected_slug_selected(self, mocked_team_members, mocked_request): - json = {"key": "value", "slug": "team-slug"} - - mocked_request.return_value = get_response(json) - - schemas = {"teams": "None"} - mdata = {"teams": [ - { - 'breadcrumb': [], - 'metadata': {'selected': True, 'table-key-properties': ['id']} - }, - { - 'breadcrumb': ['properties', 'slug'], - 'metadata': {'inclusion': 'available'} - }, - { - "breadcrumb": [ "properties", "name"], - "metadata": {"inclusion": "available"} - }]} - tap_github.get_all_teams(schemas, "tap-github", {}, mdata, "") - self.assertEqual(mocked_team_members.call_count, 0) - - @mock.patch("tap_github.__init__.get_all_team_members") - def test_slug_sub_stream_selected_slug_not_selected(self, mocked_team_members, mocked_request): - json = {"key": "value", "slug": "team-slug"} - - mocked_request.return_value = get_response(json) - - schemas = {"teams": "None", "team_members": "None"} - mdata_slug = [ - { - 'breadcrumb': [], - 'metadata': {'selected': True, 'table-key-properties': ['id']} - }, - { - 'breadcrumb': ['properties', 'slug'], - 'metadata': {'inclusion': 'available', 'selected': False} - }, - { - "breadcrumb": [ "properties", "name"], - "metadata": {"inclusion": "available"} - }] - mdata = {"teams": mdata_slug, "team_members": mdata_slug} - tap_github.get_all_teams(schemas, "tap-github", {}, mdata, "") - self.assertEqual(mocked_team_members.call_count, 1) - - @mock.patch("tap_github.__init__.get_all_team_members") - def test_slug_sub_stream_not_selected_slug_not_selected(self, mocked_team_members, mocked_request): - json = {"key": "value", "slug": "team-slug"} - - mocked_request.return_value = get_response(json) - - schemas = {"teams": "None"} - mdata = {"teams": [ - { - 'breadcrumb': [], - 'metadata': {'selected': True, 'table-key-properties': ['id']} - }, - { - 'breadcrumb': ['properties', 'slug'], - 'metadata': {'inclusion': 'available', 'selected': False} - }, - { - "breadcrumb": [ "properties", "name"], - "metadata": {"inclusion": "available"} - }]} - tap_github.get_all_teams(schemas, "tap-github", {}, mdata, "") - self.assertEqual(mocked_team_members.call_count, 0) - -@mock.patch("tap_github.__init__.authed_get_all_pages") -class TestKeyErrorUser(unittest.TestCase): - - @mock.patch("singer.write_record") - def test_user_not_selected_in_stargazers(self, mocked_write_records, mocked_request): - json = {"key": "value", "user": {"id": 1}} - - mocked_request.return_value = get_response(json) - - schemas = {"teams": "None"} - mdata = [ - { - 'breadcrumb': [], - 'metadata': {'selected': True, 'table-key-properties': ['user_id']} - }, - { - "breadcrumb": ["properties", "user"], - "metadata": {"inclusion": "available", "selected": False} - }, - { - "breadcrumb": ["properties", "starred_at"], - "metadata": {"inclusion": "available"} - }] - tap_github.get_all_stargazers(schemas, "tap-github", {}, mdata, "") - self.assertEqual(mocked_write_records.call_count, 1) - - @mock.patch("singer.write_record") - def test_user_selected_in_stargazers(self, mocked_write_records, mocked_request): - json = {"key": "value", "user": {"id": 1}} - - mocked_request.return_value = get_response(json) - - schemas = {"stargazers": "None"} - mdata = [ - { - 'breadcrumb': [], - 'metadata': {'selected': True, 'table-key-properties': ['user_id']} - }, - { - "breadcrumb": ["properties", "user"], - "metadata": {"inclusion": "available"} - }, - { - "breadcrumb": ["properties", "starred_at"], - "metadata": {"inclusion": "available"} - }] - tap_github.get_all_stargazers(schemas, "tap-github", {}, mdata, "") - self.assertEqual(mocked_write_records.call_count, 1) diff --git a/tests/unittests/test_main.py b/tests/unittests/test_main.py new file mode 100644 index 00000000..44d5d22c --- /dev/null +++ b/tests/unittests/test_main.py @@ -0,0 +1,103 @@ +import unittest +from unittest import mock +from tap_github import main +from tap_github.discover import discover + +class MockArgs: + """Mock args object class""" + + def __init__(self, config = None, properties = None, state = None, discover = False) -> None: + self.config = config + self.properties = properties + self.state = state + self.discover = discover + +@mock.patch("tap_github.GithubClient") +@mock.patch("singer.utils.parse_args") +class TestDiscoverMode(unittest.TestCase): + """ + Test main function for discover mode + """ + + mock_config = {"start_date": "", "access_token": ""} + + @mock.patch("tap_github._discover") + def test_discover_with_config(self, mock_discover, mock_args, mock_verify_access): + """Test `_discover` function is called for discover mode""" + mock_discover.return_value = dict() + mock_args.return_value = MockArgs(discover = True, config = self.mock_config) + main() + + self.assertTrue(mock_discover.called) + + +@mock.patch("tap_github.GithubClient") +@mock.patch("singer.utils.parse_args") +@mock.patch("tap_github._sync") +class TestSyncMode(unittest.TestCase): + """ + Test main function for sync mode + """ + + mock_config = {"start_date": "", "access_token": ""} + mock_catalog = {"streams": [{"stream": "teams", "schema": {}, "metadata": {}}]} + + @mock.patch("tap_github._discover") + def test_sync_with_properties(self, mock_discover, mock_sync, mock_args, mock_client): + """Test sync mode with properties given in args""" + + mock_client.return_value = "mock_client" + mock_args.return_value = MockArgs(config=self.mock_config, properties=self.mock_catalog) + main() + + # Verify `_sync` is called with expected arguments + mock_sync.assert_called_with("mock_client", self.mock_config, {}, self.mock_catalog) + + # verify `_discover` function is not called + self.assertFalse(mock_discover.called) + + @mock.patch("tap_github._discover") + def test_sync_without_properties(self, mock_discover, mock_sync, mock_args, mock_client): + """Test sync mode without properties given in args""" + + mock_discover.return_value = {"schema": "", "metadata": ""} + mock_client.return_value = "mock_client" + mock_args.return_value = MockArgs(config=self.mock_config) + main() + + # Verify `_sync` is called with expected arguments + mock_sync.assert_called_with("mock_client", self.mock_config, {}, {"schema": "", "metadata": ""}) + + # verify `_discover` function is called + self.assertTrue(mock_discover.called) + + def test_sync_with_state(self, mock_sync, mock_args, mock_client): + """Test sync mode with state given in args""" + mock_state = {"bookmarks": {"projec ts": ""}} + mock_client.return_value = "mock_client" + mock_args.return_value = MockArgs(config=self.mock_config, properties=self.mock_catalog, state=mock_state) + main() + + # Verify `_sync` is called with expected arguments + mock_sync.assert_called_with("mock_client", self.mock_config, mock_state, self.mock_catalog) + +@mock.patch("tap_github.GithubClient") +class TestDiscover(unittest.TestCase): + """Test `discover` function.""" + + def test_discover(self, mock_client): + + return_catalog = discover(mock_client) + + self.assertIsInstance(return_catalog, dict) + + @mock.patch("tap_github.discover.Schema") + @mock.patch("tap_github.discover.LOGGER.error") + def test_discover_error_handling(self, mock_logger, mock_schema, mock_client): + """Test discover function if exception arises.""" + mock_schema.from_dict.side_effect = [Exception] + with self.assertRaises(Exception): + discover(mock_client) + + # Verify logger called 3 times when an exception arises. + self.assertEqual(mock_logger.call_count, 3) diff --git a/tests/unittests/test_rate_limit.py b/tests/unittests/test_rate_limit.py index 7fb01873..987c60a0 100644 --- a/tests/unittests/test_rate_limit.py +++ b/tests/unittests/test_rate_limit.py @@ -1,17 +1,26 @@ -import tap_github.__init__ as tap_github +import tap_github +from tap_github.client import rate_throttling, GithubException import unittest from unittest import mock import time import requests +DEFAULT_SLEEP_SECONDS = 600 def api_call(): return requests.get("https://api.github.com/rate_limit") @mock.patch('time.sleep') class TestRateLimit(unittest.TestCase): + """ + Test `rate_throttling` function from client. + """ + config = {"access_token": "", "repository": "singer-io/tap-github"} def test_rate_limt_wait(self, mocked_sleep): + """ + Test `rate_throttling` for 'sleep_time' less than `MAX_SLEEP_SECONDS` + """ mocked_sleep.side_effect = None @@ -19,13 +28,17 @@ def test_rate_limt_wait(self, mocked_sleep): resp.headers["X-RateLimit-Reset"] = int(round(time.time(), 0)) + 120 resp.headers["X-RateLimit-Remaining"] = 0 - tap_github.rate_throttling(resp) + rate_throttling(resp, DEFAULT_SLEEP_SECONDS) + # Verify `time.sleep` is called with expected seconds in response mocked_sleep.assert_called_with(120) self.assertTrue(mocked_sleep.called) def test_rate_limit_exception(self, mocked_sleep): + """ + Test `rate_throttling` for 'sleep_time' greater than `MAX_SLEEP_SECONDS` + """ mocked_sleep.side_effect = None @@ -33,13 +46,16 @@ def test_rate_limit_exception(self, mocked_sleep): resp.headers["X-RateLimit-Reset"] = int(round(time.time(), 0)) + 601 resp.headers["X-RateLimit-Remaining"] = 0 - try: - tap_github.rate_throttling(resp) - except tap_github.RateLimitExceeded as e: - self.assertEqual(str(e), "API rate limit exceeded, please try after 601 seconds.") + # Verify exception is raised with proper message + with self.assertRaises(tap_github.client.RateLimitExceeded) as e: + rate_throttling(resp, DEFAULT_SLEEP_SECONDS) + self.assertEqual(str(e.exception), "API rate limit exceeded, please try after 601 seconds.") def test_rate_limit_not_exceeded(self, mocked_sleep): + """ + Test `rate_throttling` if sleep time does not exceed limit + """ mocked_sleep.side_effect = None @@ -47,6 +63,20 @@ def test_rate_limit_not_exceeded(self, mocked_sleep): resp.headers["X-RateLimit-Reset"] = int(round(time.time(), 0)) + 10 resp.headers["X-RateLimit-Remaining"] = 5 - tap_github.rate_throttling(resp) + rate_throttling(resp, DEFAULT_SLEEP_SECONDS) + # Verify that `time.sleep` is not called self.assertFalse(mocked_sleep.called) + + def test_rate_limt_header_not_found(self, mocked_sleep): + """ + Test that the `rate_throttling` function raises an exception if `X-RateLimit-Reset` key is not found in the header. + """ + resp = api_call() + resp.headers={} + + with self.assertRaises(GithubException) as e: + rate_throttling(resp, DEFAULT_SLEEP_SECONDS) + + # Verifying the message formed for the invalid base URL + self.assertEqual(str(e.exception), "The API call using the specified base url was unsuccessful. Please double-check the provided base URL.") diff --git a/tests/unittests/test_stargazers_full_table.py b/tests/unittests/test_stargazers_full_table.py deleted file mode 100644 index 47cb7089..00000000 --- a/tests/unittests/test_stargazers_full_table.py +++ /dev/null @@ -1,14 +0,0 @@ -import unittest -from unittest import mock -import tap_github.__init__ as tap_github - -@mock.patch("tap_github.__init__.authed_get_all_pages") -class TestStargazersFullTable(unittest.TestCase): - - def test_stargazers_without_query_params(self, mocked_request): - - schemas = {"stargazers": "None"} - - tap_github.get_all_stargazers(schemas, "tap-github", {}, {}, "") - - mocked_request.assert_called_with(mock.ANY, "https://api.github.com/repos/tap-github/stargazers", mock.ANY) diff --git a/tests/unittests/test_start_date_bookmark.py b/tests/unittests/test_start_date_bookmark.py deleted file mode 100644 index 8cfb4b18..00000000 --- a/tests/unittests/test_start_date_bookmark.py +++ /dev/null @@ -1,42 +0,0 @@ -import tap_github -import unittest -from unittest import mock - -@mock.patch("singer.bookmarks.get_bookmark") -class TestBookmarkStartDate(unittest.TestCase): - - def test_no_bookmark_no_start_date(self, mocked_get_bookmark): - # Start date is none and bookmark is not present then None should be return. - mocked_get_bookmark.return_value = None - start_date = None - bookmark_key = 'since' - expected_bookmark_value = None - - self.assertEqual(expected_bookmark_value, tap_github.get_bookmark('', '', '', bookmark_key, start_date)) - - def test_no_bookmark_yes_start_date(self, mocked_get_bookmark): - # Start date is present and bookmark is not present then start date should be return. - mocked_get_bookmark.return_value = None - start_date = '2021-04-01T00:00:00.000000Z' - bookmark_key = 'since' - expected_bookmark_value = '2021-04-01T00:00:00.000000Z' - - self.assertEqual(expected_bookmark_value, tap_github.get_bookmark('', '', '', bookmark_key, start_date)) - - def test_yes_bookmark_yes_start_date(self, mocked_get_bookmark): - # Start date and bookmark both are present then bookmark should be return. - mocked_get_bookmark.return_value = {"since" : "2021-05-01T00:00:00.000000Z"} - start_date = '2021-04-01T00:00:00.000000Z' - bookmark_key = 'since' - expected_bookmark_value = '2021-05-01T00:00:00.000000Z' - - self.assertEqual(expected_bookmark_value, tap_github.get_bookmark('', '', '', bookmark_key, start_date)) - - def test_yes_bookmark_no_start_date(self, mocked_get_bookmark): - # Start date is not present and bookmark is present then bookmark should be return. - mocked_get_bookmark.return_value = {"since" : "2021-05-01T00:00:00.000000Z"} - start_date = None - bookmark_key = 'since' - expected_bookmark_value = '2021-05-01T00:00:00.000000Z' - - self.assertEqual(expected_bookmark_value, tap_github.get_bookmark('', '', '', bookmark_key, start_date)) diff --git a/tests/unittests/test_stream.py b/tests/unittests/test_stream.py new file mode 100644 index 00000000..27cf49fa --- /dev/null +++ b/tests/unittests/test_stream.py @@ -0,0 +1,189 @@ +import unittest +from unittest import mock +from tap_github.streams import Comments, ProjectColumns, Projects, Reviews, TeamMemberships, Teams, PullRequests, get_schema, get_child_full_url, get_bookmark +from parameterized import parameterized + + +class TestGetSchema(unittest.TestCase): + """ + Test `get_schema` method of the stream class + """ + + def test_get_schema(self): + """Verify function returns expected schema""" + catalog = [ + {"tap_stream_id": "projects"}, + {"tap_stream_id": "comments"}, + {"tap_stream_id": "events"}, + ] + expected_schema = {"tap_stream_id": "comments"} + + # Verify returned schema is same as exected schema + self.assertEqual(get_schema(catalog, "comments"), expected_schema) + + +class TestGetBookmark(unittest.TestCase): + """ + Test `get_bookmark` method + """ + + test_stream = Comments() + + def test_with_out_repo_path(self): + """ + Test if the state does not contain a repo path + """ + state = { + "bookmarks": { + "projects": {"since": "2022-01-01T00:00:00Z"} + } + } + returned_bookmark = get_bookmark(state, "org/test-repo", "projects", "since", "2021-01-01T00:00:00Z") + self.assertEqual(returned_bookmark, "2021-01-01T00:00:00Z") + + def test_with_repo_path(self): + """ + Test if the state does contains a repo path + """ + state = { + "bookmarks": { + "org/test-repo": { + "projects": {"since": "2022-01-01T00:00:00Z"} + } + } + } + returned_bookmark = get_bookmark(state, "org/test-repo", "projects", "since", "2021-01-01T00:00:00Z") + self.assertEqual(returned_bookmark, "2022-01-01T00:00:00Z") + +class TestBuildUrl(unittest.TestCase): + """ + Test the `build_url` method of the stream class + """ + + @parameterized.expand([ + ["test_stream_with_filter_params", "org/test-repo", "https://api.github.com/repos/org/test-repo/issues/comments?sort=updated&direction=desc?since=2022-01-01T00:00:00Z", Comments], + ["test_stream_with_organization", "org", "https://api.github.com/orgs/org/teams", Teams] + ]) + def test_build_url(self, name, param, expected_url, stream_class): + """ + Test the `build_url` method for filter param or organization name only. + """ + test_streams = stream_class() + full_url = test_streams.build_url("https://api.github.com", param, "2022-01-01T00:00:00Z") + + # verify returned url is expected + self.assertEqual(expected_url, full_url) + + +class GetMinBookmark(unittest.TestCase): + """ + Test `get_min_bookmark` method of the stream class + """ + + start_date = "2020-04-01T00:00:00Z" + state = { + "bookmarks": { + "org/test-repo": { + "projects": {"since": "2022-03-29T00:00:00Z"}, + "project_columns": {"since": "2022-03-01T00:00:00Z"}, + "project_cards": {"since": "2022-03-14T00:00:00Z"}, + "pull_requests": {"since": "2022-04-01T00:00:00Z"}, + "review_comments": {"since": "2022-03-01T00:00:00Z"}, + "pr_commits": {"since": "2022-02-01T00:00:00Z"}, + "reviews": {"since": "2022-05-01T00:00:00Z"} + } + } + } + + @parameterized.expand([ + ["test_multiple_children", PullRequests, "pull_requests", ["pull_requests","review_comments", "pr_commits"], "2022-04-01T00:00:00Z", "2022-02-01T00:00:00Z"], + ["test_children_with_only_parent_selected", PullRequests, "pull_requests", ["pull_requests"], "2022-04-01T00:00:00Z", "2022-04-01T00:00:00Z"], + ["test_for_mid_child_in_stream", Projects, "projects", ["projects", "project_columns"], "2022-03-29T00:00:00Z", "2022-03-01T00:00:00Z"], + ["test_nested_child_bookmark", Projects, "projects", ["projects", "project_cards"], "2022-03-29T00:00:00Z", "2022-03-14T00:00:00Z"] + ]) + def test_multiple_children(self, name, stream_class, stream_name, stream_to_sync, current_date, expected_bookmark): + """ + Test that `get_min_bookmark` method returns the minimum bookmark from the parent and its corresponding child bookmarks. + """ + test_stream = stream_class() + bookmark = test_stream.get_min_bookmark(stream_name, stream_to_sync, + current_date, "org/test-repo", self.start_date, self.state) + + # Verify returned bookmark is expected + self.assertEqual(bookmark, expected_bookmark) + + +@mock.patch("singer.write_bookmark") +class TestWriteBookmark(unittest.TestCase): + """ + Test the `write_bookmarks` method of the stream class + """ + + state = { + "bookmarks": { + "org/test-repo": { + "projects": {"since": "2021-03-29T00:00:00Z"}, + "project_columns": {"since": "2021-03-01T00:00:00Z"}, + "project_cards": {"since": "2021-03-14T00:00:00Z"}, + "pull_requests": {"since": "2021-04-01T00:00:00Z"}, + "review_comments": {"since": "2021-03-01T00:00:00Z"}, + "pr_commits": {"since": "2021-02-01T00:00:00Z"}, + "reviews": {"since": "2021-05-01T00:00:00Z"} + } + } + } + + def test_multiple_child(self, mock_write_bookmark): + """ + Test for a stream with multiple children is selected + """ + test_stream = PullRequests() + test_stream.write_bookmarks("pull_requests", ["pull_requests","review_comments", "pr_commits"], + "2022-04-01T00:00:00Z", "org/test-repo", self.state) + + expected_calls = [ + mock.call(mock.ANY, mock.ANY, "pull_requests", {"since": "2022-04-01T00:00:00Z"}), + mock.call(mock.ANY, mock.ANY, "pr_commits", {"since": "2022-04-01T00:00:00Z"}), + mock.call(mock.ANY, mock.ANY, "review_comments", {"since": "2022-04-01T00:00:00Z"}), + ] + + # Verify `write_bookmark` is called for all selected streams + self.assertEqual(mock_write_bookmark.call_count, 3) + + self.assertIn(mock_write_bookmark.mock_calls[0], expected_calls) + self.assertIn(mock_write_bookmark.mock_calls[1], expected_calls) + self.assertIn(mock_write_bookmark.mock_calls[2], expected_calls) + + def test_nested_child(self, mock_write_bookmark): + """ + Test for the stream if the nested child is selected + """ + test_stream = Projects() + test_stream.write_bookmarks("projects", ["project_cards"], + "2022-04-01T00:00:00Z", "org/test-repo", self.state) + + # Verify `write_bookmark` is called for all selected streams + self.assertEqual(mock_write_bookmark.call_count, 1) + mock_write_bookmark.assert_called_with(mock.ANY, mock.ANY, + "project_cards", {"since": "2022-04-01T00:00:00Z"}) + + +class TestGetChildUrl(unittest.TestCase): + """ + Test `get_child_full_url` method of stream class + """ + domain = 'https://api.github.com' + + @parameterized.expand([ + ["test_child_stream", ProjectColumns, "org1/test-repo", "https://api.github.com/projects/1309875/columns", None, (1309875,)], + ["test_child_is_repository", Reviews, "org1/test-repo", "https://api.github.com/repos/org1/test-repo/pulls/11/reviews", (11,), None], + ["test_child_is_organization", TeamMemberships, "org1", "https://api.github.com/orgs/org1/teams/dev-team/memberships/demo-user-1", ("dev-team",), ("demo-user-1",)] + ]) + + def test_child_stream(self, name, stream_class, param, expected_url, parent_id, grand_parent_id): + """ + Test for a stream with one child + """ + child_stream = stream_class() + full_url = get_child_full_url(self.domain, child_stream, param, parent_id, grand_parent_id) + self.assertEqual(expected_url, full_url) diff --git a/tests/unittests/test_sub_streams_selection.py b/tests/unittests/test_sub_streams_selection.py deleted file mode 100644 index 8dd16ff9..00000000 --- a/tests/unittests/test_sub_streams_selection.py +++ /dev/null @@ -1,48 +0,0 @@ -import unittest -import tap_github.__init__ as tap_github - -class TestSubStreamSelection(unittest.TestCase): - - def test_pull_request_sub_streams_selected(self): - selected_streams = ["reviews", "pull_requests"] - self.assertIsNone(tap_github.validate_dependencies(selected_streams)) - - def test_pull_request_sub_streams_not_selected(self): - selected_streams = ["reviews", "pr_commits"] - try: - tap_github.validate_dependencies(selected_streams) - except tap_github.DependencyException as e: - self.assertEqual(str(e), "Unable to extract 'reviews' data, to receive 'reviews' data, you also need to select 'pull_requests'. Unable to extract 'pr_commits' data, to receive 'pr_commits' data, you also need to select 'pull_requests'.") - - def test_teams_sub_streams_selected(self): - selected_streams = ["teams", "team_members"] - self.assertIsNone(tap_github.validate_dependencies(selected_streams)) - - def test_teams_sub_streams_not_selected(self): - selected_streams = ["team_members"] - try: - tap_github.validate_dependencies(selected_streams) - except tap_github.DependencyException as e: - self.assertEqual(str(e), "Unable to extract 'team_members' data, to receive 'team_members' data, you also need to select 'teams'.") - - def test_projects_sub_streams_selected(self): - selected_streams = ["projects", "project_cards"] - self.assertIsNone(tap_github.validate_dependencies(selected_streams)) - - def test_projects_sub_streams_not_selected(self): - selected_streams = ["project_columns"] - try: - tap_github.validate_dependencies(selected_streams) - except tap_github.DependencyException as e: - self.assertEqual(str(e), "Unable to extract 'project_columns' data, to receive 'project_columns' data, you also need to select 'projects'.") - - def test_mixed_streams_positive(self): - selected_streams = ["pull_requests", "reviews", "collaborators", "team_members", "stargazers", "projects", "teams", "project_cards"] - self.assertIsNone(tap_github.validate_dependencies(selected_streams)) - - def test_mixed_streams_negative(self): - selected_streams = ["project_columns", "issues", "teams", "team_memberships", "projects", "releases", "review_comments"] - try: - tap_github.validate_dependencies(selected_streams) - except tap_github.DependencyException as e: - self.assertEqual(str(e), "Unable to extract 'review_comments' data, to receive 'review_comments' data, you also need to select 'pull_requests'.") diff --git a/tests/unittests/test_sync.py b/tests/unittests/test_sync.py new file mode 100644 index 00000000..ef22b7f7 --- /dev/null +++ b/tests/unittests/test_sync.py @@ -0,0 +1,168 @@ +import unittest +from unittest import mock +from tap_github.sync import sync, write_schemas + + + +def get_stream_catalog(stream_name, is_selected = False): + """Return catalog for stream""" + return { + "schema":{}, + "tap_stream_id": stream_name, + "metadata": [ + { + "breadcrumb": [], + "metadata":{ + "selected": is_selected + } + } + ], + "key_properties": [] + } + + +@mock.patch("singer.write_state") +@mock.patch("tap_github.sync.write_schemas") +@mock.patch("tap_github.streams.IncrementalStream.sync_endpoint") +class TestSyncFunctions(unittest.TestCase): + """ + Test `sync` function + """ + + @mock.patch("tap_github.streams.IncrementalOrderedStream.sync_endpoint") + def test_sync_all_parents(self, mock_inc_ordered, mock_incremental, mock_write_schemas, mock_write_state): + """ + Test sync function with only all parents selected + """ + + mock_catalog = {"streams": [ + get_stream_catalog("projects", True), + get_stream_catalog("pull_requests", True) + ]} + + client = mock.Mock() + client.extract_repos_from_config.return_value = (["test-repo"], set()) + client.authed_get_all_pages.return_value = [] + client.not_accessible_repos = {} + + sync(client, {'start_date': ""}, {}, mock_catalog) + + # Verify write schema is called for selected streams + self.assertEqual(mock_write_schemas.call_count, 2) + + self.assertEqual(mock_write_schemas.mock_calls[0], mock.call("projects", mock.ANY, mock.ANY)) + self.assertEqual(mock_write_schemas.mock_calls[1], mock.call("pull_requests", mock.ANY, mock.ANY)) + + @mock.patch("tap_github.streams.IncrementalOrderedStream.sync_endpoint") + def test_sync_only_child(self, mock_inc_ordered, mock_incremental, mock_write_schemas, mock_write_state): + """ + Test sync function with only all children selected + """ + + mock_catalog = {"streams": [ + get_stream_catalog("projects"), + get_stream_catalog("project_columns"), + get_stream_catalog("project_cards", True), + get_stream_catalog("pull_requests"), + get_stream_catalog("review_comments", True) + ]} + + client = mock.Mock() + client.extract_repos_from_config.return_value = (["test-repo"], {"org"}) + client.authed_get_all_pages.return_value = [] + client.not_accessible_repos = {} + + sync(client, {'start_date': "2019-01-01T00:00:00Z"}, {}, mock_catalog) + + # Verify write schema is called for selected streams + self.assertEqual(mock_write_schemas.call_count, 2) + + self.assertEqual(mock_write_schemas.mock_calls[0], mock.call("projects", mock.ANY, mock.ANY)) + self.assertEqual(mock_write_schemas.mock_calls[1], mock.call("pull_requests", mock.ANY, mock.ANY)) + + @mock.patch("tap_github.streams.FullTableStream.sync_endpoint") + def test_sync_only_mid_child(self, mock_full_table, mock_incremental, mock_write_schemas, mock_write_state): + """ + Test sync function with only all mid child selected + """ + + mock_catalog = {"streams": [ + get_stream_catalog("projects"), + get_stream_catalog("project_columns", True), + get_stream_catalog("project_cards"), + get_stream_catalog("teams"), + get_stream_catalog("team_members", True), + get_stream_catalog("team_memberships") + ]} + + client = mock.Mock() + client.extract_repos_from_config.return_value = (["test-repo"], {"org"}) + client.authed_get_all_pages.return_value = [] + client.not_accessible_repos = {} + + sync(client, {'start_date': ""}, {}, mock_catalog) + + # Verify write schema is called for selected streams + self.assertEqual(mock_write_schemas.call_count, 2) + + self.assertEqual(mock_write_schemas.mock_calls[0], mock.call("teams", mock.ANY, mock.ANY)) + self.assertEqual(mock_write_schemas.mock_calls[1], mock.call("projects", mock.ANY, mock.ANY)) + + @mock.patch("tap_github.sync.get_stream_to_sync", return_value = []) + @mock.patch("tap_github.sync.get_selected_streams", return_value = []) + @mock.patch("tap_github.sync.update_currently_syncing_repo") + def test_no_streams_selected(self, mock_update_curr_sync, mock_selected_streams, mock_sync_streams, + mock_incremental, mock_write_schemas, mock_write_state): + """ + Test if no streams are selected then the state does not update, + and `update_currently_syncing_repo` function is not called. + """ + + state = { + "currently_syncing_repo": "singer-io/test-repo", + "bookmarks": {}, + "currently_syncing": "teams" + } + mock_catalog = {"streams": [ + get_stream_catalog("projects"), + get_stream_catalog("project_columns", True), + get_stream_catalog("teams"), + get_stream_catalog("team_members", True) + ]} + + expected_state = { + "currently_syncing_repo": "singer-io/test-repo", + "bookmarks": {}, + "currently_syncing": "teams" + } + client = mock.Mock() + client.extract_repos_from_config.return_value = ["test-repo"], ["org1"] + sync(client, {'start_date': ""}, state, mock_catalog) + + # Verify state is not changed + self.assertEqual(state, expected_state) + + # Verify updated_currently_syncing_repo was not called + self.assertFalse(mock_update_curr_sync.called) + + +@mock.patch("singer.write_schema") +class TestWriteSchemas(unittest.TestCase): + + mock_catalog = {"streams": [ + get_stream_catalog("projects"), + get_stream_catalog("project_columns"), + get_stream_catalog("project_cards") + ]} + + def test_parents_selected(self, mock_write_schema): + write_schemas("projects", self.mock_catalog, ["projects"]) + mock_write_schema.assert_called_with("projects", mock.ANY, mock.ANY) + + def test_mid_child_selected(self, mock_write_schema): + write_schemas("project_columns", self.mock_catalog, ["project_columns"]) + mock_write_schema.assert_called_with("project_columns", mock.ANY, mock.ANY) + + def test_nested_child_selected(self, mock_write_schema): + write_schemas("project_cards", self.mock_catalog, ["project_cards"]) + mock_write_schema.assert_called_with("project_cards", mock.ANY, mock.ANY) diff --git a/tests/unittests/test_sync_endpoint.py b/tests/unittests/test_sync_endpoint.py new file mode 100644 index 00000000..338d9ea4 --- /dev/null +++ b/tests/unittests/test_sync_endpoint.py @@ -0,0 +1,289 @@ +import unittest +from unittest import mock +from tap_github.client import GithubClient +from tap_github.streams import Commits, Events, Projects, PullRequests, StarGazers, Teams + +class MockResponse(): + """Mock response object class.""" + def __init__(self, json_data): + self.json_data = json_data + + def json(self): + return self.json_data + +@mock.patch("tap_github.streams.get_schema") +@mock.patch("tap_github.client.GithubClient.verify_access_for_repo", return_value = None) +@mock.patch("tap_github.client.GithubClient.authed_get_all_pages") +class TestSyncEndpoints(unittest.TestCase): + + config = {"access_token": "", "repository": "singer-io/tap-github"} + catalog = {'schema': {}, "metadata": {}} + + @mock.patch("singer.write_record") + def test_sync_without_state(self, mock_write_records, mock_authed_all_pages, mock_verify_access, mock_get_schema): + """Verify that `write_records` is called for syncing stream endpoint.""" + + test_stream = Events() + mock_get_schema.return_value = self.catalog + mock_authed_all_pages.return_value = [MockResponse([{"id": 1, "created_at": "2019-01-01T00:00:00Z"}, + {"id": 2, "created_at": "2019-01-04T00:00:00Z"}]), + MockResponse([{"id": 3, "created_at": "2019-01-03T00:00:00Z"}, + {"id": 4, "created_at": "2019-01-02T00:00:00Z"}])] + expected_state = {'bookmarks': {'tap-github': {'events': {'since': '2019-01-04T00:00:00Z'}}}} + test_client = GithubClient(self.config) + final_state = test_stream.sync_endpoint(test_client, {}, self.catalog, "tap-github", "2018-01-02T00:00:00Z", ["events"], ['events']) + + # Verify returned state deom `sync_endpoint` + self.assertEqual(final_state, expected_state) + + # Verify `get_auth_all_pages` called with expected url + mock_authed_all_pages.assert_called_with(mock.ANY, 'https://api.github.com/repos/tap-github/events', mock.ANY, stream='events') + + # Verify `write_records` call count + self.assertEqual(mock_write_records.call_count, 4) + + @mock.patch("singer.write_record") + def test_sync_with_state(self, mock_write_records, mock_authed_all_pages, mock_verify_access, mock_get_schema): + """Verify that `write_records` is called for records with replication value greater than bookmark.""" + + test_stream = Events() + mock_get_schema.return_value = self.catalog + mock_authed_all_pages.return_value = [MockResponse([{"id": 1, "created_at": "2019-01-01T00:00:00Z"}, + {"id": 2, "created_at": "2019-01-04T00:00:00Z"}]), + MockResponse([{"id": 3, "created_at": "2019-01-03T00:00:00Z"}, + {"id": 4, "created_at": "2019-01-02T00:00:00Z"}])] + mock_state = {'bookmarks': {'tap-github': {'events': {'since': '2019-01-02T00:00:00Z'}}}} + + expected_state = {'bookmarks': {'tap-github': {'events': {'since': '2019-01-04T00:00:00Z'}}}} + test_client = GithubClient(self.config) + final_state = test_stream.sync_endpoint(test_client, mock_state, self.catalog, "tap-github", "2018-01-02T00:00:00Z", ["events"], ['events']) + + # Verify returned state deom `sync_endpoint` + self.assertEqual(final_state, expected_state) + + # Verify `write_records` call count + self.assertEqual(mock_write_records.call_count, 3) + + # Verify `get_auth_all_pages` called with expected url + mock_authed_all_pages.assert_called_with(mock.ANY, 'https://api.github.com/repos/tap-github/events', mock.ANY, stream='events') + mock_write_records.assert_called_with(mock.ANY, {'id': 4, 'created_at': '2019-01-02T00:00:00Z', '_sdc_repository': 'tap-github'},time_extracted = mock.ANY) + + +@mock.patch("tap_github.streams.get_schema") +@mock.patch("tap_github.client.GithubClient.verify_access_for_repo", return_value = None) +@mock.patch("tap_github.client.GithubClient.authed_get_all_pages") +class TestFullTable(unittest.TestCase): + """ + Test `sync_endpoint` for full table streams. + """ + config = {"access_token": "", "repository": "singer-io/tap-github"} + catalog = {"schema": {}, "metadata": {}} + + @mock.patch("tap_github.streams.Stream.get_child_records") + def test_without_child_stream(self, mock_get_child_records, mock_authed_get_all_pages, mock_verify_access, mock_get_schema): + """Verify that get_child_records() is not called for streams which do not have child streams""" + + test_client = GithubClient(self.config) + test_stream = StarGazers() + mock_get_schema.return_value = self.catalog + mock_authed_get_all_pages.return_value = [MockResponse([{"user": {"id": 1}}, {"user": {"id": 2}}]), + MockResponse([{"user": {"id": 4}}, {"user": {"id": 3}}])] + test_stream.sync_endpoint(test_client, {}, self.catalog, "tap-github", "", ["stargazers"], ["stargazers"]) + + # Verify that the authed_get_all_pages() is called with the expected url + mock_authed_get_all_pages.assert_called_with(mock.ANY, "https://api.github.com/repos/tap-github/stargazers", mock.ANY, stream='stargazers') + + # Verify that the get_child_records() is not called as Stargazers doesn't have a child stream + self.assertFalse(mock_get_child_records.called) + + @mock.patch("tap_github.streams.Stream.get_child_records") + def test_with_child_streams(self, mock_get_child_records, mock_authed_get_all_pages, mock_verify_access, mock_get_schema): + """Verify that get_child_records() is called for streams with child streams""" + + test_client = GithubClient(self.config) + test_stream = Teams() + mock_get_schema.return_value = self.catalog + + mock_authed_get_all_pages.return_value = [MockResponse([{"id": 1, "slug": "s1"}, {"id": 2, "slug": "s2"}]), + MockResponse([{"id": 3, "slug": "s3"}, {"id": 4, "slug": "s4"}])] + + test_stream.sync_endpoint(test_client, {}, self.catalog, "tap-github", "", ["teams", "team_members"], ["teams","team_members"]) + + # Verify that the authed_get_all_pages() is called with the expected url + mock_authed_get_all_pages.assert_called_with(mock.ANY, "https://api.github.com/orgs/tap-github/teams", mock.ANY, stream='teams') + + # Verify that the get_child_records() is called + self.assertTrue(mock_get_child_records.called) + + def test_with_nested_child_streams(self, mock_authed_get_all_pages, mock_verify_access, mock_get_schema): + """Verify that get_child_records() is called for streams with child streams and calls authed_get_all_pages() is called as expected""" + + test_client = GithubClient(self.config) + test_stream = Teams() + mock_get_schema.return_value = self.catalog + + mock_authed_get_all_pages.side_effect = [ + [MockResponse([{"id": 1, "slug": "stitch-dev"}])], + [MockResponse([{"login": "log1"}, {"login": "log2"}])], + [MockResponse({"url": "u1"})], + [MockResponse({"url": "u3"})], + [], [] + ] + + test_stream.sync_endpoint(test_client, {}, self.catalog, "tap-github", "", ["teams", "team_members", "team_memberships"], ["teams","team_members", "team_memberships"]) + + # Verify that the authed_get_all_pages() is called expected number of times + self.assertEqual(mock_authed_get_all_pages.call_count, 4) + + # Verify that the authed_get_all_pages() is called with the expected url + exp_call_1 = mock.call(mock.ANY, "https://api.github.com/orgs/tap-github/teams", mock.ANY, stream='teams') + exp_call_2 = mock.call(mock.ANY, "https://api.github.com/orgs/tap-github/teams/stitch-dev/members", stream='team_members') + exp_call_3 = mock.call(mock.ANY, "https://api.github.com/orgs/tap-github/teams/stitch-dev/memberships/log1", stream='team_memberships') + + self.assertEqual(mock_authed_get_all_pages.mock_calls[0], exp_call_1) + self.assertEqual(mock_authed_get_all_pages.mock_calls[1], exp_call_2) + self.assertEqual(mock_authed_get_all_pages.mock_calls[2], exp_call_3) + +@mock.patch("tap_github.streams.get_schema") +@mock.patch("tap_github.client.GithubClient.verify_access_for_repo", return_value = None) +@mock.patch("tap_github.client.GithubClient.authed_get_all_pages") +class TestIncrementalStream(unittest.TestCase): + """ + Test `sync_endpoint` for incremental streams. + """ + + config = {"access_token": "", "repository": "singer-io/tap-github"} + catalog = {"schema": {}, "metadata": {}} + + @mock.patch("tap_github.streams.Stream.get_child_records") + def test_without_child_stream(self, mock_get_child_records, mock_authed_get_all_pages, mock_verify_access, mock_get_schema): + """Verify that get_child_records() is not called for streams which do not have child streams""" + test_client = GithubClient(self.config) + test_stream = Commits() + mock_get_schema.return_value = self.catalog + mock_authed_get_all_pages.return_value = [MockResponse([{"commit": {"committer": {"date": "2022-07-05T09:42:14.000000Z"}}}, {"commit": {"committer": {"date": "2022-07-06T09:42:14.000000Z"}}}]), + MockResponse([{"commit": {"committer": {"date": "2022-07-07T09:42:14.000000Z"}}}, {"commit": {"committer": {"date": "2022-07-08T09:42:14.000000Z"}}}])] + test_stream.sync_endpoint(test_client, {}, self.catalog, "tap-github", "", ["commits"], ["commits"]) + + # Verify that the authed_get_all_pages() is called with the expected url + mock_authed_get_all_pages.assert_called_with(mock.ANY, "https://api.github.com/repos/tap-github/commits?since=", mock.ANY, stream='commits') + + # Verify that the get_child_records() is not called as Commits does not contain any child stream. + self.assertFalse(mock_get_child_records.called) + + @mock.patch("tap_github.streams.Stream.get_child_records") + def test_with_child_streams(self, mock_get_child_records, mock_authed_get_all_pages, mock_verify_access, mock_get_schema): + """Verify that get_child_records() is called for streams with child streams""" + test_client = GithubClient(self.config) + test_stream = Projects() + mock_get_schema.return_value = self.catalog + + mock_authed_get_all_pages.return_value = [MockResponse([{"id": 1, "updated_at": "2022-07-05T09:42:14.000000Z"}, {"id": 1, "updated_at": "2022-07-06T09:42:14.000000Z"}]), + MockResponse([{"id": 1, "updated_at": "2022-07-07T09:42:14.000000Z"}, {"id": 1, "updated_at": "2022-07-08T09:42:14.000000Z"}])] + + test_stream.sync_endpoint(test_client, {}, self.catalog, "tap-github", "", ["projects", "project_columns"], ["projects","project_columns"]) + + # Verify that the authed_get_all_pages() is called with the expected url + mock_authed_get_all_pages.assert_called_with(mock.ANY, "https://api.github.com/repos/tap-github/projects?state=all", mock.ANY, stream='projects') + + # Verify that the get_child_records() is called as thw Projects stream has a child stream + self.assertTrue(mock_get_child_records.called) + + def test_with_nested_child_streams(self, mock_authed_get_all_pages, mock_verify_access, mock_get_schema): + """Verify that get_child_records() is called for streams with child streams and calls authed_get_all_pages() is called as expected""" + test_client = GithubClient(self.config) + test_stream = Projects() + mock_get_schema.return_value = self.catalog + + mock_authed_get_all_pages.side_effect = [ + [MockResponse([{"id": 1, "updated_at": "2022-07-05T09:42:14.000000Z"}])], + [MockResponse([{"id": 1}, {"id": 2}])], + [MockResponse({"id": 1})], + [MockResponse({"id": 2})], + [], [] + ] + + test_stream.sync_endpoint(test_client, {}, self.catalog, "tap-github", "", ["projects", "project_columns", "project_cards"], ["projects","project_columns", "project_cards"]) + + # Verify that the authed_get_all_pages() is called expected number of times + self.assertEqual(mock_authed_get_all_pages.call_count, 4) + + exp_call_1 = mock.call(mock.ANY, "https://api.github.com/repos/tap-github/projects?state=all", mock.ANY, stream='projects') + exp_call_2 = mock.call(mock.ANY, "https://api.github.com/projects/1/columns", stream='project_columns') + exp_call_3 = mock.call(mock.ANY, "https://api.github.com/projects/columns/1/cards", stream='project_cards') + + # Verify that the API calls are done as expected with the correct url + self.assertEqual(mock_authed_get_all_pages.mock_calls[0], exp_call_1) + self.assertEqual(mock_authed_get_all_pages.mock_calls[1], exp_call_2) + self.assertEqual(mock_authed_get_all_pages.mock_calls[2], exp_call_3) + +@mock.patch("tap_github.streams.get_schema") +@mock.patch("tap_github.client.GithubClient.verify_access_for_repo", return_value = None) +@mock.patch("tap_github.client.GithubClient.authed_get_all_pages") +@mock.patch("tap_github.streams.singer.utils.strptime_to_utc") +class TestIncrementalOrderedStream(unittest.TestCase): + """ + Test `sync_endpoint` for incremental ordered streams. + """ + config = {"access_token": "", "repository": "singer-io/tap-github"} + catalog = {"schema": {}, "metadata": {}} + + @mock.patch("tap_github.streams.Stream.get_child_records") + def test_without_child_stream(self, mock_get_child_records, mock_strptime_to_utc, mock_authed_get_all_pages, mock_verify_access, mock_get_schema): + """Verify that get_child_records() is not called when child stream is not selected""" + test_client = GithubClient(self.config) + test_stream = PullRequests() + mock_strptime_to_utc.side_effect = ["2022-07-05 09:42:14", "2022-07-04 09:42:14"] + mock_get_schema.return_value = self.catalog + mock_authed_get_all_pages.return_value = [MockResponse([{"id": 1, "updated_at": "2022-07-05 09:42:14"}, {"id": 2, "updated_at": "2022-07-06 09:42:14"}]), + MockResponse([{"id": 3, "updated_at": "2022-07-07 09:42:14"}, {"id": 4, "updated_at": "2022-07-08 09:42:14"}])] + test_stream.sync_endpoint(test_client, {}, self.catalog, "tap-github", "", ["pull_requests"], ["pull_requests"]) + + # Verify that the authed_get_all_pages() is called with the expected url + mock_authed_get_all_pages.assert_called_with(mock.ANY, "https://api.github.com/repos/tap-github/pulls?state=all&sort=updated&direction=desc", stream='pull_requests') + + + @mock.patch("tap_github.streams.Stream.get_child_records") + def test_with_child_streams(self, mock_get_child_records, mock_strptime_to_utc, mock_authed_get_all_pages, mock_verify_access, mock_get_schema): + """Verify that get_child_records() is called for streams with child streams""" + test_client = GithubClient(self.config) + test_stream = PullRequests() + mock_strptime_to_utc.side_effect = ["2022-07-05T09:42:14.000000Z", "2022-07-06T09:42:14.000000Z", "2022-07-05T09:42:14.000000Z", "2022-07-05T09:42:14.000000Z", "2022-07-05T09:42:14.000000Z"] + mock_get_schema.return_value = self.catalog + + mock_authed_get_all_pages.return_value = [MockResponse([{"id": 1, "number": 1, "updated_at": "2022-07-05T09:42:14.000000Z"}, {"id": 1, "number": 1, "updated_at": "2022-07-06T09:42:14.000000Z"}]), + MockResponse([{"id": 1, "number": 1, "updated_at": "2022-07-07T09:42:14.000000Z"}, {"id": 1, "number": 1, "updated_at": "2022-07-08T09:42:14.000000Z"}])] + + test_stream.sync_endpoint(test_client, {}, self.catalog, "tap-github", "", ["pull_requests", "review_comments"], ["pull_requests","review_comments"]) + + # Verify that the authed_get_all_pages() is called with the expected url + mock_authed_get_all_pages.assert_called_with(mock.ANY, "https://api.github.com/repos/tap-github/pulls?state=all&sort=updated&direction=desc", stream='pull_requests') + + # Verify that the get_child_records() is called as the PullRequests stream has a child stream + self.assertTrue(mock_get_child_records.called) + + def test_with_nested_child_streams(self, mock_strptime_to_utc, mock_authed_get_all_pages, mock_verify_access, mock_get_schema): + """Verify that get_child_records() is called for streams with child streams and calls authed_get_all_pages() is called as expected""" + test_client = GithubClient(self.config) + test_stream = PullRequests() + mock_get_schema.return_value = self.catalog + mock_strptime_to_utc.side_effect = ["2022-07-05T09:42:14.000000Z", "2022-07-06T09:42:14.000000Z", "2022-07-06T09:42:14.000000Z"] + + mock_authed_get_all_pages.side_effect = [ + [MockResponse([{"id": 1, "number": 1, "updated_at": "2022-07-05T09:42:14.000000Z"}])], + [MockResponse([{"id": 1, "updated_at": "2022-07-06T09:42:14.000000Z"}, {"id": 2, "updated_at": "2022-07-06T09:42:14.000000Z"}])], + [], [] + ] + + test_stream.sync_endpoint(test_client, {}, self.catalog, "tap-github", "", ["pull_requests", "review_comments"], ["pull_requests","review_comments"]) + + # Verify that the authed_get_all_pages() is called expected number of times + self.assertEqual(mock_authed_get_all_pages.call_count, 2) + + print(mock_authed_get_all_pages.mock_calls) + exp_call_1 = mock.call(mock.ANY, "https://api.github.com/repos/tap-github/pulls?state=all&sort=updated&direction=desc", stream='pull_requests') + exp_call_2 = mock.call(mock.ANY, "https://api.github.com/repos/tap-github/pulls/1/comments?sort=updated_at&direction=desc", stream='review_comments') + + # Verify that the API calls are done as expected with the correct url + self.assertEqual(mock_authed_get_all_pages.mock_calls[0], exp_call_1) + self.assertEqual(mock_authed_get_all_pages.mock_calls[1], exp_call_2) diff --git a/tests/unittests/test_timeout.py b/tests/unittests/test_timeout.py index ce9a4769..a3f6ca53 100644 --- a/tests/unittests/test_timeout.py +++ b/tests/unittests/test_timeout.py @@ -1,9 +1,13 @@ import unittest from unittest import mock -import tap_github.__init__ as tap_github +import tap_github +from tap_github.client import GithubClient, REQUEST_TIMEOUT import requests +from parameterized import parameterized class Mockresponse: + """ Mock response object class.""" + def __init__(self, status_code, json, raise_error, headers={'X-RateLimit-Remaining': 1}, text=None, content=None): self.status_code = status_code self.raise_error = raise_error @@ -18,19 +22,24 @@ def raise_for_status(self): raise requests.HTTPError("Sample message") def json(self): + """ Response JSON method.""" return self.text class MockParseArgs: + """Mock args object class""" config = {} def __init__(self, config): self.config = config def get_args(config): + """ Returns required args response. """ return MockParseArgs(config) def get_response(status_code, json={}, raise_error=False, content=None): + """ Returns required mock response. """ return Mockresponse(status_code, json, raise_error, content=content) +@mock.patch("tap_github.client.GithubClient.verify_access_for_repo", return_value = None) @mock.patch("time.sleep") @mock.patch("requests.Session.request") @mock.patch("singer.utils.parse_args") @@ -38,121 +47,41 @@ class TestTimeoutValue(unittest.TestCase): """ Test case to verify the timeout value is set as expected """ - - def test_timeout_value_in_config(self, mocked_parse_args, mocked_request, mocked_sleep): - json = {"key": "value"} - # mock response - mocked_request.return_value = get_response(200, json) - - mock_config = {"request_timeout": 100} - # mock parse args - mocked_parse_args.return_value = get_args(mock_config) - - # get the timeout value for assertion - timeout = tap_github.get_request_timeout() - # function call - tap_github.authed_get("test_source", "") - - # verify that we got expected timeout value - self.assertEquals(100.0, timeout) - # verify that the request was called with expected timeout value - mocked_request.assert_called_with(method='get', url='', timeout=100.0) - - def test_timeout_value_not_in_config(self, mocked_parse_args, mocked_request, mocked_sleep): - json = {"key": "value"} - # mock response - mocked_request.return_value = get_response(200, json) - - mock_config = {} - # mock parse args - mocked_parse_args.return_value = get_args(mock_config) - - # get the timeout value for assertion - timeout = tap_github.get_request_timeout() - # function call - tap_github.authed_get("test_source", "") - - # verify that we got expected timeout value - self.assertEquals(300.0, timeout) - # verify that the request was called with expected timeout value - mocked_request.assert_called_with(method='get', url='', timeout=300.0) - - def test_timeout_string_value_in_config(self, mocked_parse_args, mocked_request, mocked_sleep): - json = {"key": "value"} - # mock response - mocked_request.return_value = get_response(200, json) - - mock_config = {"request_timeout": "100"} - # mock parse args - mocked_parse_args.return_value = get_args(mock_config) - - # get the timeout value for assertion - timeout = tap_github.get_request_timeout() - # function call - tap_github.authed_get("test_source", "") - - # verify that we got expected timeout value - self.assertEquals(100.0, timeout) - # verify that the request was called with expected timeout value - mocked_request.assert_called_with(method='get', url='', timeout=100.0) - - def test_timeout_empty_value_in_config(self, mocked_parse_args, mocked_request, mocked_sleep): - json = {"key": "value"} - # mock response - mocked_request.return_value = get_response(200, json) - - mock_config = {"request_timeout": ""} - # mock parse args - mocked_parse_args.return_value = get_args(mock_config) - - # get the timeout value for assertion - timeout = tap_github.get_request_timeout() - # function call - tap_github.authed_get("test_source", "") - - # verify that we got expected timeout value - self.assertEquals(300.0, timeout) - # verify that the request was called with expected timeout value - mocked_request.assert_called_with(method='get', url='', timeout=300.0) - - def test_timeout_0_value_in_config(self, mocked_parse_args, mocked_request, mocked_sleep): - json = {"key": "value"} + json = {"key": "value"} + + @parameterized.expand([ + ["test_int_value", {"request_timeout": 100, "access_token": "access_token"}, 100.0], + ["test_str_value", {"request_timeout": "100", "access_token": "access_token"}, 100.0], + ["test_empty_value", {"request_timeout": "", "access_token": "access_token"}, 300.0], + ["test_int_zero_value", {"request_timeout": 0, "access_token": "access_token"}, 300.0], + ["test_str_zero_value", {"request_timeout": "0", "access_token": "access_token"}, 300.0], + ["test_no_value", {"request_timeout": "0", "access_token": "access_token"}, REQUEST_TIMEOUT] + + ]) + def test_timeout_value_in_config(self, mocked_parse_args, mocked_request, mocked_sleep, mock_verify_access, name, config, expected_value): + """ + Test if timeout value given in config + """ # mock response - mocked_request.return_value = get_response(200, json) + mocked_request.return_value = get_response(200, self.json) - mock_config = {"request_timeout": 0.0} + mock_config = config # mock parse args mocked_parse_args.return_value = get_args(mock_config) + test_client = GithubClient(mock_config) # get the timeout value for assertion - timeout = tap_github.get_request_timeout() + timeout = test_client.get_request_timeout() # function call - tap_github.authed_get("test_source", "") + test_client.authed_get("test_source", "") # verify that we got expected timeout value - self.assertEquals(300.0, timeout) + self.assertEqual(expected_value, timeout) # verify that the request was called with expected timeout value - mocked_request.assert_called_with(method='get', url='', timeout=300.0) + mocked_request.assert_called_with(method='get', url='', timeout=expected_value) - def test_timeout_string_0_value_in_config(self, mocked_parse_args, mocked_request, mocked_sleep): - json = {"key": "value"} - # mock response - mocked_request.return_value = get_response(200, json) - - mock_config = {"request_timeout": "0.0"} - # mock parse args - mocked_parse_args.return_value = get_args(mock_config) - - # get the timeout value for assertion - timeout = tap_github.get_request_timeout() - # function call - tap_github.authed_get("test_source", "") - - # verify that we got expected timeout value - self.assertEquals(300.0, timeout) - # verify that the request was called with expected timeout value - mocked_request.assert_called_with(method='get', url='', timeout=300.0) +@mock.patch("tap_github.client.GithubClient.verify_access_for_repo", return_value = None) @mock.patch("time.sleep") @mock.patch("requests.Session.request") @mock.patch("singer.utils.parse_args") @@ -161,36 +90,26 @@ class TestTimeoutAndConnnectionErrorBackoff(unittest.TestCase): Test case to verify that we backoff for 5 times for Connection and Timeout error """ - def test_timeout_backoff(self, mocked_parse_args, mocked_request, mocked_sleep): - # mock request and raise 'Timeout' error - mocked_request.side_effect = requests.Timeout - - mock_config = {} + @parameterized.expand([ + ["test_timeout_backoff", requests.Timeout], + ["test_connection_error_backoff", requests.ConnectionError] + ]) + def test_backoff(self, mocked_parse_args, mocked_request, mocked_sleep, mock_verify_access, name, error_class): + """ + Test that tap retry timeout or connection error 5 times. + """ + # mock request and raise error + mocked_request.side_effect = error_class + + mock_config = {"access_token": "access_token"} # mock parse args mocked_parse_args.return_value = get_args(mock_config) + test_client = GithubClient(mock_config) - try: - # function call - tap_github.authed_get("test_source", "") - except requests.Timeout: - pass + with self.assertRaises(error_class): + test_client.authed_get("test_source", "") # verify that we backoff 5 times - self.assertEquals(5, mocked_request.call_count) + self.assertEqual(5, mocked_request.call_count) - def test_connection_error_backoff(self, mocked_parse_args, mocked_request, mocked_sleep): - # mock request and raise 'Connection' error - mocked_request.side_effect = requests.ConnectionError - mock_config = {} - # mock parse args - mocked_parse_args.return_value = get_args(mock_config) - - try: - # function call - tap_github.authed_get("test_source", "") - except requests.ConnectionError: - pass - - # verify that we backoff 5 times - self.assertEquals(5, mocked_request.call_count) diff --git a/tests/unittests/test_verify_access.py b/tests/unittests/test_verify_access.py index 1e00df32..bdd93209 100644 --- a/tests/unittests/test_verify_access.py +++ b/tests/unittests/test_verify_access.py @@ -1,9 +1,12 @@ from unittest import mock import tap_github +from tap_github.client import GithubClient import unittest import requests class Mockresponse: + """ Mock response object class.""" + def __init__(self, status_code, json, raise_error, headers={'X-RateLimit-Remaining': 1}, text=None): self.status_code = status_code self.raise_error = raise_error @@ -18,110 +21,42 @@ def raise_for_status(self): raise requests.HTTPError("Sample message") def json(self): + """ Response JSON method.""" return self.text def get_response(status_code, json={}, raise_error=False): + """ Returns required mock response. """ return Mockresponse(status_code, json, raise_error) +@mock.patch("tap_github.client.GithubClient.verify_access_for_repo", return_value = None) @mock.patch("requests.Session.request") @mock.patch("singer.utils.parse_args") class TestCredentials(unittest.TestCase): + """ + Test `verify_repo_access` error handling + """ - def test_repo_not_found(self, mocked_parse_args, mocked_request): - json = {"message": "Not Found", "documentation_url": "https:/docs.github.com/"} - mocked_request.return_value = get_response(404, json, True) - - try: - tap_github.verify_repo_access("", "repo") - except tap_github.NotFoundException as e: - self.assertEqual(str(e), "HTTP-error-code: 404, Error: Please check the repository name 'repo' or you do not have sufficient permissions to access this repository.") + config = {"access_token": "", "repository": "singer-io/tap-github"} - def test_repo_bad_request(self, mocked_parse_args, mocked_request): + def test_repo_bad_request(self, mocked_parse_args, mocked_request, mock_verify_access): + """Verify if 400 error arises""" + test_client = GithubClient(self.config) mocked_request.return_value = get_response(400, raise_error = True) - try: - tap_github.verify_repo_access("", "repo") - except tap_github.BadRequestException as e: - self.assertEqual(str(e), "HTTP-error-code: 400, Error: The request is missing or has a bad parameter.") - - def test_repo_bad_creds(self, mocked_parse_args, mocked_request): - json = {"message": "Bad credentials", "documentation_url": "https://docs.github.com/"} - mocked_request.return_value = get_response(401, json, True) - - try: - tap_github.verify_repo_access("", "repo") - except tap_github.BadCredentialsException as e: - self.assertEqual(str(e), "HTTP-error-code: 401, Error: {}".format(json)) - - @mock.patch("tap_github.get_catalog") - def test_discover_valid_creds(self, mocked_get_catalog, mocked_parse_args, mocked_request): - mocked_request.return_value = get_response(200) - mocked_get_catalog.return_value = {} - - tap_github.do_discover({"access_token": "access_token", "repository": "org/repo"}) - - self.assertTrue(mocked_get_catalog.call_count, 1) - - @mock.patch("tap_github.get_catalog") - def test_discover_not_found(self, mocked_get_catalog, mocked_parse_args, mocked_request): - json = {"message": "Not Found", "documentation_url": "https:/docs.github.com/"} - mocked_request.return_value = get_response(404, json, True) - mocked_get_catalog.return_value = {} + with self.assertRaises(tap_github.client.BadRequestException) as e: + test_client.verify_repo_access("", "repo") - try: - tap_github.do_discover({"access_token": "access_token", "repository": "org/repo"}) - except tap_github.NotFoundException as e: - self.assertEqual(str(e), "HTTP-error-code: 404, Error: Please check the repository name org/repo or you do not have sufficient permissions to access this repository.") - self.assertEqual(mocked_get_catalog.call_count, 1) + # Verify error with proper message + self.assertEqual(str(e.exception), "HTTP-error-code: 400, Error: The request is missing or has a bad parameter.") - @mock.patch("tap_github.get_catalog") - def test_discover_bad_request(self, mocked_get_catalog, mocked_parse_args, mocked_request): - mocked_request.return_value = get_response(400, raise_error = True) - mocked_get_catalog.return_value = {} - - try: - tap_github.do_discover({"access_token": "access_token", "repository": "org/repo"}) - except tap_github.BadRequestException as e: - self.assertEqual(str(e), "HTTP-error-code: 400, Error: The request is missing or has a bad parameter.") - self.assertEqual(mocked_get_catalog.call_count, 0) - - @mock.patch("tap_github.get_catalog") - def test_discover_bad_creds(self, mocked_get_catalog, mocked_parse_args, mocked_request): - json = {"message":"Bad credentials","documentation_url":"https://docs.github.com/"} + def test_repo_bad_creds(self, mocked_parse_args, mocked_request, mock_verify_access): + """Verify if 401 error arises""" + test_client = GithubClient(self.config) + json = {"message": "Bad credentials", "documentation_url": "https://docs.github.com/"} mocked_request.return_value = get_response(401, json, True) - mocked_get_catalog.return_value = {} - - try: - tap_github.do_discover({"access_token": "access_token", "repository": "org/repo"}) - except tap_github.BadCredentialsException as e: - self.assertEqual(str(e), "HTTP-error-code: 401, Error: {}".format(json)) - self.assertEqual(mocked_get_catalog.call_count, 0) - - @mock.patch("tap_github.get_catalog") - def test_discover_forbidden(self, mocked_get_catalog, mocked_parse_args, mocked_request): - json = {'message': 'Must have admin rights to Repository.', 'documentation_url': 'https://docs.github.com/'} - mocked_request.return_value = get_response(403, json, True) - mocked_get_catalog.return_value = {} - - try: - tap_github.do_discover({"access_token": "access_token", "repository": "org/repo"}) - except tap_github.AuthException as e: - self.assertEqual(str(e), "HTTP-error-code: 403, Error: {}".format(json)) - self.assertEqual(mocked_get_catalog.call_count, 0) - - -@mock.patch("tap_github.logger.info") -@mock.patch("tap_github.verify_repo_access") -class TestRepoCallCount(unittest.TestCase): - def test_repo_call_count(self, mocked_repo, mocked_logger_info): - """ - Here 3 repos are given, - so tap will check creds for all 3 repos - """ - mocked_repo.return_value = None - config = {"access_token": "access_token", "repository": "org1/repo1 org1/repo2 org2/repo1"} - tap_github.verify_access_for_repo(config) + with self.assertRaises(tap_github.client.BadCredentialsException) as e: + test_client.verify_repo_access("", "repo") - self.assertEqual(mocked_logger_info.call_count, 3) - self.assertEqual(mocked_repo.call_count, 3) + # Verify error with proper message + self.assertEqual(str(e.exception), "HTTP-error-code: 401, Error: {}".format(json))