Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TDL-5961 Support of custom domain #172

Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 9 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,15 +53,18 @@ This tap:
3. Create the config file
Create a JSON file containing the start date, access token you just created
and the path to one or multiple repositories that you want to extract data from. Each repo path should be space delimited. The repo path is relative to
`https://github.com/`. For example the path for this repository is
and the path to one or multiple repositories that you want to extract data from. Each repo path should be space delimited. The repo path is relative to `"base_url"`
(Default: `https://github.com/`). For example the path for this repository is
`singer-io/tap-github`. You can also add request timeout to set the timeout for requests which is an optional parameter with default value of 300 seconds.
```json
{"access_token": "your-access-token",
"repository": "singer-io/tap-github singer-io/getting-started",
"start_date": "2021-01-01T00:00:00Z",
"request_timeout": 300}
{
"access_token": "your-access-token",
"repository": "singer-io/tap-github singer-io/getting-started",
"start_date": "2021-01-01T00:00:00Z",
"request_timeout": 300,
"base_url": "https://api.github.com"
}
```
4. Run the tap in discovery mode to get properties.json file
Expand Down
3 changes: 2 additions & 1 deletion config.sample.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,6 @@
"access_token": "abcdefghijklmnopqrstuvwxyz1234567890ABCD",
"repository": "singer-io/target-stitch",
"start_date": "2021-01-01T00:00:00Z",
"request_timeout": 300
"request_timeout": 300,
"base_url": "https://api.github.com"
}
5 changes: 2 additions & 3 deletions tap_github/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

LOGGER = singer.get_logger()
DEFAULT_SLEEP_SECONDS = 600
DEFAULT_DOMAIN = "https://api.github.com"

# Set default timeout of 300 seconds
REQUEST_TIMEOUT = 300
Expand Down Expand Up @@ -152,12 +153,11 @@ class GithubClient:
def __init__(self, config):
self.config = config
self.session = requests.Session()
self.base_url = "https://api.github.com"
self.base_url = config.get('base_url', DEFAULT_DOMAIN)
self.max_sleep_seconds = self.config.get('max_sleep_seconds', DEFAULT_SLEEP_SECONDS)
self.set_auth_in_session()
self.not_accessible_repos = set()
somethingmorerelevant marked this conversation as resolved.
Show resolved Hide resolved

# Return the 'timeout'
def get_request_timeout(self):
"""
Get the request timeout from the config, if not present use the default 300 seconds.
Expand All @@ -167,7 +167,6 @@ def get_request_timeout(self):

# Only return the timeout value if it is passed in the config and the value is not 0, "0" or ""
if config_request_timeout and float(config_request_timeout):
# Return the timeout from config
return float(config_request_timeout)

# Return default timeout
Expand Down
23 changes: 11 additions & 12 deletions tap_github/streams.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,30 +22,30 @@ def get_schema(catalog, stream_id):
stream_catalog = [cat for cat in catalog if cat['tap_stream_id'] == stream_id ][0]
return stream_catalog

def get_child_full_url(child_object, repo_path, parent_id, grand_parent_id):
def get_child_full_url(domain, child_object, repo_path, parent_id, grand_parent_id):
somethingmorerelevant marked this conversation as resolved.
Show resolved Hide resolved
"""
Build the child stream's URL based on the parent and the grandparent's ids.
"""

if child_object.use_repository:
# The `use_repository` represents that the url contains /repos and the repository name.
child_full_url = '{}/repos/{}/{}'.format(
child_object.url,
domain,
repo_path,
child_object.path).format(*parent_id)

elif child_object.use_organization:
# The `use_organization` represents that the url contains the organization name.
org = repo_path.split('/')[0]
child_full_url = '{}/{}'.format(
child_object.url,
domain,
child_object.path).format(org, *parent_id, *grand_parent_id)

else:
# Build and return url that does not contain the repos or the organization name.
# Example: https://base_url/projects/{project_id}/columns
child_full_url = '{}/{}'.format(
child_object.url,
domain,
child_object.path).format(*grand_parent_id)
LOGGER.info("Final url is: %s", child_full_url)

Expand All @@ -69,9 +69,8 @@ class Stream:
use_repository = False
headers = {'Accept': '*/*'}
parent = None
url = "https://api.github.com"

def build_url(self, repo_path, bookmark):
def build_url(self, base_url, repo_path, bookmark):
"""
Build the full url with parameters and attributes.
"""
Expand All @@ -85,12 +84,12 @@ def build_url(self, repo_path, bookmark):
# The `use_organization` represents that the url contains the organization name.
org = repo_path.split('/')[0]
full_url = '{}/{}'.format(
self.url,
base_url,
self.path).format(org)
else:
# The url that contains /repos and the repository name.
full_url = '{}/repos/{}/{}{}'.format(
self.url,
base_url,
repo_path,
self.path,
query_string)
Expand Down Expand Up @@ -152,7 +151,7 @@ def get_child_records(self,
if not parent_id:
parent_id = grand_parent_id

child_full_url = get_child_full_url(child_object, repo_path, parent_id, grand_parent_id)
child_full_url = get_child_full_url(client.base_url, child_object, repo_path, parent_id, grand_parent_id)
stream_catalog = get_schema(catalog, child_object.tap_stream_id)

with metrics.record_counter(child_object.tap_stream_id) as counter:
Expand Down Expand Up @@ -221,7 +220,7 @@ def sync_endpoint(self,
"""

# build full url
full_url = self.build_url(repo_path, None)
full_url = self.build_url(client.base_url, repo_path, None)

stream_catalog = get_schema(catalog, self.tap_stream_id)

Expand Down Expand Up @@ -292,7 +291,7 @@ def sync_endpoint(self,
max_bookmark_value = min_bookmark_value

# build full url
full_url = self.build_url(repo_path, min_bookmark_value)
full_url = self.build_url(client.base_url, repo_path, min_bookmark_value)

stream_catalog = get_schema(catalog, self.tap_stream_id)

Expand Down Expand Up @@ -377,7 +376,7 @@ def sync_endpoint(self,
bookmark_time = singer.utils.strptime_to_utc(min_bookmark_value)

# Build full url
full_url = self.build_url(repo_path, bookmark_value)
full_url = self.build_url(client.base_url, repo_path, bookmark_value)
synced_all_records = False
stream_catalog = get_schema(catalog, self.tap_stream_id)

Expand Down
8 changes: 8 additions & 0 deletions tests/test_github_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,14 @@ class TestGithubSync(TestGithubBase):
def name():
return "tap_tester_github_sync_test"

def get_properties(self):

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add code comment here


return {
'start_date' : '2021-10-01T00:00:00Z',
'base_url': 'https://api.github.com',
'repository': 'singer-io/test-repo'
}

def test_run(self):
"""
Testing that sync creates the appropriate catalog with valid metadata.
Expand Down
29 changes: 29 additions & 0 deletions tests/unittests/test_custom_domain.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import unittest
from unittest import mock
from tap_github.client import GithubClient, DEFAULT_DOMAIN

@mock.patch('tap_github.GithubClient.verify_access_for_repo', return_value = None)
class TestCustomDomain(unittest.TestCase):
"""
Test custom domain is supported in client
"""

def test_config_without_domain(self, mock_verify_access):
"""
Test if the domain is not given in the config
"""
mock_config = {'repository': 'singer-io/test-repo', "access_token": ""}
test_client = GithubClient(mock_config)

# Verify domain in client is default
self.assertEqual(test_client.base_url, DEFAULT_DOMAIN)

def test_config_with_domain(self, mock_verify_access):
"""
Test if the domain is given in the config
"""
mock_config = {'repository': 'singer-io/test-repo', "base_url": "http://CUSTOM-git.com", "access_token": ""}
test_client = GithubClient(mock_config)

# Verify domain in client is from config
self.assertEqual(test_client.base_url, mock_config["base_url"])
13 changes: 7 additions & 6 deletions tests/unittests/test_stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def test_get_schema(self):
{"tap_stream_id": "events"},
]
expected_schema = {"tap_stream_id": "comments"}

# Verify returned schema is same as exected schema
self.assertEqual(get_schema(catalog, "comments"), expected_schema)

Expand All @@ -28,7 +28,7 @@ class TestGetBookmark(unittest.TestCase):
"""

test_stream = Comments()

def test_with_out_repo_path(self):
"""
Test if the state does not contain a repo path
Expand All @@ -40,7 +40,7 @@ def test_with_out_repo_path(self):
}
returned_bookmark = get_bookmark(state, "org/test-repo", "projects", "since", "2021-01-01T00:00:00Z")
self.assertEqual(returned_bookmark, "2021-01-01T00:00:00Z")

def test_with_repo_path(self):
"""
Test if the state does contains a repo path
Expand Down Expand Up @@ -69,15 +69,15 @@ def test_build_url(self, name, expected_url, stream_class):
Test the `build_url` method for filter param or organization name only.
"""
test_streams = stream_class()
full_url = test_streams.build_url("org/test-repo", "2022-01-01T00:00:00Z")
full_url = test_streams.build_url("https://api.github.com", "org/test-repo", "2022-01-01T00:00:00Z")

# verify returned url is expected
self.assertEqual(expected_url, full_url)


class GetMinBookmark(unittest.TestCase):
"""
Test `get_min_bookmark` method of stream class
Test `get_min_bookmark` method of the stream class
"""

start_date = "2020-04-01T00:00:00Z"
Expand Down Expand Up @@ -172,6 +172,7 @@ class TestGetChildUrl(unittest.TestCase):
"""
Test `get_child_full_url` method of stream class
"""
domain = 'https://api.github.com'

@parameterized.expand([
["test_child_stream", ProjectColumns, "https://api.github.com/projects/1309875/columns", None, (1309875,)],
Expand All @@ -184,5 +185,5 @@ def test_child_stream(self, name, stream_class, expected_url, parent_id, grand_p
Test for a stream with one child
"""
child_stream = stream_class()
full_url = get_child_full_url(child_stream, "org1/test-repo", parent_id, grand_parent_id)
full_url = get_child_full_url(self.domain, child_stream, "org1/test-repo", parent_id, grand_parent_id)
self.assertEqual(expected_url, full_url)