diff --git a/meltano.yml b/meltano.yml index db67f39..e55a75a 100644 --- a/meltano.yml +++ b/meltano.yml @@ -14,28 +14,29 @@ plugins: - name: key kind: password label: API Key - description: StackExchange API Key documentation: https://api.stackexchange.com/docs/authentication + description: StackExchange API Key + sensitive: true - name: filter kind: string label: Filter - description: Custom API filter to apply to all requests documentation: https://api.stackexchange.com/docs/filters + description: Custom API filter to apply to all requests - name: site kind: string label: Site - description: StackExchange site to extract data from documentation: https://stackexchange.com/sites + description: StackExchange site to extract data from - name: tags kind: array label: Tags - description: Tags to extract data from documentation: https://stackoverflow.com/tags + description: Tags to extract data from - name: start_date kind: integer label: Start Date - description: Start date to extract data from documentation: https://api.stackexchange.com/docs/dates + description: Start date to extract data from - name: metrics_log_level value: debug config: @@ -44,8 +45,14 @@ plugins: - singer-io site: stackoverflow.com start_date: 1651381200 + select_filter: + - questions loaders: - name: target-sqlite + variant: meltanolabs + pip_url: git+https://github.com/MeltanoLabs/target-sqlite.git config: database: ${MELTANO_PROJECT_ROOT}/output/${MELTANO_EXTRACT__LOAD_SCHEMA} - name: target-jsonl + variant: andyh1203 + pip_url: target-jsonl diff --git a/plugins/loaders/target-jsonl--andyh1203.lock b/plugins/loaders/target-jsonl--andyh1203.lock new file mode 100644 index 0000000..11fa0ba --- /dev/null +++ b/plugins/loaders/target-jsonl--andyh1203.lock @@ -0,0 +1,34 @@ +{ + "plugin_type": "loaders", + "name": "target-jsonl", + "namespace": "target_jsonl", + "variant": "andyh1203", + "label": "JSON Lines (JSONL)", + "docs": "https://hub.meltano.com/loaders/target-jsonl--andyh1203", + "repo": "https://github.com/andyh1203/target-jsonl", + "pip_url": "target-jsonl", + "description": "JSONL loader", + "logo_url": "https://hub.meltano.com/assets/logos/loaders/jsonl.png", + "settings": [ + { + "name": "destination_path", + "kind": "string", + "value": "output", + "label": "Destination Path", + "description": "Sets the destination path the JSONL files are written to, relative\nto the project root.\n\nThe directory needs to exist already, it will not be created\nautomatically.\n\nTo write JSONL files to the project root, set an empty string (`\"\"`).\n" + }, + { + "name": "do_timestamp_file", + "kind": "boolean", + "value": false, + "label": "Include Timestamp in File Names", + "description": "Specifies if the files should get timestamped.\n\nBy default, the resulting file will not have a timestamp in the file name (i.e. `exchange_rate.jsonl`).\n\nIf this option gets set to `true`, the resulting file will have a timestamp associated with it (i.e. `exchange_rate-{timestamp}.jsonl`).\n" + }, + { + "name": "custom_name", + "kind": "string", + "label": "Custom File Name Override", + "description": "Specifies a custom name for the filename, instead of the stream name.\n\nThe file name will be `{custom_name}-{timestamp}.jsonl`, if `do_timestamp_file` is `true`.\nOtherwise the file name will be `{custom_name}.jsonl`.\n\nIf custom name is not provided, the stream name will be used.\n" + } + ] +} diff --git a/plugins/loaders/target-sqlite--meltanolabs.lock b/plugins/loaders/target-sqlite--meltanolabs.lock new file mode 100644 index 0000000..0cfbe0b --- /dev/null +++ b/plugins/loaders/target-sqlite--meltanolabs.lock @@ -0,0 +1,39 @@ +{ + "plugin_type": "loaders", + "name": "target-sqlite", + "namespace": "target_sqlite", + "variant": "meltanolabs", + "label": "SQLite", + "docs": "https://hub.meltano.com/loaders/target-sqlite--meltanolabs", + "repo": "https://github.com/MeltanoLabs/target-sqlite", + "pip_url": "git+https://github.com/MeltanoLabs/target-sqlite.git", + "description": "SQLite database loader", + "logo_url": "https://hub.meltano.com/assets/logos/loaders/sqlite.png", + "settings_group_validation": [ + [ + "batch_size" + ] + ], + "settings": [ + { + "name": "database", + "value": "warehouse", + "label": "Database Name", + "description": "Name of the SQLite database file to be used or created, relative to the project root.\n\nThe `.db` extension is optional and will be added automatically when omitted.\n" + }, + { + "name": "batch_size", + "kind": "integer", + "value": 50, + "label": "Batch Size", + "description": "How many records are sent to SQLite at a time." + }, + { + "name": "timestamp_column", + "value": "__loaded_at", + "label": "Timestamp Column", + "description": "Name of the column used for recording the timestamp when data are loaded to SQLite." + } + ], + "dialect": "sqlite" +} diff --git a/poetry.lock b/poetry.lock index 6d18ca4..4fcac56 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. [[package]] name = "appdirs" @@ -1588,4 +1588,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = ">=3.8" -content-hash = "bf79f39b5d1125927dc8386610ba70ef3ab2ba101f30990dc5a7e60e52ace724" +content-hash = "bd8e958026f7358cc181cf6860fed1758cf2c68f796e6d88622c8cccca3cdb9d" diff --git a/pyproject.toml b/pyproject.toml index 7e1eb61..5654cf2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ readme = "README.md" python = ">=3.8" pyrate-limiter = { version = "==3.*", python = "<4" } requests-cache = { version = "==1.*", python = "<4" } -singer-sdk = ">=0.34,<0.37" +singer-sdk = "==0.36.*" [tool.poetry.group.dev.dependencies] pytest-httpserver = { version = "^1.0.8", python = "<4" } diff --git a/tap_stackexchange/client.py b/tap_stackexchange/client.py index 01fb260..f2b358b 100644 --- a/tap_stackexchange/client.py +++ b/tap_stackexchange/client.py @@ -47,21 +47,6 @@ def has_backoff(response: requests.Response) -> bool: limiter = Limiter(rate, max_delay=100_000) -class StackExchangePaginator(BasePageNumberPaginator): - """StackExchange paginator class.""" - - def has_more(self, response: requests.Response) -> bool: - """Check if there are more pages to retrieve. - - Args: - response: HTTP response. - - Returns: - True if there are more pages to retrieve. - """ - return response.json()["has_more"] - - class StackExchangeStream(RESTStream): """StackExchange stream class.""" @@ -73,6 +58,7 @@ class StackExchangeStream(RESTStream): ] records_jsonpath = "$.items[*]" + is_sorted = True rate_limit_response_codes: t.ClassVar[list[int]] = [] @@ -195,13 +181,13 @@ def get_url_params( return params - def get_new_paginator(self) -> StackExchangePaginator: + def get_new_paginator(self) -> BasePageNumberPaginator: """Return a new paginator instance. Returns: Paginator instance. """ - return StackExchangePaginator(start_value=1) + return BasePageNumberPaginator(start_value=1) class TagPartitionedStream(StackExchangeStream):