Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Source Salesforce: Handling 400 error when using BULK API #8206

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -510,7 +510,7 @@
- name: Salesforce
sourceDefinitionId: b117307c-14b6-41aa-9422-947e34922962
dockerRepository: airbyte/source-salesforce
dockerImageTag: 0.1.6
dockerImageTag: 0.1.7
documentationUrl: https://docs.airbyte.io/integrations/sources/salesforce
icon: salesforce.svg
sourceType: api
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5339,7 +5339,7 @@
supportsNormalization: false
supportsDBT: false
supported_destination_sync_modes: []
- dockerImage: "airbyte/source-salesforce:0.1.6"
- dockerImage: "airbyte/source-salesforce:0.1.7"
spec:
documentationUrl: "https://docs.airbyte.io/integrations/sources/salesforce"
connectionSpecification:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,5 @@ COPY source_salesforce ./source_salesforce
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]

LABEL io.airbyte.version=0.1.6
LABEL io.airbyte.version=0.1.7
LABEL io.airbyte.name=airbyte/source-salesforce
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@ tests:
basic_read:
- config_path: "secrets/config.json"
configured_catalog_path: "integration_tests/configured_catalog_rest.json"
timeout_seconds: 600
- config_path: "secrets/config_bulk.json"
configured_catalog_path: "integration_tests/configured_catalog_bulk.json"
timeout_seconds: 600
incremental:
- config_path: "secrets/config.json"
configured_catalog_path: "integration_tests/configured_catalog_rest.json"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,10 @@ def create_stream_job(self, query: str, url: str) -> Optional[str]:
except exceptions.HTTPError as error:
if error.response.status_code in [codes.FORBIDDEN, codes.BAD_REQUEST]:
error_data = error.response.json()[0]
if error_data.get("message", "") == "Selecting compound data not supported in Bulk Query":
if (error_data.get("message", "") == "Selecting compound data not supported in Bulk Query") or (
error_data.get("errorCode", "") == "INVALIDENTITY"
and "is not supported by the Bulk API" in error_data.get("message", "")
):
self.logger.error(
f"Cannot receive data for stream '{self.name}' using BULK API, error message: '{error_data.get('message')}'"
)
Expand Down Expand Up @@ -184,7 +187,7 @@ def wait_for_job(self, url: str) -> str:
self.logger.warning(f"Not wait the {self.name} data for {self._wait_timeout} minutes, data: {job_info}!!")
return job_status

def execute_job(self, query: Mapping[str, Any], url: str) -> str:
def execute_job(self, query: str, url: str) -> str:
job_status = "Failed"
for i in range(0, self.MAX_RETRY_NUMBER):
job_id = self.create_stream_job(query=query, url=url)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,28 @@ def test_bulk_sync_creation_failed(stream_bulk_config, stream_bulk_api):
assert err.value.response.json()[0]["message"] == "test_error"


def test_bulk_sync_unsupported_stream(stream_bulk_config, stream_bulk_api, caplog):
stream_name = "AcceptedEventRelation"
stream: BulkIncrementalSalesforceStream = _generate_stream(stream_name, stream_bulk_config, stream_bulk_api)
with requests_mock.Mocker() as m:
m.register_uri(
"POST",
stream.path(),
status_code=400,
json=[{"errorCode": "INVALIDENTITY", "message": f"Entity '{stream_name}' is not supported by the Bulk API."}],
)
list(stream.read_records(sync_mode=SyncMode.full_refresh))

logs = caplog.records

assert logs
assert logs[1].levelname == "ERROR"
assert (
logs[1].msg
== f"Cannot receive data for stream '{stream_name}' using BULK API, error message: 'Entity '{stream_name}' is not supported by the Bulk API.'"
)


@pytest.mark.parametrize("item_number", [0, 15, 2000, 2324, 193434])
def test_bulk_sync_pagination(item_number, stream_bulk_config, stream_bulk_api):
stream: BulkIncrementalSalesforceStream = _generate_stream("Account", stream_bulk_config, stream_bulk_api)
Expand Down
1 change: 1 addition & 0 deletions docs/integrations/sources/salesforce.md
Original file line number Diff line number Diff line change
Expand Up @@ -735,6 +735,7 @@ List of available streams:
| Version | Date | Pull Request | Subject |
| :--- | :--- | :--- | :--- |

| 0.1.7 | 2021-11-24 | [8206](https://github.com/airbytehq/airbyte/pull/8206) | Handling 400 error when trying to create a job for sync using Bulk API. |
| 0.1.6 | 2021-11-16 | [8009](https://github.com/airbytehq/airbyte/pull/8009) | Fix retring of BULK jobs |
| 0.1.5 | 2021-11-15 | [7885](https://github.com/airbytehq/airbyte/pull/7885) | Add `Transform` for output records |
| 0.1.4 | 2021-11-09 | [7778](https://github.com/airbytehq/airbyte/pull/7778) | Fix types for `anyType` fields |
Expand Down