From c2b64ee6a945334aa4691e3df0ba93e3809807f0 Mon Sep 17 00:00:00 2001 From: Oleksandr Bazarnov Date: Wed, 1 May 2024 17:08:16 +0300 Subject: [PATCH 01/27] migrated product_variants to BULK --- .../integration_tests/state.json | 35 +- .../connectors/source-shopify/metadata.yaml | 2 +- .../connectors/source-shopify/pyproject.toml | 2 +- .../schemas/product_variants.json | 10 +- .../shopify_graphql/bulk/query.py | 359 +++++++++++++++++- .../source_shopify/streams/streams.py | 16 +- 6 files changed, 369 insertions(+), 55 deletions(-) diff --git a/airbyte-integrations/connectors/source-shopify/integration_tests/state.json b/airbyte-integrations/connectors/source-shopify/integration_tests/state.json index c29ae675c6c51..3142eac4e37fd 100644 --- a/airbyte-integrations/connectors/source-shopify/integration_tests/state.json +++ b/airbyte-integrations/connectors/source-shopify/integration_tests/state.json @@ -144,9 +144,6 @@ } }, "metafield_customers": { - "customers": { - "updated_at": "2023-04-24T06:53:48-07:00" - }, "updated_at": "2023-04-13T04:50:10-07:00" }, "metafield_orders": { @@ -165,48 +162,18 @@ "updated_at": "2023-04-24T07:18:06-07:00" }, "metafield_products": { - "products": { - "updated_at": "2023-04-20T04:12:59-07:00", - "deleted": { - "deleted_at": "" - } - }, "updated_at": "2023-04-14T04:04:46-07:00" }, "product_images": { - "products": { - "updated_at": "2023-04-24T11:05:13-07:00", - "deleted": { - "deleted_at": "2023-09-05T13:32:22-07:00" - } - }, "updated_at": "2023-04-24T10:27:15-07:00" }, "metafield_product_images": { - "products": { - "updated_at": "", - "deleted": { - "deleted_at": "2023-09-05T13:32:22-07:00" - } - }, "updated_at": "2023-04-24T10:32:19-07:00" }, "product_variants": { - "id": 42778150305981, - "products": { - "updated_at": "", - "deleted": { - "deleted_at": "2023-09-05T13:32:22-07:00" - } - } + "updated_at": "2023-12-11T10:37:41+00:00" }, "metafield_product_variants": { - "products": { - "updated_at": "", - "deleted": { - "deleted_at": "2023-09-05T13:32:22-07:00" - } - }, "updated_at": "2023-04-14T03:29:27-07:00" }, "collections": { diff --git a/airbyte-integrations/connectors/source-shopify/metadata.yaml b/airbyte-integrations/connectors/source-shopify/metadata.yaml index f09ebaea243d4..18f8028e76f18 100644 --- a/airbyte-integrations/connectors/source-shopify/metadata.yaml +++ b/airbyte-integrations/connectors/source-shopify/metadata.yaml @@ -11,7 +11,7 @@ data: connectorSubtype: api connectorType: source definitionId: 9da77001-af33-4bcd-be46-6252bf9342b9 - dockerImageTag: 2.0.6 + dockerImageTag: 2.0.7 dockerRepository: airbyte/source-shopify documentationUrl: https://docs.airbyte.com/integrations/sources/shopify githubIssueLabel: source-shopify diff --git a/airbyte-integrations/connectors/source-shopify/pyproject.toml b/airbyte-integrations/connectors/source-shopify/pyproject.toml index d72d3a5277bcb..bc689ba801893 100644 --- a/airbyte-integrations/connectors/source-shopify/pyproject.toml +++ b/airbyte-integrations/connectors/source-shopify/pyproject.toml @@ -3,7 +3,7 @@ requires = [ "poetry-core>=1.0.0",] build-backend = "poetry.core.masonry.api" [tool.poetry] -version = "2.0.6" +version = "2.0.7" name = "source-shopify" description = "Source CDK implementation for Shopify." authors = [ "Airbyte ",] diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/product_variants.json b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/product_variants.json index 9471121a99888..d731c212ac578 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/product_variants.json +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/product_variants.json @@ -93,7 +93,15 @@ } }, "compare_at_price": { - "type": ["null", "number"] + "type": ["null", "object"], + "properties": { + "amount": { + "type": ["null", "number"] + }, + "currency_code": { + "type": ["null", "string"] + } + } } } } diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/shopify_graphql/bulk/query.py b/airbyte-integrations/connectors/source-shopify/source_shopify/shopify_graphql/bulk/query.py index 0b01e4e787d83..25b7ac564d318 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/shopify_graphql/bulk/query.py +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/shopify_graphql/bulk/query.py @@ -179,7 +179,7 @@ def resolve(self, query: Query) -> str: # return the constructed query operation return Operation(type="", queries=[query]).render() - def record_process_components(self, record: MutableMapping[str, Any]) -> MutableMapping[str, Any]: + def record_process_components(self, record: MutableMapping[str, Any]) -> Iterable[MutableMapping[str, Any]]: """ Defines how to process collected components, default `as is`. """ @@ -281,7 +281,7 @@ def query_nodes(self) -> List[Field]: elif isinstance(self.type.value, str): return ["__typename", "id", metafield_node] - def record_process_components(self, record: MutableMapping[str, Any]) -> MutableMapping[str, Any]: + def record_process_components(self, record: MutableMapping[str, Any]) -> Iterable[MutableMapping[str, Any]]: # resolve parent id from `str` to `int` record["owner_id"] = self.tools.resolve_str_id(record.get(BULK_PARENT_KEY)) # add `owner_resource` field @@ -670,7 +670,7 @@ class DiscountCode(ShopifyBulkQuery): "record_components": ["DiscountRedeemCode"], } - def record_process_components(self, record: MutableMapping[str, Any]) -> MutableMapping[str, Any]: + def record_process_components(self, record: MutableMapping[str, Any]) -> Optional[Iterable[MutableMapping[str, Any]]]: """ Defines how to process collected components. """ @@ -751,7 +751,7 @@ class Collection(ShopifyBulkQuery): "record_components": ["CollectionPublication"], } - def record_process_components(self, record: MutableMapping[str, Any]) -> MutableMapping[str, Any]: + def record_process_components(self, record: MutableMapping[str, Any]) -> Iterable[MutableMapping[str, Any]]: """ Defines how to process collected components. """ @@ -834,7 +834,9 @@ class CustomerAddresses(ShopifyBulkQuery): "new_record": "Customer", } - def set_default_address(self, record: MutableMapping[str, Any], address_record: MutableMapping[str, Any]) -> MutableMapping[str, Any]: + def set_default_address( + self, record: MutableMapping[str, Any], address_record: MutableMapping[str, Any] + ) -> Iterable[MutableMapping[str, Any]]: default_address = record.get("defaultAddress", {}) # the default_address could be literal `None`, additional check is required if default_address: @@ -924,7 +926,7 @@ class InventoryItem(ShopifyBulkQuery): "new_record": "InventoryItem", } - def record_process_components(self, record: MutableMapping[str, Any]) -> MutableMapping[str, Any]: + def record_process_components(self, record: MutableMapping[str, Any]) -> Iterable[MutableMapping[str, Any]]: """ Defines how to process collected components. """ @@ -1006,7 +1008,7 @@ def query(self, filter_query: Optional[str] = None) -> Query: additional_query_args=self.locations_query_args, ) - def record_process_components(self, record: MutableMapping[str, Any]) -> MutableMapping[str, Any]: + def record_process_components(self, record: MutableMapping[str, Any]) -> Iterable[MutableMapping[str, Any]]: """ Defines how to process collected components. """ @@ -1310,7 +1312,7 @@ def process_merchant_request(self, record: MutableMapping[str, Any]) -> MutableM record = self.tools.fields_names_to_snake_case(record) return record - def record_process_components(self, record: MutableMapping[str, Any]) -> MutableMapping[str, Any]: + def record_process_components(self, record: MutableMapping[str, Any]) -> Iterable[MutableMapping[str, Any]]: """ Defines how to process collected components. """ @@ -1502,3 +1504,344 @@ def record_process_components(self, record: MutableMapping[str, Any]) -> Optiona transaction["order_id"] = record.get("id") transaction["currency"] = record.get("currency") yield self.process_transaction(transaction) + + +class ProductImage(ShopifyBulkQuery): + """ + { + products( + query: "updated_at:>='2019-04-13T00:00:00+00:00' AND updated_at:<='2024-04-30T12:16:17.273363+00:00'" + sortKey: UPDATED_AT + ) { + edges { + node { + __typename + id + # THE MEDIA NODE IS NEEDED TO PROVIDE THE CURSORS + media { + edges { + node { + ... on MediaImage { + __typename + createdAt + updatedAt + image { + url + } + } + } + } + } + # THIS IS THE MAIN NODE WE WANT TO GET + images { + edges { + node { + __typename + id + height + alt: altText + src + url + width + } + } + } + } + } + } + } + """ + + query_name = "products" + sort_key = "UPDATED_AT" + + # images property fields + images_fields: List[Field] = [ + Field( + name="edges", + fields=[ + Field( + name="node", + fields=[ + "__typename", + "id", + "height", + Field(name="altText", alias="alt"), + "src", + "url", + "width", + ], + ) + ], + ) + ] + + # media fragment, contains the info about when the Image was created or updated. + media_fragment: List[InlineFragment] = [ + InlineFragment( + type="MediaImage", + fields=[ + "__typename", + "createdAt", + "updatedAt", + # fetch the `url` as the key for the later join + Field(name="image", fields=["url"]), + ], + ), + ] + + # media property fields + media_fields: List[Field] = [Field(name="edges", fields=[Field(name="node", fields=media_fragment)])] + + # main query + query_nodes: List[Field] = [ + "__typename", + "id", + Field(name="media", fields=media_fields), + Field(name="images", fields=images_fields), + ] + + record_composition = { + "new_record": "Product", + # each product could have `MediaImage` associated with the product, + # each product could have `Image` assiciated with the product and the related `MediaImage`, + # there could be multiple `MediaImage` and `Image` assigned to the product. + "record_components": ["MediaImage", "Image"], + } + + def _process_component(self, entity: List[dict]) -> List[dict]: + for item in entity: + # remove the `__parentId` from the object + if BULK_PARENT_KEY in item: + item.pop(BULK_PARENT_KEY) + # resolve the id from string + item["admin_graphql_api_id"] = item.get("id") + item["id"] = self.tools.resolve_str_id(item.get("id")) + return entity + + def _add_product_id(self, options: List[dict], product_id: Optional[int] = None) -> List[dict]: + for option in options: + # add product_id to each option + option["product_id"] = product_id if product_id else None + return options + + def _merge_with_media(self, record_components: List[dict]) -> Optional[Iterable[MutableMapping[str, Any]]]: + media = record_components.get("MediaImage", []) + images = record_components.get("Image", []) + + # Create a dictionary to map the 'url' key in images + url_map = {item["url"]: item for item in images} + + # Merge images with data from media when 'image.url' matches 'url' + for item in media: + # remove the `__parentId` from Media + if BULK_PARENT_KEY in item: + item.pop(BULK_PARENT_KEY) + + image_url = item.get("image", {}).get("url") + if image_url in url_map: + # Merge images into media + item.update(url_map.get(image_url)) + # remove lefovers + item.pop("image", None) + item.pop("url", None) + # make the `alt` None, if it's an empty str, since server sends the "" instead of Null + alt = item.get("alt") + item["alt"] = None if not alt else alt + + # return merged list of images + return media + + def _convert_datetime_to_rfc3339(self, images: List[dict]) -> MutableMapping[str, Any]: + for image in images: + image["createdAt"] = self.tools.from_iso8601_to_rfc3339(image, "createdAt") + image["updatedAt"] = self.tools.from_iso8601_to_rfc3339(image, "updatedAt") + return images + + def record_process_components(self, record: MutableMapping[str, Any]) -> Iterable[MutableMapping[str, Any]]: + """ + Defines how to process collected components. + """ + # get the joined record components collected for the record + record_components = record.get("record_components", {}) + + # process record components + if record_components: + record["images"] = self._process_component(record_components.get("Image", [])) + # add the product_id to each `Image` + record["images"] = self._add_product_id(record.get("images", []), record.get("id")) + record["images"] = self._merge_with_media(record_components) + record.pop("record_components") + # produce images records + if len(record.get("images", [])) > 0: + # convert dates from ISO-8601 to RFC-3339 + record["images"] = self._convert_datetime_to_rfc3339(record.get("images", [])) + yield from record.get("images", []) + + +class ProductVariant(ShopifyBulkQuery): + """ + { + productVariants( + query: "updated_at:>='2019-04-13T00:00:00+00:00' AND updated_at:<='2024-04-30T12:16:17.273363+00:00'" + sortKey: UPDATED_AT + ) { + edges { + node { + __typename + id + product { + product_id: id + } + title + price + sku + position + inventoryPolicy + compareAtPrice + fulfillmentService { + fulfillment_service: handle + } + inventoryManagement + createdAt + updatedAt + taxable + barcode + grams: weight + weight + weightUnit + inventoryItem { + inventory_item_id: id + } + inventoryQuantity + old_inventory_quantity: inventoryQuantity + presentmentPrices { + edges { + node { + __typename + price { + amount + currencyCode + } + compareAtPrice { + amount + currencyCode + } + } + } + } + requiresShipping + image { + image_id: id + } + } + } + } + } + """ + + query_name = "productVariants" + sort_key = "ID" + + prices_fields: List[str] = ["amount", "currencyCode"] + presentment_prices_fields: List[Field] = [ + Field( + name="edges", + fields=[ + Field( + name="node", + fields=["__typename", Field(name="price", fields=prices_fields), Field(name="compareAtPrice", fields=prices_fields)], + ) + ], + ) + ] + + # main query + query_nodes: List[Field] = [ + "__typename", + "id", + "title", + "price", + "sku", + "position", + "inventoryPolicy", + "compareAtPrice", + "inventoryManagement", + "createdAt", + "updatedAt", + "taxable", + "barcode", + "weight", + "weightUnit", + "inventoryQuantity", + "requiresShipping", + Field(name="weight", alias="grams"), + Field(name="image", fields=[Field(name="id", alias="image_id")]), + Field(name="inventoryQuantity", alias="old_inventory_quantity"), + Field(name="product", fields=[Field(name="id", alias="product_id")]), + Field(name="fulfillmentService", fields=[Field(name="handle", alias="fulfillment_service")]), + Field(name="inventoryItem", fields=[Field(name="id", alias="inventory_item_id")]), + Field(name="presentmentPrices", fields=presentment_prices_fields), + ] + + record_composition = { + "new_record": "ProductVariant", + # each `ProductVariant` could have `ProductVariantPricePair` associated with the product variant. + "record_components": ["ProductVariantPricePair"], + } + + def _process_presentment_prices(self, entity: List[dict]) -> List[dict]: + for item in entity: + # remove the `__parentId` from the object + if BULK_PARENT_KEY in item: + item.pop(BULK_PARENT_KEY) + + # these objects could be literally `Null/None` from the response, + # this is treated like a real value, so we need to assigne the correct values instead + price: Optional[Mapping[str, Any]] = item.get("price", {}) + if not price: + price = {} + # get the amount values + price_amount = price.get("amount") if price else None + # make the nested object's values up to the schema, (cast the `str` > `float`) + item["price"]["amount"] = float(price_amount) if price_amount else None + # convert field names to snake case + item["price"] = self.tools.fields_names_to_snake_case(item.get("price")) + + compare_at_price: Optional[Mapping[str, Any]] = item.get("compareAtPrice", {}) + if not compare_at_price: + compare_at_price = {} + # assign the correct value, if there is no object from response + item["compareAtPrice"] = compare_at_price + compare_at_price_amount = compare_at_price.get("amount") if compare_at_price else None + item["compareAtPrice"]["amount"] = float(compare_at_price_amount) if compare_at_price_amount else None + item["compare_at_price"] = self.tools.fields_names_to_snake_case(item["compareAtPrice"]) + # remove leftovers + item.pop("compareAtPrice", None) + + return entity + + def record_process_components(self, record: MutableMapping[str, Any]) -> Iterable[MutableMapping[str, Any]]: + """ + Defines how to process collected components. + """ + + # get the joined record components collected for the record + record_components = record.get("record_components", {}) + # process record components + if record_components: + record["presentment_prices"] = self._process_presentment_prices(record_components.get("ProductVariantPricePair", [])) + record.pop("record_components") + # unnest mandatory fields from their placeholders + record["product_id"] = self.tools.resolve_str_id(record.get("product", {}).get("product_id")) + record["fulfillment_service"] = record.get("fulfillmentService", {}).get("fulfillment_service") + record["inventory_item_id"] = self.tools.resolve_str_id(record.get("inventoryItem", {}).get("inventory_item_id")) + record["grams"] = int(record.get("grams", 0)) + # convert date-time cursors + record["createdAt"] = self.tools.from_iso8601_to_rfc3339(record, "createdAt") + record["updatedAt"] = self.tools.from_iso8601_to_rfc3339(record, "updatedAt") + # clean up the leftovers + record.pop("product", None) + record.pop("inventoryItem", None) + + yield record diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/streams/streams.py b/airbyte-integrations/connectors/source-shopify/source_shopify/streams/streams.py index 17643e9f774cb..e3e765c2d2717 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/streams/streams.py +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/streams/streams.py @@ -24,6 +24,8 @@ MetafieldProduct, MetafieldProductImage, MetafieldProductVariant, + ProductImage, + ProductVariant, Transaction, ) from source_shopify.shopify_graphql.graphql import get_query_products @@ -167,22 +169,16 @@ class MetafieldProducts(IncrementalShopifyGraphQlBulkStream): bulk_query: MetafieldProduct = MetafieldProduct -class ProductImages(IncrementalShopifyNestedStream): - parent_stream_class = Products - nested_entity = "images" - # add `product_id` to each nested subrecord - mutation_map = {"product_id": "id"} +class ProductImages(IncrementalShopifyGraphQlBulkStream): + bulk_query: ProductImage = ProductImage class MetafieldProductImages(IncrementalShopifyGraphQlBulkStream): bulk_query: MetafieldProductImage = MetafieldProductImage -class ProductVariants(IncrementalShopifyNestedStream): - parent_stream_class = Products - nested_entity = "variants" - # add `product_id` to each nested subrecord - mutation_map = {"product_id": "id"} +class ProductVariants(IncrementalShopifyGraphQlBulkStream): + bulk_query: ProductVariant = ProductVariant class MetafieldProductVariants(IncrementalShopifyGraphQlBulkStream): From b2f44ab168e2d7f8ca29b2eee504a49ffb78b59f Mon Sep 17 00:00:00 2001 From: Oleksandr Bazarnov Date: Thu, 2 May 2024 14:09:11 +0300 Subject: [PATCH 02/27] added tests for product_images/variants --- .../source-shopify/unit_tests/conftest.py | 129 ++++++++++++++++++ .../unit_tests/graphql_bulk/test_job.py | 6 + .../source-shopify/unit_tests/test_source.py | 29 ++-- 3 files changed, 150 insertions(+), 14 deletions(-) diff --git a/airbyte-integrations/connectors/source-shopify/unit_tests/conftest.py b/airbyte-integrations/connectors/source-shopify/unit_tests/conftest.py index b235659f65d98..ca1987d9d7137 100644 --- a/airbyte-integrations/connectors/source-shopify/unit_tests/conftest.py +++ b/airbyte-integrations/connectors/source-shopify/unit_tests/conftest.py @@ -510,6 +510,24 @@ def filfillment_order_jsonl_content_example(): {"__typename":"FulfillmentOrderMerchantRequest","id":"gid:\/\/shopify\/FulfillmentOrderMerchantRequest\/333","message":null,"kind":"FULFILLMENT_REQUEST","requestOptions":{"notify_customer":true},"__parentId":"gid:\/\/shopify\/FulfillmentOrder\/2"}\n""" +@pytest.fixture +def product_images_jsonl_content_example(): + return """{"__typename":"Product","id":"gid:\/\/shopify\/Product\/123"} +{"__typename":"MediaImage","createdAt":"2023-01-06T18:29:17Z","updatedAt":"2023-01-06T18:29:19Z","image":{"url":"https:\/\/cdn.shopify.com\/s\/files\/1\/0580\/3317\/6765\/products\/white-t-shirt.jpg?v=1673029759"},"__parentId":"gid:\/\/shopify\/Product\/123"} +{"__typename":"Image","id":"gid:\/\/shopify\/ProductImage\/111","height":280,"alt":"","src":"https:\/\/cdn.shopify.com\/s\/files\/1\/0580\/3317\/6765\/products\/white-t-shirt.jpg?v=1673029759","url":"https:\/\/cdn.shopify.com\/s\/files\/1\/0580\/3317\/6765\/products\/white-t-shirt.jpg?v=1673029759","width":265,"__parentId":"gid:\/\/shopify\/Product\/123"} +{"__typename":"Product","id":"gid:\/\/shopify\/Product\/456"} +{"__typename":"MediaImage","createdAt":"2021-06-23T01:09:47Z","updatedAt":"2023-04-24T17:27:15Z","image":{"url":"https:\/\/cdn.shopify.com\/s\/files\/1\/0580\/3317\/6765\/products\/4-ounce-soy-candle.jpg?v=1624410587"},"__parentId":"gid:\/\/shopify\/Product\/456"} +{"__typename":"Image","id":"gid:\/\/shopify\/ProductImage\/222","height":1467,"alt":"updated_mon_24.04.2023","src":"https:\/\/cdn.shopify.com\/s\/files\/1\/0580\/3317\/6765\/products\/4-ounce-soy-candle.jpg?v=1624410587","url":"https:\/\/cdn.shopify.com\/s\/files\/1\/0580\/3317\/6765\/products\/4-ounce-soy-candle.jpg?v=1624410587","width":2200,"__parentId":"gid:\/\/shopify\/Product\/456"}\n""" + + +@pytest.fixture +def product_variants_jsonl_content_example(): + return """{"__typename":"ProductVariant","id":"gid:\/\/shopify\/ProductVariant\/123","title":"Test 234","price":"59.00","sku":"","position":3,"inventoryPolicy":"DENY","compareAtPrice":null,"inventoryManagement":"SHOPIFY","createdAt":"2023-04-14T10:29:27Z","updatedAt":"2023-10-27T16:56:39Z","taxable":true,"barcode":"","weight":0.0,"weightUnit":"GRAMS","inventoryQuantity":0,"requiresShipping":false,"grams":0.0,"image":null,"old_inventory_quantity":0,"product":{"product_id":"gid:\/\/shopify\/Product\/111"},"fulfillmentService":{"fulfillment_service":"manual"},"inventoryItem":{"inventory_item_id":"gid:\/\/shopify\/InventoryItem\/222"}} +{"__typename":"ProductVariantPricePair","price":{"amount":"59.0","currencyCode":"USD"},"compareAtPrice":null,"__parentId":"gid:\/\/shopify\/ProductVariant\/123"} +{"__typename":"ProductVariant","id":"gid:\/\/shopify\/ProductVariant\/456","title":"Test Variant","price":"113.00","sku":"123","position":4,"inventoryPolicy":"CONTINUE","compareAtPrice":"1.00","inventoryManagement":"SHOPIFY","createdAt":"2023-12-11T10:37:41Z","updatedAt":"2023-12-11T10:37:41Z","taxable":true,"barcode":"123","weight":127.0,"weightUnit":"GRAMS","inventoryQuantity":1,"requiresShipping":true,"grams":127.0,"image":null,"old_inventory_quantity":1,"product":{"product_id":"gid:\/\/shopify\/Product\/222"},"fulfillmentService":{"fulfillment_service":"manual"},"inventoryItem":{"inventory_item_id":"gid:\/\/shopify\/InventoryItem\/333"}} +{"__typename":"ProductVariantPricePair","price":{"amount":"113.0","currencyCode":"USD"},"compareAtPrice":{"amount":"1.0","currencyCode":"USD"},"__parentId":"gid:\/\/shopify\/ProductVariant\/456"}\n""" + + @pytest.fixture def inventory_items_jsonl_content_example(): return """{"__typename":"InventoryItem","id":"gid:\/\/shopify\/InventoryItem\/44871665713341","unitCost":null,"countryCodeOfOrigin":null,"harmonizedSystemCode":null,"provinceCodeOfOrigin":null,"updatedAt":"2023-04-14T10:29:27Z","createdAt":"2023-04-14T10:29:27Z","sku":"","tracked":true,"requiresShipping":false} @@ -661,6 +679,117 @@ def fulfillment_orders_response_expected_result(): } +@pytest.fixture +def product_images_response_expected_result(): + return [ + { + "created_at": "2023-01-06T18:29:17+00:00", + "updated_at": "2023-01-06T18:29:19+00:00", + "id": 111, + "height": 280, + "alt": None, + "src": "https://cdn.shopify.com/s/files/1/0580/3317/6765/products/white-t-shirt.jpg?v=1673029759", + "width": 265, + "admin_graphql_api_id": "gid://shopify/ProductImage/111", + "product_id": 123, + "shop_url": "test_shop" + }, + { + "created_at": "2021-06-23T01:09:47+00:00", + "updated_at": "2023-04-24T17:27:15+00:00", + "id": 222, + "height": 1467, + "alt": "updated_mon_24.04.2023", + "src": "https://cdn.shopify.com/s/files/1/0580/3317/6765/products/4-ounce-soy-candle.jpg?v=1624410587", + "width": 2200, + "admin_graphql_api_id": "gid://shopify/ProductImage/222", + "product_id": 456, + "shop_url": "test_shop" + } + ] + + +@pytest.fixture +def product_variants_response_expected_result(): + return [ + { + "id": 123, + "title": "Test 234", + "price": "59.00", + "sku": "", + "position": 3, + "inventory_policy": "DENY", + "compare_at_price": None, + "inventory_management": "SHOPIFY", + "created_at": "2023-04-14T10:29:27+00:00", + "updated_at": "2023-10-27T16:56:39+00:00", + "taxable": True, + "barcode": "", + "weight": 0.0, + "weight_unit": "GRAMS", + "inventory_quantity": 0, + "requires_shipping": False, + "grams": 0, + "image": None, + "old_inventory_quantity": 0, + "fulfillment_service": "manual", + "admin_graphql_api_id": "gid://shopify/ProductVariant/123", + "presentment_prices": [ + { + "price": { + "amount": 59.0, + "currency_code": "USD" + }, + "compare_at_price": { + "amount": None + } + } + ], + "product_id": 111, + "inventory_item_id": 222, + "shop_url": "test_shop" + }, + { + "id": 456, + "title": "Test Variant", + "price": "113.00", + "sku": "123", + "position": 4, + "inventory_policy": "CONTINUE", + "compare_at_price": "1.00", + "inventory_management": "SHOPIFY", + "created_at": "2023-12-11T10:37:41+00:00", + "updated_at": "2023-12-11T10:37:41+00:00", + "taxable": True, + "barcode": "123", + "weight": 127.0, + "weight_unit": "GRAMS", + "inventory_quantity": 1, + "requires_shipping": True, + "grams": 127, + "image": None, + "old_inventory_quantity": 1, + "fulfillment_service": "manual", + "admin_graphql_api_id": "gid://shopify/ProductVariant/456", + "presentment_prices": [ + { + "price": { + "amount": 113.0, + "currency_code": "USD" + }, + "compare_at_price": { + "amount": 1.0, + "currency_code": "USD" + } + } + ], + "product_id": 222, + "inventory_item_id": 333, + "shop_url": "test_shop" + } + ] + + @pytest.fixture def inventory_items_response_expected_result(): return [ diff --git a/airbyte-integrations/connectors/source-shopify/unit_tests/graphql_bulk/test_job.py b/airbyte-integrations/connectors/source-shopify/unit_tests/graphql_bulk/test_job.py index 402f49469e679..eb6e898f7ed9a 100644 --- a/airbyte-integrations/connectors/source-shopify/unit_tests/graphql_bulk/test_job.py +++ b/airbyte-integrations/connectors/source-shopify/unit_tests/graphql_bulk/test_job.py @@ -15,6 +15,8 @@ InventoryItems, InventoryLevels, MetafieldOrders, + ProductImages, + ProductVariants, TransactionsGraphql, ) @@ -251,6 +253,8 @@ def test_job_read_file_invalid_filename(mocker, auth_config) -> None: (TransactionsGraphql, "transactions_jsonl_content_example", "transactions_response_expected_result"), (InventoryItems, "inventory_items_jsonl_content_example", "inventory_items_response_expected_result"), (InventoryLevels, "inventory_levels_jsonl_content_example", "inventory_levels_response_expected_result"), + (ProductImages, "product_images_jsonl_content_example", "product_images_response_expected_result"), + (ProductVariants, "product_variants_jsonl_content_example", "product_variants_response_expected_result"), ], ids=[ "CustomerAddress", @@ -261,6 +265,8 @@ def test_job_read_file_invalid_filename(mocker, auth_config) -> None: "TransactionsGraphql", "InventoryItems", "InventoryLevels", + "ProductImages", + "ProductVariants", ], ) def test_bulk_stream_parse_response( diff --git a/airbyte-integrations/connectors/source-shopify/unit_tests/test_source.py b/airbyte-integrations/connectors/source-shopify/unit_tests/test_source.py index 277c969517dee..18d7360bcd83e 100644 --- a/airbyte-integrations/connectors/source-shopify/unit_tests/test_source.py +++ b/airbyte-integrations/connectors/source-shopify/unit_tests/test_source.py @@ -73,13 +73,14 @@ def config(basic_config) -> dict: (MetafieldProductVariants, None, "graphql.json"), (MetafieldLocations, None, "graphql.json"), (MetafieldCollections, None, "graphql.json"), + (ProductImages, None, "graphql.json"), + (ProductVariants, None, "graphql.json"), # (MetafieldSmartCollections, {"id": 123}, "smart_collections/123/metafields.json"), (MetafieldPages, {"id": 123}, "pages/123/metafields.json"), (MetafieldShops, None, "metafields.json"), # Nested Substreams - (ProductImages, None, ""), - (ProductVariants, None, ""), + (OrderRefunds, None, ""), # (Customers, None, "customers.json"), (Orders, None, "orders.json"), @@ -131,13 +132,13 @@ def test_path_with_stream_slice_param(stream, stream_slice, expected_path, confi "stream, parent_records, state_checkpoint_interval", [ ( - ProductImages, + OrderRefunds, [ - {"id": 1, "images": [{"updated_at": "2021-01-01T00:00:00+00:00"}]}, - {"id": 2, "images": [{"updated_at": "2021-02-01T00:00:00+00:00"}]}, - {"id": 3, "images": [{"updated_at": "2021-03-01T00:00:00+00:00"}]}, - {"id": 4, "images": [{"updated_at": "2021-04-01T00:00:00+00:00"}]}, - {"id": 5, "images": [{"updated_at": "2021-05-01T00:00:00+00:00"}]}, + {"id": 1, "refunds": [{"created_at": "2021-01-01T00:00:00+00:00"}]}, + {"id": 2, "refunds": [{"created_at": "2021-02-01T00:00:00+00:00"}]}, + {"id": 3, "refunds": [{"created_at": "2021-03-01T00:00:00+00:00"}]}, + {"id": 4, "refunds": [{"created_at": "2021-04-01T00:00:00+00:00"}]}, + {"id": 5, "refunds": [{"created_at": "2021-05-01T00:00:00+00:00"}]}, ], 2, ), @@ -211,17 +212,17 @@ def test_request_params(config, stream, expected) -> None: "last_record, current_state, expected", [ # no init state - ({"created_at": "2022-10-10T06:21:53-07:00"}, {}, {"created_at": "2022-10-10T06:21:53-07:00", "orders": None}), + ({"created_at": "2022-10-10T06:21:53-07:00"}, {}, {"created_at": "2022-10-10T06:21:53-07:00", "orders": {"updated_at": "", "deleted": {"deleted_at": ""}}}), # state is empty str - ({"created_at": "2022-10-10T06:21:53-07:00"}, {"created_at": ""}, {"created_at": "2022-10-10T06:21:53-07:00", "orders": None}), + ({"created_at": "2022-10-10T06:21:53-07:00"}, {"created_at": ""}, {"created_at": "2022-10-10T06:21:53-07:00", "orders": {"updated_at": "", "deleted": {"deleted_at": ""}}}), # state is None - ({"created_at": "2022-10-10T06:21:53-07:00"}, {"created_at": None}, {"created_at": "2022-10-10T06:21:53-07:00", "orders": None}), + ({"created_at": "2022-10-10T06:21:53-07:00"}, {"created_at": None}, {"created_at": "2022-10-10T06:21:53-07:00", "orders": {"updated_at": "", "deleted": {"deleted_at": ""}}}), # last rec cursor is None - ({"created_at": None}, {"created_at": None}, {"created_at": "", "orders": None}), + ({"created_at": None}, {"created_at": None}, {"created_at": "", "orders": {"updated_at": "", "deleted": {"deleted_at": ""}}}), # last rec cursor is empty str - ({"created_at": ""}, {"created_at": "null"}, {"created_at": "null", "orders": None}), + ({"created_at": ""}, {"created_at": "null"}, {"created_at": "null", "orders": {"updated_at": "", "deleted": {"deleted_at": ""}}}), # no values at all - ({}, {}, {"created_at": "", "orders": None}), + ({}, {}, {"created_at": "", "orders": {"updated_at": "", "deleted": {"deleted_at": ""}}}), ], ids=[ "no init state", From 5cf76bb8d408f81a7d929c547894b1dfe6573bfc Mon Sep 17 00:00:00 2001 From: Oleksandr Bazarnov Date: Thu, 2 May 2024 14:26:46 +0300 Subject: [PATCH 03/27] updated changelog --- .../connectors/source-shopify/metadata.yaml | 10 +++++++++- docs/integrations/sources/shopify-migrations.md | 11 +++++++++++ docs/integrations/sources/shopify.md | 1 + 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/airbyte-integrations/connectors/source-shopify/metadata.yaml b/airbyte-integrations/connectors/source-shopify/metadata.yaml index 382f93f7fa1d0..684e9d60f8502 100644 --- a/airbyte-integrations/connectors/source-shopify/metadata.yaml +++ b/airbyte-integrations/connectors/source-shopify/metadata.yaml @@ -11,7 +11,7 @@ data: connectorSubtype: api connectorType: source definitionId: 9da77001-af33-4bcd-be46-6252bf9342b9 - dockerImageTag: 2.0.7 + dockerImageTag: 2.1.0 dockerRepository: airbyte/source-shopify documentationUrl: https://docs.airbyte.com/integrations/sources/shopify githubIssueLabel: source-shopify @@ -64,6 +64,14 @@ data: "product_variants", "transactions", ] + 2.1.0: + message: + "This upgrade changes the `Product Images` and `Product Variants` streams to use `Shopify GraphQL BULK`. + More details here: https://github.com/airbytehq/airbyte/pull/37767." + upgradeDeadline: "2024-06-10" + scopedImpact: + - scopeType: stream + impactedScopes: ["product_images", "product_variants"] suggestedStreams: streams: - customers diff --git a/docs/integrations/sources/shopify-migrations.md b/docs/integrations/sources/shopify-migrations.md index 0ecf880c31ae7..1a27bf221b3c1 100644 --- a/docs/integrations/sources/shopify-migrations.md +++ b/docs/integrations/sources/shopify-migrations.md @@ -1,5 +1,16 @@ # Shopify Migration Guide +## Upgrading to 2.1.0 +This version implements `Shopify GraphQL BULK Operations` to speed up the following streams: + - `Product Images` + - `Product Variants` + +* The `Product Variants` stream now has the cursor field `updated_at`, instead of the `id`. + +### Action items required for 2.1.0 +- `Refresh Schema` + `Reset` is required for this stream after the upgrade from previous version. + + ## Upgrading to 2.0.0 This version implements `Shopify GraphQL BULK Operations` to speed up the following streams: - `Collections` diff --git a/docs/integrations/sources/shopify.md b/docs/integrations/sources/shopify.md index 11a6cef4aee14..71e8fd747b9c9 100644 --- a/docs/integrations/sources/shopify.md +++ b/docs/integrations/sources/shopify.md @@ -207,6 +207,7 @@ For all `Shopify GraphQL BULK` api requests these limitations are applied: https | Version | Date | Pull Request | Subject | |:--------|:-----------|:---------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| 2.1.0 | 2024-05-02 | [37767](https://github.com/airbytehq/airbyte/pull/37767) | Migrated `Product Images` and `Product Variants` to `GraphQL BULK` | | 2.0.7 | 2024-04-24 | [36660](https://github.com/airbytehq/airbyte/pull/36660) | Schema descriptions | | 2.0.6 | 2024-04-22 | [37468](https://github.com/airbytehq/airbyte/pull/37468) | Fixed one time retry for `Internal Server Error` for BULK streams | | 2.0.5 | 2024-04-03 | [36788](https://github.com/airbytehq/airbyte/pull/36788) | Added ability to dynamically adjust the size of the `slice` | From 01c7626cf6fea6646b620f1c28ec8c0aa85ea09f Mon Sep 17 00:00:00 2001 From: Oleksandr Bazarnov Date: Thu, 2 May 2024 16:44:45 +0300 Subject: [PATCH 04/27] fixed price numberic value casting for product_variants --- .../source_shopify/shopify_graphql/bulk/query.py | 4 ++++ .../connectors/source-shopify/unit_tests/conftest.py | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/shopify_graphql/bulk/query.py b/airbyte-integrations/connectors/source-shopify/source_shopify/shopify_graphql/bulk/query.py index 25b7ac564d318..c595db282ad2e 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/shopify_graphql/bulk/query.py +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/shopify_graphql/bulk/query.py @@ -1832,11 +1832,15 @@ def record_process_components(self, record: MutableMapping[str, Any]) -> Iterabl if record_components: record["presentment_prices"] = self._process_presentment_prices(record_components.get("ProductVariantPricePair", [])) record.pop("record_components") + # unnest mandatory fields from their placeholders record["product_id"] = self.tools.resolve_str_id(record.get("product", {}).get("product_id")) record["fulfillment_service"] = record.get("fulfillmentService", {}).get("fulfillment_service") record["inventory_item_id"] = self.tools.resolve_str_id(record.get("inventoryItem", {}).get("inventory_item_id")) record["grams"] = int(record.get("grams", 0)) + # cast the the `price` to number, could be literally `None` + price = record.get("price") + record["price"] = float(price) if price else None # convert date-time cursors record["createdAt"] = self.tools.from_iso8601_to_rfc3339(record, "createdAt") record["updatedAt"] = self.tools.from_iso8601_to_rfc3339(record, "updatedAt") diff --git a/airbyte-integrations/connectors/source-shopify/unit_tests/conftest.py b/airbyte-integrations/connectors/source-shopify/unit_tests/conftest.py index ca1987d9d7137..f3ae944e151a9 100644 --- a/airbyte-integrations/connectors/source-shopify/unit_tests/conftest.py +++ b/airbyte-integrations/connectors/source-shopify/unit_tests/conftest.py @@ -715,7 +715,7 @@ def product_variants_response_expected_result(): { "id": 123, "title": "Test 234", - "price": "59.00", + "price": 59.00, "sku": "", "position": 3, "inventory_policy": "DENY", @@ -752,7 +752,7 @@ def product_variants_response_expected_result(): { "id": 456, "title": "Test Variant", - "price": "113.00", + "price": 113.00, "sku": "123", "position": 4, "inventory_policy": "CONTINUE", From 953a756c781ad727963d531dea3a7bdd3e9bb371 Mon Sep 17 00:00:00 2001 From: Oleksandr Bazarnov Date: Thu, 2 May 2024 17:06:03 +0300 Subject: [PATCH 05/27] Added the Products stream migration --- .../connectors/source-shopify/metadata.yaml | 4 +- .../connectors/source-shopify/poetry.lock | 31 ++--- .../connectors/source-shopify/pyproject.toml | 2 +- .../shopify_graphql/bulk/query.py | 120 ++++++++++++++++++ .../source_shopify/streams/streams.py | 7 +- .../source-shopify/unit_tests/conftest.py | 55 ++++++++ .../unit_tests/test_deleted_events_stream.py | 42 +++--- .../source-shopify/unit_tests/test_source.py | 9 +- .../sources/shopify-migrations.md | 3 +- docs/integrations/sources/shopify.md | 2 +- 10 files changed, 225 insertions(+), 50 deletions(-) diff --git a/airbyte-integrations/connectors/source-shopify/metadata.yaml b/airbyte-integrations/connectors/source-shopify/metadata.yaml index 684e9d60f8502..caf37588b64ab 100644 --- a/airbyte-integrations/connectors/source-shopify/metadata.yaml +++ b/airbyte-integrations/connectors/source-shopify/metadata.yaml @@ -66,12 +66,12 @@ data: ] 2.1.0: message: - "This upgrade changes the `Product Images` and `Product Variants` streams to use `Shopify GraphQL BULK`. + "This upgrade changes the `Products`, `Product Images` and `Product Variants` streams to use `Shopify GraphQL BULK`. More details here: https://github.com/airbytehq/airbyte/pull/37767." upgradeDeadline: "2024-06-10" scopedImpact: - scopeType: stream - impactedScopes: ["product_images", "product_variants"] + impactedScopes: ["product_variants"] suggestedStreams: streams: - customers diff --git a/airbyte-integrations/connectors/source-shopify/poetry.lock b/airbyte-integrations/connectors/source-shopify/poetry.lock index b33eac57dfb5b..422b97e2a0d89 100644 --- a/airbyte-integrations/connectors/source-shopify/poetry.lock +++ b/airbyte-integrations/connectors/source-shopify/poetry.lock @@ -278,13 +278,13 @@ files = [ [[package]] name = "exceptiongroup" -version = "1.2.0" +version = "1.2.1" description = "Backport of PEP 654 (exception groups)" optional = false python-versions = ">=3.7" files = [ - {file = "exceptiongroup-1.2.0-py3-none-any.whl", hash = "sha256:4bfd3996ac73b41e9b9628b04e079f193850720ea5945fc96a08633c66912f14"}, - {file = "exceptiongroup-1.2.0.tar.gz", hash = "sha256:91f5c769735f051a4290d52edd0858999b57e5876e9f85937691bd4c9fa3ed68"}, + {file = "exceptiongroup-1.2.1-py3-none-any.whl", hash = "sha256:5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad"}, + {file = "exceptiongroup-1.2.1.tar.gz", hash = "sha256:a4785e48b045528f5bfe627b6ad554ff32def154f42372786903b7abcfe1aa16"}, ] [package.extras] @@ -532,28 +532,29 @@ pytzdata = ">=2020.1" [[package]] name = "platformdirs" -version = "4.2.0" -description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +version = "4.2.1" +description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." optional = false python-versions = ">=3.8" files = [ - {file = "platformdirs-4.2.0-py3-none-any.whl", hash = "sha256:0614df2a2f37e1a662acbd8e2b25b92ccf8632929bc6d43467e17fe89c75e068"}, - {file = "platformdirs-4.2.0.tar.gz", hash = "sha256:ef0cc731df711022c174543cb70a9b5bd22e5a9337c8624ef2c2ceb8ddad8768"}, + {file = "platformdirs-4.2.1-py3-none-any.whl", hash = "sha256:17d5a1161b3fd67b390023cb2d3b026bbd40abde6fdb052dfbd3a29c3ba22ee1"}, + {file = "platformdirs-4.2.1.tar.gz", hash = "sha256:031cd18d4ec63ec53e82dceaac0417d218a6863f7745dfcc9efe7793b7039bdf"}, ] [package.extras] docs = ["furo (>=2023.9.10)", "proselint (>=0.13)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.25.2)"] test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)"] +type = ["mypy (>=1.8)"] [[package]] name = "pluggy" -version = "1.4.0" +version = "1.5.0" description = "plugin and hook calling mechanisms for python" optional = false python-versions = ">=3.8" files = [ - {file = "pluggy-1.4.0-py3-none-any.whl", hash = "sha256:7db9f7b503d67d1c5b95f59773ebb58a8c1c288129a88665838012cfb07b8981"}, - {file = "pluggy-1.4.0.tar.gz", hash = "sha256:8c85c2876142a764e5b7548e7d9a0e0ddb46f5185161049a79b7e974454223be"}, + {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, + {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, ] [package.extras] @@ -670,13 +671,13 @@ files = [ [[package]] name = "pytest" -version = "8.1.1" +version = "8.2.0" description = "pytest: simple powerful testing with Python" optional = false python-versions = ">=3.8" files = [ - {file = "pytest-8.1.1-py3-none-any.whl", hash = "sha256:2a8386cfc11fa9d2c50ee7b2a57e7d898ef90470a7a34c4b949ff59662bb78b7"}, - {file = "pytest-8.1.1.tar.gz", hash = "sha256:ac978141a75948948817d360297b7aae0fcb9d6ff6bc9ec6d514b85d5a65c044"}, + {file = "pytest-8.2.0-py3-none-any.whl", hash = "sha256:1733f0620f6cda4095bbf0d9ff8022486e91892245bb9e7d5542c018f612f233"}, + {file = "pytest-8.2.0.tar.gz", hash = "sha256:d507d4482197eac0ba2bae2e9babf0672eb333017bcedaa5fb1a3d42c1174b3f"}, ] [package.dependencies] @@ -684,11 +685,11 @@ colorama = {version = "*", markers = "sys_platform == \"win32\""} exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} iniconfig = "*" packaging = "*" -pluggy = ">=1.4,<2.0" +pluggy = ">=1.5,<2.0" tomli = {version = ">=1", markers = "python_version < \"3.11\""} [package.extras] -testing = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] +dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] [[package]] name = "pytest-mock" diff --git a/airbyte-integrations/connectors/source-shopify/pyproject.toml b/airbyte-integrations/connectors/source-shopify/pyproject.toml index 5bc45647339f4..bf22a128ceac1 100644 --- a/airbyte-integrations/connectors/source-shopify/pyproject.toml +++ b/airbyte-integrations/connectors/source-shopify/pyproject.toml @@ -3,7 +3,7 @@ requires = [ "poetry-core>=1.0.0",] build-backend = "poetry.core.masonry.api" [tool.poetry] -version = "2.0.7" +version = "2.1.0" name = "source-shopify" description = "Source CDK implementation for Shopify." authors = [ "Airbyte ",] diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/shopify_graphql/bulk/query.py b/airbyte-integrations/connectors/source-shopify/source_shopify/shopify_graphql/bulk/query.py index c595db282ad2e..6b20535eeda51 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/shopify_graphql/bulk/query.py +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/shopify_graphql/bulk/query.py @@ -1506,6 +1506,126 @@ def record_process_components(self, record: MutableMapping[str, Any]) -> Optiona yield self.process_transaction(transaction) +class Product(ShopifyBulkQuery): + """ + { + products(query: "updated_at:>='2020-01-20T00:00:00+00:00' AND updated_at:<'2024-04-25T00:00:00+00:00'", sortKey:UPDATED_AT) { + edges { + node { + __typename + id + publishedAt + createdAt + status + vendor + updatedAt + bodyHtml + productType + tags + options { + __typename + id + values + position + } + handle + images { + edges { + node { + __typename + id + } + } + + } + templateSuffix + title + variants { + edges { + node { + __typename + id + } + } + } + } + } + } + } + """ + + query_name = "products" + sort_key = "UPDATED_AT" + # images property fields + images_fields: List[Field] = [Field(name="edges", fields=[Field(name="node", fields=["__typename", "id"])])] + # variants property fields, we re-use the same field names as for the `images` property + variants_fields: List[Field] = images_fields + # main query + query_nodes: List[Field] = [ + "__typename", + "id", + "publishedAt", + "createdAt", + "status", + "vendor", + "updatedAt", + "bodyHtml", + "productType", + "tags", + "handle", + "templateSuffix", + "title", + Field(name="options", fields=["id", "name", "values", "position"]), + Field(name="images", fields=images_fields), + Field(name="variants", fields=variants_fields), + ] + + record_composition = { + "new_record": "Product", + # each product could have `Image` and `ProductVariant` associated with the product + "record_components": ["Image", "ProductVariant"], + } + + def _process_component(self, entity: List[dict]) -> List[dict]: + for item in entity: + # remove the `__parentId` from the object + if BULK_PARENT_KEY in item: + item.pop(BULK_PARENT_KEY) + # resolve the id from string + item["id"] = self.tools.resolve_str_id(item.get("id")) + return entity + + def _process_options(self, options: List[dict], product_id: Optional[int] = None) -> List[dict]: + for option in options: + # add product_id to each option + option["product_id"] = product_id if product_id else None + return options + + def record_process_components(self, record: MutableMapping[str, Any]) -> Iterable[MutableMapping[str, Any]]: + """ + Defines how to process collected components. + """ + # get the joined record components collected for the record + record_components = record.get("record_components", {}) + + # process record components + if record_components: + record["images"] = self._process_component(record_components.get("Image", [])) + record["variants"] = self._process_component(record_components.get("ProductVariant", [])) + record["options"] = self._process_component(record.get("options", [])) + # add the product_id to the `options` + product_id = record.get("id") + record["options"] = self._process_options(record.get("options", []), product_id) + record.pop("record_components") + + # convert dates from ISO-8601 to RFC-3339 + record["published_at"] = self.tools.from_iso8601_to_rfc3339(record, "publishedAt") + record["updatedAt"] = self.tools.from_iso8601_to_rfc3339(record, "updatedAt") + record["createdAt"] = self.tools.from_iso8601_to_rfc3339(record, "createdAt") + + yield record + + class ProductImage(ShopifyBulkQuery): """ { diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/streams/streams.py b/airbyte-integrations/connectors/source-shopify/source_shopify/streams/streams.py index e3e765c2d2717..c5854cb3a7c6e 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/streams/streams.py +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/streams/streams.py @@ -24,6 +24,7 @@ MetafieldProduct, MetafieldProductImage, MetafieldProductVariant, + Product, ProductImage, ProductVariant, Transaction, @@ -110,10 +111,8 @@ class MetafieldDraftOrders(IncrementalShopifyGraphQlBulkStream): bulk_query: MetafieldDraftOrder = MetafieldDraftOrder -class Products(IncrementalShopifyStreamWithDeletedEvents): - use_cache = True - data_field = "products" - deleted_events_api_name = "Product" +class Products(IncrementalShopifyGraphQlBulkStream): + bulk_query: Product = Product class ProductsGraphQl(IncrementalShopifyStream): diff --git a/airbyte-integrations/connectors/source-shopify/unit_tests/conftest.py b/airbyte-integrations/connectors/source-shopify/unit_tests/conftest.py index f3ae944e151a9..8f9cce4a25a6f 100644 --- a/airbyte-integrations/connectors/source-shopify/unit_tests/conftest.py +++ b/airbyte-integrations/connectors/source-shopify/unit_tests/conftest.py @@ -510,6 +510,14 @@ def filfillment_order_jsonl_content_example(): {"__typename":"FulfillmentOrderMerchantRequest","id":"gid:\/\/shopify\/FulfillmentOrderMerchantRequest\/333","message":null,"kind":"FULFILLMENT_REQUEST","requestOptions":{"notify_customer":true},"__parentId":"gid:\/\/shopify\/FulfillmentOrder\/2"}\n""" +@pytest.fixture +def products_jsonl_content_example(): + return """{"__typename":"Product","id":"gid:\/\/shopify\/Product\/123","publishedAt":"2021-06-23T01:09:29Z","createdAt":"2021-06-23T01:09:29Z","status":"ACTIVE","vendor":"Blanda, O'Kon and Bartell","updatedAt":"2023-04-20T11:12:26Z","bodyHtml":"Gold and silver glitter iPhone 7 cases with geometric line patterns, stacked","productType":"Music","tags":["developer-tools-generator"],"handle":"gold-silver-iphone-7-case","templateSuffix":null,"title":"Gold Silver iPhone 7 Case","options":[{"id":"gid:\/\/shopify\/ProductOption\/444","name":"Title","values":["Plastic","indigo"],"position":1}]} +{"__typename":"Image","id":"gid:\/\/shopify\/ProductImage\/111","__parentId":"gid:\/\/shopify\/Product\/123"} +{"__typename":"ProductVariant","id":"gid:\/\/shopify\/ProductVariant\/111","__parentId":"gid:\/\/shopify\/Product\/123"} +{"__typename":"ProductVariant","id":"gid:\/\/shopify\/ProductVariant\/222","__parentId":"gid:\/\/shopify\/Product\/123"}\n""" + + @pytest.fixture def product_images_jsonl_content_example(): return """{"__typename":"Product","id":"gid:\/\/shopify\/Product\/123"} @@ -679,6 +687,53 @@ def fulfillment_orders_response_expected_result(): } +@pytest.fixture +def products_response_expected_result(): + return { + "id": 123, + "published_at": "2021-06-23T01:09:29+00:00", + "created_at": "2021-06-23T01:09:29+00:00", + "status": "ACTIVE", + "vendor": "Blanda, O'Kon and Bartell", + "updated_at": "2023-04-20T11:12:26+00:00", + "body_html": "Gold and silver glitter iPhone 7 cases with geometric line patterns, stacked", + "product_type": "Music", + "tags": [ + "developer-tools-generator" + ], + "handle": "gold-silver-iphone-7-case", + "template_suffix": None, + "title": "Gold Silver iPhone 7 Case", + "options": [ + { + "id": 444, + "name": "Title", + "values": [ + "Plastic", + "indigo" + ], + "position": 1, + "product_id": 123 + } + ], + "admin_graphql_api_id": "gid://shopify/Product/123", + "images": [ + { + "id": 111 + } + ], + "variants": [ + { + "id": 111 + }, + { + "id": 222 + } + ], + "shop_url": "test_shop" + } + + @pytest.fixture def product_images_response_expected_result(): return [ diff --git a/airbyte-integrations/connectors/source-shopify/unit_tests/test_deleted_events_stream.py b/airbyte-integrations/connectors/source-shopify/unit_tests/test_deleted_events_stream.py index 126d28b7e66d3..8046d2aad9286 100644 --- a/airbyte-integrations/connectors/source-shopify/unit_tests/test_deleted_events_stream.py +++ b/airbyte-integrations/connectors/source-shopify/unit_tests/test_deleted_events_stream.py @@ -6,7 +6,7 @@ import pytest from source_shopify.auth import ShopifyAuthenticator from source_shopify.streams.base_streams import ShopifyDeletedEventsStream -from source_shopify.streams.streams import Products +from source_shopify.streams.streams import CustomCollections @pytest.fixture @@ -19,7 +19,7 @@ def config(basic_config): @pytest.mark.parametrize( "stream,expected_main_path,expected_events_path", [ - (Products, "products.json", "events.json"), + (CustomCollections, "custom_collections.json", "events.json"), ], ) def test_path(stream, expected_main_path, expected_events_path, config) -> None: @@ -33,7 +33,7 @@ def test_path(stream, expected_main_path, expected_events_path, config) -> None: @pytest.mark.parametrize( "stream,expected_events_schema", [ - (Products, {}), + (CustomCollections, {}), ], ) def test_get_json_schema(stream, expected_events_schema, config) -> None: @@ -46,7 +46,7 @@ def test_get_json_schema(stream, expected_events_schema, config) -> None: @pytest.mark.parametrize( "stream,expected_data_field,expected_pk,expected_cursor_field", [ - (Products, "events", "id", "deleted_at"), + (CustomCollections, "events", "id", "deleted_at"), ], ) def test_has_correct_instance_vars(stream, expected_data_field, expected_pk, expected_cursor_field, config) -> None: @@ -59,7 +59,7 @@ def test_has_correct_instance_vars(stream, expected_data_field, expected_pk, exp @pytest.mark.parametrize( "stream,expected", [ - (Products, None), + (CustomCollections, None), ], ) def test_has_no_availability_strategy(stream, expected, config) -> None: @@ -72,13 +72,13 @@ def test_has_no_availability_strategy(stream, expected, config) -> None: "stream,deleted_records_json,expected", [ ( - Products, + CustomCollections, [ { "id": 123, "subject_id": 234, "created_at": "2023-09-05T14:02:00-07:00", - "subject_type": "Product", + "subject_type": "Collection", "verb": "destroy", "arguments": [], "message": "Test Message", @@ -92,7 +92,7 @@ def test_has_no_availability_strategy(stream, expected, config) -> None: "id": 123, "subject_id": 234, "created_at": "2023-09-05T14:02:00-07:00", - "subject_type": "Product", + "subject_type": "Collection", "verb": "destroy", "arguments": [], "message": "Test Message", @@ -116,13 +116,13 @@ def test_read_deleted_records(stream, requests_mock, deleted_records_json, expec "stream,input,expected", [ ( - Products, + CustomCollections, [ { "id": 123, "subject_id": 234, "created_at": "2023-09-05T14:02:00-07:00", - "subject_type": "Product", + "subject_type": "Collection", "verb": "destroy", "arguments": [], "message": "Test Message", @@ -155,23 +155,23 @@ def test_produce_deleted_records_from_events(stream, input, expected, config) -> [ # params with NO STATE ( - Products, + CustomCollections, {}, None, {"limit": 250, "order": "updated_at asc", "updated_at_min": "2020-11-01"}, - {"filter": "Product", "verb": "destroy"}, + {"filter": "Collection", "verb": "destroy"}, ), # params with STATE ( - Products, + CustomCollections, {"updated_at": "2028-01-01", "deleted": {"deleted_at": "2029-01-01"}}, None, {"limit": 250, "order": "updated_at asc", "updated_at_min": "2028-01-01"}, - {"created_at_min": "2029-01-01", "filter": "Product", "verb": "destroy"}, + {"created_at_min": "2029-01-01", "filter": "Collection", "verb": "destroy"}, ), # params with NO STATE but with NEXT_PAGE_TOKEN ( - Products, + CustomCollections, {}, {"page_info": "next_page_token"}, {"limit": 250, "page_info": "next_page_token"}, @@ -188,7 +188,7 @@ def test_request_params(config, stream, stream_state, next_page_token, expected_ @pytest.mark.parametrize( "stream,expected", [ - (Products, ShopifyDeletedEventsStream), + (CustomCollections, ShopifyDeletedEventsStream), ], ) def test_deleted_events_instance(stream, config, expected) -> None: @@ -199,7 +199,7 @@ def test_deleted_events_instance(stream, config, expected) -> None: @pytest.mark.parametrize( "stream,expected", [ - (Products, ""), + (CustomCollections, ""), ], ) def test_default_deleted_state_comparison_value(stream, config, expected) -> None: @@ -212,28 +212,28 @@ def test_default_deleted_state_comparison_value(stream, config, expected) -> Non [ # NO INITIAL STATE ( - Products, + CustomCollections, {"id": 1, "updated_at": "2021-01-01"}, {}, {"updated_at": "2021-01-01", "deleted": {"deleted_at": ""}}, ), # with INITIAL STATE ( - Products, + CustomCollections, {"id": 1, "updated_at": "2022-01-01"}, {"updated_at": "2021-01-01", "deleted": {"deleted_at": ""}}, {"updated_at": "2022-01-01", "deleted": {"deleted_at": ""}}, ), # with NO Last Record value and NO current state value ( - Products, + CustomCollections, {}, {}, {"updated_at": "", "deleted": {"deleted_at": ""}}, ), # with NO Last Record value but with Current state value ( - Products, + CustomCollections, {}, {"updated_at": "2030-01-01", "deleted": {"deleted_at": ""}}, {"updated_at": "2030-01-01", "deleted": {"deleted_at": ""}}, diff --git a/airbyte-integrations/connectors/source-shopify/unit_tests/test_source.py b/airbyte-integrations/connectors/source-shopify/unit_tests/test_source.py index 18d7360bcd83e..72f5029897529 100644 --- a/airbyte-integrations/connectors/source-shopify/unit_tests/test_source.py +++ b/airbyte-integrations/connectors/source-shopify/unit_tests/test_source.py @@ -73,19 +73,18 @@ def config(basic_config) -> dict: (MetafieldProductVariants, None, "graphql.json"), (MetafieldLocations, None, "graphql.json"), (MetafieldCollections, None, "graphql.json"), + (Products, None, "graphql.json"), (ProductImages, None, "graphql.json"), (ProductVariants, None, "graphql.json"), - # + # Nested Substreams + (OrderRefunds, None, ""), + # regular streams (MetafieldSmartCollections, {"id": 123}, "smart_collections/123/metafields.json"), (MetafieldPages, {"id": 123}, "pages/123/metafields.json"), (MetafieldShops, None, "metafields.json"), - # Nested Substreams - (OrderRefunds, None, ""), - # (Customers, None, "customers.json"), (Orders, None, "orders.json"), (DraftOrders, None, "draft_orders.json"), - (Products, None, "products.json"), (AbandonedCheckouts, None, "checkouts.json"), (Collects, None, "collects.json"), (TenderTransactions, None, "tender_transactions.json"), diff --git a/docs/integrations/sources/shopify-migrations.md b/docs/integrations/sources/shopify-migrations.md index 1a27bf221b3c1..cea82229afd42 100644 --- a/docs/integrations/sources/shopify-migrations.md +++ b/docs/integrations/sources/shopify-migrations.md @@ -2,8 +2,9 @@ ## Upgrading to 2.1.0 This version implements `Shopify GraphQL BULK Operations` to speed up the following streams: + - `Products` - `Product Images` - - `Product Variants` + - `Product Variants`* * The `Product Variants` stream now has the cursor field `updated_at`, instead of the `id`. diff --git a/docs/integrations/sources/shopify.md b/docs/integrations/sources/shopify.md index 71e8fd747b9c9..589b471de46f4 100644 --- a/docs/integrations/sources/shopify.md +++ b/docs/integrations/sources/shopify.md @@ -207,7 +207,7 @@ For all `Shopify GraphQL BULK` api requests these limitations are applied: https | Version | Date | Pull Request | Subject | |:--------|:-----------|:---------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| 2.1.0 | 2024-05-02 | [37767](https://github.com/airbytehq/airbyte/pull/37767) | Migrated `Product Images` and `Product Variants` to `GraphQL BULK` | +| 2.1.0 | 2024-05-02 | [37767](https://github.com/airbytehq/airbyte/pull/37767) | Migrated `Products`, `Product Images` and `Product Variants` to `GraphQL BULK` | | 2.0.7 | 2024-04-24 | [36660](https://github.com/airbytehq/airbyte/pull/36660) | Schema descriptions | | 2.0.6 | 2024-04-22 | [37468](https://github.com/airbytehq/airbyte/pull/37468) | Fixed one time retry for `Internal Server Error` for BULK streams | | 2.0.5 | 2024-04-03 | [36788](https://github.com/airbytehq/airbyte/pull/36788) | Added ability to dynamically adjust the size of the `slice` | From bd17a5ad5d3effbf66cf866deba5fbe385e96114 Mon Sep 17 00:00:00 2001 From: Oleksandr Bazarnov Date: Thu, 2 May 2024 18:24:38 +0300 Subject: [PATCH 06/27] updated the tags for the products stream --- .../source_shopify/shopify_graphql/bulk/query.py | 9 ++++++++- .../connectors/source-shopify/unit_tests/conftest.py | 4 +--- .../source-shopify/unit_tests/graphql_bulk/test_job.py | 3 +++ 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/shopify_graphql/bulk/query.py b/airbyte-integrations/connectors/source-shopify/source_shopify/shopify_graphql/bulk/query.py index 6b20535eeda51..a6d5241c3bb46 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/shopify_graphql/bulk/query.py +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/shopify_graphql/bulk/query.py @@ -1601,6 +1601,12 @@ def _process_options(self, options: List[dict], product_id: Optional[int] = None option["product_id"] = product_id if product_id else None return options + def _unnest_tags(self, record: MutableMapping[str, Any]) -> str: + # we keep supporting 1 tag only, as it was for the REST stream, + # to avoid breaking change. + tags = record.get("tags", []) + return tags[0] if len(tags) > 0 else None + def record_process_components(self, record: MutableMapping[str, Any]) -> Iterable[MutableMapping[str, Any]]: """ Defines how to process collected components. @@ -1617,7 +1623,8 @@ def record_process_components(self, record: MutableMapping[str, Any]) -> Iterabl product_id = record.get("id") record["options"] = self._process_options(record.get("options", []), product_id) record.pop("record_components") - + # unnest the `tags` (the list of 1) + record["tags"] = self._unnest_tags(record) # convert dates from ISO-8601 to RFC-3339 record["published_at"] = self.tools.from_iso8601_to_rfc3339(record, "publishedAt") record["updatedAt"] = self.tools.from_iso8601_to_rfc3339(record, "updatedAt") diff --git a/airbyte-integrations/connectors/source-shopify/unit_tests/conftest.py b/airbyte-integrations/connectors/source-shopify/unit_tests/conftest.py index 8f9cce4a25a6f..78048756c4a9e 100644 --- a/airbyte-integrations/connectors/source-shopify/unit_tests/conftest.py +++ b/airbyte-integrations/connectors/source-shopify/unit_tests/conftest.py @@ -698,9 +698,7 @@ def products_response_expected_result(): "updated_at": "2023-04-20T11:12:26+00:00", "body_html": "Gold and silver glitter iPhone 7 cases with geometric line patterns, stacked", "product_type": "Music", - "tags": [ - "developer-tools-generator" - ], + "tags": "developer-tools-generator", "handle": "gold-silver-iphone-7-case", "template_suffix": None, "title": "Gold Silver iPhone 7 Case", diff --git a/airbyte-integrations/connectors/source-shopify/unit_tests/graphql_bulk/test_job.py b/airbyte-integrations/connectors/source-shopify/unit_tests/graphql_bulk/test_job.py index eb6e898f7ed9a..853fbc136c75e 100644 --- a/airbyte-integrations/connectors/source-shopify/unit_tests/graphql_bulk/test_job.py +++ b/airbyte-integrations/connectors/source-shopify/unit_tests/graphql_bulk/test_job.py @@ -16,6 +16,7 @@ InventoryLevels, MetafieldOrders, ProductImages, + Products, ProductVariants, TransactionsGraphql, ) @@ -253,6 +254,7 @@ def test_job_read_file_invalid_filename(mocker, auth_config) -> None: (TransactionsGraphql, "transactions_jsonl_content_example", "transactions_response_expected_result"), (InventoryItems, "inventory_items_jsonl_content_example", "inventory_items_response_expected_result"), (InventoryLevels, "inventory_levels_jsonl_content_example", "inventory_levels_response_expected_result"), + (Products, "products_jsonl_content_example", "products_response_expected_result"), (ProductImages, "product_images_jsonl_content_example", "product_images_response_expected_result"), (ProductVariants, "product_variants_jsonl_content_example", "product_variants_response_expected_result"), ], @@ -265,6 +267,7 @@ def test_job_read_file_invalid_filename(mocker, auth_config) -> None: "TransactionsGraphql", "InventoryItems", "InventoryLevels", + "Products", "ProductImages", "ProductVariants", ], From d206b8f6a599f802543f8c2fe04f169ad62751de Mon Sep 17 00:00:00 2001 From: Oleksandr Bazarnov Date: Thu, 2 May 2024 22:14:07 +0300 Subject: [PATCH 07/27] updated abnormal values for incremental CAT, updated expected records for the product_images --- .../integration_tests/abnormal_state.json | 15 +-------------- .../integration_tests/expected_records.jsonl | 5 ++--- ...pected_records_transactions_with_user_id.jsonl | 5 ++--- .../source_shopify/streams/base_streams.py | 2 +- 4 files changed, 6 insertions(+), 21 deletions(-) diff --git a/airbyte-integrations/connectors/source-shopify/integration_tests/abnormal_state.json b/airbyte-integrations/connectors/source-shopify/integration_tests/abnormal_state.json index 00af9172d88dc..82c9d349ade24 100644 --- a/airbyte-integrations/connectors/source-shopify/integration_tests/abnormal_state.json +++ b/airbyte-integrations/connectors/source-shopify/integration_tests/abnormal_state.json @@ -420,13 +420,7 @@ "type": "STREAM", "stream": { "stream_state": { - "id": 99999999999999, - "products": { - "updated_at": "2027-07-11T13:07:45-07:00", - "deleted": { - "deleted_at": "2027-07-11T13:07:45-07:00" - } - } + "updated_at": "2027-07-11T13:07:45-07:00" }, "stream_descriptor": { "name": "product_images" @@ -454,13 +448,6 @@ "type": "STREAM", "stream": { "stream_state": { - "id": 99999999999999, - "products": { - "updated_at": "2027-07-11T13:07:45-07:00", - "deleted": { - "deleted_at": "2027-07-11T13:07:45-07:00" - } - }, "updated_at": "2027-07-11T13:07:45-07:00" }, "stream_descriptor": { diff --git a/airbyte-integrations/connectors/source-shopify/integration_tests/expected_records.jsonl b/airbyte-integrations/connectors/source-shopify/integration_tests/expected_records.jsonl index d43ad7bf03692..d4baa9840c1ff 100644 --- a/airbyte-integrations/connectors/source-shopify/integration_tests/expected_records.jsonl +++ b/airbyte-integrations/connectors/source-shopify/integration_tests/expected_records.jsonl @@ -70,9 +70,8 @@ {"stream": "price_rules", "data": {"id": 1112171741373, "value_type": "fixed_amount", "value": "-10.0", "customer_selection": "all", "target_type": "line_item", "target_selection": "all", "allocation_method": "across", "allocation_limit": null, "once_per_customer": false, "usage_limit": null, "starts_at": "2017-01-19T09:59:10-08:00", "ends_at": null, "created_at": "2022-10-14T10:19:39-07:00", "updated_at": "2023-04-14T05:24:53-07:00", "entitled_product_ids": [], "entitled_variant_ids": [], "entitled_collection_ids": [], "entitled_country_ids": [], "prerequisite_product_ids": [], "prerequisite_variant_ids": [], "prerequisite_collection_ids": [], "customer_segment_prerequisite_ids": [], "prerequisite_customer_ids": [], "prerequisite_subtotal_range": null, "prerequisite_quantity_range": null, "prerequisite_shipping_price_range": null, "prerequisite_to_entitlement_quantity_ratio": {"prerequisite_quantity": null, "entitled_quantity": null}, "prerequisite_to_entitlement_purchase": {"prerequisite_amount": null}, "title": "New Title 2023", "admin_graphql_api_id": "gid://shopify/PriceRule/1112171741373", "shop_url": "airbyte-integration-test"}, "emitted_at": 1708953899511} {"stream": "price_rules", "data": {"id": 945000284349, "value_type": "percentage", "value": "-3.0", "customer_selection": "all", "target_type": "line_item", "target_selection": "all", "allocation_method": "across", "allocation_limit": null, "once_per_customer": true, "usage_limit": 10, "starts_at": "2021-07-07T07:22:04-07:00", "ends_at": null, "created_at": "2021-07-07T07:23:11-07:00", "updated_at": "2023-04-24T05:52:22-07:00", "entitled_product_ids": [], "entitled_variant_ids": [], "entitled_collection_ids": [], "entitled_country_ids": [], "prerequisite_product_ids": [], "prerequisite_variant_ids": [], "prerequisite_collection_ids": [], "customer_segment_prerequisite_ids": [], "prerequisite_customer_ids": [], "prerequisite_subtotal_range": null, "prerequisite_quantity_range": null, "prerequisite_shipping_price_range": null, "prerequisite_to_entitlement_quantity_ratio": {"prerequisite_quantity": null, "entitled_quantity": null}, "prerequisite_to_entitlement_purchase": {"prerequisite_amount": null}, "title": "1V8Z165KSH5T", "admin_graphql_api_id": "gid://shopify/PriceRule/945000284349", "shop_url": "airbyte-integration-test"}, "emitted_at": 1708953899512} {"stream": "price_rules", "data": {"id": 945205379261, "value_type": "percentage", "value": "-100.0", "customer_selection": "all", "target_type": "shipping_line", "target_selection": "all", "allocation_method": "each", "allocation_limit": null, "once_per_customer": false, "usage_limit": null, "starts_at": "2021-07-08T05:40:13-07:00", "ends_at": "2024-01-01T23:59:59-08:00", "created_at": "2021-07-08T05:40:37-07:00", "updated_at": "2023-12-07T03:40:44-08:00", "entitled_product_ids": [], "entitled_variant_ids": [], "entitled_collection_ids": [], "entitled_country_ids": [], "prerequisite_product_ids": [], "prerequisite_variant_ids": [], "prerequisite_collection_ids": [], "customer_segment_prerequisite_ids": [], "prerequisite_customer_ids": [], "prerequisite_subtotal_range": {"greater_than_or_equal_to": "1.0"}, "prerequisite_quantity_range": null, "prerequisite_shipping_price_range": null, "prerequisite_to_entitlement_quantity_ratio": {"prerequisite_quantity": null, "entitled_quantity": null}, "prerequisite_to_entitlement_purchase": {"prerequisite_amount": null}, "title": "HZAVNV2487WC", "admin_graphql_api_id": "gid://shopify/PriceRule/945205379261", "shop_url": "airbyte-integration-test"}, "emitted_at": 1708953899513} -{"stream": "product_images", "data": {"id": 29301295481021, "alt": null, "position": 1, "product_id": 6796218138813, "created_at": "2021-06-22T18:09:28-07:00", "updated_at": "2021-06-22T18:09:28-07:00", "admin_graphql_api_id": "gid://shopify/ProductImage/29301295481021", "width": 4393, "height": 2929, "src": "https://cdn.shopify.com/s/files/1/0580/3317/6765/products/tin-of-beard-balm.jpg?v=1624410568", "variant_ids": [], "shop_url": "airbyte-integration-test"}, "emitted_at": 1708953901149} -{"stream": "product_images", "data": {"id": 29301295513789, "alt": null, "position": 1, "product_id": 6796218269885, "created_at": "2021-06-22T18:09:29-07:00", "updated_at": "2021-06-22T18:09:29-07:00", "admin_graphql_api_id": "gid://shopify/ProductImage/29301295513789", "width": 3840, "height": 2560, "src": "https://cdn.shopify.com/s/files/1/0580/3317/6765/products/pair-of-all-black-sneakers.jpg?v=1624410569", "variant_ids": [], "shop_url": "airbyte-integration-test"}, "emitted_at": 1708953901154} -{"stream": "product_images", "data": {"id": 29301295546557, "alt": null, "position": 1, "product_id": 6796218302653, "created_at": "2021-06-22T18:09:29-07:00", "updated_at": "2021-06-22T18:09:29-07:00", "admin_graphql_api_id": "gid://shopify/ProductImage/29301295546557", "width": 3960, "height": 2640, "src": "https://cdn.shopify.com/s/files/1/0580/3317/6765/products/red-silver-fishing-lure.jpg?v=1624410569", "variant_ids": [], "shop_url": "airbyte-integration-test"}, "emitted_at": 1708953901155} +{"stream": "product_images", "data": {"created_at": "2023-04-14T10:34:46+00:00", "updated_at": "2023-04-14T11:05:13+00:00", "id": 33290489659581, "height": 64, "alt": "Test", "src": "https://cdn.shopify.com/s/files/1/0580/3317/6765/products/Airbytelogo64x64.png?v=1681468487", "width": 64, "admin_graphql_api_id": "gid://shopify/ProductImage/33290489659581", "product_id": 6796229574845, "shop_url": "airbyte-integration-test"}, "emitted_at": 1714673982582} +{"stream": "product_images", "data": {"created_at": "2021-06-23T01:09:47+00:00", "updated_at": "2023-04-24T17:27:15+00:00", "id": 29301297316029, "height": 1467, "alt": "updated_mon_24.04.2023", "src": "https://cdn.shopify.com/s/files/1/0580/3317/6765/products/4-ounce-soy-candle.jpg?v=1624410587", "width": 2200, "admin_graphql_api_id": "gid://shopify/ProductImage/29301297316029", "product_id": 6796220989629, "shop_url": "airbyte-integration-test"}, "emitted_at": 1714673982587} {"stream": "products", "data": {"id": 6796217909437, "title": "Red And Navy Tee Sleeve", "body_html": "Zoom in on the sleeve of a red t-shirt with navy blue trim along the sleeve. Looks like a great tennis outfit.", "vendor": "Little Group", "product_type": "Movies", "created_at": "2021-06-22T18:09:27-07:00", "handle": "red-and-navy-tee-sleeve", "updated_at": "2023-04-20T04:12:25-07:00", "published_at": "2021-06-22T18:09:27-07:00", "template_suffix": null, "published_scope": "web", "tags": "developer-tools-generator", "status": "active", "admin_graphql_api_id": "gid://shopify/Product/6796217909437", "variants": [{"id": 40090579992765, "product_id": 6796217909437, "title": "Plastic", "price": 23.0, "sku": "", "position": 1, "inventory_policy": "deny", "compare_at_price": null, "fulfillment_service": "manual", "inventory_management": "shopify", "option1": "Plastic", "option2": null, "option3": null, "created_at": "2021-06-22T18:09:27-07:00", "updated_at": "2023-10-27T09:55:54-07:00", "taxable": true, "barcode": null, "grams": 39, "weight": 39.0, "weight_unit": "g", "inventory_item_id": 42185194700989, "inventory_quantity": 3, "old_inventory_quantity": 3, "requires_shipping": true, "admin_graphql_api_id": "gid://shopify/ProductVariant/40090579992765", "image_id": null}], "options": [{"id": 8720175235261, "product_id": 6796217909437, "name": "Title", "position": 1, "values": ["Plastic"]}], "images": [], "image": null, "shop_url": "airbyte-integration-test"}, "emitted_at": 1708953903012} {"stream": "products", "data": {"id": 6796217942205, "title": "Grey T-Shirt", "body_html": "A grey t-shirt on a hanger. Simple. Classic. Grey.", "vendor": "Lang - Bogisich", "product_type": "Home", "created_at": "2021-06-22T18:09:27-07:00", "handle": "grey-t-shirt", "updated_at": "2023-04-20T04:12:25-07:00", "published_at": "2021-06-22T18:09:27-07:00", "template_suffix": null, "published_scope": "web", "tags": "developer-tools-generator", "status": "active", "admin_graphql_api_id": "gid://shopify/Product/6796217942205", "variants": [{"id": 40090580025533, "product_id": 6796217942205, "title": "Granite", "price": 70.0, "sku": "", "position": 1, "inventory_policy": "deny", "compare_at_price": null, "fulfillment_service": "manual", "inventory_management": "shopify", "option1": "Granite", "option2": null, "option3": null, "created_at": "2021-06-22T18:09:27-07:00", "updated_at": "2023-10-27T09:55:54-07:00", "taxable": true, "barcode": null, "grams": 0, "weight": 0.0, "weight_unit": "g", "inventory_item_id": 42185194733757, "inventory_quantity": 38, "old_inventory_quantity": 38, "requires_shipping": false, "admin_graphql_api_id": "gid://shopify/ProductVariant/40090580025533", "image_id": null}], "options": [{"id": 8720175268029, "product_id": 6796217942205, "name": "Title", "position": 1, "values": ["Granite"]}], "images": [], "image": null, "shop_url": "airbyte-integration-test"}, "emitted_at": 1708953903015} {"stream": "products", "data": {"id": 6796217974973, "title": "Pool Floaty Icecream", "body_html": "Inflatable pink ice cream pool toy.", "vendor": "Fritsch - Ferry", "product_type": "Grocery", "created_at": "2021-06-22T18:09:27-07:00", "handle": "pool-floaty-icecream", "updated_at": "2023-04-20T04:12:25-07:00", "published_at": "2021-06-22T18:09:27-07:00", "template_suffix": null, "published_scope": "web", "tags": "developer-tools-generator", "status": "active", "admin_graphql_api_id": "gid://shopify/Product/6796217974973", "variants": [{"id": 40090580091069, "product_id": 6796217974973, "title": "magenta", "price": 57.0, "sku": "", "position": 1, "inventory_policy": "deny", "compare_at_price": null, "fulfillment_service": "manual", "inventory_management": "shopify", "option1": "magenta", "option2": null, "option3": null, "created_at": "2021-06-22T18:09:27-07:00", "updated_at": "2023-10-27T09:55:54-07:00", "taxable": true, "barcode": null, "grams": 499, "weight": 499.0, "weight_unit": "g", "inventory_item_id": 42185194766525, "inventory_quantity": 1, "old_inventory_quantity": 1, "requires_shipping": true, "admin_graphql_api_id": "gid://shopify/ProductVariant/40090580091069", "image_id": null}], "options": [{"id": 8720175300797, "product_id": 6796217974973, "name": "Title", "position": 1, "values": ["magenta"]}], "images": [], "image": null, "shop_url": "airbyte-integration-test"}, "emitted_at": 1708953903015} diff --git a/airbyte-integrations/connectors/source-shopify/integration_tests/expected_records_transactions_with_user_id.jsonl b/airbyte-integrations/connectors/source-shopify/integration_tests/expected_records_transactions_with_user_id.jsonl index 95af9f2580ce3..5b9e041c9bd3e 100644 --- a/airbyte-integrations/connectors/source-shopify/integration_tests/expected_records_transactions_with_user_id.jsonl +++ b/airbyte-integrations/connectors/source-shopify/integration_tests/expected_records_transactions_with_user_id.jsonl @@ -70,9 +70,8 @@ {"stream": "price_rules", "data": {"id": 1112171741373, "value_type": "fixed_amount", "value": "-10.0", "customer_selection": "all", "target_type": "line_item", "target_selection": "all", "allocation_method": "across", "allocation_limit": null, "once_per_customer": false, "usage_limit": null, "starts_at": "2017-01-19T09:59:10-08:00", "ends_at": null, "created_at": "2022-10-14T10:19:39-07:00", "updated_at": "2023-04-14T05:24:53-07:00", "entitled_product_ids": [], "entitled_variant_ids": [], "entitled_collection_ids": [], "entitled_country_ids": [], "prerequisite_product_ids": [], "prerequisite_variant_ids": [], "prerequisite_collection_ids": [], "customer_segment_prerequisite_ids": [], "prerequisite_customer_ids": [], "prerequisite_subtotal_range": null, "prerequisite_quantity_range": null, "prerequisite_shipping_price_range": null, "prerequisite_to_entitlement_quantity_ratio": {"prerequisite_quantity": null, "entitled_quantity": null}, "prerequisite_to_entitlement_purchase": {"prerequisite_amount": null}, "title": "New Title 2023", "admin_graphql_api_id": "gid://shopify/PriceRule/1112171741373", "shop_url": "airbyte-integration-test"}, "emitted_at": 1708953671890} {"stream": "price_rules", "data": {"id": 945000284349, "value_type": "percentage", "value": "-3.0", "customer_selection": "all", "target_type": "line_item", "target_selection": "all", "allocation_method": "across", "allocation_limit": null, "once_per_customer": true, "usage_limit": 10, "starts_at": "2021-07-07T07:22:04-07:00", "ends_at": null, "created_at": "2021-07-07T07:23:11-07:00", "updated_at": "2023-04-24T05:52:22-07:00", "entitled_product_ids": [], "entitled_variant_ids": [], "entitled_collection_ids": [], "entitled_country_ids": [], "prerequisite_product_ids": [], "prerequisite_variant_ids": [], "prerequisite_collection_ids": [], "customer_segment_prerequisite_ids": [], "prerequisite_customer_ids": [], "prerequisite_subtotal_range": null, "prerequisite_quantity_range": null, "prerequisite_shipping_price_range": null, "prerequisite_to_entitlement_quantity_ratio": {"prerequisite_quantity": null, "entitled_quantity": null}, "prerequisite_to_entitlement_purchase": {"prerequisite_amount": null}, "title": "1V8Z165KSH5T", "admin_graphql_api_id": "gid://shopify/PriceRule/945000284349", "shop_url": "airbyte-integration-test"}, "emitted_at": 1708953671890} {"stream": "price_rules", "data": {"id": 945205379261, "value_type": "percentage", "value": "-100.0", "customer_selection": "all", "target_type": "shipping_line", "target_selection": "all", "allocation_method": "each", "allocation_limit": null, "once_per_customer": false, "usage_limit": null, "starts_at": "2021-07-08T05:40:13-07:00", "ends_at": "2024-01-01T23:59:59-08:00", "created_at": "2021-07-08T05:40:37-07:00", "updated_at": "2023-12-07T03:40:44-08:00", "entitled_product_ids": [], "entitled_variant_ids": [], "entitled_collection_ids": [], "entitled_country_ids": [], "prerequisite_product_ids": [], "prerequisite_variant_ids": [], "prerequisite_collection_ids": [], "customer_segment_prerequisite_ids": [], "prerequisite_customer_ids": [], "prerequisite_subtotal_range": {"greater_than_or_equal_to": "1.0"}, "prerequisite_quantity_range": null, "prerequisite_shipping_price_range": null, "prerequisite_to_entitlement_quantity_ratio": {"prerequisite_quantity": null, "entitled_quantity": null}, "prerequisite_to_entitlement_purchase": {"prerequisite_amount": null}, "title": "HZAVNV2487WC", "admin_graphql_api_id": "gid://shopify/PriceRule/945205379261", "shop_url": "airbyte-integration-test"}, "emitted_at": 1708953671891} -{"stream": "product_images", "data": {"id": 29301295481021, "alt": null, "position": 1, "product_id": 6796218138813, "created_at": "2021-06-22T18:09:28-07:00", "updated_at": "2021-06-22T18:09:28-07:00", "admin_graphql_api_id": "gid://shopify/ProductImage/29301295481021", "width": 4393, "height": 2929, "src": "https://cdn.shopify.com/s/files/1/0580/3317/6765/products/tin-of-beard-balm.jpg?v=1624410568", "variant_ids": [], "shop_url": "airbyte-integration-test"}, "emitted_at": 1708953673537} -{"stream": "product_images", "data": {"id": 29301295513789, "alt": null, "position": 1, "product_id": 6796218269885, "created_at": "2021-06-22T18:09:29-07:00", "updated_at": "2021-06-22T18:09:29-07:00", "admin_graphql_api_id": "gid://shopify/ProductImage/29301295513789", "width": 3840, "height": 2560, "src": "https://cdn.shopify.com/s/files/1/0580/3317/6765/products/pair-of-all-black-sneakers.jpg?v=1624410569", "variant_ids": [], "shop_url": "airbyte-integration-test"}, "emitted_at": 1708953673539} -{"stream": "product_images", "data": {"id": 29301295546557, "alt": null, "position": 1, "product_id": 6796218302653, "created_at": "2021-06-22T18:09:29-07:00", "updated_at": "2021-06-22T18:09:29-07:00", "admin_graphql_api_id": "gid://shopify/ProductImage/29301295546557", "width": 3960, "height": 2640, "src": "https://cdn.shopify.com/s/files/1/0580/3317/6765/products/red-silver-fishing-lure.jpg?v=1624410569", "variant_ids": [], "shop_url": "airbyte-integration-test"}, "emitted_at": 1708953673539} +{"stream": "product_images", "data": {"created_at": "2023-04-14T10:34:46+00:00", "updated_at": "2023-04-14T11:05:13+00:00", "id": 33290489659581, "height": 64, "alt": "Test", "src": "https://cdn.shopify.com/s/files/1/0580/3317/6765/products/Airbytelogo64x64.png?v=1681468487", "width": 64, "admin_graphql_api_id": "gid://shopify/ProductImage/33290489659581", "product_id": 6796229574845, "shop_url": "airbyte-integration-test"}, "emitted_at": 1714673982582} +{"stream": "product_images", "data": {"created_at": "2021-06-23T01:09:47+00:00", "updated_at": "2023-04-24T17:27:15+00:00", "id": 29301297316029, "height": 1467, "alt": "updated_mon_24.04.2023", "src": "https://cdn.shopify.com/s/files/1/0580/3317/6765/products/4-ounce-soy-candle.jpg?v=1624410587", "width": 2200, "admin_graphql_api_id": "gid://shopify/ProductImage/29301297316029", "product_id": 6796220989629, "shop_url": "airbyte-integration-test"}, "emitted_at": 1714673982587} {"stream": "products", "data": {"id": 6796217909437, "title": "Red And Navy Tee Sleeve", "body_html": "Zoom in on the sleeve of a red t-shirt with navy blue trim along the sleeve. Looks like a great tennis outfit.", "vendor": "Little Group", "product_type": "Movies", "created_at": "2021-06-22T18:09:27-07:00", "handle": "red-and-navy-tee-sleeve", "updated_at": "2023-04-20T04:12:25-07:00", "published_at": "2021-06-22T18:09:27-07:00", "template_suffix": null, "published_scope": "web", "tags": "developer-tools-generator", "status": "active", "admin_graphql_api_id": "gid://shopify/Product/6796217909437", "variants": [{"id": 40090579992765, "product_id": 6796217909437, "title": "Plastic", "price": 23.0, "sku": "", "position": 1, "inventory_policy": "deny", "compare_at_price": null, "fulfillment_service": "manual", "inventory_management": "shopify", "option1": "Plastic", "option2": null, "option3": null, "created_at": "2021-06-22T18:09:27-07:00", "updated_at": "2023-10-27T09:55:54-07:00", "taxable": true, "barcode": null, "grams": 39, "weight": 39.0, "weight_unit": "g", "inventory_item_id": 42185194700989, "inventory_quantity": 3, "old_inventory_quantity": 3, "requires_shipping": true, "admin_graphql_api_id": "gid://shopify/ProductVariant/40090579992765", "image_id": null}], "options": [{"id": 8720175235261, "product_id": 6796217909437, "name": "Title", "position": 1, "values": ["Plastic"]}], "images": [], "image": null, "shop_url": "airbyte-integration-test"}, "emitted_at": 1708953675398} {"stream": "products", "data": {"id": 6796217942205, "title": "Grey T-Shirt", "body_html": "A grey t-shirt on a hanger. Simple. Classic. Grey.", "vendor": "Lang - Bogisich", "product_type": "Home", "created_at": "2021-06-22T18:09:27-07:00", "handle": "grey-t-shirt", "updated_at": "2023-04-20T04:12:25-07:00", "published_at": "2021-06-22T18:09:27-07:00", "template_suffix": null, "published_scope": "web", "tags": "developer-tools-generator", "status": "active", "admin_graphql_api_id": "gid://shopify/Product/6796217942205", "variants": [{"id": 40090580025533, "product_id": 6796217942205, "title": "Granite", "price": 70.0, "sku": "", "position": 1, "inventory_policy": "deny", "compare_at_price": null, "fulfillment_service": "manual", "inventory_management": "shopify", "option1": "Granite", "option2": null, "option3": null, "created_at": "2021-06-22T18:09:27-07:00", "updated_at": "2023-10-27T09:55:54-07:00", "taxable": true, "barcode": null, "grams": 0, "weight": 0.0, "weight_unit": "g", "inventory_item_id": 42185194733757, "inventory_quantity": 38, "old_inventory_quantity": 38, "requires_shipping": false, "admin_graphql_api_id": "gid://shopify/ProductVariant/40090580025533", "image_id": null}], "options": [{"id": 8720175268029, "product_id": 6796217942205, "name": "Title", "position": 1, "values": ["Granite"]}], "images": [], "image": null, "shop_url": "airbyte-integration-test"}, "emitted_at": 1708953675399} {"stream": "products", "data": {"id": 6796217974973, "title": "Pool Floaty Icecream", "body_html": "Inflatable pink ice cream pool toy.", "vendor": "Fritsch - Ferry", "product_type": "Grocery", "created_at": "2021-06-22T18:09:27-07:00", "handle": "pool-floaty-icecream", "updated_at": "2023-04-20T04:12:25-07:00", "published_at": "2021-06-22T18:09:27-07:00", "template_suffix": null, "published_scope": "web", "tags": "developer-tools-generator", "status": "active", "admin_graphql_api_id": "gid://shopify/Product/6796217974973", "variants": [{"id": 40090580091069, "product_id": 6796217974973, "title": "magenta", "price": 57.0, "sku": "", "position": 1, "inventory_policy": "deny", "compare_at_price": null, "fulfillment_service": "manual", "inventory_management": "shopify", "option1": "magenta", "option2": null, "option3": null, "created_at": "2021-06-22T18:09:27-07:00", "updated_at": "2023-10-27T09:55:54-07:00", "taxable": true, "barcode": null, "grams": 499, "weight": 499.0, "weight_unit": "g", "inventory_item_id": 42185194766525, "inventory_quantity": 1, "old_inventory_quantity": 1, "requires_shipping": true, "admin_graphql_api_id": "gid://shopify/ProductVariant/40090580091069", "image_id": null}], "options": [{"id": 8720175300797, "product_id": 6796217974973, "name": "Title", "position": 1, "values": ["magenta"]}], "images": [], "image": null, "shop_url": "airbyte-integration-test"}, "emitted_at": 1708953675399} diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/streams/base_streams.py b/airbyte-integrations/connectors/source-shopify/source_shopify/streams/base_streams.py index f33545e3c4493..f6f5d99724daa 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/streams/base_streams.py +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/streams/base_streams.py @@ -648,7 +648,7 @@ def __init__(self, config: Dict) -> None: # overide the default job slice size, if provided (it's auto-adjusted, later on) self.bulk_window_in_days = config.get("bulk_window_in_days") if self.bulk_window_in_days: - self.job_manager.job_size = self.bulk_window_in_days + self.job_manager._job_size = self.bulk_window_in_days # define Record Producer instance self.record_producer: ShopifyBulkRecord = ShopifyBulkRecord(self.query) From 202048d261d009c2dfad2ed2502c4c4059aed147 Mon Sep 17 00:00:00 2001 From: Oleksandr Bazarnov Date: Fri, 3 May 2024 15:47:46 +0300 Subject: [PATCH 08/27] updated after review --- .../shopify_graphql/bulk/__init__.py | 0 .../shopify_graphql/bulk/exceptions.py | 6 +- .../shopify_graphql/bulk/job.py | 86 +++++++----- .../shopify_graphql/bulk/retry.py | 2 +- .../source_shopify/streams/base_streams.py | 6 +- .../source-shopify/unit_tests/__init__.py | 0 .../unit_tests/graphql_bulk/__init__.py | 0 .../unit_tests/graphql_bulk/test_job.py | 126 +++++++++--------- .../source-shopify/unit_tests/test_source.py | 3 +- 9 files changed, 126 insertions(+), 103 deletions(-) create mode 100644 airbyte-integrations/connectors/source-shopify/source_shopify/shopify_graphql/bulk/__init__.py create mode 100644 airbyte-integrations/connectors/source-shopify/unit_tests/__init__.py create mode 100644 airbyte-integrations/connectors/source-shopify/unit_tests/graphql_bulk/__init__.py diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/shopify_graphql/bulk/__init__.py b/airbyte-integrations/connectors/source-shopify/source_shopify/shopify_graphql/bulk/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/shopify_graphql/bulk/exceptions.py b/airbyte-integrations/connectors/source-shopify/source_shopify/shopify_graphql/bulk/exceptions.py index 59aaec8641c8c..1177d0fbdcf17 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/shopify_graphql/bulk/exceptions.py +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/shopify_graphql/bulk/exceptions.py @@ -19,8 +19,10 @@ def __init__(self, message: str, **kwargs) -> None: class BulkJobError(BaseBulkException): """Raised when there are BULK Job Errors in response""" - class BulkJobUnknownError(BaseBulkException): - """Raised when BULK Job has FAILED with Unknown status""" + class BulkJobNonHandableError(BaseBulkException): + """Raised when there are non-actionable BULK Job Errors in response""" + + failure_type: FailureType = FailureType.system_error class BulkJobBadResponse(BaseBulkException): """Raised when the requests.Response object could not be parsed""" diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/shopify_graphql/bulk/job.py b/airbyte-integrations/connectors/source-shopify/source_shopify/shopify_graphql/bulk/job.py index fdbccbca9735a..e6cf79b7d9d52 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/shopify_graphql/bulk/job.py +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/shopify_graphql/bulk/job.py @@ -67,7 +67,7 @@ class ShopifyBulkManager: # P365D, upper boundary for slice size _job_size_max: Final[float] = 365.0 # dynamically adjusted slice interval - _job_size: float = field(init=False, default=0.0) + job_size: float = field(init=False, default=0.0) # expand slice factor _job_size_expand_factor: int = field(init=False, default=2) # reduce slice factor @@ -138,10 +138,10 @@ def _is_long_running_job(self) -> bool: return False def _expand_job_size(self) -> None: - self._job_size += self._job_size_adjusted_expand_factor + self.job_size += self._job_size_adjusted_expand_factor def _reduce_job_size(self) -> None: - self._job_size /= self._job_size_adjusted_reduce_factor + self.job_size /= self._job_size_adjusted_reduce_factor def _save_latest_request(self, response: requests.Response) -> None: self._request = response.request @@ -162,7 +162,7 @@ def __adjust_job_size(self, job_current_elapsed_time: float) -> None: # set the last job time self._job_last_elapsed_time = job_current_elapsed_time # check the job size slice interval are acceptable - self._job_size = max(self._job_size_min, min(self._job_size, self._job_size_max)) + self.job_size = max(self._job_size_min, min(self.job_size, self._job_size_max)) def __reset_state(self) -> None: # reset the job state to default @@ -282,19 +282,47 @@ def _on_access_denied_job(self, **kwagrs) -> AirbyteTracedException: ) def _on_job_with_errors(self, errors: List[Mapping[str, Any]]) -> AirbyteTracedException: - raise ShopifyBulkExceptions.BulkJobUnknownError( - f"Could not validate the status of the BULK Job `{self._job_id}`. Errors: {errors}." - ) + raise ShopifyBulkExceptions.BulkJobError(f"Could not validate the status of the BULK Job `{self._job_id}`. Errors: {errors}.") - def _job_check_for_errors(self, response: requests.Response) -> Optional[Iterable[Mapping[str, Any]]]: - try: + def _on_non_handable_job_error(self, errors: List[Mapping[str, Any]]) -> AirbyteTracedException: + raise ShopifyBulkExceptions.BulkJobNonHandableError(f"The Stream: `{self.stream_name}`, Non-handable error occured: {errors}") - return response.json().get("errors") or response.json().get("data", {}).get("bulkOperationRunQuery", {}).get("userErrors", []) + def _collect_bulk_errors(self, response: requests.Response) -> List[Optional[dict]]: + try: + server_errors = response.json().get("errors", []) + user_errors = response.json().get("data", {}).get("bulkOperationRunQuery", {}).get("userErrors", []) + errors = server_errors + user_errors + return errors except (Exception, JSONDecodeError) as e: raise ShopifyBulkExceptions.BulkJobBadResponse( f"Couldn't check the `response` for `errors`, status: {response.status_code}, response: `{response.text}`. Trace: {repr(e)}." ) + def _job_healthcheck(self, response: requests.Response) -> Optional[Exception]: + try: + # save the latest request to retry + self._save_latest_request(response) + + # get the errors, if occured + errors = self._collect_bulk_errors(response) + + # when the concurrent job takes place, + # another job could not be created + # we typically need to wait and retry, but no longer than 10 min. + if self._has_running_concurrent_job(errors): + return self._job_retry_on_concurrency() + + # when the job was already created and the error appears in the middle + if self._job_state and errors: + self._on_job_with_errors(errors) + + # when the job was not created because of some errors + if not self._job_state and errors: + self._on_non_handable_job_error(errors) + + except (ShopifyBulkExceptions.BulkJobBadResponse, ShopifyBulkExceptions.BulkJobError) as e: + raise e + def _job_send_state_request(self) -> requests.Response: with self.session as job_state_request: status_args = self._job_get_request_args(ShopifyBulkTemplates.status) @@ -303,11 +331,7 @@ def _job_send_state_request(self) -> requests.Response: def _job_track_running(self) -> None: job_state_response = self._job_send_state_request() - errors = self._job_check_for_errors(job_state_response) - if errors: - # the exception raised when there are job-related errors, and the Job cannot be run futher. - self._on_job_with_errors(errors) - + self._job_healthcheck(job_state_response) self._job_update_state(job_state_response) self._job_state_to_fn_map.get(self._job_state)(response=job_state_response) @@ -348,7 +372,7 @@ def _job_retry_concurrent(self) -> Optional[requests.Response]: ) sleep(self._concurrent_interval) retried_response = self._job_retry_request() - return self._job_healthcheck(retried_response) + return self.job_process_created(retried_response) def _job_retry_on_concurrency(self) -> Optional[requests.Response]: if self._has_reached_max_concurrency(): @@ -360,17 +384,6 @@ def _job_retry_on_concurrency(self) -> Optional[requests.Response]: else: return self._job_retry_concurrent() - def _job_healthcheck(self, response: requests.Response) -> Optional[requests.Response]: - # save the latest request to retry - self._save_latest_request(response) - # check for query errors - errors = self._job_check_for_errors(response) - # when the concurrent job takes place, we typically need to wait and retry, but no longer than 10 min. - if self._has_running_concurrent_job(errors): - return self._job_retry_on_concurrency() - - return response if not errors else None - @bulk_retry_on_exception(logger) def _job_check_state(self) -> Optional[str]: while not self._job_completed(): @@ -381,12 +394,13 @@ def _job_check_state(self) -> Optional[str]: # external method to be used within other components + @bulk_retry_on_exception(logger) def job_process_created(self, response: requests.Response) -> None: """ The Bulk Job with CREATED status, should be processed, before we move forward with Job Status Checks. """ - response = self._job_healthcheck(response) - bulk_response = response.json().get("data", {}).get("bulkOperationRunQuery", {}).get("bulkOperation", {}) + self._job_healthcheck(response) + bulk_response = response.json().get("data", {}).get("bulkOperationRunQuery", {}).get("bulkOperation", {}) if response else None if bulk_response and bulk_response.get("status") == ShopifyBulkJobStatus.CREATED.value: self._job_id = bulk_response.get("id") self._job_created_at = bulk_response.get("createdAt") @@ -396,10 +410,10 @@ def job_size_normalize(self, start: datetime, end: datetime) -> datetime: # adjust slice size when it's bigger than the loop point when it should end, # to preserve correct job size adjustments when this is the only job we need to run, based on STATE provided requested_slice_size = (end - start).total_days() - self._job_size = requested_slice_size if requested_slice_size < self._job_size else self._job_size + self.job_size = requested_slice_size if requested_slice_size < self.job_size else self.job_size def get_adjusted_job_start(self, slice_start: datetime) -> datetime: - step = self._job_size if self._job_size else self._job_size_min + step = self.job_size if self.job_size else self._job_size_min return slice_start.add(days=step) def get_adjusted_job_end(self, slice_start: datetime, slice_end: datetime) -> datetime: @@ -415,19 +429,19 @@ def job_check_for_completion(self) -> Optional[str]: This method checks the status for the `CREATED` Shopify BULK Job, using it's `ID`. The time spent for the Job execution is tracked to understand the effort. """ - # track created job until it's COMPLETED + job_started = time() try: + # track created job until it's COMPLETED self._job_check_state() return self._job_result_filename except ( - ShopifyBulkExceptions.BulkJobCanceled, ShopifyBulkExceptions.BulkJobFailed, ShopifyBulkExceptions.BulkJobTimout, ShopifyBulkExceptions.BulkJobAccessDenied, - # this one is retryable, but stil needs to be raised, - # if the max attempts value is reached. - ShopifyBulkExceptions.BulkJobUnknownError, + # when the job is canceled by non-source actions, + # we should raise the system_error + ShopifyBulkExceptions.BulkJobCanceled, ) as bulk_job_error: raise bulk_job_error finally: diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/shopify_graphql/bulk/retry.py b/airbyte-integrations/connectors/source-shopify/source_shopify/shopify_graphql/bulk/retry.py index d3550a0826ffa..c081db0e917cf 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/shopify_graphql/bulk/retry.py +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/shopify_graphql/bulk/retry.py @@ -10,7 +10,7 @@ BULK_RETRY_ERRORS: Final[Tuple] = ( ShopifyBulkExceptions.BulkJobBadResponse, - ShopifyBulkExceptions.BulkJobUnknownError, + ShopifyBulkExceptions.BulkJobError, ) diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/streams/base_streams.py b/airbyte-integrations/connectors/source-shopify/source_shopify/streams/base_streams.py index f6f5d99724daa..d5b85a9913691 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/streams/base_streams.py +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/streams/base_streams.py @@ -648,7 +648,7 @@ def __init__(self, config: Dict) -> None: # overide the default job slice size, if provided (it's auto-adjusted, later on) self.bulk_window_in_days = config.get("bulk_window_in_days") if self.bulk_window_in_days: - self.job_manager._job_size = self.bulk_window_in_days + self.job_manager.job_size = self.bulk_window_in_days # define Record Producer instance self.record_producer: ShopifyBulkRecord = ShopifyBulkRecord(self.query) @@ -748,7 +748,7 @@ def get_state_value(self, stream_state: Mapping[str, Any] = None) -> Optional[Un return self.config.get("start_date") def emit_slice_message(self, slice_start: datetime, slice_end: datetime) -> None: - slice_size_message = f"Slice size: `P{round(self.job_manager._job_size, 1)}D`" + slice_size_message = f"Slice size: `P{round(self.job_manager.job_size, 1)}D`" self.logger.info(f"Stream: `{self.name}` requesting BULK Job for period: {slice_start} -- {slice_end}. {slice_size_message}") @stream_state_cache.cache_stream_state @@ -772,7 +772,7 @@ def process_bulk_results( self, response: requests.Response, stream_state: Optional[Mapping[str, Any]] = None, - ) -> Iterable[Mapping[str, Any]]: + ) -> Optional[Iterable[Mapping[str, Any]]]: # process the CREATED Job prior to other actions self.job_manager.job_process_created(response) # get results fetched from COMPLETED BULK Job diff --git a/airbyte-integrations/connectors/source-shopify/unit_tests/__init__.py b/airbyte-integrations/connectors/source-shopify/unit_tests/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/airbyte-integrations/connectors/source-shopify/unit_tests/graphql_bulk/__init__.py b/airbyte-integrations/connectors/source-shopify/unit_tests/graphql_bulk/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/airbyte-integrations/connectors/source-shopify/unit_tests/graphql_bulk/test_job.py b/airbyte-integrations/connectors/source-shopify/unit_tests/graphql_bulk/test_job.py index 7c9f50bdc61da..015242ac065f5 100644 --- a/airbyte-integrations/connectors/source-shopify/unit_tests/graphql_bulk/test_job.py +++ b/airbyte-integrations/connectors/source-shopify/unit_tests/graphql_bulk/test_job.py @@ -22,22 +22,6 @@ ) -@pytest.mark.parametrize( - "bulk_job_response, expected_len", - [ - ("bulk_error", 1), - ("bulk_unknown_error", 1), - ("bulk_no_errors", 0), - ], -) -def test_check_for_errors(request, requests_mock, bulk_job_response, expected_len, auth_config) -> None: - stream = MetafieldOrders(auth_config) - requests_mock.get(stream.job_manager.base_url, json=request.getfixturevalue(bulk_job_response)) - test_response = requests.get(stream.job_manager.base_url) - test_errors = stream.job_manager._job_check_for_errors(test_response) - assert len(test_errors) == expected_len - - def test_get_errors_from_response_invalid_response(auth_config) -> None: expected = "Couldn't check the `response` for `errors`" stream = MetafieldOrders(auth_config) @@ -45,46 +29,29 @@ def test_get_errors_from_response_invalid_response(auth_config) -> None: response.status_code = 404 response.url = "https://example.com/invalid" with pytest.raises(ShopifyBulkExceptions.BulkJobBadResponse) as error: - stream.job_manager._job_check_for_errors(response) + stream.job_manager._job_healthcheck(response) assert expected in repr(error.value) -@pytest.mark.parametrize( - "bulk_job_response, expected", - [ - ("bulk_error_with_concurrent_job", True), - ("bulk_successful_response", False), - ("bulk_error", False), - ], -) -def test_has_running_concurrent_job(request, requests_mock, bulk_job_response, auth_config, expected) -> None: +def test_retry_on_concurrent_job(request, requests_mock, auth_config) -> None: stream = MetafieldOrders(auth_config) - requests_mock.get(stream.job_manager.base_url, json=request.getfixturevalue(bulk_job_response)) - test_response = requests.get(stream.job_manager.base_url) - test_errors = stream.job_manager._job_check_for_errors(test_response) - assert stream.job_manager._has_running_concurrent_job(test_errors) == expected - - -@pytest.mark.parametrize( - "bulk_job_response, expected", - [ - ("bulk_successful_response", "gid://shopify/BulkOperation/4046733967549"), - ("bulk_successful_response_with_no_id", None), - ], -) -def test_job_process_created(request, requests_mock, bulk_job_response, auth_config, expected) -> None: - stream = MetafieldOrders(auth_config) - requests_mock.get(stream.job_manager.base_url, json=request.getfixturevalue(bulk_job_response)) + stream.job_manager._concurrent_interval = 0 + # mocking responses + requests_mock.get( + stream.job_manager.base_url, + [ + # concurrent request is running (3 - retries) + {"json": request.getfixturevalue("bulk_error_with_concurrent_job")}, + {"json": request.getfixturevalue("bulk_error_with_concurrent_job")}, + {"json": request.getfixturevalue("bulk_error_with_concurrent_job")}, + # concurrent request has finished + {"json": request.getfixturevalue("bulk_successful_response")}, + ]) + test_response = requests.get(stream.job_manager.base_url) - # process the job with id (typically CREATED one) - stream.job_manager.job_process_created(test_response) - assert stream.job_manager._job_id == expected - - -def test_job_state_completed(auth_config) -> None: - stream = MetafieldOrders(auth_config) - stream.job_manager._job_state = ShopifyBulkJobStatus.COMPLETED.value - assert stream.job_manager._job_completed() == True + stream.job_manager._job_healthcheck(test_response) + # call count should be 4 (3 retries, 1 - succeeded) + assert requests_mock.call_count == 4 @pytest.mark.parametrize( @@ -125,6 +92,28 @@ def test_job_retry_on_concurrency(request, requests_mock, bulk_job_response, con assert requests_mock.call_count == 2 +@pytest.mark.parametrize( + "bulk_job_response, expected", + [ + ("bulk_successful_response", "gid://shopify/BulkOperation/4046733967549"), + ("bulk_successful_response_with_no_id", None), + ], +) +def test_job_process_created(request, requests_mock, bulk_job_response, auth_config, expected) -> None: + stream = MetafieldOrders(auth_config) + requests_mock.get(stream.job_manager.base_url, json=request.getfixturevalue(bulk_job_response)) + test_response = requests.get(stream.job_manager.base_url) + # process the job with id (typically CREATED one) + stream.job_manager.job_process_created(test_response) + assert stream.job_manager._job_id == expected + + +def test_job_state_completed(auth_config) -> None: + stream = MetafieldOrders(auth_config) + stream.job_manager._job_state = ShopifyBulkJobStatus.COMPLETED.value + assert stream.job_manager._job_completed() == True + + @pytest.mark.parametrize( "job_response, error_type, expected", [ @@ -163,17 +152,30 @@ def test_job_check_for_completion(mocker, request, requests_mock, job_response, @pytest.mark.parametrize( - "job_response, error_type, max_retry, expected_msg, call_count_expected", + "job_response, job_state, error_type, max_retry, expected_msg, call_count_expected", [ + # No retry - dead end + ( + "bulk_successful_response_with_errors", + False, + ShopifyBulkExceptions.BulkJobNonHandableError, + 2, + "Non-handable error occured", + 1, + ), + # Should be retried ( - "bulk_successful_response_with_errors", - ShopifyBulkExceptions.BulkJobUnknownError, + "bulk_successful_response_with_errors", + True, + ShopifyBulkExceptions.BulkJobError, 2, "Could not validate the status of the BULK Job", 3, ), + # Should be retried ( None, + False, ShopifyBulkExceptions.BulkJobBadResponse, 1, "Couldn't check the `response` for `errors`", @@ -181,17 +183,22 @@ def test_job_check_for_completion(mocker, request, requests_mock, job_response, ), ], ids=[ - "BulkJobUnknownError", + "BulkJobNonHandableError", + "BulkJobError", "BulkJobBadResponse", ], ) -def test_retry_on_job_exception(mocker, request, requests_mock, job_response, auth_config, error_type, max_retry, call_count_expected, expected_msg) -> None: +def test_retry_on_job_exception(mocker, request, requests_mock, job_response, auth_config, job_state, error_type, max_retry, call_count_expected, expected_msg) -> None: stream = MetafieldOrders(auth_config) stream.job_manager._job_backoff_time = 0 stream.job_manager._job_max_retries = max_retry # patching the method to get the right ID checks if job_response: stream.job_manager._job_id = request.getfixturevalue(job_response).get("data", {}).get("node", {}).get("id") + + if job_state: + # setting job_state to simulate the error-in-the-middle + stream.job_manager._job_state = request.getfixturevalue(job_response).get("data", {}).get("node", {}).get("status") # mocking the response for STATUS CHECKS json_mock_response = request.getfixturevalue(job_response) if job_response else None @@ -240,7 +247,6 @@ def test_job_check_with_running_scenario(request, requests_mock, job_response, a assert stream.job_manager._job_state == expected - def test_job_read_file_invalid_filename(mocker, auth_config) -> None: stream = MetafieldOrders(auth_config) expected = "An error occured while producing records from BULK Job result" @@ -336,7 +342,7 @@ def test_stream_slices( auth_config["start_date"] = "2020-01-01" stream = stream(auth_config) - stream.job_manager._job_size = 1000 + stream.job_manager.job_size = 1000 test_result = list(stream.stream_slices(stream_state=stream_state)) test_query_from_slice = test_result[0].get("query") assert expected in test_query_from_slice @@ -375,11 +381,11 @@ def test_expand_stream_slices_job_size( # for the sake of simplicity we fake some parts to simulate the `current_job_time_elapsed` # fake current slice interval value - stream.job_manager._job_size = previous_slice_size + stream.job_manager.job_size = previous_slice_size # fake `last job elapsed time` if last_job_elapsed_time: stream.job_manager._job_last_elapsed_time = last_job_elapsed_time # parsing result from completed job list(stream.parse_response(test_bulk_response)) # check the next slice - assert stream.job_manager._job_size == adjusted_slice_size + assert stream.job_manager.job_size == adjusted_slice_size diff --git a/airbyte-integrations/connectors/source-shopify/unit_tests/test_source.py b/airbyte-integrations/connectors/source-shopify/unit_tests/test_source.py index 72f5029897529..e042f57b60e43 100644 --- a/airbyte-integrations/connectors/source-shopify/unit_tests/test_source.py +++ b/airbyte-integrations/connectors/source-shopify/unit_tests/test_source.py @@ -8,7 +8,6 @@ import pytest from airbyte_cdk.utils import AirbyteTracedException -from conftest import records_per_slice from source_shopify.auth import ShopifyAuthenticator from source_shopify.source import ConnectionCheckTest, SourceShopify from source_shopify.streams.streams import ( @@ -50,6 +49,8 @@ TransactionsGraphql, ) +from .conftest import records_per_slice + @pytest.fixture def config(basic_config) -> dict: From 6088f7c44d528d12df37e0e00cf35ed56f4b9e7a Mon Sep 17 00:00:00 2001 From: Oleksandr Bazarnov Date: Fri, 3 May 2024 16:37:40 +0300 Subject: [PATCH 09/27] updated migration guide --- docs/integrations/sources/shopify-migrations.md | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/docs/integrations/sources/shopify-migrations.md b/docs/integrations/sources/shopify-migrations.md index cea82229afd42..2ea036ecb1912 100644 --- a/docs/integrations/sources/shopify-migrations.md +++ b/docs/integrations/sources/shopify-migrations.md @@ -4,10 +4,19 @@ This version implements `Shopify GraphQL BULK Operations` to speed up the following streams: - `Products` - `Product Images` - - `Product Variants`* + - `Product Variants` + +* In the `Products` stream, the `published_scope` property is no longer available. +* In the `Products` stream, the `images` property now contains only the `id` of the image. Refer to the `Product Images` stream instead. +* In the `Products` stream, the `variants` property now contains only the `id` of the variant. Refer to the `Product Variants` stream instead. +* In the `Products` stream, the `position` property is no longer available. +* The `Product Variants` stream now has the cursor field `updated_at` instead of `id`. +* In the `Product Variants` stream, the date-time fields, such as `created_at` and `updated_at`, now use `UTC` format without a timezone component. +* In the `Product Variants` stream, the `presentment_prices.compare_at_price` property has changed from a `number` to an `object of strings`. This field was not populated in the `REST API` stream version, but it is correctly covered in the GraphQL stream version. +* The `Product Variants` stream's `inventory_policy` and `inventory_management` properties now contain `uppercase string` values, instead of `lowercase`. +* In the `Product Images` stream, the date-time fields, such as `created_at` and `updated_at`, now use `UTC` format without a timezone component. +* In the `Product Images` stream, the `variant_ids` property is no longer available. Refer to the `Product variants` stream instead. -* The `Product Variants` stream now has the cursor field `updated_at`, instead of the `id`. - ### Action items required for 2.1.0 - `Refresh Schema` + `Reset` is required for this stream after the upgrade from previous version. From 03820036cd4cf8c2bbfe65c8e159bff81032f49d Mon Sep 17 00:00:00 2001 From: Oleksandr Bazarnov Date: Fri, 3 May 2024 18:14:57 +0300 Subject: [PATCH 10/27] updated CAT config --- .../connectors/source-shopify/acceptance-test-config.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/airbyte-integrations/connectors/source-shopify/acceptance-test-config.yml b/airbyte-integrations/connectors/source-shopify/acceptance-test-config.yml index 1c8a3d2f58d52..fa685bce30eab 100644 --- a/airbyte-integrations/connectors/source-shopify/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-shopify/acceptance-test-config.yml @@ -25,8 +25,9 @@ acceptance_tests: tests: - config_path: "secrets/config.json" backward_compatibility_tests_config: - # The cursor field for `fulfillments` stream has changed from `id` to `updated_at` - disable_for_version: "1.1.8" + # the `product_variants` steam schema has changed, mainly: + # see this PR: https://github.com/airbytehq/airbyte/pull/37767 + disable_for_version: "2.0.8" basic_read: tests: - config_path: "secrets/config_transactions_with_user_id.json" From 28662357b7b08f2cb6060846145442fadcf95f3c Mon Sep 17 00:00:00 2001 From: Erick Corona Date: Sun, 5 May 2024 17:01:31 -0600 Subject: [PATCH 11/27] Add preliminary test to read records from entrypoint in Bulk GraphQL job --- .../unit_tests/graphql_bulk/test_job.py | 154 ++++++++++++++++++ 1 file changed, 154 insertions(+) diff --git a/airbyte-integrations/connectors/source-shopify/unit_tests/graphql_bulk/test_job.py b/airbyte-integrations/connectors/source-shopify/unit_tests/graphql_bulk/test_job.py index 69e2838fc16ea..6af5f403d5485 100644 --- a/airbyte-integrations/connectors/source-shopify/unit_tests/graphql_bulk/test_job.py +++ b/airbyte-integrations/connectors/source-shopify/unit_tests/graphql_bulk/test_job.py @@ -5,6 +5,8 @@ import pytest import requests + +from source_shopify import SourceShopify from source_shopify.shopify_graphql.bulk.exceptions import ShopifyBulkExceptions from source_shopify.shopify_graphql.bulk.status import ShopifyBulkJobStatus from source_shopify.streams.streams import ( @@ -17,6 +19,31 @@ MetafieldOrders, TransactionsGraphql, ) +from airbyte_cdk.test.entrypoint_wrapper import EntrypointOutput, read +from airbyte_cdk.test.mock_http import HttpMocker +from airbyte_cdk.test.mock_http import HttpResponse +from airbyte_cdk.test.mock_http.response_builder import ( + FieldPath, + HttpResponseBuilder, + NestedPath, + RecordBuilder, + create_record_builder, + create_response_builder, + find_template, +) +from airbyte_cdk.test.state_builder import StateBuilder +from airbyte_protocol.models import ( + AirbyteStateBlob, + AirbyteStateMessage, + AirbyteStreamState, + ConfiguredAirbyteCatalog, + FailureType, + StreamDescriptor, + SyncMode, +) +from typing import Any, Dict, List, Optional +from airbyte_cdk.test.catalog_builder import CatalogBuilder +from airbyte_cdk.test.mock_http.request import ANY_QUERY_PARAMS, HttpRequest @pytest.mark.parametrize( @@ -35,6 +62,133 @@ def test_check_for_errors(request, requests_mock, bulk_job_response, expected_le assert len(test_errors) == expected_len +def _catalog(stream_name: str, sync_mode: SyncMode) -> ConfiguredAirbyteCatalog: + return CatalogBuilder().with_stream(stream_name, sync_mode).build() + + +def _read( + catalog: ConfiguredAirbyteCatalog, + state: Optional[List[AirbyteStateMessage]] = None, + expecting_exception: bool = False +) -> EntrypointOutput: + config = { + "start_date": "2023-04-13", + "shop": "airbyte-integration-test", + "credentials": { + "auth_method": "api_password", + "api_password": "api_password", + }, + "bulk_window_in_days": 1000 + } + + return read(SourceShopify(), config, catalog, state, expecting_exception) + + +def get_http_records_request() -> HttpRequest: + url = "https://airbyte-integration-test.myshopify.com/admin/api/2023-07/shop.json" + return HttpRequest( + url=url, + query_params=ANY_QUERY_PARAMS, + ) + + +def get_http_scopes_request() -> HttpRequest: + url = "https://airbyte-integration-test.myshopify.com/admin/oauth/access_scopes.json" + return HttpRequest( + url=url, + query_params=ANY_QUERY_PARAMS, + ) + + +def get_http_shop_request() -> HttpRequest: + url = "https://airbyte-integration-test.myshopify.com/admin/api/2023-07/shop.json" + return HttpRequest( + url=url, + query_params=ANY_QUERY_PARAMS, + ) + + +def get_graphql_request() -> HttpRequest: + return HttpRequest( + url="https://airbyte-integration-test.myshopify.com/admin/api/2023-07/graphql.json", + body=b'{"query": "mutation {\\n bulkOperationRunQuery(\\n query: \\"\\"\\"\\n {\\n orders(\\n query: \\"updated_at:>=\'2023-04-13T00:00:00+00:00\' AND updated_at:<=\'2023-04-13T02:24:00+00:00\'\\"\\n sortKey: UPDATED_AT\\n ) {\\n edges {\\n node {\\n __typename\\n id\\n metafields {\\n edges {\\n node {\\n __typename\\n id\\n namespace\\n value\\n key\\n description\\n createdAt\\n updatedAt\\n type\\n }\\n }\\n }\\n }\\n }\\n }\\n}\\n \\"\\"\\"\\n ) {\\n bulkOperation {\\n id\\n status\\n createdAt\\n }\\n userErrors {\\n field\\n message\\n }\\n }\\n }"}' + ) + + +def get_graphql_request2() -> HttpRequest: + return HttpRequest( + url="https://airbyte-integration-test.myshopify.com/admin/api/2023-07/graphql.json", + body="""query { + node(id: "gid://shopify/BulkOperation/4472588009661") { + ... on BulkOperation { + id + status + errorCode + createdAt + objectCount + fileSize + url + partialDataUrl + } + } + }""" + ) + + +def test_read_graphql_records_successfully(): + with HttpMocker() as http_mocker: + catalog = _catalog("metafield_orders", SyncMode.full_refresh) + stream = catalog.streams[0].stream + + response_scopes = '{"access_scopes":[{"handle":"read_analytics"},{"handle":"read_customers"},{"handle":"read_gdpr_data_request"},{"handle":"read_online_store_navigation"},{"handle":"read_shopify_payments_accounts"},{"handle":"read_shopify_payments_bank_accounts"},{"handle":"read_shopify_payments_disputes"},{"handle":"read_shopify_payments_payouts"},{"handle":"read_assigned_fulfillment_orders"},{"handle":"read_discounts"},{"handle":"read_draft_orders"},{"handle":"read_files"},{"handle":"read_fulfillments"},{"handle":"read_gift_cards"},{"handle":"read_inventory"},{"handle":"read_legal_policies"},{"handle":"read_locations"},{"handle":"read_marketing_events"},{"handle":"read_merchant_managed_fulfillment_orders"},{"handle":"read_online_store_pages"},{"handle":"read_order_edits"},{"handle":"read_orders"},{"handle":"read_price_rules"},{"handle":"read_product_listings"},{"handle":"read_reports"},{"handle":"read_resource_feedbacks"},{"handle":"read_script_tags"},{"handle":"read_shipping"},{"handle":"read_locales"},{"handle":"read_content"},{"handle":"read_themes"},{"handle":"read_third_party_fulfillment_orders"},{"handle":"read_translations"},{"handle":"read_publications"},{"handle":"read_returns"},{"handle":"read_channels"},{"handle":"read_products"},{"handle":"read_markets"},{"handle":"read_shopify_credit"},{"handle":"read_store_credit_account_transactions"},{"handle":"read_all_cart_transforms"},{"handle":"read_cart_transforms"},{"handle":"read_all_checkout_completion_target_customizations"},{"handle":"read_companies"},{"handle":"read_custom_fulfillment_services"},{"handle":"read_customer_data_erasure"},{"handle":"read_customer_merge"},{"handle":"read_dery_customizations"},{"handle":"read_fulfillment_constraint_rules"},{"handle":"read_gates"},{"handle":"read_order_submission_rules"},{"handle":"read_payment_customizations"},{"handle":"read_packing_slip_templates"},{"handle":"read_payment_terms"},{"handle":"read_pixels"},{"handle":"read_product_feeds"},{"handle":"read_purchase_options"},{"handle":"read_shopify_payments_provider_accounts_sensitive"},{"handle":"read_all_orders"}]}' + response_shop = '{"shop":{"id":58033176765,"name":"airbyte integration test","email":"sherif@airbyte.io","domain":"airbyte-integration-test.myshopify.com","province":"California","country":"US","address1":"350 29th Avenue","zip":"94121","city":"San Francisco","source":null,"phone":"8023494963","latitude":37.7827286,"longitude":-122.4889911,"primary_locale":"en","address2":"","created_at":"2021-06-22T18:00:23-07:00","updated_at":"2024-01-30T21:11:05-08:00","country_code":"US","country_name":"United States","currency":"USD","customer_email":"sherif@airbyte.io","timezone":"(GMT-08:00) America\/Los_Angeles","iana_timezone":"America\/Los_Angeles","shop_owner":"Airbyte Airbyte","money_format":"${{amount}}","money_with_currency_format":"${{amount}} USD","weight_unit":"kg","province_code":"CA","taxes_included":true,"auto_configure_tax_inclusivity":null,"tax_shipping":null,"county_taxes":true,"plan_display_name":"Developer Preview","plan_name":"partner_test","has_discounts":true,"has_gift_cards":false,"myshopify_domain":"airbyte-integration-test.myshopify.com","google_apps_domain":null,"google_apps_login_enabled":null,"money_in_emails_format":"${{amount}}","money_with_currency_in_emails_format":"${{amount}} USD","eligible_for_payments":true,"requires_extra_payments_agreement":false,"password_enabled":true,"has_storefront":true,"finances":true,"primary_location_id":63590301885,"checkout_api_supported":true,"multi_location_enabled":true,"setup_required":false,"pre_launch_enabled":false,"enabled_presentment_currencies":["USD"],"transactional_sms_disabled":false,"marketing_sms_consent_enabled_at_checkout":false}}' + response_graphql = '{"data":{"bulkOperationRunQuery":{"bulkOperation":{"id":"gid://shopify/BulkOperation/4472588009661","status":"CREATED","createdAt":"2024-05-05T15:34:08Z"},"userErrors":[]}},"extensions":{"cost":{"requestedQueryCost":10,"actualQueryCost":10,"throttleStatus":{"maximumAvailable":2000.0,"currentlyAvailable":1990,"restoreRate":100.0}}}}' + response_graphql2 = '{"data":{"bulkOperationRunQuery":{"bulkOperation":{"id":"gid://shopify/BulkOperation/4472588009661","status":"COMPLETED","createdAt":"2024-05-05T15:34:08Z"},"userErrors":[]}},"extensions":{"cost":{"requestedQueryCost":10,"actualQueryCost":10,"throttleStatus":{"maximumAvailable":2000.0,"currentlyAvailable":1990,"restoreRate":100.0}}}}' + + http_mocker.get( + get_http_shop_request(), + HttpResponse(response_shop, 200) + ) + http_mocker.get( + get_http_scopes_request(), + HttpResponse(response_scopes, 200) + ) + http_mocker.post( + get_graphql_request(), + HttpResponse(response_graphql, 200) + ) + http_mocker.post( + get_graphql_request(), + HttpResponse(response_graphql, 200) + ) + http_mocker.post( + get_graphql_request2(), + HttpResponse(response_graphql2, 200) + ) + http_mocker.post( + get_graphql_request2(), + HttpResponse(response_graphql2, 200) + ) + + output = _read(catalog) + + history = http_mocker.__getattribute__("_mocker").request_history + assert output.records + + +def test_check_for_errors_with_connection_error() -> None: + with HttpMocker() as http_mocker: + inner_mocker = http_mocker.__getattribute__("_mocker") + inner_mocker.register_uri("GET", "https://airbyte-integration-test.myshopify.com/admin/api/2023-07/shop.json", exc=ConnectionAbortedError) + + # TODO: how to check for retries? + output = _read(SyncMode.full_refresh, expecting_exception=False) + inner_mocker + + print(output.errors) + assert output.errors + + def test_get_errors_from_response_invalid_response(auth_config) -> None: expected = "Couldn't check the `response` for `errors`" stream = MetafieldOrders(auth_config) From a37dea4a8360ed022d78b5e8af6e6597a0ebce24 Mon Sep 17 00:00:00 2001 From: Erick Corona Date: Mon, 6 May 2024 20:41:57 -0600 Subject: [PATCH 12/27] Fix test with basic read records job --- .../connectors/source-shopify/pyproject.toml | 1 + .../unit_tests/graphql_bulk/test_job.py | 70 +++++++++---------- 2 files changed, 35 insertions(+), 36 deletions(-) diff --git a/airbyte-integrations/connectors/source-shopify/pyproject.toml b/airbyte-integrations/connectors/source-shopify/pyproject.toml index 6f5ca760852de..d3c5d5e954aa2 100644 --- a/airbyte-integrations/connectors/source-shopify/pyproject.toml +++ b/airbyte-integrations/connectors/source-shopify/pyproject.toml @@ -28,3 +28,4 @@ source-shopify = "source_shopify.run:run" requests-mock = "^1.11.0" pytest-mock = "^3.12.0" pytest = "^8.0.0" +freezegun = "^1.4.0" diff --git a/airbyte-integrations/connectors/source-shopify/unit_tests/graphql_bulk/test_job.py b/airbyte-integrations/connectors/source-shopify/unit_tests/graphql_bulk/test_job.py index 6af5f403d5485..ceaaf18e95497 100644 --- a/airbyte-integrations/connectors/source-shopify/unit_tests/graphql_bulk/test_job.py +++ b/airbyte-integrations/connectors/source-shopify/unit_tests/graphql_bulk/test_job.py @@ -22,6 +22,7 @@ from airbyte_cdk.test.entrypoint_wrapper import EntrypointOutput, read from airbyte_cdk.test.mock_http import HttpMocker from airbyte_cdk.test.mock_http import HttpResponse +from freezegun import freeze_time from airbyte_cdk.test.mock_http.response_builder import ( FieldPath, HttpResponseBuilder, @@ -72,7 +73,7 @@ def _read( expecting_exception: bool = False ) -> EntrypointOutput: config = { - "start_date": "2023-04-13", + "start_date": "2024-05-05", "shop": "airbyte-integration-test", "credentials": { "auth_method": "api_password", @@ -84,38 +85,28 @@ def _read( return read(SourceShopify(), config, catalog, state, expecting_exception) -def get_http_records_request() -> HttpRequest: - url = "https://airbyte-integration-test.myshopify.com/admin/api/2023-07/shop.json" +def _scopes_request() -> HttpRequest: return HttpRequest( - url=url, + url="https://airbyte-integration-test.myshopify.com/admin/oauth/access_scopes.json", query_params=ANY_QUERY_PARAMS, ) -def get_http_scopes_request() -> HttpRequest: - url = "https://airbyte-integration-test.myshopify.com/admin/oauth/access_scopes.json" +def _shop_request() -> HttpRequest: return HttpRequest( - url=url, + url="https://airbyte-integration-test.myshopify.com/admin/api/2023-07/shop.json", query_params=ANY_QUERY_PARAMS, ) -def get_http_shop_request() -> HttpRequest: - url = "https://airbyte-integration-test.myshopify.com/admin/api/2023-07/shop.json" - return HttpRequest( - url=url, - query_params=ANY_QUERY_PARAMS, - ) - - -def get_graphql_request() -> HttpRequest: +def data_graphql_request() -> HttpRequest: return HttpRequest( url="https://airbyte-integration-test.myshopify.com/admin/api/2023-07/graphql.json", - body=b'{"query": "mutation {\\n bulkOperationRunQuery(\\n query: \\"\\"\\"\\n {\\n orders(\\n query: \\"updated_at:>=\'2023-04-13T00:00:00+00:00\' AND updated_at:<=\'2023-04-13T02:24:00+00:00\'\\"\\n sortKey: UPDATED_AT\\n ) {\\n edges {\\n node {\\n __typename\\n id\\n metafields {\\n edges {\\n node {\\n __typename\\n id\\n namespace\\n value\\n key\\n description\\n createdAt\\n updatedAt\\n type\\n }\\n }\\n }\\n }\\n }\\n }\\n}\\n \\"\\"\\"\\n ) {\\n bulkOperation {\\n id\\n status\\n createdAt\\n }\\n userErrors {\\n field\\n message\\n }\\n }\\n }"}' + body='{"query": "mutation {\\n bulkOperationRunQuery(\\n query: \\"\\"\\"\\n {\\n orders(\\n query: \\"updated_at:>=\'2024-05-05T00:00:00+00:00\' AND updated_at:<=\'2024-05-05T02:24:00+00:00\'\\"\\n sortKey: UPDATED_AT\\n ) {\\n edges {\\n node {\\n __typename\\n id\\n metafields {\\n edges {\\n node {\\n __typename\\n id\\n namespace\\n value\\n key\\n description\\n createdAt\\n updatedAt\\n type\\n }\\n }\\n }\\n }\\n }\\n }\\n}\\n \\"\\"\\"\\n ) {\\n bulkOperation {\\n id\\n status\\n createdAt\\n }\\n userErrors {\\n field\\n message\\n }\\n }\\n }"}' ) -def get_graphql_request2() -> HttpRequest: +def _status_graphql_request() -> HttpRequest: return HttpRequest( url="https://airbyte-integration-test.myshopify.com/admin/api/2023-07/graphql.json", body="""query { @@ -135,44 +126,51 @@ def get_graphql_request2() -> HttpRequest: ) +def _records_file_request() -> HttpRequest: + return HttpRequest( + url="https://storage.googleapis.com/shopify-tiers-assets-prod-us-east1/bulk-operation-outputs/l6lersgk4i81iqc3n6iisywwtipb-final", + query_params=ANY_QUERY_PARAMS, + ) + + +@freeze_time("2024-05-05T01:00:00") def test_read_graphql_records_successfully(): with HttpMocker() as http_mocker: catalog = _catalog("metafield_orders", SyncMode.full_refresh) - stream = catalog.streams[0].stream response_scopes = '{"access_scopes":[{"handle":"read_analytics"},{"handle":"read_customers"},{"handle":"read_gdpr_data_request"},{"handle":"read_online_store_navigation"},{"handle":"read_shopify_payments_accounts"},{"handle":"read_shopify_payments_bank_accounts"},{"handle":"read_shopify_payments_disputes"},{"handle":"read_shopify_payments_payouts"},{"handle":"read_assigned_fulfillment_orders"},{"handle":"read_discounts"},{"handle":"read_draft_orders"},{"handle":"read_files"},{"handle":"read_fulfillments"},{"handle":"read_gift_cards"},{"handle":"read_inventory"},{"handle":"read_legal_policies"},{"handle":"read_locations"},{"handle":"read_marketing_events"},{"handle":"read_merchant_managed_fulfillment_orders"},{"handle":"read_online_store_pages"},{"handle":"read_order_edits"},{"handle":"read_orders"},{"handle":"read_price_rules"},{"handle":"read_product_listings"},{"handle":"read_reports"},{"handle":"read_resource_feedbacks"},{"handle":"read_script_tags"},{"handle":"read_shipping"},{"handle":"read_locales"},{"handle":"read_content"},{"handle":"read_themes"},{"handle":"read_third_party_fulfillment_orders"},{"handle":"read_translations"},{"handle":"read_publications"},{"handle":"read_returns"},{"handle":"read_channels"},{"handle":"read_products"},{"handle":"read_markets"},{"handle":"read_shopify_credit"},{"handle":"read_store_credit_account_transactions"},{"handle":"read_all_cart_transforms"},{"handle":"read_cart_transforms"},{"handle":"read_all_checkout_completion_target_customizations"},{"handle":"read_companies"},{"handle":"read_custom_fulfillment_services"},{"handle":"read_customer_data_erasure"},{"handle":"read_customer_merge"},{"handle":"read_dery_customizations"},{"handle":"read_fulfillment_constraint_rules"},{"handle":"read_gates"},{"handle":"read_order_submission_rules"},{"handle":"read_payment_customizations"},{"handle":"read_packing_slip_templates"},{"handle":"read_payment_terms"},{"handle":"read_pixels"},{"handle":"read_product_feeds"},{"handle":"read_purchase_options"},{"handle":"read_shopify_payments_provider_accounts_sensitive"},{"handle":"read_all_orders"}]}' response_shop = '{"shop":{"id":58033176765,"name":"airbyte integration test","email":"sherif@airbyte.io","domain":"airbyte-integration-test.myshopify.com","province":"California","country":"US","address1":"350 29th Avenue","zip":"94121","city":"San Francisco","source":null,"phone":"8023494963","latitude":37.7827286,"longitude":-122.4889911,"primary_locale":"en","address2":"","created_at":"2021-06-22T18:00:23-07:00","updated_at":"2024-01-30T21:11:05-08:00","country_code":"US","country_name":"United States","currency":"USD","customer_email":"sherif@airbyte.io","timezone":"(GMT-08:00) America\/Los_Angeles","iana_timezone":"America\/Los_Angeles","shop_owner":"Airbyte Airbyte","money_format":"${{amount}}","money_with_currency_format":"${{amount}} USD","weight_unit":"kg","province_code":"CA","taxes_included":true,"auto_configure_tax_inclusivity":null,"tax_shipping":null,"county_taxes":true,"plan_display_name":"Developer Preview","plan_name":"partner_test","has_discounts":true,"has_gift_cards":false,"myshopify_domain":"airbyte-integration-test.myshopify.com","google_apps_domain":null,"google_apps_login_enabled":null,"money_in_emails_format":"${{amount}}","money_with_currency_in_emails_format":"${{amount}} USD","eligible_for_payments":true,"requires_extra_payments_agreement":false,"password_enabled":true,"has_storefront":true,"finances":true,"primary_location_id":63590301885,"checkout_api_supported":true,"multi_location_enabled":true,"setup_required":false,"pre_launch_enabled":false,"enabled_presentment_currencies":["USD"],"transactional_sms_disabled":false,"marketing_sms_consent_enabled_at_checkout":false}}' response_graphql = '{"data":{"bulkOperationRunQuery":{"bulkOperation":{"id":"gid://shopify/BulkOperation/4472588009661","status":"CREATED","createdAt":"2024-05-05T15:34:08Z"},"userErrors":[]}},"extensions":{"cost":{"requestedQueryCost":10,"actualQueryCost":10,"throttleStatus":{"maximumAvailable":2000.0,"currentlyAvailable":1990,"restoreRate":100.0}}}}' - response_graphql2 = '{"data":{"bulkOperationRunQuery":{"bulkOperation":{"id":"gid://shopify/BulkOperation/4472588009661","status":"COMPLETED","createdAt":"2024-05-05T15:34:08Z"},"userErrors":[]}},"extensions":{"cost":{"requestedQueryCost":10,"actualQueryCost":10,"throttleStatus":{"maximumAvailable":2000.0,"currentlyAvailable":1990,"restoreRate":100.0}}}}' + response_graphql2 = '{"data":{"node":{"id":"gid://shopify/BulkOperation/4476008693949","status":"COMPLETED","errorCode":null,"createdAt":"2024-05-06T20:45:48Z","objectCount":"4","fileSize":"774","url":"https://storage.googleapis.com/shopify-tiers-assets-prod-us-east1/bulk-operation-outputs/l6lersgk4i81iqc3n6iisywwtipb-final?GoogleAccessId=assets-us-prod%40shopify-tiers.iam.gserviceaccount.com&Expires=1715633149&Signature=oMjQelfAzUW%2FdulC3HbuBapbUriUJ%2Bc9%2FKpIIf954VTxBqKChJAdoTmWT9ymh%2FnCiHdM%2BeM%2FADz5siAC%2BXtHBWkJfvs%2F0cYpse0ueiQsw6R8gW5JpeSbizyGWcBBWkv5j8GncAnZOUVYDxRIgfxcPb8BlFxBfC3wsx%2F00v9D6EHbPpkIMTbCOAhheJdw9GmVa%2BOMqHGHlmiADM34RDeBPrvSo65f%2FakpV2LBQTEV%2BhDt0ndaREQ0MrpNwhKnc3vZPzA%2BliOGM0wyiYr9qVwByynHq8c%2FaJPPgI5eGEfQcyepgWZTRW5S0DbmBIFxZJLN6Nq6bJ2bIZWrVriUhNGx2g%3D%3D&response-content-disposition=attachment%3B+filename%3D%22bulk-4476008693949.jsonl%22%3B+filename%2A%3DUTF-8%27%27bulk-4476008693949.jsonl&response-content-type=application%2Fjsonl","partialDataUrl":null}},"extensions":{"cost":{"requestedQueryCost":1,"actualQueryCost":1,"throttleStatus":{"maximumAvailable":2000.0,"currentlyAvailable":1999,"restoreRate":100.0}}}}' + response_data = """{"__typename":"Order","id":"gid:\/\/shopify\/Order\/5010584895677"} +{"__typename":"Metafield","id":"gid:\/\/shopify\/Metafield\/22347288150205","namespace":"my_fields","value":"asdfasdf","key":"purchase_order","description":null,"createdAt":"2023-04-13T12:09:50Z","updatedAt":"2024-05-13T00:09:50Z","type":"single_line_text_field","__parentId":"gid:\/\/shopify\/Order\/5010584895677"} +{"__typename":"Order","id":"gid:\/\/shopify\/Order\/5010585911485"} +{"__typename":"Metafield","id":"gid:\/\/shopify\/Metafield\/22347288740029","namespace":"my_fields","value":"asdfasdfasdf","key":"purchase_order","description":null,"createdAt":"2023-04-13T12:11:20Z","updatedAt":"2024-05-05T00:11:20Z","type":"single_line_text_field","__parentId":"gid:\/\/shopify\/Order\/5010585911485"} +""" http_mocker.get( - get_http_shop_request(), - HttpResponse(response_shop, 200) + _shop_request(), + HttpResponse(response_shop) ) http_mocker.get( - get_http_scopes_request(), - HttpResponse(response_scopes, 200) + _scopes_request(), + HttpResponse(response_scopes) ) http_mocker.post( - get_graphql_request(), - HttpResponse(response_graphql, 200) + data_graphql_request(), + HttpResponse(response_graphql) ) http_mocker.post( - get_graphql_request(), - HttpResponse(response_graphql, 200) + _status_graphql_request(), + HttpResponse(response_graphql2) ) - http_mocker.post( - get_graphql_request2(), - HttpResponse(response_graphql2, 200) - ) - http_mocker.post( - get_graphql_request2(), - HttpResponse(response_graphql2, 200) + http_mocker.get( + _records_file_request(), + HttpResponse(response_data) ) output = _read(catalog) - history = http_mocker.__getattribute__("_mocker").request_history assert output.records From ab522ee2b39e56c6ac8e29b4c98f25baa8c965c1 Mon Sep 17 00:00:00 2001 From: Erick Corona Date: Tue, 7 May 2024 11:32:23 -0600 Subject: [PATCH 13/27] Organize test data --- .../unit_tests/graphql_bulk/test_job.py | 95 +++++++++---------- 1 file changed, 46 insertions(+), 49 deletions(-) diff --git a/airbyte-integrations/connectors/source-shopify/unit_tests/graphql_bulk/test_job.py b/airbyte-integrations/connectors/source-shopify/unit_tests/graphql_bulk/test_job.py index ceaaf18e95497..414823dabd759 100644 --- a/airbyte-integrations/connectors/source-shopify/unit_tests/graphql_bulk/test_job.py +++ b/airbyte-integrations/connectors/source-shopify/unit_tests/graphql_bulk/test_job.py @@ -23,23 +23,9 @@ from airbyte_cdk.test.mock_http import HttpMocker from airbyte_cdk.test.mock_http import HttpResponse from freezegun import freeze_time -from airbyte_cdk.test.mock_http.response_builder import ( - FieldPath, - HttpResponseBuilder, - NestedPath, - RecordBuilder, - create_record_builder, - create_response_builder, - find_template, -) -from airbyte_cdk.test.state_builder import StateBuilder from airbyte_protocol.models import ( - AirbyteStateBlob, AirbyteStateMessage, - AirbyteStreamState, ConfiguredAirbyteCatalog, - FailureType, - StreamDescriptor, SyncMode, ) from typing import Any, Dict, List, Optional @@ -99,7 +85,7 @@ def _shop_request() -> HttpRequest: ) -def data_graphql_request() -> HttpRequest: +def _data_graphql_request() -> HttpRequest: return HttpRequest( url="https://airbyte-integration-test.myshopify.com/admin/api/2023-07/graphql.json", body='{"query": "mutation {\\n bulkOperationRunQuery(\\n query: \\"\\"\\"\\n {\\n orders(\\n query: \\"updated_at:>=\'2024-05-05T00:00:00+00:00\' AND updated_at:<=\'2024-05-05T02:24:00+00:00\'\\"\\n sortKey: UPDATED_AT\\n ) {\\n edges {\\n node {\\n __typename\\n id\\n metafields {\\n edges {\\n node {\\n __typename\\n id\\n namespace\\n value\\n key\\n description\\n createdAt\\n updatedAt\\n type\\n }\\n }\\n }\\n }\\n }\\n }\\n}\\n \\"\\"\\"\\n ) {\\n bulkOperation {\\n id\\n status\\n createdAt\\n }\\n userErrors {\\n field\\n message\\n }\\n }\\n }"}' @@ -133,41 +119,45 @@ def _records_file_request() -> HttpRequest: ) -@freeze_time("2024-05-05T01:00:00") -def test_read_graphql_records_successfully(): - with HttpMocker() as http_mocker: - catalog = _catalog("metafield_orders", SyncMode.full_refresh) - - response_scopes = '{"access_scopes":[{"handle":"read_analytics"},{"handle":"read_customers"},{"handle":"read_gdpr_data_request"},{"handle":"read_online_store_navigation"},{"handle":"read_shopify_payments_accounts"},{"handle":"read_shopify_payments_bank_accounts"},{"handle":"read_shopify_payments_disputes"},{"handle":"read_shopify_payments_payouts"},{"handle":"read_assigned_fulfillment_orders"},{"handle":"read_discounts"},{"handle":"read_draft_orders"},{"handle":"read_files"},{"handle":"read_fulfillments"},{"handle":"read_gift_cards"},{"handle":"read_inventory"},{"handle":"read_legal_policies"},{"handle":"read_locations"},{"handle":"read_marketing_events"},{"handle":"read_merchant_managed_fulfillment_orders"},{"handle":"read_online_store_pages"},{"handle":"read_order_edits"},{"handle":"read_orders"},{"handle":"read_price_rules"},{"handle":"read_product_listings"},{"handle":"read_reports"},{"handle":"read_resource_feedbacks"},{"handle":"read_script_tags"},{"handle":"read_shipping"},{"handle":"read_locales"},{"handle":"read_content"},{"handle":"read_themes"},{"handle":"read_third_party_fulfillment_orders"},{"handle":"read_translations"},{"handle":"read_publications"},{"handle":"read_returns"},{"handle":"read_channels"},{"handle":"read_products"},{"handle":"read_markets"},{"handle":"read_shopify_credit"},{"handle":"read_store_credit_account_transactions"},{"handle":"read_all_cart_transforms"},{"handle":"read_cart_transforms"},{"handle":"read_all_checkout_completion_target_customizations"},{"handle":"read_companies"},{"handle":"read_custom_fulfillment_services"},{"handle":"read_customer_data_erasure"},{"handle":"read_customer_merge"},{"handle":"read_dery_customizations"},{"handle":"read_fulfillment_constraint_rules"},{"handle":"read_gates"},{"handle":"read_order_submission_rules"},{"handle":"read_payment_customizations"},{"handle":"read_packing_slip_templates"},{"handle":"read_payment_terms"},{"handle":"read_pixels"},{"handle":"read_product_feeds"},{"handle":"read_purchase_options"},{"handle":"read_shopify_payments_provider_accounts_sensitive"},{"handle":"read_all_orders"}]}' - response_shop = '{"shop":{"id":58033176765,"name":"airbyte integration test","email":"sherif@airbyte.io","domain":"airbyte-integration-test.myshopify.com","province":"California","country":"US","address1":"350 29th Avenue","zip":"94121","city":"San Francisco","source":null,"phone":"8023494963","latitude":37.7827286,"longitude":-122.4889911,"primary_locale":"en","address2":"","created_at":"2021-06-22T18:00:23-07:00","updated_at":"2024-01-30T21:11:05-08:00","country_code":"US","country_name":"United States","currency":"USD","customer_email":"sherif@airbyte.io","timezone":"(GMT-08:00) America\/Los_Angeles","iana_timezone":"America\/Los_Angeles","shop_owner":"Airbyte Airbyte","money_format":"${{amount}}","money_with_currency_format":"${{amount}} USD","weight_unit":"kg","province_code":"CA","taxes_included":true,"auto_configure_tax_inclusivity":null,"tax_shipping":null,"county_taxes":true,"plan_display_name":"Developer Preview","plan_name":"partner_test","has_discounts":true,"has_gift_cards":false,"myshopify_domain":"airbyte-integration-test.myshopify.com","google_apps_domain":null,"google_apps_login_enabled":null,"money_in_emails_format":"${{amount}}","money_with_currency_in_emails_format":"${{amount}} USD","eligible_for_payments":true,"requires_extra_payments_agreement":false,"password_enabled":true,"has_storefront":true,"finances":true,"primary_location_id":63590301885,"checkout_api_supported":true,"multi_location_enabled":true,"setup_required":false,"pre_launch_enabled":false,"enabled_presentment_currencies":["USD"],"transactional_sms_disabled":false,"marketing_sms_consent_enabled_at_checkout":false}}' - response_graphql = '{"data":{"bulkOperationRunQuery":{"bulkOperation":{"id":"gid://shopify/BulkOperation/4472588009661","status":"CREATED","createdAt":"2024-05-05T15:34:08Z"},"userErrors":[]}},"extensions":{"cost":{"requestedQueryCost":10,"actualQueryCost":10,"throttleStatus":{"maximumAvailable":2000.0,"currentlyAvailable":1990,"restoreRate":100.0}}}}' - response_graphql2 = '{"data":{"node":{"id":"gid://shopify/BulkOperation/4476008693949","status":"COMPLETED","errorCode":null,"createdAt":"2024-05-06T20:45:48Z","objectCount":"4","fileSize":"774","url":"https://storage.googleapis.com/shopify-tiers-assets-prod-us-east1/bulk-operation-outputs/l6lersgk4i81iqc3n6iisywwtipb-final?GoogleAccessId=assets-us-prod%40shopify-tiers.iam.gserviceaccount.com&Expires=1715633149&Signature=oMjQelfAzUW%2FdulC3HbuBapbUriUJ%2Bc9%2FKpIIf954VTxBqKChJAdoTmWT9ymh%2FnCiHdM%2BeM%2FADz5siAC%2BXtHBWkJfvs%2F0cYpse0ueiQsw6R8gW5JpeSbizyGWcBBWkv5j8GncAnZOUVYDxRIgfxcPb8BlFxBfC3wsx%2F00v9D6EHbPpkIMTbCOAhheJdw9GmVa%2BOMqHGHlmiADM34RDeBPrvSo65f%2FakpV2LBQTEV%2BhDt0ndaREQ0MrpNwhKnc3vZPzA%2BliOGM0wyiYr9qVwByynHq8c%2FaJPPgI5eGEfQcyepgWZTRW5S0DbmBIFxZJLN6Nq6bJ2bIZWrVriUhNGx2g%3D%3D&response-content-disposition=attachment%3B+filename%3D%22bulk-4476008693949.jsonl%22%3B+filename%2A%3DUTF-8%27%27bulk-4476008693949.jsonl&response-content-type=application%2Fjsonl","partialDataUrl":null}},"extensions":{"cost":{"requestedQueryCost":1,"actualQueryCost":1,"throttleStatus":{"maximumAvailable":2000.0,"currentlyAvailable":1999,"restoreRate":100.0}}}}' - response_data = """{"__typename":"Order","id":"gid:\/\/shopify\/Order\/5010584895677"} +def _register_successful_read_requests(http_mocker: HttpMocker): + response_scopes = '{"access_scopes":[{"handle":"read_analytics"},{"handle":"read_customers"},{"handle":"read_gdpr_data_request"},{"handle":"read_online_store_navigation"},{"handle":"read_shopify_payments_accounts"},{"handle":"read_shopify_payments_bank_accounts"},{"handle":"read_shopify_payments_disputes"},{"handle":"read_shopify_payments_payouts"},{"handle":"read_assigned_fulfillment_orders"},{"handle":"read_discounts"},{"handle":"read_draft_orders"},{"handle":"read_files"},{"handle":"read_fulfillments"},{"handle":"read_gift_cards"},{"handle":"read_inventory"},{"handle":"read_legal_policies"},{"handle":"read_locations"},{"handle":"read_marketing_events"},{"handle":"read_merchant_managed_fulfillment_orders"},{"handle":"read_online_store_pages"},{"handle":"read_order_edits"},{"handle":"read_orders"},{"handle":"read_price_rules"},{"handle":"read_product_listings"},{"handle":"read_reports"},{"handle":"read_resource_feedbacks"},{"handle":"read_script_tags"},{"handle":"read_shipping"},{"handle":"read_locales"},{"handle":"read_content"},{"handle":"read_themes"},{"handle":"read_third_party_fulfillment_orders"},{"handle":"read_translations"},{"handle":"read_publications"},{"handle":"read_returns"},{"handle":"read_channels"},{"handle":"read_products"},{"handle":"read_markets"},{"handle":"read_shopify_credit"},{"handle":"read_store_credit_account_transactions"},{"handle":"read_all_cart_transforms"},{"handle":"read_cart_transforms"},{"handle":"read_all_checkout_completion_target_customizations"},{"handle":"read_companies"},{"handle":"read_custom_fulfillment_services"},{"handle":"read_customer_data_erasure"},{"handle":"read_customer_merge"},{"handle":"read_dery_customizations"},{"handle":"read_fulfillment_constraint_rules"},{"handle":"read_gates"},{"handle":"read_order_submission_rules"},{"handle":"read_payment_customizations"},{"handle":"read_packing_slip_templates"},{"handle":"read_payment_terms"},{"handle":"read_pixels"},{"handle":"read_product_feeds"},{"handle":"read_purchase_options"},{"handle":"read_shopify_payments_provider_accounts_sensitive"},{"handle":"read_all_orders"}]}' + response_shop = '{"shop":{"id":58033176765,"name":"airbyte integration test","email":"sherif@airbyte.io","domain":"airbyte-integration-test.myshopify.com","province":"California","country":"US","address1":"350 29th Avenue","zip":"94121","city":"San Francisco","source":null,"phone":"8023494963","latitude":37.7827286,"longitude":-122.4889911,"primary_locale":"en","address2":"","created_at":"2021-06-22T18:00:23-07:00","updated_at":"2024-01-30T21:11:05-08:00","country_code":"US","country_name":"United States","currency":"USD","customer_email":"sherif@airbyte.io","timezone":"(GMT-08:00) America\/Los_Angeles","iana_timezone":"America\/Los_Angeles","shop_owner":"Airbyte Airbyte","money_format":"${{amount}}","money_with_currency_format":"${{amount}} USD","weight_unit":"kg","province_code":"CA","taxes_included":true,"auto_configure_tax_inclusivity":null,"tax_shipping":null,"county_taxes":true,"plan_display_name":"Developer Preview","plan_name":"partner_test","has_discounts":true,"has_gift_cards":false,"myshopify_domain":"airbyte-integration-test.myshopify.com","google_apps_domain":null,"google_apps_login_enabled":null,"money_in_emails_format":"${{amount}}","money_with_currency_in_emails_format":"${{amount}} USD","eligible_for_payments":true,"requires_extra_payments_agreement":false,"password_enabled":true,"has_storefront":true,"finances":true,"primary_location_id":63590301885,"checkout_api_supported":true,"multi_location_enabled":true,"setup_required":false,"pre_launch_enabled":false,"enabled_presentment_currencies":["USD"],"transactional_sms_disabled":false,"marketing_sms_consent_enabled_at_checkout":false}}' + response_graphql = '{"data":{"bulkOperationRunQuery":{"bulkOperation":{"id":"gid://shopify/BulkOperation/4472588009661","status":"CREATED","createdAt":"2024-05-05T15:34:08Z"},"userErrors":[]}},"extensions":{"cost":{"requestedQueryCost":10,"actualQueryCost":10,"throttleStatus":{"maximumAvailable":2000.0,"currentlyAvailable":1990,"restoreRate":100.0}}}}' + response_graphql2 = '{"data":{"node":{"id":"gid://shopify/BulkOperation/4476008693949","status":"COMPLETED","errorCode":null,"createdAt":"2024-05-06T20:45:48Z","objectCount":"4","fileSize":"774","url":"https://storage.googleapis.com/shopify-tiers-assets-prod-us-east1/bulk-operation-outputs/l6lersgk4i81iqc3n6iisywwtipb-final?GoogleAccessId=assets-us-prod%40shopify-tiers.iam.gserviceaccount.com&Expires=1715633149&Signature=oMjQelfAzUW%2FdulC3HbuBapbUriUJ%2Bc9%2FKpIIf954VTxBqKChJAdoTmWT9ymh%2FnCiHdM%2BeM%2FADz5siAC%2BXtHBWkJfvs%2F0cYpse0ueiQsw6R8gW5JpeSbizyGWcBBWkv5j8GncAnZOUVYDxRIgfxcPb8BlFxBfC3wsx%2F00v9D6EHbPpkIMTbCOAhheJdw9GmVa%2BOMqHGHlmiADM34RDeBPrvSo65f%2FakpV2LBQTEV%2BhDt0ndaREQ0MrpNwhKnc3vZPzA%2BliOGM0wyiYr9qVwByynHq8c%2FaJPPgI5eGEfQcyepgWZTRW5S0DbmBIFxZJLN6Nq6bJ2bIZWrVriUhNGx2g%3D%3D&response-content-disposition=attachment%3B+filename%3D%22bulk-4476008693949.jsonl%22%3B+filename%2A%3DUTF-8%27%27bulk-4476008693949.jsonl&response-content-type=application%2Fjsonl","partialDataUrl":null}},"extensions":{"cost":{"requestedQueryCost":1,"actualQueryCost":1,"throttleStatus":{"maximumAvailable":2000.0,"currentlyAvailable":1999,"restoreRate":100.0}}}}' + response_data = """{"__typename":"Order","id":"gid:\/\/shopify\/Order\/5010584895677"} {"__typename":"Metafield","id":"gid:\/\/shopify\/Metafield\/22347288150205","namespace":"my_fields","value":"asdfasdf","key":"purchase_order","description":null,"createdAt":"2023-04-13T12:09:50Z","updatedAt":"2024-05-13T00:09:50Z","type":"single_line_text_field","__parentId":"gid:\/\/shopify\/Order\/5010584895677"} {"__typename":"Order","id":"gid:\/\/shopify\/Order\/5010585911485"} {"__typename":"Metafield","id":"gid:\/\/shopify\/Metafield\/22347288740029","namespace":"my_fields","value":"asdfasdfasdf","key":"purchase_order","description":null,"createdAt":"2023-04-13T12:11:20Z","updatedAt":"2024-05-05T00:11:20Z","type":"single_line_text_field","__parentId":"gid:\/\/shopify\/Order\/5010585911485"} """ - http_mocker.get( - _shop_request(), - HttpResponse(response_shop) - ) - http_mocker.get( - _scopes_request(), - HttpResponse(response_scopes) - ) - http_mocker.post( - data_graphql_request(), - HttpResponse(response_graphql) - ) - http_mocker.post( - _status_graphql_request(), - HttpResponse(response_graphql2) - ) - http_mocker.get( - _records_file_request(), - HttpResponse(response_data) - ) + http_mocker.get( + _shop_request(), + HttpResponse(response_shop) + ) + http_mocker.get( + _scopes_request(), + HttpResponse(response_scopes) + ) + http_mocker.post( + _data_graphql_request(), + HttpResponse(response_graphql) + ) + http_mocker.post( + _status_graphql_request(), + HttpResponse(response_graphql2) + ) + http_mocker.get( + _records_file_request(), + HttpResponse(response_data) + ) + + +# @freeze_time("2024-05-05T01:00:00") +def test_read_graphql_records_successfully(): + with HttpMocker() as http_mocker: + catalog = _catalog("metafield_orders", SyncMode.full_refresh) + + _register_successful_read_requests(http_mocker) output = _read(catalog) @@ -175,16 +165,23 @@ def test_read_graphql_records_successfully(): def test_check_for_errors_with_connection_error() -> None: - with HttpMocker() as http_mocker: + with (HttpMocker() as http_mocker): + catalog = _catalog("metafield_orders", SyncMode.full_refresh) + inner_mocker = http_mocker.__getattribute__("_mocker") inner_mocker.register_uri("GET", "https://airbyte-integration-test.myshopify.com/admin/api/2023-07/shop.json", exc=ConnectionAbortedError) + _register_successful_read_requests(http_mocker) + # TODO: how to check for retries? - output = _read(SyncMode.full_refresh, expecting_exception=False) - inner_mocker + output = _read(catalog) + print(f"Call Count: {inner_mocker.call_count}") + print(f"iscalled: {inner_mocker.called}") + print(f"Mocker history: {inner_mocker.request_history}") print(output.errors) - assert output.errors + # assert "ConnectionAbortedError" in output.errors.__str__() + assert output.records def test_get_errors_from_response_invalid_response(auth_config) -> None: From 66398ba8d71760161041924669d6b497e40134cd Mon Sep 17 00:00:00 2001 From: Oleksandr Bazarnov Date: Tue, 7 May 2024 21:21:27 +0300 Subject: [PATCH 14/27] updated after the live-coding session --- .../source_shopify/streams/base_streams.py | 2 +- .../source-shopify/unit_tests/graphql_bulk/test_job.py | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/streams/base_streams.py b/airbyte-integrations/connectors/source-shopify/source_shopify/streams/base_streams.py index f33545e3c4493..f6f5d99724daa 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/streams/base_streams.py +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/streams/base_streams.py @@ -648,7 +648,7 @@ def __init__(self, config: Dict) -> None: # overide the default job slice size, if provided (it's auto-adjusted, later on) self.bulk_window_in_days = config.get("bulk_window_in_days") if self.bulk_window_in_days: - self.job_manager.job_size = self.bulk_window_in_days + self.job_manager._job_size = self.bulk_window_in_days # define Record Producer instance self.record_producer: ShopifyBulkRecord = ShopifyBulkRecord(self.query) diff --git a/airbyte-integrations/connectors/source-shopify/unit_tests/graphql_bulk/test_job.py b/airbyte-integrations/connectors/source-shopify/unit_tests/graphql_bulk/test_job.py index 414823dabd759..01ac9c7253a3f 100644 --- a/airbyte-integrations/connectors/source-shopify/unit_tests/graphql_bulk/test_job.py +++ b/airbyte-integrations/connectors/source-shopify/unit_tests/graphql_bulk/test_job.py @@ -31,6 +31,7 @@ from typing import Any, Dict, List, Optional from airbyte_cdk.test.catalog_builder import CatalogBuilder from airbyte_cdk.test.mock_http.request import ANY_QUERY_PARAMS, HttpRequest +import pendulum as pdm @pytest.mark.parametrize( @@ -49,6 +50,8 @@ def test_check_for_errors(request, requests_mock, bulk_job_response, expected_le assert len(test_errors) == expected_len +start_date = "2024-01-01T01:01:01+00:00" + def _catalog(stream_name: str, sync_mode: SyncMode) -> ConfiguredAirbyteCatalog: return CatalogBuilder().with_stream(stream_name, sync_mode).build() @@ -85,10 +88,10 @@ def _shop_request() -> HttpRequest: ) -def _data_graphql_request() -> HttpRequest: +def _data_graphql_request(start: str) -> HttpRequest: return HttpRequest( url="https://airbyte-integration-test.myshopify.com/admin/api/2023-07/graphql.json", - body='{"query": "mutation {\\n bulkOperationRunQuery(\\n query: \\"\\"\\"\\n {\\n orders(\\n query: \\"updated_at:>=\'2024-05-05T00:00:00+00:00\' AND updated_at:<=\'2024-05-05T02:24:00+00:00\'\\"\\n sortKey: UPDATED_AT\\n ) {\\n edges {\\n node {\\n __typename\\n id\\n metafields {\\n edges {\\n node {\\n __typename\\n id\\n namespace\\n value\\n key\\n description\\n createdAt\\n updatedAt\\n type\\n }\\n }\\n }\\n }\\n }\\n }\\n}\\n \\"\\"\\"\\n ) {\\n bulkOperation {\\n id\\n status\\n createdAt\\n }\\n userErrors {\\n field\\n message\\n }\\n }\\n }"}' + body='{"query": "mutation {\\n bulkOperationRunQuery(\\n query: \\"\\"\\"\\n {\\n orders(\\n query: \\"updated_at:>=\'2024-05-05T00:00:00+00:00\' AND updated_at:<=\'2024-05-05T01:00:00+00:00\'\\"\\n sortKey: UPDATED_AT\\n ) {\\n edges {\\n node {\\n __typename\\n id\\n metafields {\\n edges {\\n node {\\n __typename\\n id\\n namespace\\n value\\n key\\n description\\n createdAt\\n updatedAt\\n type\\n }\\n }\\n }\\n }\\n }\\n }\\n}\\n \\"\\"\\"\\n ) {\\n bulkOperation {\\n id\\n status\\n createdAt\\n }\\n userErrors {\\n field\\n message\\n }\\n }\\n }"}' ) @@ -152,7 +155,7 @@ def _register_successful_read_requests(http_mocker: HttpMocker): ) -# @freeze_time("2024-05-05T01:00:00") +@freeze_time(pdm.parse(start_date).subtract(days=1)) def test_read_graphql_records_successfully(): with HttpMocker() as http_mocker: catalog = _catalog("metafield_orders", SyncMode.full_refresh) From 14861a93a8dbfa937ebafdf7b4d9b23d9d83c704 Mon Sep 17 00:00:00 2001 From: Erick Corona Date: Wed, 8 May 2024 11:12:50 -0600 Subject: [PATCH 15/27] Test refactor --- .../unit_tests/graphql_bulk/test_job.py | 137 --------------- .../integration/test_bulk_stream.py | 165 ++++++++++++++++++ .../unit_tests/integration/test_data.py | 26 +++ 3 files changed, 191 insertions(+), 137 deletions(-) create mode 100644 airbyte-integrations/connectors/source-shopify/unit_tests/integration/test_bulk_stream.py create mode 100644 airbyte-integrations/connectors/source-shopify/unit_tests/integration/test_data.py diff --git a/airbyte-integrations/connectors/source-shopify/unit_tests/graphql_bulk/test_job.py b/airbyte-integrations/connectors/source-shopify/unit_tests/graphql_bulk/test_job.py index 01ac9c7253a3f..e9cb636b4fd81 100644 --- a/airbyte-integrations/connectors/source-shopify/unit_tests/graphql_bulk/test_job.py +++ b/airbyte-integrations/connectors/source-shopify/unit_tests/graphql_bulk/test_job.py @@ -50,143 +50,6 @@ def test_check_for_errors(request, requests_mock, bulk_job_response, expected_le assert len(test_errors) == expected_len -start_date = "2024-01-01T01:01:01+00:00" - -def _catalog(stream_name: str, sync_mode: SyncMode) -> ConfiguredAirbyteCatalog: - return CatalogBuilder().with_stream(stream_name, sync_mode).build() - - -def _read( - catalog: ConfiguredAirbyteCatalog, - state: Optional[List[AirbyteStateMessage]] = None, - expecting_exception: bool = False -) -> EntrypointOutput: - config = { - "start_date": "2024-05-05", - "shop": "airbyte-integration-test", - "credentials": { - "auth_method": "api_password", - "api_password": "api_password", - }, - "bulk_window_in_days": 1000 - } - - return read(SourceShopify(), config, catalog, state, expecting_exception) - - -def _scopes_request() -> HttpRequest: - return HttpRequest( - url="https://airbyte-integration-test.myshopify.com/admin/oauth/access_scopes.json", - query_params=ANY_QUERY_PARAMS, - ) - - -def _shop_request() -> HttpRequest: - return HttpRequest( - url="https://airbyte-integration-test.myshopify.com/admin/api/2023-07/shop.json", - query_params=ANY_QUERY_PARAMS, - ) - - -def _data_graphql_request(start: str) -> HttpRequest: - return HttpRequest( - url="https://airbyte-integration-test.myshopify.com/admin/api/2023-07/graphql.json", - body='{"query": "mutation {\\n bulkOperationRunQuery(\\n query: \\"\\"\\"\\n {\\n orders(\\n query: \\"updated_at:>=\'2024-05-05T00:00:00+00:00\' AND updated_at:<=\'2024-05-05T01:00:00+00:00\'\\"\\n sortKey: UPDATED_AT\\n ) {\\n edges {\\n node {\\n __typename\\n id\\n metafields {\\n edges {\\n node {\\n __typename\\n id\\n namespace\\n value\\n key\\n description\\n createdAt\\n updatedAt\\n type\\n }\\n }\\n }\\n }\\n }\\n }\\n}\\n \\"\\"\\"\\n ) {\\n bulkOperation {\\n id\\n status\\n createdAt\\n }\\n userErrors {\\n field\\n message\\n }\\n }\\n }"}' - ) - - -def _status_graphql_request() -> HttpRequest: - return HttpRequest( - url="https://airbyte-integration-test.myshopify.com/admin/api/2023-07/graphql.json", - body="""query { - node(id: "gid://shopify/BulkOperation/4472588009661") { - ... on BulkOperation { - id - status - errorCode - createdAt - objectCount - fileSize - url - partialDataUrl - } - } - }""" - ) - - -def _records_file_request() -> HttpRequest: - return HttpRequest( - url="https://storage.googleapis.com/shopify-tiers-assets-prod-us-east1/bulk-operation-outputs/l6lersgk4i81iqc3n6iisywwtipb-final", - query_params=ANY_QUERY_PARAMS, - ) - - -def _register_successful_read_requests(http_mocker: HttpMocker): - response_scopes = '{"access_scopes":[{"handle":"read_analytics"},{"handle":"read_customers"},{"handle":"read_gdpr_data_request"},{"handle":"read_online_store_navigation"},{"handle":"read_shopify_payments_accounts"},{"handle":"read_shopify_payments_bank_accounts"},{"handle":"read_shopify_payments_disputes"},{"handle":"read_shopify_payments_payouts"},{"handle":"read_assigned_fulfillment_orders"},{"handle":"read_discounts"},{"handle":"read_draft_orders"},{"handle":"read_files"},{"handle":"read_fulfillments"},{"handle":"read_gift_cards"},{"handle":"read_inventory"},{"handle":"read_legal_policies"},{"handle":"read_locations"},{"handle":"read_marketing_events"},{"handle":"read_merchant_managed_fulfillment_orders"},{"handle":"read_online_store_pages"},{"handle":"read_order_edits"},{"handle":"read_orders"},{"handle":"read_price_rules"},{"handle":"read_product_listings"},{"handle":"read_reports"},{"handle":"read_resource_feedbacks"},{"handle":"read_script_tags"},{"handle":"read_shipping"},{"handle":"read_locales"},{"handle":"read_content"},{"handle":"read_themes"},{"handle":"read_third_party_fulfillment_orders"},{"handle":"read_translations"},{"handle":"read_publications"},{"handle":"read_returns"},{"handle":"read_channels"},{"handle":"read_products"},{"handle":"read_markets"},{"handle":"read_shopify_credit"},{"handle":"read_store_credit_account_transactions"},{"handle":"read_all_cart_transforms"},{"handle":"read_cart_transforms"},{"handle":"read_all_checkout_completion_target_customizations"},{"handle":"read_companies"},{"handle":"read_custom_fulfillment_services"},{"handle":"read_customer_data_erasure"},{"handle":"read_customer_merge"},{"handle":"read_dery_customizations"},{"handle":"read_fulfillment_constraint_rules"},{"handle":"read_gates"},{"handle":"read_order_submission_rules"},{"handle":"read_payment_customizations"},{"handle":"read_packing_slip_templates"},{"handle":"read_payment_terms"},{"handle":"read_pixels"},{"handle":"read_product_feeds"},{"handle":"read_purchase_options"},{"handle":"read_shopify_payments_provider_accounts_sensitive"},{"handle":"read_all_orders"}]}' - response_shop = '{"shop":{"id":58033176765,"name":"airbyte integration test","email":"sherif@airbyte.io","domain":"airbyte-integration-test.myshopify.com","province":"California","country":"US","address1":"350 29th Avenue","zip":"94121","city":"San Francisco","source":null,"phone":"8023494963","latitude":37.7827286,"longitude":-122.4889911,"primary_locale":"en","address2":"","created_at":"2021-06-22T18:00:23-07:00","updated_at":"2024-01-30T21:11:05-08:00","country_code":"US","country_name":"United States","currency":"USD","customer_email":"sherif@airbyte.io","timezone":"(GMT-08:00) America\/Los_Angeles","iana_timezone":"America\/Los_Angeles","shop_owner":"Airbyte Airbyte","money_format":"${{amount}}","money_with_currency_format":"${{amount}} USD","weight_unit":"kg","province_code":"CA","taxes_included":true,"auto_configure_tax_inclusivity":null,"tax_shipping":null,"county_taxes":true,"plan_display_name":"Developer Preview","plan_name":"partner_test","has_discounts":true,"has_gift_cards":false,"myshopify_domain":"airbyte-integration-test.myshopify.com","google_apps_domain":null,"google_apps_login_enabled":null,"money_in_emails_format":"${{amount}}","money_with_currency_in_emails_format":"${{amount}} USD","eligible_for_payments":true,"requires_extra_payments_agreement":false,"password_enabled":true,"has_storefront":true,"finances":true,"primary_location_id":63590301885,"checkout_api_supported":true,"multi_location_enabled":true,"setup_required":false,"pre_launch_enabled":false,"enabled_presentment_currencies":["USD"],"transactional_sms_disabled":false,"marketing_sms_consent_enabled_at_checkout":false}}' - response_graphql = '{"data":{"bulkOperationRunQuery":{"bulkOperation":{"id":"gid://shopify/BulkOperation/4472588009661","status":"CREATED","createdAt":"2024-05-05T15:34:08Z"},"userErrors":[]}},"extensions":{"cost":{"requestedQueryCost":10,"actualQueryCost":10,"throttleStatus":{"maximumAvailable":2000.0,"currentlyAvailable":1990,"restoreRate":100.0}}}}' - response_graphql2 = '{"data":{"node":{"id":"gid://shopify/BulkOperation/4476008693949","status":"COMPLETED","errorCode":null,"createdAt":"2024-05-06T20:45:48Z","objectCount":"4","fileSize":"774","url":"https://storage.googleapis.com/shopify-tiers-assets-prod-us-east1/bulk-operation-outputs/l6lersgk4i81iqc3n6iisywwtipb-final?GoogleAccessId=assets-us-prod%40shopify-tiers.iam.gserviceaccount.com&Expires=1715633149&Signature=oMjQelfAzUW%2FdulC3HbuBapbUriUJ%2Bc9%2FKpIIf954VTxBqKChJAdoTmWT9ymh%2FnCiHdM%2BeM%2FADz5siAC%2BXtHBWkJfvs%2F0cYpse0ueiQsw6R8gW5JpeSbizyGWcBBWkv5j8GncAnZOUVYDxRIgfxcPb8BlFxBfC3wsx%2F00v9D6EHbPpkIMTbCOAhheJdw9GmVa%2BOMqHGHlmiADM34RDeBPrvSo65f%2FakpV2LBQTEV%2BhDt0ndaREQ0MrpNwhKnc3vZPzA%2BliOGM0wyiYr9qVwByynHq8c%2FaJPPgI5eGEfQcyepgWZTRW5S0DbmBIFxZJLN6Nq6bJ2bIZWrVriUhNGx2g%3D%3D&response-content-disposition=attachment%3B+filename%3D%22bulk-4476008693949.jsonl%22%3B+filename%2A%3DUTF-8%27%27bulk-4476008693949.jsonl&response-content-type=application%2Fjsonl","partialDataUrl":null}},"extensions":{"cost":{"requestedQueryCost":1,"actualQueryCost":1,"throttleStatus":{"maximumAvailable":2000.0,"currentlyAvailable":1999,"restoreRate":100.0}}}}' - response_data = """{"__typename":"Order","id":"gid:\/\/shopify\/Order\/5010584895677"} -{"__typename":"Metafield","id":"gid:\/\/shopify\/Metafield\/22347288150205","namespace":"my_fields","value":"asdfasdf","key":"purchase_order","description":null,"createdAt":"2023-04-13T12:09:50Z","updatedAt":"2024-05-13T00:09:50Z","type":"single_line_text_field","__parentId":"gid:\/\/shopify\/Order\/5010584895677"} -{"__typename":"Order","id":"gid:\/\/shopify\/Order\/5010585911485"} -{"__typename":"Metafield","id":"gid:\/\/shopify\/Metafield\/22347288740029","namespace":"my_fields","value":"asdfasdfasdf","key":"purchase_order","description":null,"createdAt":"2023-04-13T12:11:20Z","updatedAt":"2024-05-05T00:11:20Z","type":"single_line_text_field","__parentId":"gid:\/\/shopify\/Order\/5010585911485"} -""" - - http_mocker.get( - _shop_request(), - HttpResponse(response_shop) - ) - http_mocker.get( - _scopes_request(), - HttpResponse(response_scopes) - ) - http_mocker.post( - _data_graphql_request(), - HttpResponse(response_graphql) - ) - http_mocker.post( - _status_graphql_request(), - HttpResponse(response_graphql2) - ) - http_mocker.get( - _records_file_request(), - HttpResponse(response_data) - ) - - -@freeze_time(pdm.parse(start_date).subtract(days=1)) -def test_read_graphql_records_successfully(): - with HttpMocker() as http_mocker: - catalog = _catalog("metafield_orders", SyncMode.full_refresh) - - _register_successful_read_requests(http_mocker) - - output = _read(catalog) - - assert output.records - - -def test_check_for_errors_with_connection_error() -> None: - with (HttpMocker() as http_mocker): - catalog = _catalog("metafield_orders", SyncMode.full_refresh) - - inner_mocker = http_mocker.__getattribute__("_mocker") - inner_mocker.register_uri("GET", "https://airbyte-integration-test.myshopify.com/admin/api/2023-07/shop.json", exc=ConnectionAbortedError) - - _register_successful_read_requests(http_mocker) - - # TODO: how to check for retries? - output = _read(catalog) - - print(f"Call Count: {inner_mocker.call_count}") - print(f"iscalled: {inner_mocker.called}") - print(f"Mocker history: {inner_mocker.request_history}") - print(output.errors) - # assert "ConnectionAbortedError" in output.errors.__str__() - assert output.records - - def test_get_errors_from_response_invalid_response(auth_config) -> None: expected = "Couldn't check the `response` for `errors`" stream = MetafieldOrders(auth_config) diff --git a/airbyte-integrations/connectors/source-shopify/unit_tests/integration/test_bulk_stream.py b/airbyte-integrations/connectors/source-shopify/unit_tests/integration/test_bulk_stream.py new file mode 100644 index 0000000000000..8af0a72df2f59 --- /dev/null +++ b/airbyte-integrations/connectors/source-shopify/unit_tests/integration/test_bulk_stream.py @@ -0,0 +1,165 @@ +import pytest +import requests + +from source_shopify import SourceShopify +from airbyte_cdk.test.entrypoint_wrapper import EntrypointOutput, read +from airbyte_cdk.test.mock_http import HttpMocker +from airbyte_cdk.test.mock_http import HttpResponse +from freezegun import freeze_time +from airbyte_protocol.models import SyncMode +from typing import Any, Dict +from airbyte_cdk.test.catalog_builder import CatalogBuilder +from airbyte_cdk.test.mock_http.request import ANY_QUERY_PARAMS, HttpRequest +from test_data import get_shop_response, get_scopes_response, get_status_graphql_response, get_data_graphql_response, \ + get_records_file_response +import pendulum as pdm + + +_JOB_START_DATE = pdm.parse("2024-05-05T00:00:00+00:00") +_JOB_END_DATE = _JOB_START_DATE.add(hours=3) +_URL_ACCESS_SCOPES = "https://airbyte-integration-test.myshopify.com/admin/oauth/access_scopes.json" +_URL_SHOP = "https://airbyte-integration-test.myshopify.com/admin/api/2023-07/shop.json" +_URL_GRAPHQL = "https://airbyte-integration-test.myshopify.com/admin/api/2023-07/graphql.json" +_URL_RECORDS_FILE = "https://storage.googleapis.com/shopify-tiers-assets-prod-us-east1/bulk-operation-outputs/l6lersgk4i81iqc3n6iisywwtipb-final" +_BULK_STREAM = "metafield_orders" + + +def _get_config(start_date: str, bulk_window: int = 1) -> Dict[str, Any]: + return { + "start_date": start_date, + "shop": "airbyte-integration-test", + "credentials": { + "auth_method": "api_password", + "api_password": "api_password", + }, + "bulk_window_in_days": bulk_window + } + + +def _get_scopes_request() -> HttpRequest: + return HttpRequest( + url=_URL_ACCESS_SCOPES, + query_params=ANY_QUERY_PARAMS, + ) + + +def _get_shop_request() -> HttpRequest: + return HttpRequest( + url=_URL_SHOP, + query_params=ANY_QUERY_PARAMS, + ) + + +def _get_data_graphql_request() -> HttpRequest: + return HttpRequest( + url=_URL_GRAPHQL, + body='{"query": "mutation {\\n bulkOperationRunQuery(\\n query: \\"\\"\\"\\n {\\n orders(\\n query: \\"updated_at:>=\'2024-05-05T00:00:00+00:00\' AND updated_at:<=\'2024-05-05T03:00:00+00:00\'\\"\\n sortKey: UPDATED_AT\\n ) {\\n edges {\\n node {\\n __typename\\n id\\n metafields {\\n edges {\\n node {\\n __typename\\n id\\n namespace\\n value\\n key\\n description\\n createdAt\\n updatedAt\\n type\\n }\\n }\\n }\\n }\\n }\\n }\\n}\\n \\"\\"\\"\\n ) {\\n bulkOperation {\\n id\\n status\\n createdAt\\n }\\n userErrors {\\n field\\n message\\n }\\n }\\n }"}' + ) + + +def _get_status_graphql_request() -> HttpRequest: + return HttpRequest( + url=_URL_GRAPHQL, + body="""query { + node(id: "gid://shopify/BulkOperation/4472588009661") { + ... on BulkOperation { + id + status + errorCode + createdAt + objectCount + fileSize + url + partialDataUrl + } + } + }""" + ) + + +def _get_records_file_request() -> HttpRequest: + return HttpRequest( + url=_URL_RECORDS_FILE, + query_params=ANY_QUERY_PARAMS, + ) + + +def _mock_successful_read_requests(http_mocker: HttpMocker): + """Mock the multiple requests needed for a bulk GraphQL read. + """ + http_mocker.get( + _get_shop_request(), + HttpResponse(get_shop_response()) + ) + http_mocker.get( + _get_scopes_request(), + HttpResponse(get_scopes_response()) + ) + http_mocker.post( + _get_data_graphql_request(), + HttpResponse(get_data_graphql_response()) + ) + http_mocker.post( + _get_status_graphql_request(), + HttpResponse(get_status_graphql_response()) + ) + http_mocker.get( + _get_records_file_request(), + HttpResponse(get_records_file_response()) + ) + + +@freeze_time(_JOB_END_DATE) +def test_read_graphql_records_successfully(): + with HttpMocker() as http_mocker: + _mock_successful_read_requests(http_mocker) + + catalog = CatalogBuilder().with_stream(_BULK_STREAM, SyncMode.full_refresh).build() + output = read(SourceShopify(), _get_config(_JOB_START_DATE.to_date_string()), catalog) + + assert output.records + + +def _mock_failing_read_requests(http_mocker: HttpMocker): + """Mock the multiple requests needed for a bulk GraphQL read with a failure and then a successful response. + """ + http_mocker.get( + _get_shop_request(), + HttpResponse(get_shop_response()) + ) + http_mocker.get( + _get_scopes_request(), + HttpResponse(get_scopes_response()) + ) + inner_mocker = http_mocker.__getattribute__("_mocker") + inner_mocker.register_uri( + "POST", + _URL_GRAPHQL, + [{"status_code": 104}, {"text": get_data_graphql_response(), "status_code": 200}], + ) + http_mocker.post( + _get_status_graphql_request(), + HttpResponse(get_status_graphql_response()) + ) + http_mocker.get( + _get_records_file_request(), + HttpResponse(get_records_file_response()) + ) + + +@freeze_time(_JOB_END_DATE) +def test_check_for_errors_with_connection_error() -> None: + with HttpMocker() as http_mocker: + + _mock_failing_read_requests(http_mocker) + + catalog = CatalogBuilder().with_stream(_BULK_STREAM, SyncMode.full_refresh).build() + output = read(SourceShopify(), _get_config(_JOB_START_DATE.to_date_string()), catalog) + + inner_mocker = http_mocker.__getattribute__("_mocker") + print(f"Call Count: {inner_mocker.call_count}") + print(f"IsCalled: {inner_mocker.called}") + print(f"Mocker history: {inner_mocker.request_history}") + print(output.errors) + # assert "ConnectionAbortedError" in output.errors.__str__() + assert output.records diff --git a/airbyte-integrations/connectors/source-shopify/unit_tests/integration/test_data.py b/airbyte-integrations/connectors/source-shopify/unit_tests/integration/test_data.py new file mode 100644 index 0000000000000..3468874d2c5b7 --- /dev/null +++ b/airbyte-integrations/connectors/source-shopify/unit_tests/integration/test_data.py @@ -0,0 +1,26 @@ +import pendulum as pdm +import pytest + + +def get_shop_response(): + return '{"shop":{"id":58033176765,"name":"airbyte integration test","email":"sherif@airbyte.io","domain":"airbyte-integration-test.myshopify.com","province":"California","country":"US","address1":"350 29th Avenue","zip":"94121","city":"San Francisco","source":null,"phone":"8023494963","latitude":37.7827286,"longitude":-122.4889911,"primary_locale":"en","address2":"","created_at":"2021-06-22T18:00:23-07:00","updated_at":"2024-05-05T01:11:05-08:00","country_code":"US","country_name":"United States","currency":"USD","customer_email":"sherif@airbyte.io","timezone":"(GMT-08:00) America\/Los_Angeles","iana_timezone":"America\/Los_Angeles","shop_owner":"Airbyte Airbyte","money_format":"${{amount}}","money_with_currency_format":"${{amount}} USD","weight_unit":"kg","province_code":"CA","taxes_included":true,"auto_configure_tax_inclusivity":null,"tax_shipping":null,"county_taxes":true,"plan_display_name":"Developer Preview","plan_name":"partner_test","has_discounts":true,"has_gift_cards":false,"myshopify_domain":"airbyte-integration-test.myshopify.com","google_apps_domain":null,"google_apps_login_enabled":null,"money_in_emails_format":"${{amount}}","money_with_currency_in_emails_format":"${{amount}} USD","eligible_for_payments":true,"requires_extra_payments_agreement":false,"password_enabled":true,"has_storefront":true,"finances":true,"primary_location_id":63590301885,"checkout_api_supported":true,"multi_location_enabled":true,"setup_required":false,"pre_launch_enabled":false,"enabled_presentment_currencies":["USD"],"transactional_sms_disabled":false,"marketing_sms_consent_enabled_at_checkout":false}}' + + +def get_scopes_response(): + return '{"access_scopes":[{"handle":"read_analytics"},{"handle":"read_customers"},{"handle":"read_gdpr_data_request"},{"handle":"read_online_store_navigation"},{"handle":"read_shopify_payments_accounts"},{"handle":"read_shopify_payments_bank_accounts"},{"handle":"read_shopify_payments_disputes"},{"handle":"read_shopify_payments_payouts"},{"handle":"read_assigned_fulfillment_orders"},{"handle":"read_discounts"},{"handle":"read_draft_orders"},{"handle":"read_files"},{"handle":"read_fulfillments"},{"handle":"read_gift_cards"},{"handle":"read_inventory"},{"handle":"read_legal_policies"},{"handle":"read_locations"},{"handle":"read_marketing_events"},{"handle":"read_merchant_managed_fulfillment_orders"},{"handle":"read_online_store_pages"},{"handle":"read_order_edits"},{"handle":"read_orders"},{"handle":"read_price_rules"},{"handle":"read_product_listings"},{"handle":"read_reports"},{"handle":"read_resource_feedbacks"},{"handle":"read_script_tags"},{"handle":"read_shipping"},{"handle":"read_locales"},{"handle":"read_content"},{"handle":"read_themes"},{"handle":"read_third_party_fulfillment_orders"},{"handle":"read_translations"},{"handle":"read_publications"},{"handle":"read_returns"},{"handle":"read_channels"},{"handle":"read_products"},{"handle":"read_markets"},{"handle":"read_shopify_credit"},{"handle":"read_store_credit_account_transactions"},{"handle":"read_all_cart_transforms"},{"handle":"read_cart_transforms"},{"handle":"read_all_checkout_completion_target_customizations"},{"handle":"read_companies"},{"handle":"read_custom_fulfillment_services"},{"handle":"read_customer_data_erasure"},{"handle":"read_customer_merge"},{"handle":"read_dery_customizations"},{"handle":"read_fulfillment_constraint_rules"},{"handle":"read_gates"},{"handle":"read_order_submission_rules"},{"handle":"read_payment_customizations"},{"handle":"read_packing_slip_templates"},{"handle":"read_payment_terms"},{"handle":"read_pixels"},{"handle":"read_product_feeds"},{"handle":"read_purchase_options"},{"handle":"read_shopify_payments_provider_accounts_sensitive"},{"handle":"read_all_orders"}]}' + + +def get_data_graphql_response(): + return '{"data":{"bulkOperationRunQuery":{"bulkOperation":{"id":"gid://shopify/BulkOperation/4472588009661","status":"CREATED","createdAt":"2024-05-05T02:00:00Z"},"userErrors":[]}},"extensions":{"cost":{"requestedQueryCost":10,"actualQueryCost":10,"throttleStatus":{"maximumAvailable":2000.0,"currentlyAvailable":1990,"restoreRate":100.0}}}}' + + +def get_status_graphql_response(): + return '{"data":{"node":{"id":"gid://shopify/BulkOperation/4476008693949","status":"COMPLETED","errorCode":null,"createdAt":"2024-05-05T00:45:48Z","objectCount":"4","fileSize":"774","url":"https://storage.googleapis.com/shopify-tiers-assets-prod-us-east1/bulk-operation-outputs/l6lersgk4i81iqc3n6iisywwtipb-final?GoogleAccessId=assets-us-prod%40shopify-tiers.iam.gserviceaccount.com&Expires=1715633149&Signature=oMjQelfAzUW%2FdulC3HbuBapbUriUJ%2Bc9%2FKpIIf954VTxBqKChJAdoTmWT9ymh%2FnCiHdM%2BeM%2FADz5siAC%2BXtHBWkJfvs%2F0cYpse0ueiQsw6R8gW5JpeSbizyGWcBBWkv5j8GncAnZOUVYDxRIgfxcPb8BlFxBfC3wsx%2F00v9D6EHbPpkIMTbCOAhheJdw9GmVa%2BOMqHGHlmiADM34RDeBPrvSo65f%2FakpV2LBQTEV%2BhDt0ndaREQ0MrpNwhKnc3vZPzA%2BliOGM0wyiYr9qVwByynHq8c%2FaJPPgI5eGEfQcyepgWZTRW5S0DbmBIFxZJLN6Nq6bJ2bIZWrVriUhNGx2g%3D%3D&response-content-disposition=attachment%3B+filename%3D%22bulk-4476008693949.jsonl%22%3B+filename%2A%3DUTF-8%27%27bulk-4476008693949.jsonl&response-content-type=application%2Fjsonl","partialDataUrl":null}},"extensions":{"cost":{"requestedQueryCost":1,"actualQueryCost":1,"throttleStatus":{"maximumAvailable":2000.0,"currentlyAvailable":1999,"restoreRate":100.0}}}}' + + +def get_records_file_response(): + return """{"__typename":"Order","id":"gid:\/\/shopify\/Order\/5010584895677"} +{"__typename":"Metafield","id":"gid:\/\/shopify\/Metafield\/22347288150205","namespace":"my_fields","value":"asdfasdf","key":"purchase_order","description":null,"createdAt":"2023-04-13T12:09:50Z","updatedAt":"2024-05-05T01:09:50Z","type":"single_line_text_field","__parentId":"gid:\/\/shopify\/Order\/5010584895677"} +{"__typename":"Order","id":"gid:\/\/shopify\/Order\/5010585911485"} +{"__typename":"Metafield","id":"gid:\/\/shopify\/Metafield\/22347288740029","namespace":"my_fields","value":"asdfasdfasdf","key":"purchase_order","description":null,"createdAt":"2023-04-13T12:11:20Z","updatedAt":"2024-05-05T01:11:20Z","type":"single_line_text_field","__parentId":"gid:\/\/shopify\/Order\/5010585911485"} +""" From 66898dcbdd2425ae2296f7eb38ca413480fc4b69 Mon Sep 17 00:00:00 2001 From: Erick Corona Date: Wed, 8 May 2024 18:10:27 -0600 Subject: [PATCH 16/27] Fix data graphql end date --- .../integration/test_bulk_stream.py | 29 +++++++++++-------- 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/airbyte-integrations/connectors/source-shopify/unit_tests/integration/test_bulk_stream.py b/airbyte-integrations/connectors/source-shopify/unit_tests/integration/test_bulk_stream.py index 8af0a72df2f59..6c357d46eeefb 100644 --- a/airbyte-integrations/connectors/source-shopify/unit_tests/integration/test_bulk_stream.py +++ b/airbyte-integrations/connectors/source-shopify/unit_tests/integration/test_bulk_stream.py @@ -16,11 +16,13 @@ _JOB_START_DATE = pdm.parse("2024-05-05T00:00:00+00:00") -_JOB_END_DATE = _JOB_START_DATE.add(hours=3) +_JOB_END_DATE = _JOB_START_DATE.add(hours=2, minutes=24) + _URL_ACCESS_SCOPES = "https://airbyte-integration-test.myshopify.com/admin/oauth/access_scopes.json" _URL_SHOP = "https://airbyte-integration-test.myshopify.com/admin/api/2023-07/shop.json" _URL_GRAPHQL = "https://airbyte-integration-test.myshopify.com/admin/api/2023-07/graphql.json" _URL_RECORDS_FILE = "https://storage.googleapis.com/shopify-tiers-assets-prod-us-east1/bulk-operation-outputs/l6lersgk4i81iqc3n6iisywwtipb-final" + _BULK_STREAM = "metafield_orders" @@ -53,7 +55,7 @@ def _get_shop_request() -> HttpRequest: def _get_data_graphql_request() -> HttpRequest: return HttpRequest( url=_URL_GRAPHQL, - body='{"query": "mutation {\\n bulkOperationRunQuery(\\n query: \\"\\"\\"\\n {\\n orders(\\n query: \\"updated_at:>=\'2024-05-05T00:00:00+00:00\' AND updated_at:<=\'2024-05-05T03:00:00+00:00\'\\"\\n sortKey: UPDATED_AT\\n ) {\\n edges {\\n node {\\n __typename\\n id\\n metafields {\\n edges {\\n node {\\n __typename\\n id\\n namespace\\n value\\n key\\n description\\n createdAt\\n updatedAt\\n type\\n }\\n }\\n }\\n }\\n }\\n }\\n}\\n \\"\\"\\"\\n ) {\\n bulkOperation {\\n id\\n status\\n createdAt\\n }\\n userErrors {\\n field\\n message\\n }\\n }\\n }"}' + body='{"query": "mutation {\\n bulkOperationRunQuery(\\n query: \\"\\"\\"\\n {\\n orders(\\n query: \\"updated_at:>=\'2024-05-05T00:00:00+00:00\' AND updated_at:<=\'2024-05-05T02:24:00+00:00\'\\"\\n sortKey: UPDATED_AT\\n ) {\\n edges {\\n node {\\n __typename\\n id\\n metafields {\\n edges {\\n node {\\n __typename\\n id\\n namespace\\n value\\n key\\n description\\n createdAt\\n updatedAt\\n type\\n }\\n }\\n }\\n }\\n }\\n }\\n}\\n \\"\\"\\"\\n ) {\\n bulkOperation {\\n id\\n status\\n createdAt\\n }\\n userErrors {\\n field\\n message\\n }\\n }\\n }"}' ) @@ -117,7 +119,8 @@ def test_read_graphql_records_successfully(): catalog = CatalogBuilder().with_stream(_BULK_STREAM, SyncMode.full_refresh).build() output = read(SourceShopify(), _get_config(_JOB_START_DATE.to_date_string()), catalog) - assert output.records + assert output.errors == [] + assert len(output.records) == 2 def _mock_failing_read_requests(http_mocker: HttpMocker): @@ -132,11 +135,17 @@ def _mock_failing_read_requests(http_mocker: HttpMocker): HttpResponse(get_scopes_response()) ) inner_mocker = http_mocker.__getattribute__("_mocker") + + def raise_connectio_error(request, context): + raise ConnectionError("ConnectionError") + + # Mock the first GraphQL request to fail with a ConnectionError, and then succeed in the next call. inner_mocker.register_uri( "POST", _URL_GRAPHQL, - [{"status_code": 104}, {"text": get_data_graphql_response(), "status_code": 200}], + [{"text": raise_connectio_error}, {"text": get_data_graphql_response(), "status_code": 200}], ) + http_mocker.post( _get_status_graphql_request(), HttpResponse(get_status_graphql_response()) @@ -150,16 +159,12 @@ def _mock_failing_read_requests(http_mocker: HttpMocker): @freeze_time(_JOB_END_DATE) def test_check_for_errors_with_connection_error() -> None: with HttpMocker() as http_mocker: - _mock_failing_read_requests(http_mocker) catalog = CatalogBuilder().with_stream(_BULK_STREAM, SyncMode.full_refresh).build() output = read(SourceShopify(), _get_config(_JOB_START_DATE.to_date_string()), catalog) - inner_mocker = http_mocker.__getattribute__("_mocker") - print(f"Call Count: {inner_mocker.call_count}") - print(f"IsCalled: {inner_mocker.called}") - print(f"Mocker history: {inner_mocker.request_history}") - print(output.errors) - # assert "ConnectionAbortedError" in output.errors.__str__() - assert output.records + assert "ConnectionError" in output.errors.__str__() + + # TODO: We should be able to read records once the retry logic is implemented in HTTPClient. + # assert output.records == 2 From 9a93b79136608363c672b6535d55f2f997a048f9 Mon Sep 17 00:00:00 2001 From: Erick Corona Date: Wed, 8 May 2024 20:55:58 -0600 Subject: [PATCH 17/27] Improve format --- .../integration/test_bulk_stream.py | 43 ++++++------------- 1 file changed, 14 insertions(+), 29 deletions(-) diff --git a/airbyte-integrations/connectors/source-shopify/unit_tests/integration/test_bulk_stream.py b/airbyte-integrations/connectors/source-shopify/unit_tests/integration/test_bulk_stream.py index 6c357d46eeefb..a659f3bfb7c00 100644 --- a/airbyte-integrations/connectors/source-shopify/unit_tests/integration/test_bulk_stream.py +++ b/airbyte-integrations/connectors/source-shopify/unit_tests/integration/test_bulk_stream.py @@ -1,6 +1,3 @@ -import pytest -import requests - from source_shopify import SourceShopify from airbyte_cdk.test.entrypoint_wrapper import EntrypointOutput, read from airbyte_cdk.test.mock_http import HttpMocker @@ -55,7 +52,8 @@ def _get_shop_request() -> HttpRequest: def _get_data_graphql_request() -> HttpRequest: return HttpRequest( url=_URL_GRAPHQL, - body='{"query": "mutation {\\n bulkOperationRunQuery(\\n query: \\"\\"\\"\\n {\\n orders(\\n query: \\"updated_at:>=\'2024-05-05T00:00:00+00:00\' AND updated_at:<=\'2024-05-05T02:24:00+00:00\'\\"\\n sortKey: UPDATED_AT\\n ) {\\n edges {\\n node {\\n __typename\\n id\\n metafields {\\n edges {\\n node {\\n __typename\\n id\\n namespace\\n value\\n key\\n description\\n createdAt\\n updatedAt\\n type\\n }\\n }\\n }\\n }\\n }\\n }\\n}\\n \\"\\"\\"\\n ) {\\n bulkOperation {\\n id\\n status\\n createdAt\\n }\\n userErrors {\\n field\\n message\\n }\\n }\\n }"}' + # TODO: This is a very long string. Split or add a matcher that is not affected by slight body differences. + body='''{"query": "mutation {\\n bulkOperationRunQuery(\\n query: \\"\\"\\"\\n {\\n orders(\\n query: \\"updated_at:>=\'2024-05-05T00:00:00+00:00\' AND updated_at:<=\'2024-05-05T02:24:00+00:00\'\\"\\n sortKey: UPDATED_AT\\n ) {\\n edges {\\n node {\\n __typename\\n id\\n metafields {\\n edges {\\n node {\\n __typename\\n id\\n namespace\\n value\\n key\\n description\\n createdAt\\n updatedAt\\n type\\n }\\n }\\n }\\n }\\n }\\n }\\n}\\n \\"\\"\\"\\n ) {\\n bulkOperation {\\n id\\n status\\n createdAt\\n }\\n userErrors {\\n field\\n message\\n }\\n }\\n }"}''' ) @@ -89,26 +87,11 @@ def _get_records_file_request() -> HttpRequest: def _mock_successful_read_requests(http_mocker: HttpMocker): """Mock the multiple requests needed for a bulk GraphQL read. """ - http_mocker.get( - _get_shop_request(), - HttpResponse(get_shop_response()) - ) - http_mocker.get( - _get_scopes_request(), - HttpResponse(get_scopes_response()) - ) - http_mocker.post( - _get_data_graphql_request(), - HttpResponse(get_data_graphql_response()) - ) - http_mocker.post( - _get_status_graphql_request(), - HttpResponse(get_status_graphql_response()) - ) - http_mocker.get( - _get_records_file_request(), - HttpResponse(get_records_file_response()) - ) + http_mocker.get(_get_shop_request(), HttpResponse(get_shop_response())) + http_mocker.get(_get_scopes_request(), HttpResponse(get_scopes_response())) + http_mocker.post(_get_data_graphql_request(), HttpResponse(get_data_graphql_response())) + http_mocker.post(_get_status_graphql_request(), HttpResponse(get_status_graphql_response())) + http_mocker.get(_get_records_file_request(), HttpResponse(get_records_file_response())) @freeze_time(_JOB_END_DATE) @@ -123,7 +106,7 @@ def test_read_graphql_records_successfully(): assert len(output.records) == 2 -def _mock_failing_read_requests(http_mocker: HttpMocker): +def _mock_read_requests_with_connection_error(http_mocker: HttpMocker): """Mock the multiple requests needed for a bulk GraphQL read with a failure and then a successful response. """ http_mocker.get( @@ -136,14 +119,14 @@ def _mock_failing_read_requests(http_mocker: HttpMocker): ) inner_mocker = http_mocker.__getattribute__("_mocker") - def raise_connectio_error(request, context): + def raise_connection_error(request, context): raise ConnectionError("ConnectionError") - # Mock the first GraphQL request to fail with a ConnectionError, and then succeed in the next call. + # Use a list of responses to mock the first GraphQL request with a ConnectionError, and then succeed in the next call. inner_mocker.register_uri( "POST", _URL_GRAPHQL, - [{"text": raise_connectio_error}, {"text": get_data_graphql_response(), "status_code": 200}], + [{"text": raise_connection_error}, {"text": get_data_graphql_response(), "status_code": 200}], ) http_mocker.post( @@ -159,11 +142,13 @@ def raise_connectio_error(request, context): @freeze_time(_JOB_END_DATE) def test_check_for_errors_with_connection_error() -> None: with HttpMocker() as http_mocker: - _mock_failing_read_requests(http_mocker) + _mock_read_requests_with_connection_error(http_mocker) catalog = CatalogBuilder().with_stream(_BULK_STREAM, SyncMode.full_refresh).build() output = read(SourceShopify(), _get_config(_JOB_START_DATE.to_date_string()), catalog) + inner_mocker = http_mocker.__getattribute__("_mocker") + print(inner_mocker.request_history) assert "ConnectionError" in output.errors.__str__() # TODO: We should be able to read records once the retry logic is implemented in HTTPClient. From 1d86e03e8af480343ea566445336e2a55294eaee Mon Sep 17 00:00:00 2001 From: maxi297 Date: Tue, 14 May 2024 17:43:07 -0400 Subject: [PATCH 18/27] Make requests and responses re-usable --- .../unit_tests/integration/__init__.py | 0 .../unit_tests/integration/api/__init__.py | 0 .../integration/api/authentication.py | 82 ++++++++ .../unit_tests/integration/api/bulk.py | 175 ++++++++++++++++ .../integration/test_bulk_stream.py | 190 ++++++------------ .../unit_tests/integration/test_data.py | 26 --- .../resource/http/response/shop.json | 60 ++++++ 7 files changed, 379 insertions(+), 154 deletions(-) create mode 100644 airbyte-integrations/connectors/source-shopify/unit_tests/integration/__init__.py create mode 100644 airbyte-integrations/connectors/source-shopify/unit_tests/integration/api/__init__.py create mode 100644 airbyte-integrations/connectors/source-shopify/unit_tests/integration/api/authentication.py create mode 100644 airbyte-integrations/connectors/source-shopify/unit_tests/integration/api/bulk.py delete mode 100644 airbyte-integrations/connectors/source-shopify/unit_tests/integration/test_data.py create mode 100644 airbyte-integrations/connectors/source-shopify/unit_tests/resource/http/response/shop.json diff --git a/airbyte-integrations/connectors/source-shopify/unit_tests/integration/__init__.py b/airbyte-integrations/connectors/source-shopify/unit_tests/integration/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/airbyte-integrations/connectors/source-shopify/unit_tests/integration/api/__init__.py b/airbyte-integrations/connectors/source-shopify/unit_tests/integration/api/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/airbyte-integrations/connectors/source-shopify/unit_tests/integration/api/authentication.py b/airbyte-integrations/connectors/source-shopify/unit_tests/integration/api/authentication.py new file mode 100644 index 0000000000000..dfce957167c28 --- /dev/null +++ b/airbyte-integrations/connectors/source-shopify/unit_tests/integration/api/authentication.py @@ -0,0 +1,82 @@ +import json + +from airbyte_cdk.test.mock_http import HttpMocker, HttpRequest, HttpResponse +from airbyte_cdk.test.mock_http.request import ANY_QUERY_PARAMS +from airbyte_cdk.test.mock_http.response_builder import find_template + + +_ALL_SCOPES = [ + "read_all_cart_transforms", + "read_all_checkout_completion_target_customizations", + "read_all_orders", + "read_analytics", + "read_assigned_fulfillment_orders", + "read_cart_transforms", + "read_channels", + "read_companies", + "read_content", + "read_custom_fulfillment_services", + "read_customer_data_erasure", + "read_customer_merge", + "read_customers", + "read_dery_customizations", + "read_discounts", + "read_draft_orders", + "read_files", + "read_fulfillment_constraint_rules", + "read_fulfillments", + "read_gates", + "read_gdpr_data_request", + "read_gift_cards", + "read_inventory", + "read_legal_policies", + "read_locales", + "read_locations", + "read_marketing_events", + "read_markets", + "read_merchant_managed_fulfillment_orders", + "read_online_store_navigation", + "read_online_store_pages", + "read_order_edits", + "read_order_submission_rules", + "read_orders", + "read_packing_slip_templates", + "read_payment_customizations", + "read_payment_terms", + "read_pixels", + "read_price_rules", + "read_product_feeds", + "read_product_listings", + "read_products", + "read_publications", + "read_purchase_options", + "read_reports", + "read_resource_feedbacks", + "read_returns", + "read_script_tags", + "read_shipping", + "read_shopify_credit", + "read_shopify_payments_accounts", + "read_shopify_payments_bank_accounts", + "read_shopify_payments_disputes", + "read_shopify_payments_payouts", + "read_shopify_payments_provider_accounts_sensitive", + "read_store_credit_account_transactions", + "read_themes", + "read_third_party_fulfillment_orders", + "read_translations" +] + + +def set_up_shop(http_mocker: HttpMocker, shop_name: str) -> None: + http_mocker.get( + HttpRequest(f"https://{shop_name}.myshopify.com/admin/api/2023-07/shop.json", query_params=ANY_QUERY_PARAMS), + HttpResponse(json.dumps(find_template("shop", __file__)), status_code=200), + ) + + +def grant_all_scopes(http_mocker: HttpMocker, shop_name: str) -> None: + http_mocker.get( + HttpRequest(f"https://{shop_name}.myshopify.com/admin/oauth/access_scopes.json"), + HttpResponse(json.dumps({"access_scopes": [{"handle": scope} for scope in _ALL_SCOPES]}), status_code=200), + ) diff --git a/airbyte-integrations/connectors/source-shopify/unit_tests/integration/api/bulk.py b/airbyte-integrations/connectors/source-shopify/unit_tests/integration/api/bulk.py new file mode 100644 index 0000000000000..392056ae81639 --- /dev/null +++ b/airbyte-integrations/connectors/source-shopify/unit_tests/integration/api/bulk.py @@ -0,0 +1,175 @@ +import json +from datetime import datetime +from random import randint + +from airbyte_cdk.test.mock_http import HttpRequest, HttpResponse + + +def _create_job_url(shop_name: str) -> str: + return f"https://{shop_name}.myshopify.com/admin/api/2023-07/graphql.json" + + +def create_job_creation_request(shop_name: str, lower_boundary: datetime, upper_boundary: datetime) -> HttpRequest: + outer_query = """mutation { + bulkOperationRunQuery( + query: \"\"\" + %INNER_QUERY_TOKEN% + \"\"\" + ) { + bulkOperation { + id + status + createdAt + } + userErrors { + field + message + } + } + }""" + inner_query = """{ + orders( + query: \"updated_at:>='%LOWER_BOUNDARY_TOKEN%' AND updated_at:<='%UPPER_BOUNDARY_TOKEN%'\" + sortKey: UPDATED_AT + ) { + edges { + node { + __typename + id + metafields { + edges { + node { + __typename + id + namespace + value + key + description + createdAt + updatedAt + type + } + } + } + } + } + } +}""" + inner_query = inner_query.replace("%LOWER_BOUNDARY_TOKEN%", lower_boundary.isoformat()) + inner_query = inner_query.replace("%UPPER_BOUNDARY_TOKEN%", upper_boundary.isoformat()) + outer_query = outer_query.replace("%INNER_QUERY_TOKEN%", inner_query) + + return HttpRequest( + url=_create_job_url(shop_name), + body=json.dumps({"query": outer_query}) + ) + + +def create_job_status_request(shop_name: str, job_id: str) -> HttpRequest: + return HttpRequest( + url=_create_job_url(shop_name), + body=f"""query {{ + node(id: "{job_id}") {{ + ... on BulkOperation {{ + id + status + errorCode + createdAt + objectCount + fileSize + url + partialDataUrl + }} + }} + }}""" + ) + + +class JobCreationResponseBuilder: + def __init__(self) -> None: + self._template = { + "data": { + "bulkOperationRunQuery": { + "bulkOperation": { + "id": "gid://shopify/BulkOperation/0", + "status": "CREATED", + "createdAt": "2024-05-05T02:00:00Z" + }, + "userErrors": [] + } + }, + "extensions": { + "cost": { + "requestedQueryCost": 10, + "actualQueryCost": 10, + "throttleStatus": { + "maximumAvailable": 2000.0, + "currentlyAvailable": 1990, + "restoreRate": 100.0 + } + } + } + } + + def with_bulk_operation_id(self, bulk_operation_id: str) -> "JobCreationResponseBuilder": + self._template["data"]["bulkOperationRunQuery"]["bulkOperation"]["id"] = bulk_operation_id + return self + + def build(self) -> HttpResponse: + return HttpResponse(json.dumps(self._template), status_code=200) + + +class JobStatusResponseBuilder: + def __init__(self) -> None: + self._template = { + "data": { + "node": {}, + "extensions": { + "cost": { + "requestedQueryCost": 1, + "actualQueryCost": 1, + "throttleStatus": { + "maximumAvailable": 2000.0, + "currentlyAvailable": 1999, + "restoreRate": 100.0 + } + } + } + } + } + + def with_completed_status(self, bulk_operation_id: str, job_result_url: str) -> "JobStatusResponseBuilder": + self._template["data"]["node"] = { + "id": bulk_operation_id, + "status": "COMPLETED", + "errorCode": None, + "createdAt": "2024-05-05T00:45:48Z", + "objectCount": "4", + "fileSize": "774", + "url": job_result_url, + "partialDataUrl": None + } + return self + + def build(self) -> HttpResponse: + return HttpResponse(json.dumps(self._template), status_code=200) + + +class MetafieldOrdersJobResponseBuilder: + def __init__(self) -> None: + self._records = [] + + def _any_record(self) -> str: + an_id = str(randint(1000000000000, 9999999999999)) + a_parent_id = str(randint(1000000000000, 9999999999999)) + return f"""{{"__typename":"Order","id":"gid:\/\/shopify\/Order\/{a_parent_id}"}} +{{"__typename":"Metafield","id":"gid:\/\/shopify\/Metafield\/{an_id}","namespace":"my_fields","value":"asdfasdf","key":"purchase_order","description":null,"createdAt":"2023-04-13T12:09:50Z","updatedAt":"2024-05-05T01:09:50Z","type":"single_line_text_field","__parentId":"gid:\/\/shopify\/Order\/{a_parent_id}"}} +""" + + + def with_record(self) -> "MetafieldOrdersJobResponseBuilder": + self._records.append(self._any_record()) + return self + + def build(self) -> HttpResponse: + return HttpResponse("".join(self._records), status_code=200) diff --git a/airbyte-integrations/connectors/source-shopify/unit_tests/integration/test_bulk_stream.py b/airbyte-integrations/connectors/source-shopify/unit_tests/integration/test_bulk_stream.py index a659f3bfb7c00..c68357275284b 100644 --- a/airbyte-integrations/connectors/source-shopify/unit_tests/integration/test_bulk_stream.py +++ b/airbyte-integrations/connectors/source-shopify/unit_tests/integration/test_bulk_stream.py @@ -1,32 +1,32 @@ +from datetime import datetime, timedelta +from unittest import TestCase + from source_shopify import SourceShopify -from airbyte_cdk.test.entrypoint_wrapper import EntrypointOutput, read -from airbyte_cdk.test.mock_http import HttpMocker -from airbyte_cdk.test.mock_http import HttpResponse +from airbyte_cdk.test.entrypoint_wrapper import read +from airbyte_cdk.test.mock_http import HttpMocker, HttpResponse from freezegun import freeze_time -from airbyte_protocol.models import SyncMode +from airbyte_protocol.models import SyncMode, FailureType from typing import Any, Dict from airbyte_cdk.test.catalog_builder import CatalogBuilder -from airbyte_cdk.test.mock_http.request import ANY_QUERY_PARAMS, HttpRequest -from test_data import get_shop_response, get_scopes_response, get_status_graphql_response, get_data_graphql_response, \ - get_records_file_response -import pendulum as pdm - +from airbyte_cdk.test.mock_http.request import HttpRequest +from unit_tests.integration.api.authentication import grant_all_scopes, set_up_shop +from unit_tests.integration.api.bulk import JobCreationResponseBuilder, JobStatusResponseBuilder, MetafieldOrdersJobResponseBuilder, create_job_creation_request, create_job_status_request -_JOB_START_DATE = pdm.parse("2024-05-05T00:00:00+00:00") -_JOB_END_DATE = _JOB_START_DATE.add(hours=2, minutes=24) +_BULK_OPERATION_ID = "gid://shopify/BulkOperation/4472588009661" +_BULK_STREAM = "metafield_orders" +_SHOP_NAME = "airbyte-integration-test" -_URL_ACCESS_SCOPES = "https://airbyte-integration-test.myshopify.com/admin/oauth/access_scopes.json" -_URL_SHOP = "https://airbyte-integration-test.myshopify.com/admin/api/2023-07/shop.json" -_URL_GRAPHQL = "https://airbyte-integration-test.myshopify.com/admin/api/2023-07/graphql.json" -_URL_RECORDS_FILE = "https://storage.googleapis.com/shopify-tiers-assets-prod-us-east1/bulk-operation-outputs/l6lersgk4i81iqc3n6iisywwtipb-final" +_JOB_START_DATE = datetime.fromisoformat("2024-05-05T00:00:00+00:00") +_JOB_END_DATE = _JOB_START_DATE + timedelta(hours=2, minutes=24) -_BULK_STREAM = "metafield_orders" +_URL_GRAPHQL = f"https://{_SHOP_NAME}.myshopify.com/admin/api/2023-07/graphql.json" +_JOB_RESULT_URL = "https://storage.googleapis.com/shopify-tiers-assets-prod-us-east1/bulk-operation-outputs/l6lersgk4i81iqc3n6iisywwtipb-final?GoogleAccessId=assets-us-prod%40shopify-tiers.iam.gserviceaccount.com&Expires=1715633149&Signature=oMjQelfAzUW%2FdulC3HbuBapbUriUJ%2Bc9%2FKpIIf954VTxBqKChJAdoTmWT9ymh%2FnCiHdM%2BeM%2FADz5siAC%2BXtHBWkJfvs%2F0cYpse0ueiQsw6R8gW5JpeSbizyGWcBBWkv5j8GncAnZOUVYDxRIgfxcPb8BlFxBfC3wsx%2F00v9D6EHbPpkIMTbCOAhheJdw9GmVa%2BOMqHGHlmiADM34RDeBPrvSo65f%2FakpV2LBQTEV%2BhDt0ndaREQ0MrpNwhKnc3vZPzA%2BliOGM0wyiYr9qVwByynHq8c%2FaJPPgI5eGEfQcyepgWZTRW5S0DbmBIFxZJLN6Nq6bJ2bIZWrVriUhNGx2g%3D%3D&response-content-disposition=attachment%3B+filename%3D%22bulk-4476008693949.jsonl%22%3B+filename%2A%3DUTF-8%27%27bulk-4476008693949.jsonl&response-content-type=application%2Fjsonl" -def _get_config(start_date: str, bulk_window: int = 1) -> Dict[str, Any]: +def _get_config(start_date: datetime, bulk_window: int = 1) -> Dict[str, Any]: return { - "start_date": start_date, - "shop": "airbyte-integration-test", + "start_date": start_date.strftime("%Y-%m-%d"), + "shop": _SHOP_NAME, "credentials": { "auth_method": "api_password", "api_password": "api_password", @@ -35,121 +35,55 @@ def _get_config(start_date: str, bulk_window: int = 1) -> Dict[str, Any]: } -def _get_scopes_request() -> HttpRequest: - return HttpRequest( - url=_URL_ACCESS_SCOPES, - query_params=ANY_QUERY_PARAMS, - ) - - -def _get_shop_request() -> HttpRequest: - return HttpRequest( - url=_URL_SHOP, - query_params=ANY_QUERY_PARAMS, - ) - - -def _get_data_graphql_request() -> HttpRequest: - return HttpRequest( - url=_URL_GRAPHQL, - # TODO: This is a very long string. Split or add a matcher that is not affected by slight body differences. - body='''{"query": "mutation {\\n bulkOperationRunQuery(\\n query: \\"\\"\\"\\n {\\n orders(\\n query: \\"updated_at:>=\'2024-05-05T00:00:00+00:00\' AND updated_at:<=\'2024-05-05T02:24:00+00:00\'\\"\\n sortKey: UPDATED_AT\\n ) {\\n edges {\\n node {\\n __typename\\n id\\n metafields {\\n edges {\\n node {\\n __typename\\n id\\n namespace\\n value\\n key\\n description\\n createdAt\\n updatedAt\\n type\\n }\\n }\\n }\\n }\\n }\\n }\\n}\\n \\"\\"\\"\\n ) {\\n bulkOperation {\\n id\\n status\\n createdAt\\n }\\n userErrors {\\n field\\n message\\n }\\n }\\n }"}''' - ) - - -def _get_status_graphql_request() -> HttpRequest: - return HttpRequest( - url=_URL_GRAPHQL, - body="""query { - node(id: "gid://shopify/BulkOperation/4472588009661") { - ... on BulkOperation { - id - status - errorCode - createdAt - objectCount - fileSize - url - partialDataUrl - } - } - }""" - ) - - -def _get_records_file_request() -> HttpRequest: - return HttpRequest( - url=_URL_RECORDS_FILE, - query_params=ANY_QUERY_PARAMS, - ) - - -def _mock_successful_read_requests(http_mocker: HttpMocker): - """Mock the multiple requests needed for a bulk GraphQL read. - """ - http_mocker.get(_get_shop_request(), HttpResponse(get_shop_response())) - http_mocker.get(_get_scopes_request(), HttpResponse(get_scopes_response())) - http_mocker.post(_get_data_graphql_request(), HttpResponse(get_data_graphql_response())) - http_mocker.post(_get_status_graphql_request(), HttpResponse(get_status_graphql_response())) - http_mocker.get(_get_records_file_request(), HttpResponse(get_records_file_response())) - - @freeze_time(_JOB_END_DATE) -def test_read_graphql_records_successfully(): - with HttpMocker() as http_mocker: - _mock_successful_read_requests(http_mocker) - - catalog = CatalogBuilder().with_stream(_BULK_STREAM, SyncMode.full_refresh).build() - output = read(SourceShopify(), _get_config(_JOB_START_DATE.to_date_string()), catalog) +class GraphQlBulkStreamTest(TestCase): + + def setUp(self) -> None: + self._http_mocker = HttpMocker() + self._http_mocker.__enter__() + + set_up_shop(self._http_mocker, _SHOP_NAME) + grant_all_scopes(self._http_mocker, _SHOP_NAME) + + def tearDown(self) -> None: + self._http_mocker.__exit__(None, None, None) + + def test_when_read_then_extract_records(self) -> None: + self._http_mocker.post( + create_job_creation_request(_SHOP_NAME, _JOB_START_DATE, _JOB_END_DATE), + JobCreationResponseBuilder().with_bulk_operation_id(_BULK_OPERATION_ID).build(), + ) + self._http_mocker.post( + create_job_status_request(_SHOP_NAME, _BULK_OPERATION_ID), + JobStatusResponseBuilder().with_completed_status(_BULK_OPERATION_ID, _JOB_RESULT_URL).build(), + ) + self._http_mocker.get( + HttpRequest(_JOB_RESULT_URL), + MetafieldOrdersJobResponseBuilder().with_record().with_record().build(), + ) + + output = self._read(_get_config(_JOB_START_DATE)) assert output.errors == [] assert len(output.records) == 2 + def test_given_connection_error_when_read_then_fail_to_sync(self) -> None: + """ + We want to fix this behavior in a subsequent release so that instead, we retry and have records being emitted + """ + inner_mocker = self._http_mocker.__getattribute__("_mocker") + inner_mocker.register_uri( # TODO the testing library should have the ability to generate ConnectionError. As this might not be trivial, we will wait for another case before implementing + "POST", + _URL_GRAPHQL, + [{"exc": ConnectionError("ConnectionError")}, {"text": JobCreationResponseBuilder().with_bulk_operation_id(_BULK_OPERATION_ID).build().body, "status_code": 200}], + ) -def _mock_read_requests_with_connection_error(http_mocker: HttpMocker): - """Mock the multiple requests needed for a bulk GraphQL read with a failure and then a successful response. - """ - http_mocker.get( - _get_shop_request(), - HttpResponse(get_shop_response()) - ) - http_mocker.get( - _get_scopes_request(), - HttpResponse(get_scopes_response()) - ) - inner_mocker = http_mocker.__getattribute__("_mocker") - - def raise_connection_error(request, context): - raise ConnectionError("ConnectionError") - - # Use a list of responses to mock the first GraphQL request with a ConnectionError, and then succeed in the next call. - inner_mocker.register_uri( - "POST", - _URL_GRAPHQL, - [{"text": raise_connection_error}, {"text": get_data_graphql_response(), "status_code": 200}], - ) - - http_mocker.post( - _get_status_graphql_request(), - HttpResponse(get_status_graphql_response()) - ) - http_mocker.get( - _get_records_file_request(), - HttpResponse(get_records_file_response()) - ) - + output = self._read(_get_config(_JOB_START_DATE)) -@freeze_time(_JOB_END_DATE) -def test_check_for_errors_with_connection_error() -> None: - with HttpMocker() as http_mocker: - _mock_read_requests_with_connection_error(http_mocker) + assert list(map(lambda error: error.trace.error.failure_type, output.errors)) == [FailureType.system_error, FailureType.config_error] # The actual error followed by the error that crashes the python app + assert "ConnectionError" in output.errors[0].__str__() + def _read(self, config): catalog = CatalogBuilder().with_stream(_BULK_STREAM, SyncMode.full_refresh).build() - output = read(SourceShopify(), _get_config(_JOB_START_DATE.to_date_string()), catalog) - - inner_mocker = http_mocker.__getattribute__("_mocker") - print(inner_mocker.request_history) - assert "ConnectionError" in output.errors.__str__() - - # TODO: We should be able to read records once the retry logic is implemented in HTTPClient. - # assert output.records == 2 + output = read(SourceShopify(), config, catalog) + return output diff --git a/airbyte-integrations/connectors/source-shopify/unit_tests/integration/test_data.py b/airbyte-integrations/connectors/source-shopify/unit_tests/integration/test_data.py deleted file mode 100644 index 3468874d2c5b7..0000000000000 --- a/airbyte-integrations/connectors/source-shopify/unit_tests/integration/test_data.py +++ /dev/null @@ -1,26 +0,0 @@ -import pendulum as pdm -import pytest - - -def get_shop_response(): - return '{"shop":{"id":58033176765,"name":"airbyte integration test","email":"sherif@airbyte.io","domain":"airbyte-integration-test.myshopify.com","province":"California","country":"US","address1":"350 29th Avenue","zip":"94121","city":"San Francisco","source":null,"phone":"8023494963","latitude":37.7827286,"longitude":-122.4889911,"primary_locale":"en","address2":"","created_at":"2021-06-22T18:00:23-07:00","updated_at":"2024-05-05T01:11:05-08:00","country_code":"US","country_name":"United States","currency":"USD","customer_email":"sherif@airbyte.io","timezone":"(GMT-08:00) America\/Los_Angeles","iana_timezone":"America\/Los_Angeles","shop_owner":"Airbyte Airbyte","money_format":"${{amount}}","money_with_currency_format":"${{amount}} USD","weight_unit":"kg","province_code":"CA","taxes_included":true,"auto_configure_tax_inclusivity":null,"tax_shipping":null,"county_taxes":true,"plan_display_name":"Developer Preview","plan_name":"partner_test","has_discounts":true,"has_gift_cards":false,"myshopify_domain":"airbyte-integration-test.myshopify.com","google_apps_domain":null,"google_apps_login_enabled":null,"money_in_emails_format":"${{amount}}","money_with_currency_in_emails_format":"${{amount}} USD","eligible_for_payments":true,"requires_extra_payments_agreement":false,"password_enabled":true,"has_storefront":true,"finances":true,"primary_location_id":63590301885,"checkout_api_supported":true,"multi_location_enabled":true,"setup_required":false,"pre_launch_enabled":false,"enabled_presentment_currencies":["USD"],"transactional_sms_disabled":false,"marketing_sms_consent_enabled_at_checkout":false}}' - - -def get_scopes_response(): - return '{"access_scopes":[{"handle":"read_analytics"},{"handle":"read_customers"},{"handle":"read_gdpr_data_request"},{"handle":"read_online_store_navigation"},{"handle":"read_shopify_payments_accounts"},{"handle":"read_shopify_payments_bank_accounts"},{"handle":"read_shopify_payments_disputes"},{"handle":"read_shopify_payments_payouts"},{"handle":"read_assigned_fulfillment_orders"},{"handle":"read_discounts"},{"handle":"read_draft_orders"},{"handle":"read_files"},{"handle":"read_fulfillments"},{"handle":"read_gift_cards"},{"handle":"read_inventory"},{"handle":"read_legal_policies"},{"handle":"read_locations"},{"handle":"read_marketing_events"},{"handle":"read_merchant_managed_fulfillment_orders"},{"handle":"read_online_store_pages"},{"handle":"read_order_edits"},{"handle":"read_orders"},{"handle":"read_price_rules"},{"handle":"read_product_listings"},{"handle":"read_reports"},{"handle":"read_resource_feedbacks"},{"handle":"read_script_tags"},{"handle":"read_shipping"},{"handle":"read_locales"},{"handle":"read_content"},{"handle":"read_themes"},{"handle":"read_third_party_fulfillment_orders"},{"handle":"read_translations"},{"handle":"read_publications"},{"handle":"read_returns"},{"handle":"read_channels"},{"handle":"read_products"},{"handle":"read_markets"},{"handle":"read_shopify_credit"},{"handle":"read_store_credit_account_transactions"},{"handle":"read_all_cart_transforms"},{"handle":"read_cart_transforms"},{"handle":"read_all_checkout_completion_target_customizations"},{"handle":"read_companies"},{"handle":"read_custom_fulfillment_services"},{"handle":"read_customer_data_erasure"},{"handle":"read_customer_merge"},{"handle":"read_dery_customizations"},{"handle":"read_fulfillment_constraint_rules"},{"handle":"read_gates"},{"handle":"read_order_submission_rules"},{"handle":"read_payment_customizations"},{"handle":"read_packing_slip_templates"},{"handle":"read_payment_terms"},{"handle":"read_pixels"},{"handle":"read_product_feeds"},{"handle":"read_purchase_options"},{"handle":"read_shopify_payments_provider_accounts_sensitive"},{"handle":"read_all_orders"}]}' - - -def get_data_graphql_response(): - return '{"data":{"bulkOperationRunQuery":{"bulkOperation":{"id":"gid://shopify/BulkOperation/4472588009661","status":"CREATED","createdAt":"2024-05-05T02:00:00Z"},"userErrors":[]}},"extensions":{"cost":{"requestedQueryCost":10,"actualQueryCost":10,"throttleStatus":{"maximumAvailable":2000.0,"currentlyAvailable":1990,"restoreRate":100.0}}}}' - - -def get_status_graphql_response(): - return '{"data":{"node":{"id":"gid://shopify/BulkOperation/4476008693949","status":"COMPLETED","errorCode":null,"createdAt":"2024-05-05T00:45:48Z","objectCount":"4","fileSize":"774","url":"https://storage.googleapis.com/shopify-tiers-assets-prod-us-east1/bulk-operation-outputs/l6lersgk4i81iqc3n6iisywwtipb-final?GoogleAccessId=assets-us-prod%40shopify-tiers.iam.gserviceaccount.com&Expires=1715633149&Signature=oMjQelfAzUW%2FdulC3HbuBapbUriUJ%2Bc9%2FKpIIf954VTxBqKChJAdoTmWT9ymh%2FnCiHdM%2BeM%2FADz5siAC%2BXtHBWkJfvs%2F0cYpse0ueiQsw6R8gW5JpeSbizyGWcBBWkv5j8GncAnZOUVYDxRIgfxcPb8BlFxBfC3wsx%2F00v9D6EHbPpkIMTbCOAhheJdw9GmVa%2BOMqHGHlmiADM34RDeBPrvSo65f%2FakpV2LBQTEV%2BhDt0ndaREQ0MrpNwhKnc3vZPzA%2BliOGM0wyiYr9qVwByynHq8c%2FaJPPgI5eGEfQcyepgWZTRW5S0DbmBIFxZJLN6Nq6bJ2bIZWrVriUhNGx2g%3D%3D&response-content-disposition=attachment%3B+filename%3D%22bulk-4476008693949.jsonl%22%3B+filename%2A%3DUTF-8%27%27bulk-4476008693949.jsonl&response-content-type=application%2Fjsonl","partialDataUrl":null}},"extensions":{"cost":{"requestedQueryCost":1,"actualQueryCost":1,"throttleStatus":{"maximumAvailable":2000.0,"currentlyAvailable":1999,"restoreRate":100.0}}}}' - - -def get_records_file_response(): - return """{"__typename":"Order","id":"gid:\/\/shopify\/Order\/5010584895677"} -{"__typename":"Metafield","id":"gid:\/\/shopify\/Metafield\/22347288150205","namespace":"my_fields","value":"asdfasdf","key":"purchase_order","description":null,"createdAt":"2023-04-13T12:09:50Z","updatedAt":"2024-05-05T01:09:50Z","type":"single_line_text_field","__parentId":"gid:\/\/shopify\/Order\/5010584895677"} -{"__typename":"Order","id":"gid:\/\/shopify\/Order\/5010585911485"} -{"__typename":"Metafield","id":"gid:\/\/shopify\/Metafield\/22347288740029","namespace":"my_fields","value":"asdfasdfasdf","key":"purchase_order","description":null,"createdAt":"2023-04-13T12:11:20Z","updatedAt":"2024-05-05T01:11:20Z","type":"single_line_text_field","__parentId":"gid:\/\/shopify\/Order\/5010585911485"} -""" diff --git a/airbyte-integrations/connectors/source-shopify/unit_tests/resource/http/response/shop.json b/airbyte-integrations/connectors/source-shopify/unit_tests/resource/http/response/shop.json new file mode 100644 index 0000000000000..bdbde8f400f3d --- /dev/null +++ b/airbyte-integrations/connectors/source-shopify/unit_tests/resource/http/response/shop.json @@ -0,0 +1,60 @@ +{ + "shop": { + "id": 58033176765, + "name": "airbyte integration test", + "email": "sherif@airbyte.io", + "domain": "a-shop.myshopify.com", + "province": "California", + "country": "US", + "address1": "350 29th Avenue", + "zip": "94121", + "city": "San Francisco", + "source": null, + "phone": "8023494963", + "latitude": 37.7827286, + "longitude": -122.4889911, + "primary_locale": "en", + "address2": "", + "created_at": "2021-06-22T18:00:23-07:00", + "updated_at": "2024-05-05T01:11:05-08:00", + "country_code": "US", + "country_name": "United States", + "currency": "USD", + "customer_email": "sherif@airbyte.io", + "timezone": "(GMT-08:00) America\/Los_Angeles", + "iana_timezone": "America\/Los_Angeles", + "shop_owner": "Airbyte Airbyte", + "money_format": "${{amount}}", + "money_with_currency_format": "${{amount}} USD", + "weight_unit": "kg", + "province_code": "CA", + "taxes_included": true, + "auto_configure_tax_inclusivity": null, + "tax_shipping": null, + "county_taxes": true, + "plan_display_name": "Developer Preview", + "plan_name": "partner_test", + "has_discounts": true, + "has_gift_cards": false, + "myshopify_domain": "a-shop.myshopify.com", + "google_apps_domain": null, + "google_apps_login_enabled": null, + "money_in_emails_format": "${{amount}}", + "money_with_currency_in_emails_format": "${{amount}} USD", + "eligible_for_payments": true, + "requires_extra_payments_agreement": false, + "password_enabled": true, + "has_storefront": true, + "finances": true, + "primary_location_id": 63590301885, + "checkout_api_supported": true, + "multi_location_enabled": true, + "setup_required": false, + "pre_launch_enabled": false, + "enabled_presentment_currencies": [ + "USD" + ], + "transactional_sms_disabled": false, + "marketing_sms_consent_enabled_at_checkout": false + } +} From da0cc1b7ca2b7d6f7b806dd4c67f699642d71b92 Mon Sep 17 00:00:00 2001 From: maxi297 Date: Fri, 24 May 2024 10:39:44 -0400 Subject: [PATCH 19/27] format --- .../unit_tests/graphql_bulk/test_job.py | 1 - .../integration/api/authentication.py | 3 +- .../unit_tests/integration/api/bulk.py | 2 + .../integration/test_bulk_stream.py | 20 ++- .../resource/http/response/shop.json | 114 +++++++++--------- 5 files changed, 74 insertions(+), 66 deletions(-) diff --git a/airbyte-integrations/connectors/source-shopify/unit_tests/graphql_bulk/test_job.py b/airbyte-integrations/connectors/source-shopify/unit_tests/graphql_bulk/test_job.py index 88a3ed3a8a693..015242ac065f5 100644 --- a/airbyte-integrations/connectors/source-shopify/unit_tests/graphql_bulk/test_job.py +++ b/airbyte-integrations/connectors/source-shopify/unit_tests/graphql_bulk/test_job.py @@ -5,7 +5,6 @@ import pytest import requests - from source_shopify.shopify_graphql.bulk.exceptions import ShopifyBulkExceptions from source_shopify.shopify_graphql.bulk.status import ShopifyBulkJobStatus from source_shopify.streams.streams import ( diff --git a/airbyte-integrations/connectors/source-shopify/unit_tests/integration/api/authentication.py b/airbyte-integrations/connectors/source-shopify/unit_tests/integration/api/authentication.py index dfce957167c28..386091c3023cd 100644 --- a/airbyte-integrations/connectors/source-shopify/unit_tests/integration/api/authentication.py +++ b/airbyte-integrations/connectors/source-shopify/unit_tests/integration/api/authentication.py @@ -1,10 +1,11 @@ +# Copyright (c) 2024 Airbyte, Inc., all rights reserved. + import json from airbyte_cdk.test.mock_http import HttpMocker, HttpRequest, HttpResponse from airbyte_cdk.test.mock_http.request import ANY_QUERY_PARAMS from airbyte_cdk.test.mock_http.response_builder import find_template - _ALL_SCOPES = [ "read_all_cart_transforms", "read_all_checkout_completion_target_customizations", diff --git a/airbyte-integrations/connectors/source-shopify/unit_tests/integration/api/bulk.py b/airbyte-integrations/connectors/source-shopify/unit_tests/integration/api/bulk.py index 392056ae81639..e66a85741ea40 100644 --- a/airbyte-integrations/connectors/source-shopify/unit_tests/integration/api/bulk.py +++ b/airbyte-integrations/connectors/source-shopify/unit_tests/integration/api/bulk.py @@ -1,3 +1,5 @@ +# Copyright (c) 2024 Airbyte, Inc., all rights reserved. + import json from datetime import datetime from random import randint diff --git a/airbyte-integrations/connectors/source-shopify/unit_tests/integration/test_bulk_stream.py b/airbyte-integrations/connectors/source-shopify/unit_tests/integration/test_bulk_stream.py index c68357275284b..2d67dc031fd71 100644 --- a/airbyte-integrations/connectors/source-shopify/unit_tests/integration/test_bulk_stream.py +++ b/airbyte-integrations/connectors/source-shopify/unit_tests/integration/test_bulk_stream.py @@ -1,16 +1,24 @@ +# Copyright (c) 2024 Airbyte, Inc., all rights reserved. + from datetime import datetime, timedelta +from typing import Any, Dict from unittest import TestCase -from source_shopify import SourceShopify +from airbyte_cdk.test.catalog_builder import CatalogBuilder from airbyte_cdk.test.entrypoint_wrapper import read from airbyte_cdk.test.mock_http import HttpMocker, HttpResponse -from freezegun import freeze_time -from airbyte_protocol.models import SyncMode, FailureType -from typing import Any, Dict -from airbyte_cdk.test.catalog_builder import CatalogBuilder from airbyte_cdk.test.mock_http.request import HttpRequest +from airbyte_protocol.models import FailureType, SyncMode +from freezegun import freeze_time +from source_shopify import SourceShopify from unit_tests.integration.api.authentication import grant_all_scopes, set_up_shop -from unit_tests.integration.api.bulk import JobCreationResponseBuilder, JobStatusResponseBuilder, MetafieldOrdersJobResponseBuilder, create_job_creation_request, create_job_status_request +from unit_tests.integration.api.bulk import ( + JobCreationResponseBuilder, + JobStatusResponseBuilder, + MetafieldOrdersJobResponseBuilder, + create_job_creation_request, + create_job_status_request, +) _BULK_OPERATION_ID = "gid://shopify/BulkOperation/4472588009661" _BULK_STREAM = "metafield_orders" diff --git a/airbyte-integrations/connectors/source-shopify/unit_tests/resource/http/response/shop.json b/airbyte-integrations/connectors/source-shopify/unit_tests/resource/http/response/shop.json index bdbde8f400f3d..e4b4eb802a4fa 100644 --- a/airbyte-integrations/connectors/source-shopify/unit_tests/resource/http/response/shop.json +++ b/airbyte-integrations/connectors/source-shopify/unit_tests/resource/http/response/shop.json @@ -1,60 +1,58 @@ { - "shop": { - "id": 58033176765, - "name": "airbyte integration test", - "email": "sherif@airbyte.io", - "domain": "a-shop.myshopify.com", - "province": "California", - "country": "US", - "address1": "350 29th Avenue", - "zip": "94121", - "city": "San Francisco", - "source": null, - "phone": "8023494963", - "latitude": 37.7827286, - "longitude": -122.4889911, - "primary_locale": "en", - "address2": "", - "created_at": "2021-06-22T18:00:23-07:00", - "updated_at": "2024-05-05T01:11:05-08:00", - "country_code": "US", - "country_name": "United States", - "currency": "USD", - "customer_email": "sherif@airbyte.io", - "timezone": "(GMT-08:00) America\/Los_Angeles", - "iana_timezone": "America\/Los_Angeles", - "shop_owner": "Airbyte Airbyte", - "money_format": "${{amount}}", - "money_with_currency_format": "${{amount}} USD", - "weight_unit": "kg", - "province_code": "CA", - "taxes_included": true, - "auto_configure_tax_inclusivity": null, - "tax_shipping": null, - "county_taxes": true, - "plan_display_name": "Developer Preview", - "plan_name": "partner_test", - "has_discounts": true, - "has_gift_cards": false, - "myshopify_domain": "a-shop.myshopify.com", - "google_apps_domain": null, - "google_apps_login_enabled": null, - "money_in_emails_format": "${{amount}}", - "money_with_currency_in_emails_format": "${{amount}} USD", - "eligible_for_payments": true, - "requires_extra_payments_agreement": false, - "password_enabled": true, - "has_storefront": true, - "finances": true, - "primary_location_id": 63590301885, - "checkout_api_supported": true, - "multi_location_enabled": true, - "setup_required": false, - "pre_launch_enabled": false, - "enabled_presentment_currencies": [ - "USD" - ], - "transactional_sms_disabled": false, - "marketing_sms_consent_enabled_at_checkout": false - } + "shop": { + "id": 58033176765, + "name": "airbyte integration test", + "email": "sherif@airbyte.io", + "domain": "a-shop.myshopify.com", + "province": "California", + "country": "US", + "address1": "350 29th Avenue", + "zip": "94121", + "city": "San Francisco", + "source": null, + "phone": "8023494963", + "latitude": 37.7827286, + "longitude": -122.4889911, + "primary_locale": "en", + "address2": "", + "created_at": "2021-06-22T18:00:23-07:00", + "updated_at": "2024-05-05T01:11:05-08:00", + "country_code": "US", + "country_name": "United States", + "currency": "USD", + "customer_email": "sherif@airbyte.io", + "timezone": "(GMT-08:00) America/Los_Angeles", + "iana_timezone": "America/Los_Angeles", + "shop_owner": "Airbyte Airbyte", + "money_format": "${{amount}}", + "money_with_currency_format": "${{amount}} USD", + "weight_unit": "kg", + "province_code": "CA", + "taxes_included": true, + "auto_configure_tax_inclusivity": null, + "tax_shipping": null, + "county_taxes": true, + "plan_display_name": "Developer Preview", + "plan_name": "partner_test", + "has_discounts": true, + "has_gift_cards": false, + "myshopify_domain": "a-shop.myshopify.com", + "google_apps_domain": null, + "google_apps_login_enabled": null, + "money_in_emails_format": "${{amount}}", + "money_with_currency_in_emails_format": "${{amount}} USD", + "eligible_for_payments": true, + "requires_extra_payments_agreement": false, + "password_enabled": true, + "has_storefront": true, + "finances": true, + "primary_location_id": 63590301885, + "checkout_api_supported": true, + "multi_location_enabled": true, + "setup_required": false, + "pre_launch_enabled": false, + "enabled_presentment_currencies": ["USD"], + "transactional_sms_disabled": false, + "marketing_sms_consent_enabled_at_checkout": false + } } From a2f7186428471787b9b265c7593ec7cdff749fb1 Mon Sep 17 00:00:00 2001 From: maxi297 Date: Fri, 24 May 2024 12:42:23 -0400 Subject: [PATCH 20/27] Update release information --- airbyte-integrations/connectors/source-shopify/metadata.yaml | 2 +- airbyte-integrations/connectors/source-shopify/pyproject.toml | 2 +- docs/integrations/sources/shopify.md | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/airbyte-integrations/connectors/source-shopify/metadata.yaml b/airbyte-integrations/connectors/source-shopify/metadata.yaml index 508ed170e6dc8..ea4ba46c77850 100644 --- a/airbyte-integrations/connectors/source-shopify/metadata.yaml +++ b/airbyte-integrations/connectors/source-shopify/metadata.yaml @@ -11,7 +11,7 @@ data: connectorSubtype: api connectorType: source definitionId: 9da77001-af33-4bcd-be46-6252bf9342b9 - dockerImageTag: 2.1.4 + dockerImageTag: 2.1.5 dockerRepository: airbyte/source-shopify documentationUrl: https://docs.airbyte.com/integrations/sources/shopify githubIssueLabel: source-shopify diff --git a/airbyte-integrations/connectors/source-shopify/pyproject.toml b/airbyte-integrations/connectors/source-shopify/pyproject.toml index 8512d6d56ed8b..0385c2a126096 100644 --- a/airbyte-integrations/connectors/source-shopify/pyproject.toml +++ b/airbyte-integrations/connectors/source-shopify/pyproject.toml @@ -3,7 +3,7 @@ requires = [ "poetry-core>=1.0.0",] build-backend = "poetry.core.masonry.api" [tool.poetry] -version = "2.1.4" +version = "2.1.5" name = "source-shopify" description = "Source CDK implementation for Shopify." authors = [ "Airbyte ",] diff --git a/docs/integrations/sources/shopify.md b/docs/integrations/sources/shopify.md index f79451aada3b2..264441de495f8 100644 --- a/docs/integrations/sources/shopify.md +++ b/docs/integrations/sources/shopify.md @@ -208,6 +208,7 @@ For all `Shopify GraphQL BULK` api requests these limitations are applied: https | Version | Date | Pull Request | Subject | | :------ | :--------- | :------------------------------------------------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| 2.1.5 | 2024-05-27 | [38084](https://github.com/airbytehq/airbyte/pull/38084) | Add some mock server tests | | 2.1.4 | 2024-05-24 | [38610](https://github.com/airbytehq/airbyte/pull/38610) | Updated the source `API Version` to `2024-04` | | 2.1.3 | 2024-05-23 | [38464](https://github.com/airbytehq/airbyte/pull/38464) | Added missing fields to `Products` stream | | 2.1.2 | 2024-05-23 | [38352](https://github.com/airbytehq/airbyte/pull/38352) | Migrated `Order Risks` stream to `GraphQL BULK` | From 0025b620c24c962b8511f91558ec6db4e9f2618b Mon Sep 17 00:00:00 2001 From: maxi297 Date: Fri, 24 May 2024 13:32:28 -0400 Subject: [PATCH 21/27] Update poetry lock for CI --- .../connectors/source-shopify/poetry.lock | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/airbyte-integrations/connectors/source-shopify/poetry.lock b/airbyte-integrations/connectors/source-shopify/poetry.lock index df34541cacf03..dc6efc749e2f9 100644 --- a/airbyte-integrations/connectors/source-shopify/poetry.lock +++ b/airbyte-integrations/connectors/source-shopify/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. [[package]] name = "airbyte-cdk" @@ -290,6 +290,20 @@ files = [ [package.extras] test = ["pytest (>=6)"] +[[package]] +name = "freezegun" +version = "1.5.1" +description = "Let your Python tests travel through time" +optional = false +python-versions = ">=3.7" +files = [ + {file = "freezegun-1.5.1-py3-none-any.whl", hash = "sha256:bf111d7138a8abe55ab48a71755673dbaa4ab87f4cff5634a4442dfec34c15f1"}, + {file = "freezegun-1.5.1.tar.gz", hash = "sha256:b29dedfcda6d5e8e083ce71b2b542753ad48cfec44037b3fc79702e2980a89e9"}, +] + +[package.dependencies] +python-dateutil = ">=2.7" + [[package]] name = "genson" version = "1.2.2" @@ -1054,4 +1068,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.9,<3.12" -content-hash = "a88ba9d29c8cc1a7dd520d152b96c4b43d36bbecafb1a276ef9965650ccc7b2b" +content-hash = "689f75a3ccd29338ce0993262c0a14e0593ac515ba8fe0eedf222558b9ec2f81" From deda781cc0867c21a25f9e85cd6b3f10ececc5b0 Mon Sep 17 00:00:00 2001 From: maxi297 Date: Fri, 24 May 2024 15:57:36 -0400 Subject: [PATCH 22/27] Update following merge master --- .../source-shopify/unit_tests/integration/api/authentication.py | 2 +- .../source-shopify/unit_tests/integration/api/bulk.py | 2 +- .../source-shopify/unit_tests/integration/test_bulk_stream.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/airbyte-integrations/connectors/source-shopify/unit_tests/integration/api/authentication.py b/airbyte-integrations/connectors/source-shopify/unit_tests/integration/api/authentication.py index 386091c3023cd..9b250b54224b8 100644 --- a/airbyte-integrations/connectors/source-shopify/unit_tests/integration/api/authentication.py +++ b/airbyte-integrations/connectors/source-shopify/unit_tests/integration/api/authentication.py @@ -71,7 +71,7 @@ def set_up_shop(http_mocker: HttpMocker, shop_name: str) -> None: http_mocker.get( - HttpRequest(f"https://{shop_name}.myshopify.com/admin/api/2023-07/shop.json", query_params=ANY_QUERY_PARAMS), + HttpRequest(f"https://{shop_name}.myshopify.com/admin/api/2024-04/shop.json", query_params=ANY_QUERY_PARAMS), HttpResponse(json.dumps(find_template("shop", __file__)), status_code=200), ) diff --git a/airbyte-integrations/connectors/source-shopify/unit_tests/integration/api/bulk.py b/airbyte-integrations/connectors/source-shopify/unit_tests/integration/api/bulk.py index e66a85741ea40..6bde2b2c84ecc 100644 --- a/airbyte-integrations/connectors/source-shopify/unit_tests/integration/api/bulk.py +++ b/airbyte-integrations/connectors/source-shopify/unit_tests/integration/api/bulk.py @@ -8,7 +8,7 @@ def _create_job_url(shop_name: str) -> str: - return f"https://{shop_name}.myshopify.com/admin/api/2023-07/graphql.json" + return f"https://{shop_name}.myshopify.com/admin/api/2024-04/graphql.json" def create_job_creation_request(shop_name: str, lower_boundary: datetime, upper_boundary: datetime) -> HttpRequest: diff --git a/airbyte-integrations/connectors/source-shopify/unit_tests/integration/test_bulk_stream.py b/airbyte-integrations/connectors/source-shopify/unit_tests/integration/test_bulk_stream.py index 2d67dc031fd71..9be0da4bd0613 100644 --- a/airbyte-integrations/connectors/source-shopify/unit_tests/integration/test_bulk_stream.py +++ b/airbyte-integrations/connectors/source-shopify/unit_tests/integration/test_bulk_stream.py @@ -27,7 +27,7 @@ _JOB_START_DATE = datetime.fromisoformat("2024-05-05T00:00:00+00:00") _JOB_END_DATE = _JOB_START_DATE + timedelta(hours=2, minutes=24) -_URL_GRAPHQL = f"https://{_SHOP_NAME}.myshopify.com/admin/api/2023-07/graphql.json" +_URL_GRAPHQL = f"https://{_SHOP_NAME}.myshopify.com/admin/api/2024-04/graphql.json" _JOB_RESULT_URL = "https://storage.googleapis.com/shopify-tiers-assets-prod-us-east1/bulk-operation-outputs/l6lersgk4i81iqc3n6iisywwtipb-final?GoogleAccessId=assets-us-prod%40shopify-tiers.iam.gserviceaccount.com&Expires=1715633149&Signature=oMjQelfAzUW%2FdulC3HbuBapbUriUJ%2Bc9%2FKpIIf954VTxBqKChJAdoTmWT9ymh%2FnCiHdM%2BeM%2FADz5siAC%2BXtHBWkJfvs%2F0cYpse0ueiQsw6R8gW5JpeSbizyGWcBBWkv5j8GncAnZOUVYDxRIgfxcPb8BlFxBfC3wsx%2F00v9D6EHbPpkIMTbCOAhheJdw9GmVa%2BOMqHGHlmiADM34RDeBPrvSo65f%2FakpV2LBQTEV%2BhDt0ndaREQ0MrpNwhKnc3vZPzA%2BliOGM0wyiYr9qVwByynHq8c%2FaJPPgI5eGEfQcyepgWZTRW5S0DbmBIFxZJLN6Nq6bJ2bIZWrVriUhNGx2g%3D%3D&response-content-disposition=attachment%3B+filename%3D%22bulk-4476008693949.jsonl%22%3B+filename%2A%3DUTF-8%27%27bulk-4476008693949.jsonl&response-content-type=application%2Fjsonl" From 789306baf656279757a4a051f6f7c7f4d31cf84e Mon Sep 17 00:00:00 2001 From: maxi297 Date: Fri, 24 May 2024 22:57:01 -0400 Subject: [PATCH 23/27] Fix stateful class that would break tests --- .../connectors/source-shopify/source_shopify/scopes.py | 6 ++---- .../source-shopify/source_shopify/streams/base_streams.py | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/scopes.py b/airbyte-integrations/connectors/source-shopify/source_shopify/scopes.py index 805acc1f8d68d..cc09e3a0e5640 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/scopes.py +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/scopes.py @@ -83,16 +83,14 @@ class ShopifyScopes: logger = logging.getLogger("airbyte") def __init__(self, config: Mapping[str, Any]) -> None: + self.permitted_streams: List[str] = list(ALWAYS_PERMITTED_STREAMS) + self.not_permitted_streams: List[set[str, str]] = [] self.user_scopes = self.get_user_scopes(config) # for each stream check the authenticated user has all scopes required self.get_streams_from_user_scopes() # log if there are streams missing scopes and should be omitted self.emit_missing_scopes() - # the list of validated streams - permitted_streams: List[str] = ALWAYS_PERMITTED_STREAMS - # the list of not permitted streams - not_permitted_streams: List[set[str, str]] = [] # template for the log message missing_scope_message: str = ( "The stream `{stream}` could not be synced without the `{scope}` scope. Please check the `{scope}` is granted." diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/streams/base_streams.py b/airbyte-integrations/connectors/source-shopify/source_shopify/streams/base_streams.py index 0a86b8b2a1e68..bc084f3c482bb 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/streams/base_streams.py +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/streams/base_streams.py @@ -648,7 +648,7 @@ def __init__(self, config: Dict) -> None: # overide the default job slice size, if provided (it's auto-adjusted, later on) self.bulk_window_in_days = config.get("bulk_window_in_days") if self.bulk_window_in_days: - self.job_manager._job_size = self.bulk_window_in_days + self.job_manager.job_size = self.bulk_window_in_days # define Record Producer instance self.record_producer: ShopifyBulkRecord = ShopifyBulkRecord(self.query) From 7c33dd2d8b9e57b8950fa9ea2ff115639539a8b5 Mon Sep 17 00:00:00 2001 From: maxi297 Date: Mon, 27 May 2024 12:06:11 -0400 Subject: [PATCH 24/27] Code review --- .../integration/api/authentication.py | 66 ++----------------- .../unit_tests/integration/api/bulk.py | 3 +- 2 files changed, 6 insertions(+), 63 deletions(-) diff --git a/airbyte-integrations/connectors/source-shopify/unit_tests/integration/api/authentication.py b/airbyte-integrations/connectors/source-shopify/unit_tests/integration/api/authentication.py index 9b250b54224b8..df16077abc14b 100644 --- a/airbyte-integrations/connectors/source-shopify/unit_tests/integration/api/authentication.py +++ b/airbyte-integrations/connectors/source-shopify/unit_tests/integration/api/authentication.py @@ -5,73 +5,15 @@ from airbyte_cdk.test.mock_http import HttpMocker, HttpRequest, HttpResponse from airbyte_cdk.test.mock_http.request import ANY_QUERY_PARAMS from airbyte_cdk.test.mock_http.response_builder import find_template +from source_shopify.scopes import SCOPES_MAPPING +from source_shopify.streams.base_streams import ShopifyStream -_ALL_SCOPES = [ - "read_all_cart_transforms", - "read_all_checkout_completion_target_customizations", - "read_all_orders", - "read_analytics", - "read_assigned_fulfillment_orders", - "read_cart_transforms", - "read_channels", - "read_companies", - "read_content", - "read_custom_fulfillment_services", - "read_customer_data_erasure", - "read_customer_merge", - "read_customers", - "read_dery_customizations", - "read_discounts", - "read_draft_orders", - "read_files", - "read_fulfillment_constraint_rules", - "read_fulfillments", - "read_gates", - "read_gdpr_data_request", - "read_gift_cards", - "read_inventory", - "read_legal_policies", - "read_locales", - "read_locations", - "read_marketing_events", - "read_markets", - "read_merchant_managed_fulfillment_orders", - "read_online_store_navigation", - "read_online_store_pages", - "read_order_edits", - "read_order_submission_rules", - "read_orders", - "read_packing_slip_templates", - "read_payment_customizations", - "read_payment_terms", - "read_pixels", - "read_price_rules", - "read_product_feeds", - "read_product_listings", - "read_products", - "read_publications", - "read_purchase_options", - "read_reports", - "read_resource_feedbacks", - "read_returns", - "read_script_tags", - "read_shipping", - "read_shopify_credit", - "read_shopify_payments_accounts", - "read_shopify_payments_bank_accounts", - "read_shopify_payments_disputes", - "read_shopify_payments_payouts", - "read_shopify_payments_provider_accounts_sensitive", - "read_store_credit_account_transactions", - "read_themes", - "read_third_party_fulfillment_orders", - "read_translations" -] +_ALL_SCOPES = [scope for stream_scopes in SCOPES_MAPPING.values() for scope in stream_scopes] def set_up_shop(http_mocker: HttpMocker, shop_name: str) -> None: http_mocker.get( - HttpRequest(f"https://{shop_name}.myshopify.com/admin/api/2024-04/shop.json", query_params=ANY_QUERY_PARAMS), + HttpRequest(f"https://{shop_name}.myshopify.com/admin/api/{ShopifyStream.api_version}/shop.json", query_params=ANY_QUERY_PARAMS), HttpResponse(json.dumps(find_template("shop", __file__)), status_code=200), ) diff --git a/airbyte-integrations/connectors/source-shopify/unit_tests/integration/api/bulk.py b/airbyte-integrations/connectors/source-shopify/unit_tests/integration/api/bulk.py index 6bde2b2c84ecc..b6598ef155e5d 100644 --- a/airbyte-integrations/connectors/source-shopify/unit_tests/integration/api/bulk.py +++ b/airbyte-integrations/connectors/source-shopify/unit_tests/integration/api/bulk.py @@ -5,10 +5,11 @@ from random import randint from airbyte_cdk.test.mock_http import HttpRequest, HttpResponse +from source_shopify.streams.base_streams import ShopifyStream def _create_job_url(shop_name: str) -> str: - return f"https://{shop_name}.myshopify.com/admin/api/2024-04/graphql.json" + return f"https://{shop_name}.myshopify.com/admin/api/{ShopifyStream.api_version}/graphql.json" def create_job_creation_request(shop_name: str, lower_boundary: datetime, upper_boundary: datetime) -> HttpRequest: From cfcedc02800ca1185a8fdcebd501a9abbdce5806 Mon Sep 17 00:00:00 2001 From: maxi297 Date: Tue, 28 May 2024 11:07:47 -0400 Subject: [PATCH 25/27] Update query to use 'ShopifyBulkTemplates' in mock server tests --- .../unit_tests/integration/api/bulk.py | 31 +++++-------------- 1 file changed, 7 insertions(+), 24 deletions(-) diff --git a/airbyte-integrations/connectors/source-shopify/unit_tests/integration/api/bulk.py b/airbyte-integrations/connectors/source-shopify/unit_tests/integration/api/bulk.py index b6598ef155e5d..3bb81776c5160 100644 --- a/airbyte-integrations/connectors/source-shopify/unit_tests/integration/api/bulk.py +++ b/airbyte-integrations/connectors/source-shopify/unit_tests/integration/api/bulk.py @@ -5,6 +5,8 @@ from random import randint from airbyte_cdk.test.mock_http import HttpRequest, HttpResponse + +from source_shopify.shopify_graphql.bulk.query import ShopifyBulkTemplates from source_shopify.streams.base_streams import ShopifyStream @@ -13,26 +15,9 @@ def _create_job_url(shop_name: str) -> str: def create_job_creation_request(shop_name: str, lower_boundary: datetime, upper_boundary: datetime) -> HttpRequest: - outer_query = """mutation { - bulkOperationRunQuery( - query: \"\"\" - %INNER_QUERY_TOKEN% - \"\"\" - ) { - bulkOperation { - id - status - createdAt - } - userErrors { - field - message - } - } - }""" - inner_query = """{ + query = """ { orders( - query: \"updated_at:>='%LOWER_BOUNDARY_TOKEN%' AND updated_at:<='%UPPER_BOUNDARY_TOKEN%'\" + query: "updated_at:>='%LOWER_BOUNDARY_TOKEN%' AND updated_at:<='%UPPER_BOUNDARY_TOKEN%'" sortKey: UPDATED_AT ) { edges { @@ -58,13 +43,11 @@ def create_job_creation_request(shop_name: str, lower_boundary: datetime, upper_ } } }""" - inner_query = inner_query.replace("%LOWER_BOUNDARY_TOKEN%", lower_boundary.isoformat()) - inner_query = inner_query.replace("%UPPER_BOUNDARY_TOKEN%", upper_boundary.isoformat()) - outer_query = outer_query.replace("%INNER_QUERY_TOKEN%", inner_query) - + query = query.replace("%LOWER_BOUNDARY_TOKEN%", lower_boundary.isoformat()).replace("%UPPER_BOUNDARY_TOKEN%", upper_boundary.isoformat()) + prepared_query = ShopifyBulkTemplates.prepare(query) return HttpRequest( url=_create_job_url(shop_name), - body=json.dumps({"query": outer_query}) + body=json.dumps({"query": prepared_query}) ) From 6df28d195b3813f923ceb048451ac1caa248728e Mon Sep 17 00:00:00 2001 From: maxi297 Date: Tue, 28 May 2024 17:01:18 -0400 Subject: [PATCH 26/27] format --- .../connectors/source-shopify/unit_tests/integration/api/bulk.py | 1 - 1 file changed, 1 deletion(-) diff --git a/airbyte-integrations/connectors/source-shopify/unit_tests/integration/api/bulk.py b/airbyte-integrations/connectors/source-shopify/unit_tests/integration/api/bulk.py index 3bb81776c5160..d3328a760c807 100644 --- a/airbyte-integrations/connectors/source-shopify/unit_tests/integration/api/bulk.py +++ b/airbyte-integrations/connectors/source-shopify/unit_tests/integration/api/bulk.py @@ -5,7 +5,6 @@ from random import randint from airbyte_cdk.test.mock_http import HttpRequest, HttpResponse - from source_shopify.shopify_graphql.bulk.query import ShopifyBulkTemplates from source_shopify.streams.base_streams import ShopifyStream From f6757f43fc6acdbbbbfe96cf8264d7d78e7570af Mon Sep 17 00:00:00 2001 From: Maxime Carbonneau-Leclerc <3360483+maxi297@users.noreply.github.com> Date: Wed, 5 Jun 2024 09:20:03 -0400 Subject: [PATCH 27/27] Issue 7935/integrate httpclient to access scopes (#38678) --- .../connectors/source-shopify/poetry.lock | 324 +++++++++++++++++- .../connectors/source-shopify/pyproject.toml | 2 +- .../source_shopify/http_request.py | 49 +++ .../source-shopify/source_shopify/scopes.py | 15 +- .../shopify_graphql/bulk/exceptions.py | 9 +- .../shopify_graphql/bulk/job.py | 168 ++++----- .../shopify_graphql/bulk/retry.py | 11 + .../source_shopify/streams/base_streams.py | 50 +-- .../source-shopify/source_shopify/utils.py | 2 +- .../source-shopify/unit_tests/conftest.py | 7 + .../unit_tests/graphql_bulk/test_job.py | 72 ++-- .../unit_tests/integration/api/bulk.py | 21 +- .../integration/test_bulk_stream.py | 104 +++++- 13 files changed, 625 insertions(+), 209 deletions(-) create mode 100644 airbyte-integrations/connectors/source-shopify/source_shopify/http_request.py diff --git a/airbyte-integrations/connectors/source-shopify/poetry.lock b/airbyte-integrations/connectors/source-shopify/poetry.lock index dc6efc749e2f9..d226410969f82 100644 --- a/airbyte-integrations/connectors/source-shopify/poetry.lock +++ b/airbyte-integrations/connectors/source-shopify/poetry.lock @@ -2,19 +2,20 @@ [[package]] name = "airbyte-cdk" -version = "0.81.4" +version = "0.90.0" description = "A framework for writing Airbyte Connectors." optional = false python-versions = "<4.0,>=3.9" files = [ - {file = "airbyte_cdk-0.81.4-py3-none-any.whl", hash = "sha256:4ed193da4e8be4867e1d8983172d10afb3c3b10f3e10ec618431deec1f2af4cb"}, - {file = "airbyte_cdk-0.81.4.tar.gz", hash = "sha256:5c63d8c792edf5f24d0ad804b34b3ebcc056ecede6cb4f87ebf9ac07aa987f24"}, + {file = "airbyte_cdk-0.90.0-py3-none-any.whl", hash = "sha256:bd0aa5843cdc4901f2e482f0e86695ca4e6db83b65c5017799255dd20535cf56"}, + {file = "airbyte_cdk-0.90.0.tar.gz", hash = "sha256:25cefc010718bada5cce3f87e7ae93068630732c0d34ce5145f8ddf7457d4d3c"}, ] [package.dependencies] -airbyte-protocol-models = "*" +airbyte-protocol-models = ">=0.9.0,<1.0" backoff = "*" cachetools = "*" +cryptography = ">=42.0.5,<43.0.0" Deprecated = ">=1.2,<1.3" dpath = ">=2.0.1,<2.1.0" genson = "1.2.2" @@ -22,10 +23,13 @@ isodate = ">=0.6.1,<0.7.0" Jinja2 = ">=3.1.2,<3.2.0" jsonref = ">=0.2,<0.3" jsonschema = ">=3.2.0,<3.3.0" +langchain_core = "0.1.42" pendulum = "<3.0.0" pydantic = ">=1.10.8,<2.0.0" +pyjwt = ">=2.8.0,<3.0.0" pyrate-limiter = ">=3.1.0,<3.2.0" python-dateutil = "*" +pytz = "2024.1" PyYAML = ">=6.0.1,<7.0.0" requests = "*" requests_cache = "*" @@ -34,7 +38,7 @@ wcmatch = "8.4" [package.extras] file-based = ["avro (>=1.11.2,<1.12.0)", "fastavro (>=1.8.0,<1.9.0)", "markdown", "pdf2image (==1.16.3)", "pdfminer.six (==20221105)", "pyarrow (>=15.0.0,<15.1.0)", "pytesseract (==0.3.10)", "unstructured.pytesseract (>=0.3.12)", "unstructured[docx,pptx] (==0.10.27)"] sphinx-docs = ["Sphinx (>=4.2,<4.3)", "sphinx-rtd-theme (>=1.0,<1.1)"] -vector-db-based = ["cohere (==4.21)", "langchain (==0.0.271)", "openai[embeddings] (==0.27.9)", "tiktoken (==0.4.0)"] +vector-db-based = ["cohere (==4.21)", "langchain (==0.1.16)", "openai[embeddings] (==0.27.9)", "tiktoken (==0.4.0)"] [[package]] name = "airbyte-protocol-models" @@ -138,6 +142,70 @@ files = [ {file = "certifi-2024.2.2.tar.gz", hash = "sha256:0569859f95fc761b18b45ef421b1290a0f65f147e92a1e5eb3e635f9a5e4e66f"}, ] +[[package]] +name = "cffi" +version = "1.16.0" +description = "Foreign Function Interface for Python calling C code." +optional = false +python-versions = ">=3.8" +files = [ + {file = "cffi-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6b3d6606d369fc1da4fd8c357d026317fbb9c9b75d36dc16e90e84c26854b088"}, + {file = "cffi-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ac0f5edd2360eea2f1daa9e26a41db02dd4b0451b48f7c318e217ee092a213e9"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7e61e3e4fa664a8588aa25c883eab612a188c725755afff6289454d6362b9673"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a72e8961a86d19bdb45851d8f1f08b041ea37d2bd8d4fd19903bc3083d80c896"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5b50bf3f55561dac5438f8e70bfcdfd74543fd60df5fa5f62d94e5867deca684"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7651c50c8c5ef7bdb41108b7b8c5a83013bfaa8a935590c5d74627c047a583c7"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4108df7fe9b707191e55f33efbcb2d81928e10cea45527879a4749cbe472614"}, + {file = "cffi-1.16.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:32c68ef735dbe5857c810328cb2481e24722a59a2003018885514d4c09af9743"}, + {file = "cffi-1.16.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:673739cb539f8cdaa07d92d02efa93c9ccf87e345b9a0b556e3ecc666718468d"}, + {file = "cffi-1.16.0-cp310-cp310-win32.whl", hash = "sha256:9f90389693731ff1f659e55c7d1640e2ec43ff725cc61b04b2f9c6d8d017df6a"}, + {file = "cffi-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:e6024675e67af929088fda399b2094574609396b1decb609c55fa58b028a32a1"}, + {file = "cffi-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b84834d0cf97e7d27dd5b7f3aca7b6e9263c56308ab9dc8aae9784abb774d404"}, + {file = "cffi-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1b8ebc27c014c59692bb2664c7d13ce7a6e9a629be20e54e7271fa696ff2b417"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ee07e47c12890ef248766a6e55bd38ebfb2bb8edd4142d56db91b21ea68b7627"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8a9d3ebe49f084ad71f9269834ceccbf398253c9fac910c4fd7053ff1386936"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e70f54f1796669ef691ca07d046cd81a29cb4deb1e5f942003f401c0c4a2695d"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5bf44d66cdf9e893637896c7faa22298baebcd18d1ddb6d2626a6e39793a1d56"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7b78010e7b97fef4bee1e896df8a4bbb6712b7f05b7ef630f9d1da00f6444d2e"}, + {file = "cffi-1.16.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c6a164aa47843fb1b01e941d385aab7215563bb8816d80ff3a363a9f8448a8dc"}, + {file = "cffi-1.16.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e09f3ff613345df5e8c3667da1d918f9149bd623cd9070c983c013792a9a62eb"}, + {file = "cffi-1.16.0-cp311-cp311-win32.whl", hash = "sha256:2c56b361916f390cd758a57f2e16233eb4f64bcbeee88a4881ea90fca14dc6ab"}, + {file = "cffi-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:db8e577c19c0fda0beb7e0d4e09e0ba74b1e4c092e0e40bfa12fe05b6f6d75ba"}, + {file = "cffi-1.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:fa3a0128b152627161ce47201262d3140edb5a5c3da88d73a1b790a959126956"}, + {file = "cffi-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:68e7c44931cc171c54ccb702482e9fc723192e88d25a0e133edd7aff8fcd1f6e"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:abd808f9c129ba2beda4cfc53bde801e5bcf9d6e0f22f095e45327c038bfe68e"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:88e2b3c14bdb32e440be531ade29d3c50a1a59cd4e51b1dd8b0865c54ea5d2e2"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fcc8eb6d5902bb1cf6dc4f187ee3ea80a1eba0a89aba40a5cb20a5087d961357"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b7be2d771cdba2942e13215c4e340bfd76398e9227ad10402a8767ab1865d2e6"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e715596e683d2ce000574bae5d07bd522c781a822866c20495e52520564f0969"}, + {file = "cffi-1.16.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2d92b25dbf6cae33f65005baf472d2c245c050b1ce709cc4588cdcdd5495b520"}, + {file = "cffi-1.16.0-cp312-cp312-win32.whl", hash = "sha256:b2ca4e77f9f47c55c194982e10f058db063937845bb2b7a86c84a6cfe0aefa8b"}, + {file = "cffi-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:68678abf380b42ce21a5f2abde8efee05c114c2fdb2e9eef2efdb0257fba1235"}, + {file = "cffi-1.16.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0c9ef6ff37e974b73c25eecc13952c55bceed9112be2d9d938ded8e856138bcc"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a09582f178759ee8128d9270cd1344154fd473bb77d94ce0aeb2a93ebf0feaf0"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e760191dd42581e023a68b758769e2da259b5d52e3103c6060ddc02c9edb8d7b"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:80876338e19c951fdfed6198e70bc88f1c9758b94578d5a7c4c91a87af3cf31c"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a6a14b17d7e17fa0d207ac08642c8820f84f25ce17a442fd15e27ea18d67c59b"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6602bc8dc6f3a9e02b6c22c4fc1e47aa50f8f8e6d3f78a5e16ac33ef5fefa324"}, + {file = "cffi-1.16.0-cp38-cp38-win32.whl", hash = "sha256:131fd094d1065b19540c3d72594260f118b231090295d8c34e19a7bbcf2e860a"}, + {file = "cffi-1.16.0-cp38-cp38-win_amd64.whl", hash = "sha256:31d13b0f99e0836b7ff893d37af07366ebc90b678b6664c955b54561fc36ef36"}, + {file = "cffi-1.16.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:582215a0e9adbe0e379761260553ba11c58943e4bbe9c36430c4ca6ac74b15ed"}, + {file = "cffi-1.16.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b29ebffcf550f9da55bec9e02ad430c992a87e5f512cd63388abb76f1036d8d2"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dc9b18bf40cc75f66f40a7379f6a9513244fe33c0e8aa72e2d56b0196a7ef872"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9cb4a35b3642fc5c005a6755a5d17c6c8b6bcb6981baf81cea8bfbc8903e8ba8"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b86851a328eedc692acf81fb05444bdf1891747c25af7529e39ddafaf68a4f3f"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c0f31130ebc2d37cdd8e44605fb5fa7ad59049298b3f745c74fa74c62fbfcfc4"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f8e709127c6c77446a8c0a8c8bf3c8ee706a06cd44b1e827c3e6a2ee6b8c098"}, + {file = "cffi-1.16.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:748dcd1e3d3d7cd5443ef03ce8685043294ad6bd7c02a38d1bd367cfd968e000"}, + {file = "cffi-1.16.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8895613bcc094d4a1b2dbe179d88d7fb4a15cee43c052e8885783fac397d91fe"}, + {file = "cffi-1.16.0-cp39-cp39-win32.whl", hash = "sha256:ed86a35631f7bfbb28e108dd96773b9d5a6ce4811cf6ea468bb6a359b256b1e4"}, + {file = "cffi-1.16.0-cp39-cp39-win_amd64.whl", hash = "sha256:3686dffb02459559c74dd3d81748269ffb0eb027c39a6fc99502de37d501faa8"}, + {file = "cffi-1.16.0.tar.gz", hash = "sha256:bcb3ef43e58665bbda2fb198698fcae6776483e0c4a631aa5647806c25e02cc0"}, +] + +[package.dependencies] +pycparser = "*" + [[package]] name = "charset-normalizer" version = "3.3.2" @@ -248,6 +316,60 @@ files = [ {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] +[[package]] +name = "cryptography" +version = "42.0.7" +description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." +optional = false +python-versions = ">=3.7" +files = [ + {file = "cryptography-42.0.7-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:a987f840718078212fdf4504d0fd4c6effe34a7e4740378e59d47696e8dfb477"}, + {file = "cryptography-42.0.7-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:bd13b5e9b543532453de08bcdc3cc7cebec6f9883e886fd20a92f26940fd3e7a"}, + {file = "cryptography-42.0.7-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a79165431551042cc9d1d90e6145d5d0d3ab0f2d66326c201d9b0e7f5bf43604"}, + {file = "cryptography-42.0.7-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a47787a5e3649008a1102d3df55424e86606c9bae6fb77ac59afe06d234605f8"}, + {file = "cryptography-42.0.7-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:02c0eee2d7133bdbbc5e24441258d5d2244beb31da5ed19fbb80315f4bbbff55"}, + {file = "cryptography-42.0.7-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:5e44507bf8d14b36b8389b226665d597bc0f18ea035d75b4e53c7b1ea84583cc"}, + {file = "cryptography-42.0.7-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:7f8b25fa616d8b846aef64b15c606bb0828dbc35faf90566eb139aa9cff67af2"}, + {file = "cryptography-42.0.7-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:93a3209f6bb2b33e725ed08ee0991b92976dfdcf4e8b38646540674fc7508e13"}, + {file = "cryptography-42.0.7-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:e6b8f1881dac458c34778d0a424ae5769de30544fc678eac51c1c8bb2183e9da"}, + {file = "cryptography-42.0.7-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:3de9a45d3b2b7d8088c3fbf1ed4395dfeff79d07842217b38df14ef09ce1d8d7"}, + {file = "cryptography-42.0.7-cp37-abi3-win32.whl", hash = "sha256:789caea816c6704f63f6241a519bfa347f72fbd67ba28d04636b7c6b7da94b0b"}, + {file = "cryptography-42.0.7-cp37-abi3-win_amd64.whl", hash = "sha256:8cb8ce7c3347fcf9446f201dc30e2d5a3c898d009126010cbd1f443f28b52678"}, + {file = "cryptography-42.0.7-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:a3a5ac8b56fe37f3125e5b72b61dcde43283e5370827f5233893d461b7360cd4"}, + {file = "cryptography-42.0.7-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:779245e13b9a6638df14641d029add5dc17edbef6ec915688f3acb9e720a5858"}, + {file = "cryptography-42.0.7-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d563795db98b4cd57742a78a288cdbdc9daedac29f2239793071fe114f13785"}, + {file = "cryptography-42.0.7-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:31adb7d06fe4383226c3e963471f6837742889b3c4caa55aac20ad951bc8ffda"}, + {file = "cryptography-42.0.7-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:efd0bf5205240182e0f13bcaea41be4fdf5c22c5129fc7ced4a0282ac86998c9"}, + {file = "cryptography-42.0.7-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:a9bc127cdc4ecf87a5ea22a2556cab6c7eda2923f84e4f3cc588e8470ce4e42e"}, + {file = "cryptography-42.0.7-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:3577d029bc3f4827dd5bf8bf7710cac13527b470bbf1820a3f394adb38ed7d5f"}, + {file = "cryptography-42.0.7-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:2e47577f9b18723fa294b0ea9a17d5e53a227867a0a4904a1a076d1646d45ca1"}, + {file = "cryptography-42.0.7-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:1a58839984d9cb34c855197043eaae2c187d930ca6d644612843b4fe8513c886"}, + {file = "cryptography-42.0.7-cp39-abi3-win32.whl", hash = "sha256:e6b79d0adb01aae87e8a44c2b64bc3f3fe59515280e00fb6d57a7267a2583cda"}, + {file = "cryptography-42.0.7-cp39-abi3-win_amd64.whl", hash = "sha256:16268d46086bb8ad5bf0a2b5544d8a9ed87a0e33f5e77dd3c3301e63d941a83b"}, + {file = "cryptography-42.0.7-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:2954fccea107026512b15afb4aa664a5640cd0af630e2ee3962f2602693f0c82"}, + {file = "cryptography-42.0.7-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:362e7197754c231797ec45ee081f3088a27a47c6c01eff2ac83f60f85a50fe60"}, + {file = "cryptography-42.0.7-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:4f698edacf9c9e0371112792558d2f705b5645076cc0aaae02f816a0171770fd"}, + {file = "cryptography-42.0.7-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:5482e789294854c28237bba77c4c83be698be740e31a3ae5e879ee5444166582"}, + {file = "cryptography-42.0.7-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:e9b2a6309f14c0497f348d08a065d52f3020656f675819fc405fb63bbcd26562"}, + {file = "cryptography-42.0.7-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:d8e3098721b84392ee45af2dd554c947c32cc52f862b6a3ae982dbb90f577f14"}, + {file = "cryptography-42.0.7-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c65f96dad14f8528a447414125e1fc8feb2ad5a272b8f68477abbcc1ea7d94b9"}, + {file = "cryptography-42.0.7-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:36017400817987670037fbb0324d71489b6ead6231c9604f8fc1f7d008087c68"}, + {file = "cryptography-42.0.7.tar.gz", hash = "sha256:ecbfbc00bf55888edda9868a4cf927205de8499e7fabe6c050322298382953f2"}, +] + +[package.dependencies] +cffi = {version = ">=1.12", markers = "platform_python_implementation != \"PyPy\""} + +[package.extras] +docs = ["sphinx (>=5.3.0)", "sphinx-rtd-theme (>=1.1.1)"] +docstest = ["pyenchant (>=1.6.11)", "readme-renderer", "sphinxcontrib-spelling (>=4.0.1)"] +nox = ["nox"] +pep8test = ["check-sdist", "click", "mypy", "ruff"] +sdist = ["build"] +ssh = ["bcrypt (>=3.1.5)"] +test = ["certifi", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"] +test-randomorder = ["pytest-randomly"] + [[package]] name = "deprecated" version = "1.2.14" @@ -398,6 +520,31 @@ MarkupSafe = ">=2.0" [package.extras] i18n = ["Babel (>=2.7)"] +[[package]] +name = "jsonpatch" +version = "1.33" +description = "Apply JSON-Patches (RFC 6902)" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" +files = [ + {file = "jsonpatch-1.33-py2.py3-none-any.whl", hash = "sha256:0ae28c0cd062bbd8b8ecc26d7d164fbbea9652a1a3693f3b956c1eae5145dade"}, + {file = "jsonpatch-1.33.tar.gz", hash = "sha256:9fcd4009c41e6d12348b4a0ff2563ba56a2923a7dfee731d004e212e1ee5030c"}, +] + +[package.dependencies] +jsonpointer = ">=1.9" + +[[package]] +name = "jsonpointer" +version = "2.4" +description = "Identify specific nodes in a JSON document (RFC 6901)" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" +files = [ + {file = "jsonpointer-2.4-py2.py3-none-any.whl", hash = "sha256:15d51bba20eea3165644553647711d150376234112651b4f1811022aecad7d7a"}, + {file = "jsonpointer-2.4.tar.gz", hash = "sha256:585cee82b70211fa9e6043b7bb89db6e1aa49524340dde8ad6b63206ea689d88"}, +] + [[package]] name = "jsonref" version = "0.2" @@ -430,6 +577,44 @@ six = ">=1.11.0" format = ["idna", "jsonpointer (>1.13)", "rfc3987", "strict-rfc3339", "webcolors"] format-nongpl = ["idna", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "webcolors"] +[[package]] +name = "langchain-core" +version = "0.1.42" +description = "Building applications with LLMs through composability" +optional = false +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "langchain_core-0.1.42-py3-none-any.whl", hash = "sha256:c5653ffa08a44f740295c157a24c0def4a753333f6a2c41f76bf431cd00be8b5"}, + {file = "langchain_core-0.1.42.tar.gz", hash = "sha256:40751bf60ea5d8e2b2efe65290db434717ee3834870c002e40e2811f09d814e6"}, +] + +[package.dependencies] +jsonpatch = ">=1.33,<2.0" +langsmith = ">=0.1.0,<0.2.0" +packaging = ">=23.2,<24.0" +pydantic = ">=1,<3" +PyYAML = ">=5.3" +tenacity = ">=8.1.0,<9.0.0" + +[package.extras] +extended-testing = ["jinja2 (>=3,<4)"] + +[[package]] +name = "langsmith" +version = "0.1.63" +description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." +optional = false +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "langsmith-0.1.63-py3-none-any.whl", hash = "sha256:7810afdf5e3f3b472fc581a29371fb96cd843dde2149e048d1b9610325159d1e"}, + {file = "langsmith-0.1.63.tar.gz", hash = "sha256:a609405b52f6f54df442a142cbf19ab38662d54e532f96028b4c546434d4afdf"}, +] + +[package.dependencies] +orjson = ">=3.9.14,<4.0.0" +pydantic = ">=1,<3" +requests = ">=2,<3" + [[package]] name = "markupsafe" version = "2.1.5" @@ -499,15 +684,70 @@ files = [ {file = "MarkupSafe-2.1.5.tar.gz", hash = "sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b"}, ] +[[package]] +name = "orjson" +version = "3.10.3" +description = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy" +optional = false +python-versions = ">=3.8" +files = [ + {file = "orjson-3.10.3-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:9fb6c3f9f5490a3eb4ddd46fc1b6eadb0d6fc16fb3f07320149c3286a1409dd8"}, + {file = "orjson-3.10.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:252124b198662eee80428f1af8c63f7ff077c88723fe206a25df8dc57a57b1fa"}, + {file = "orjson-3.10.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9f3e87733823089a338ef9bbf363ef4de45e5c599a9bf50a7a9b82e86d0228da"}, + {file = "orjson-3.10.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c8334c0d87103bb9fbbe59b78129f1f40d1d1e8355bbed2ca71853af15fa4ed3"}, + {file = "orjson-3.10.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1952c03439e4dce23482ac846e7961f9d4ec62086eb98ae76d97bd41d72644d7"}, + {file = "orjson-3.10.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:c0403ed9c706dcd2809f1600ed18f4aae50be263bd7112e54b50e2c2bc3ebd6d"}, + {file = "orjson-3.10.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:382e52aa4270a037d41f325e7d1dfa395b7de0c367800b6f337d8157367bf3a7"}, + {file = "orjson-3.10.3-cp310-none-win32.whl", hash = "sha256:be2aab54313752c04f2cbaab4515291ef5af8c2256ce22abc007f89f42f49109"}, + {file = "orjson-3.10.3-cp310-none-win_amd64.whl", hash = "sha256:416b195f78ae461601893f482287cee1e3059ec49b4f99479aedf22a20b1098b"}, + {file = "orjson-3.10.3-cp311-cp311-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:73100d9abbbe730331f2242c1fc0bcb46a3ea3b4ae3348847e5a141265479700"}, + {file = "orjson-3.10.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:544a12eee96e3ab828dbfcb4d5a0023aa971b27143a1d35dc214c176fdfb29b3"}, + {file = "orjson-3.10.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:520de5e2ef0b4ae546bea25129d6c7c74edb43fc6cf5213f511a927f2b28148b"}, + {file = "orjson-3.10.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ccaa0a401fc02e8828a5bedfd80f8cd389d24f65e5ca3954d72c6582495b4bcf"}, + {file = "orjson-3.10.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a7bc9e8bc11bac40f905640acd41cbeaa87209e7e1f57ade386da658092dc16"}, + {file = "orjson-3.10.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:3582b34b70543a1ed6944aca75e219e1192661a63da4d039d088a09c67543b08"}, + {file = "orjson-3.10.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1c23dfa91481de880890d17aa7b91d586a4746a4c2aa9a145bebdbaf233768d5"}, + {file = "orjson-3.10.3-cp311-none-win32.whl", hash = "sha256:1770e2a0eae728b050705206d84eda8b074b65ee835e7f85c919f5705b006c9b"}, + {file = "orjson-3.10.3-cp311-none-win_amd64.whl", hash = "sha256:93433b3c1f852660eb5abdc1f4dd0ced2be031ba30900433223b28ee0140cde5"}, + {file = "orjson-3.10.3-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:a39aa73e53bec8d410875683bfa3a8edf61e5a1c7bb4014f65f81d36467ea098"}, + {file = "orjson-3.10.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0943a96b3fa09bee1afdfccc2cb236c9c64715afa375b2af296c73d91c23eab2"}, + {file = "orjson-3.10.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e852baafceff8da3c9defae29414cc8513a1586ad93e45f27b89a639c68e8176"}, + {file = "orjson-3.10.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:18566beb5acd76f3769c1d1a7ec06cdb81edc4d55d2765fb677e3eaa10fa99e0"}, + {file = "orjson-3.10.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1bd2218d5a3aa43060efe649ec564ebedec8ce6ae0a43654b81376216d5ebd42"}, + {file = "orjson-3.10.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:cf20465e74c6e17a104ecf01bf8cd3b7b252565b4ccee4548f18b012ff2f8069"}, + {file = "orjson-3.10.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ba7f67aa7f983c4345eeda16054a4677289011a478ca947cd69c0a86ea45e534"}, + {file = "orjson-3.10.3-cp312-none-win32.whl", hash = "sha256:17e0713fc159abc261eea0f4feda611d32eabc35708b74bef6ad44f6c78d5ea0"}, + {file = "orjson-3.10.3-cp312-none-win_amd64.whl", hash = "sha256:4c895383b1ec42b017dd2c75ae8a5b862fc489006afde06f14afbdd0309b2af0"}, + {file = "orjson-3.10.3-cp38-cp38-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:be2719e5041e9fb76c8c2c06b9600fe8e8584e6980061ff88dcbc2691a16d20d"}, + {file = "orjson-3.10.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0175a5798bdc878956099f5c54b9837cb62cfbf5d0b86ba6d77e43861bcec2"}, + {file = "orjson-3.10.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:978be58a68ade24f1af7758626806e13cff7748a677faf95fbb298359aa1e20d"}, + {file = "orjson-3.10.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:16bda83b5c61586f6f788333d3cf3ed19015e3b9019188c56983b5a299210eb5"}, + {file = "orjson-3.10.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4ad1f26bea425041e0a1adad34630c4825a9e3adec49079b1fb6ac8d36f8b754"}, + {file = "orjson-3.10.3-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:9e253498bee561fe85d6325ba55ff2ff08fb5e7184cd6a4d7754133bd19c9195"}, + {file = "orjson-3.10.3-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:0a62f9968bab8a676a164263e485f30a0b748255ee2f4ae49a0224be95f4532b"}, + {file = "orjson-3.10.3-cp38-none-win32.whl", hash = "sha256:8d0b84403d287d4bfa9bf7d1dc298d5c1c5d9f444f3737929a66f2fe4fb8f134"}, + {file = "orjson-3.10.3-cp38-none-win_amd64.whl", hash = "sha256:8bc7a4df90da5d535e18157220d7915780d07198b54f4de0110eca6b6c11e290"}, + {file = "orjson-3.10.3-cp39-cp39-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:9059d15c30e675a58fdcd6f95465c1522b8426e092de9fff20edebfdc15e1cb0"}, + {file = "orjson-3.10.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8d40c7f7938c9c2b934b297412c067936d0b54e4b8ab916fd1a9eb8f54c02294"}, + {file = "orjson-3.10.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d4a654ec1de8fdaae1d80d55cee65893cb06494e124681ab335218be6a0691e7"}, + {file = "orjson-3.10.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:831c6ef73f9aa53c5f40ae8f949ff7681b38eaddb6904aab89dca4d85099cb78"}, + {file = "orjson-3.10.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99b880d7e34542db89f48d14ddecbd26f06838b12427d5a25d71baceb5ba119d"}, + {file = "orjson-3.10.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:2e5e176c994ce4bd434d7aafb9ecc893c15f347d3d2bbd8e7ce0b63071c52e25"}, + {file = "orjson-3.10.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:b69a58a37dab856491bf2d3bbf259775fdce262b727f96aafbda359cb1d114d8"}, + {file = "orjson-3.10.3-cp39-none-win32.whl", hash = "sha256:b8d4d1a6868cde356f1402c8faeb50d62cee765a1f7ffcfd6de732ab0581e063"}, + {file = "orjson-3.10.3-cp39-none-win_amd64.whl", hash = "sha256:5102f50c5fc46d94f2033fe00d392588564378260d64377aec702f21a7a22912"}, + {file = "orjson-3.10.3.tar.gz", hash = "sha256:2b166507acae7ba2f7c315dcf185a9111ad5e992ac81f2d507aac39193c2c818"}, +] + [[package]] name = "packaging" -version = "24.0" +version = "23.2" description = "Core utilities for Python packages" optional = false python-versions = ">=3.7" files = [ - {file = "packaging-24.0-py3-none-any.whl", hash = "sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5"}, - {file = "packaging-24.0.tar.gz", hash = "sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9"}, + {file = "packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"}, + {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"}, ] [[package]] @@ -575,6 +815,17 @@ files = [ dev = ["pre-commit", "tox"] testing = ["pytest", "pytest-benchmark"] +[[package]] +name = "pycparser" +version = "2.22" +description = "C parser in Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"}, + {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"}, +] + [[package]] name = "pydantic" version = "1.10.15" @@ -627,6 +878,23 @@ typing-extensions = ">=4.2.0" dotenv = ["python-dotenv (>=0.10.4)"] email = ["email-validator (>=1.0.3)"] +[[package]] +name = "pyjwt" +version = "2.8.0" +description = "JSON Web Token implementation in Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "PyJWT-2.8.0-py3-none-any.whl", hash = "sha256:59127c392cc44c2da5bb3192169a91f429924e17aff6534d70fdc02ab3e04320"}, + {file = "PyJWT-2.8.0.tar.gz", hash = "sha256:57e28d156e3d5c10088e0c68abb90bfac3df82b40a71bd0daa20c65ccd5c23de"}, +] + +[package.extras] +crypto = ["cryptography (>=3.4.0)"] +dev = ["coverage[toml] (==5.0.4)", "cryptography (>=3.4.0)", "pre-commit", "pytest (>=6.0.0,<7.0.0)", "sphinx (>=4.5.0,<5.0.0)", "sphinx-rtd-theme", "zope.interface"] +docs = ["sphinx (>=4.5.0,<5.0.0)", "sphinx-rtd-theme", "zope.interface"] +tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"] + [[package]] name = "pyrate-limiter" version = "3.1.1" @@ -736,6 +1004,17 @@ files = [ [package.dependencies] six = ">=1.5" +[[package]] +name = "pytz" +version = "2024.1" +description = "World timezone definitions, modern and historical" +optional = false +python-versions = "*" +files = [ + {file = "pytz-2024.1-py2.py3-none-any.whl", hash = "sha256:328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319"}, + {file = "pytz-2024.1.tar.gz", hash = "sha256:2a29735ea9c18baf14b448846bde5a48030ed267578472d8955cd0e7443a9812"}, +] + [[package]] name = "pytzdata" version = "2020.1" @@ -809,13 +1088,13 @@ files = [ [[package]] name = "requests" -version = "2.32.1" +version = "2.32.2" description = "Python HTTP for Humans." optional = false python-versions = ">=3.8" files = [ - {file = "requests-2.32.1-py3-none-any.whl", hash = "sha256:21ac9465cdf8c1650fe1ecde8a71669a93d4e6f147550483a2967d08396a56a5"}, - {file = "requests-2.32.1.tar.gz", hash = "sha256:eb97e87e64c79e64e5b8ac75cee9dd1f97f49e289b083ee6be96268930725685"}, + {file = "requests-2.32.2-py3-none-any.whl", hash = "sha256:fc06670dd0ed212426dfeb94fc1b983d917c4f9847c863f313c9dfaaffb7c23c"}, + {file = "requests-2.32.2.tar.gz", hash = "sha256:dd951ff5ecf3e3b3aa26b40703ba77495dab41da839ae72ef3c8e5d8e2433289"}, ] [package.dependencies] @@ -919,6 +1198,21 @@ files = [ {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, ] +[[package]] +name = "tenacity" +version = "8.3.0" +description = "Retry code until it succeeds" +optional = false +python-versions = ">=3.8" +files = [ + {file = "tenacity-8.3.0-py3-none-any.whl", hash = "sha256:3649f6443dbc0d9b01b9d8020a9c4ec7a1ff5f6f3c6c8a036ef371f573fe9185"}, + {file = "tenacity-8.3.0.tar.gz", hash = "sha256:953d4e6ad24357bceffbc9707bc74349aca9d245f68eb65419cf0c249a1949a2"}, +] + +[package.extras] +doc = ["reno", "sphinx"] +test = ["pytest", "tornado (>=4.5)", "typeguard"] + [[package]] name = "tomli" version = "2.0.1" @@ -932,13 +1226,13 @@ files = [ [[package]] name = "typing-extensions" -version = "4.11.0" +version = "4.12.0" description = "Backported and Experimental Type Hints for Python 3.8+" optional = false python-versions = ">=3.8" files = [ - {file = "typing_extensions-4.11.0-py3-none-any.whl", hash = "sha256:c1f94d72897edaf4ce775bb7558d5b79d8126906a14ea5ed1635921406c0387a"}, - {file = "typing_extensions-4.11.0.tar.gz", hash = "sha256:83f085bd5ca59c80295fc2a82ab5dac679cbe02b9f33f7d83af68e241bea51b0"}, + {file = "typing_extensions-4.12.0-py3-none-any.whl", hash = "sha256:b349c66bea9016ac22978d800cfff206d5f9816951f12a7d0ec5578b0a819594"}, + {file = "typing_extensions-4.12.0.tar.gz", hash = "sha256:8cbcdc8606ebcb0d95453ad7dc5065e6237b6aa230a31e81d0f440c30fed5fd8"}, ] [[package]] @@ -1068,4 +1362,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.9,<3.12" -content-hash = "689f75a3ccd29338ce0993262c0a14e0593ac515ba8fe0eedf222558b9ec2f81" +content-hash = "702ede56bf9c39032986e5709136f2c66da780844d77c54b154001d7244a16c0" diff --git a/airbyte-integrations/connectors/source-shopify/pyproject.toml b/airbyte-integrations/connectors/source-shopify/pyproject.toml index e21d640701c68..a9fca38f32c66 100644 --- a/airbyte-integrations/connectors/source-shopify/pyproject.toml +++ b/airbyte-integrations/connectors/source-shopify/pyproject.toml @@ -17,7 +17,7 @@ include = "source_shopify" [tool.poetry.dependencies] python = "^3.9,<3.12" -airbyte-cdk = "0.81.4" +airbyte-cdk = "0.90.0" sgqlc = "==16.3" graphql-query = "^1.1.1" diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/http_request.py b/airbyte-integrations/connectors/source-shopify/source_shopify/http_request.py new file mode 100644 index 0000000000000..a3e9c73189870 --- /dev/null +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/http_request.py @@ -0,0 +1,49 @@ +# Copyright (c) 2024 Airbyte, Inc., all rights reserved. + +from typing import Optional, Union + +import requests +from airbyte_cdk.sources.streams.http.error_handlers import ErrorHandler, ErrorResolution, ResponseAction +from airbyte_protocol.models import FailureType +from requests import exceptions + +RESPONSE_CONSUMPTION_EXCEPTIONS = ( + exceptions.ChunkedEncodingError, + exceptions.JSONDecodeError, +) + +TRANSIENT_EXCEPTIONS = ( + exceptions.ConnectTimeout, + exceptions.ConnectionError, + exceptions.HTTPError, + exceptions.ReadTimeout, + # This error was added as part of the migration from REST to bulk (https://github.com/airbytehq/airbyte/commit/f5094041bebb80cd6602a98829c19a7515276ed3) but it is unclear in which case it occurs and why it is transient + exceptions.SSLError, +) + RESPONSE_CONSUMPTION_EXCEPTIONS + +_NO_ERROR_RESOLUTION = ErrorResolution(ResponseAction.SUCCESS, None, None) + + +class ShopifyErrorHandler(ErrorHandler): + def __init__(self, stream_name: str = "") -> None: + self._stream_name = stream_name + + def interpret_response(self, response: Optional[Union[requests.Response, Exception]]) -> ErrorResolution: + if isinstance(response, TRANSIENT_EXCEPTIONS): + return ErrorResolution( + ResponseAction.RETRY, + FailureType.transient_error, + f"Error of type {type(response)} is considered transient. Try again later. (full error message is {response})", + ) + elif isinstance(response, requests.Response): + if response.ok: + return _NO_ERROR_RESOLUTION + + if response.status_code == 429 or response.status_code >= 500: + return ErrorResolution( + ResponseAction.RETRY, + FailureType.transient_error, + f"Status code `{response.status_code}` is considered transient. Try again later. (full error message is {response.content})", + ) + + return _NO_ERROR_RESOLUTION # Not all the error handling is defined here so it assumes the previous code will handle the error if there is one diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/scopes.py b/airbyte-integrations/connectors/source-shopify/source_shopify/scopes.py index cc09e3a0e5640..00fd6b6d7c2fa 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/scopes.py +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/scopes.py @@ -7,8 +7,10 @@ from typing import Any, Iterable, List, Mapping, Optional import requests +from airbyte_cdk.sources.streams.http import HttpClient from requests.exceptions import ConnectionError, InvalidURL, JSONDecodeError, SSLError +from .http_request import ShopifyErrorHandler from .utils import ShopifyAccessScopesError, ShopifyBadJsonError, ShopifyConnectionError, ShopifyWrongShopNameError SCOPES_MAPPING: Mapping[str, set[str]] = { @@ -85,6 +87,9 @@ class ShopifyScopes: def __init__(self, config: Mapping[str, Any]) -> None: self.permitted_streams: List[str] = list(ALWAYS_PERMITTED_STREAMS) self.not_permitted_streams: List[set[str, str]] = [] + self._error_handler = ShopifyErrorHandler() + self._http_client = HttpClient("ShopifyScopes", self.logger, self._error_handler, session=requests.Session()) + self.user_scopes = self.get_user_scopes(config) # for each stream check the authenticated user has all scopes required self.get_streams_from_user_scopes() @@ -96,20 +101,16 @@ def __init__(self, config: Mapping[str, Any]) -> None: "The stream `{stream}` could not be synced without the `{scope}` scope. Please check the `{scope}` is granted." ) - @staticmethod - def get_user_scopes(config) -> list[Any]: - session = requests.Session() + def get_user_scopes(self, config) -> list[Any]: url = f"https://{config['shop']}.myshopify.com/admin/oauth/access_scopes.json" headers = config["authenticator"].get_auth_header() try: - response = session.get(url, headers=headers).json() - access_scopes = [scope.get("handle") for scope in response.get("access_scopes")] + _, response = self._http_client.send_request("GET", url, headers=headers, request_kwargs={}) + access_scopes = [scope.get("handle") for scope in response.json().get("access_scopes")] except InvalidURL: raise ShopifyWrongShopNameError(url) except JSONDecodeError as json_error: raise ShopifyBadJsonError(json_error) - except (SSLError, ConnectionError) as con_error: - raise ShopifyConnectionError(con_error) if access_scopes: return access_scopes diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/shopify_graphql/bulk/exceptions.py b/airbyte-integrations/connectors/source-shopify/source_shopify/shopify_graphql/bulk/exceptions.py index 1177d0fbdcf17..3dcc00d14e52a 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/shopify_graphql/bulk/exceptions.py +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/shopify_graphql/bulk/exceptions.py @@ -47,5 +47,12 @@ class BulkJobTimout(BaseBulkException): class BulkJobAccessDenied(BaseBulkException): """Raised when BULK Job has ACCESS_DENIED status""" + class BulkJobCreationFailedConcurrentError(BaseBulkException): + """Raised when an attempt to create a job as failed because of concurrency limits.""" + + failure_type: FailureType = FailureType.transient_error + class BulkJobConcurrentError(BaseBulkException): - """Raised when BULK Job could not be created, since the 1 Bulk job / shop quota is exceeded.""" + """Raised when failing the job after hitting too many BulkJobCreationFailedConcurrentError.""" + + failure_type: FailureType = FailureType.transient_error diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/shopify_graphql/bulk/job.py b/airbyte-integrations/connectors/source-shopify/source_shopify/shopify_graphql/bulk/job.py index ad6fd10a82f93..2414dbe6481ae 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/shopify_graphql/bulk/job.py +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/shopify_graphql/bulk/job.py @@ -10,12 +10,14 @@ import pendulum as pdm import requests +from airbyte_cdk.sources.streams.http import HttpClient from requests.exceptions import JSONDecodeError from source_shopify.utils import ApiTypeEnum from source_shopify.utils import ShopifyRateLimiter as limiter +from ...http_request import ShopifyErrorHandler from .exceptions import AirbyteTracedException, ShopifyBulkExceptions -from .query import ShopifyBulkTemplates +from .query import ShopifyBulkQuery, ShopifyBulkTemplates from .retry import bulk_retry_on_exception from .status import ShopifyBulkJobStatus from .tools import END_OF_FILE, BulkTools @@ -26,6 +28,7 @@ class ShopifyBulkManager: session: requests.Session base_url: str stream_name: str + query: ShopifyBulkQuery # default logger logger: Final[logging.Logger] = logging.getLogger("airbyte") @@ -34,8 +37,6 @@ class ShopifyBulkManager: _retrieve_chunk_size: Final[int] = 1024 * 1024 * 10 _job_max_retries: Final[int] = 6 _job_backoff_time: int = 5 - # saved latest request - _request: Optional[requests.Request] = None # running job logger constrain, every 100-ish message will be printed _log_job_msg_frequency: Final[int] = 100 @@ -51,7 +52,7 @@ class ShopifyBulkManager: # currents: _job_id, _job_state, _job_created_at, _job_self_canceled _job_id: Optional[str] = field(init=False, default=None) - _job_state: ShopifyBulkJobStatus = field(init=False, default=None) + _job_state: str = field(init=False, default=None) # this string is based on ShopifyBulkJobStatus # completed and saved Bulk Job result filename _job_result_filename: Optional[str] = field(init=False, default=None) # date-time when the Bulk Job was created on the server @@ -81,6 +82,9 @@ class ShopifyBulkManager: # 2 sec is set as default value to cover the case with the empty-fast-completed jobs _job_last_elapsed_time: float = field(init=False, default=2.0) + def __post_init__(self): + self._http_client = HttpClient(self.stream_name, self.logger, ShopifyErrorHandler(), session=self.session) + @property def _tools(self) -> BulkTools: return BulkTools() @@ -142,9 +146,6 @@ def _expand_job_size(self) -> None: def _reduce_job_size(self) -> None: self.job_size /= self._job_size_adjusted_reduce_factor - def _save_latest_request(self, response: requests.Response) -> None: - self._request = response.request - def _job_size_reduce_next(self) -> None: # revert the flag self._job_should_revert_slice = False @@ -180,14 +181,17 @@ def _job_canceled(self) -> bool: return self._job_state == ShopifyBulkJobStatus.CANCELED.value def _job_cancel(self) -> None: - # re-use of `self._session(*, **)` to make BULK Job cancel request - cancel_args = self._job_get_request_args(ShopifyBulkTemplates.cancel) - with self.session as cancel_job: - canceled_response = cancel_job.request(**cancel_args) - # mark the job was self-canceled - self._job_self_canceled = True - # check CANCELED Job health - self._job_healthcheck(canceled_response) + _, canceled_response = self._http_client.send_request( + http_method="POST", + url=self.base_url, + data=ShopifyBulkTemplates.cancel(self._job_id), + headers={"Content-Type": "application/graphql"}, + request_kwargs={}, + ) + # mark the job was self-canceled + self._job_self_canceled = True + # check CANCELED Job health + self._job_healthcheck(canceled_response) # sleep to ensure the cancelation sleep(self._job_check_interval) @@ -209,27 +213,19 @@ def _log_state(self, message: Optional[str] = None) -> None: else: self.logger.info(pattern) - def _job_get_request_args(self, template: ShopifyBulkTemplates) -> Mapping[str, Any]: - return { - "method": "POST", - "url": self.base_url, - "data": template(self._job_id), - "headers": {"Content-Type": "application/graphql"}, - } - def _job_get_result(self, response: Optional[requests.Response] = None) -> Optional[str]: parsed_response = response.json().get("data", {}).get("node", {}) if response else None job_result_url = parsed_response.get("url") if parsed_response and not self._job_self_canceled else None if job_result_url: # save to local file using chunks to avoid OOM filename = self._tools.filename_from_url(job_result_url) - with self.session.get(job_result_url, stream=True) as response: - response.raise_for_status() - with open(filename, "wb") as file: - for chunk in response.iter_content(chunk_size=self._retrieve_chunk_size): - file.write(chunk) - # add `` line to the bottom of the saved data for easy parsing - file.write(END_OF_FILE.encode()) + _, response = self._http_client.send_request(http_method="GET", url=job_result_url, request_kwargs={"stream": True}) + response.raise_for_status() + with open(filename, "wb") as file: + for chunk in response.iter_content(chunk_size=self._retrieve_chunk_size): + file.write(chunk) + # add `` line to the bottom of the saved data for easy parsing + file.write(END_OF_FILE.encode()) return filename def _job_update_state(self, response: Optional[requests.Response] = None) -> None: @@ -298,45 +294,27 @@ def _collect_bulk_errors(self, response: requests.Response) -> List[Optional[dic ) def _job_healthcheck(self, response: requests.Response) -> Optional[Exception]: - try: - # save the latest request to retry - self._save_latest_request(response) - - # get the errors, if occured - errors = self._collect_bulk_errors(response) + errors = self._collect_bulk_errors(response) - # when the concurrent job takes place, - # another job could not be created - # we typically need to wait and retry, but no longer than 10 min. - if self._has_running_concurrent_job(errors): - return self._job_retry_on_concurrency() - - # when the job was already created and the error appears in the middle - if self._job_state and errors: - self._on_job_with_errors(errors) - - # when the job was not created because of some errors - if not self._job_state and errors: - self._on_non_handable_job_error(errors) - - except (ShopifyBulkExceptions.BulkJobBadResponse, ShopifyBulkExceptions.BulkJobError) as e: - raise e - - def _job_send_state_request(self) -> requests.Response: - with self.session as job_state_request: - status_args = self._job_get_request_args(ShopifyBulkTemplates.status) - self._request = requests.Request(**status_args, auth=self.session.auth).prepare() - return job_state_request.send(self._request) + if self._job_state and errors: + self._on_job_with_errors(errors) def _job_track_running(self) -> None: - job_state_response = self._job_send_state_request() - self._job_healthcheck(job_state_response) - self._job_update_state(job_state_response) - self._job_state_to_fn_map.get(self._job_state)(response=job_state_response) + _, response = self._http_client.send_request( + http_method="POST", + url=self.base_url, + data=ShopifyBulkTemplates.status(self._job_id), + headers={"Content-Type": "application/graphql"}, + request_kwargs={}, + ) + self._job_healthcheck(response) + + self._job_update_state(response) + self._job_state_to_fn_map.get(self._job_state)(response=response) def _has_running_concurrent_job(self, errors: Optional[Iterable[Mapping[str, Any]]] = None) -> bool: """ - When concurent BULK Job is already running for the same SHOP we receive: + When concurrent BULK Job is already running for the same SHOP we receive: Error example: [ { @@ -346,63 +324,63 @@ def _has_running_concurrent_job(self, errors: Optional[Iterable[Mapping[str, Any ] """ - concurent_job_pattern = "A bulk query operation for this app and shop is already in progress" + concurrent_job_pattern = "A bulk query operation for this app and shop is already in progress" # the errors are handled in `job_job_check_for_errors` if errors: for error in errors: message = error.get("message", "") if isinstance(error, dict) else "" - if concurent_job_pattern in message: + if concurrent_job_pattern in message: return True - # reset the `_concurrent_attempt` counter, once there is no concurrent job error - self._concurrent_attempt = 0 return False def _has_reached_max_concurrency(self) -> bool: return self._concurrent_attempt == self._concurrent_max_retry - def _job_retry_request(self) -> Optional[requests.Response]: - with self.session.send(self._request) as retried_request: - return retried_request - - def _job_retry_concurrent(self) -> Optional[requests.Response]: - self._concurrent_attempt += 1 - self.logger.warning( - f"Stream: `{self.stream_name}`, the BULK concurrency limit has reached. Waiting {self._concurrent_interval} sec before retry, atttempt: {self._concurrent_attempt}.", - ) - sleep(self._concurrent_interval) - retried_response = self._job_retry_request() - return self.job_process_created(retried_response) - - def _job_retry_on_concurrency(self) -> Optional[requests.Response]: - if self._has_reached_max_concurrency(): - # indicate we're out of attempts to retry with job creation - message = f"The BULK Job couldn't be created at this time, since another job is running." - self.logger.error(message) - # raise AibyteTracebackException with `INCOMPLETE` status - raise ShopifyBulkExceptions.BulkJobConcurrentError(message) - else: - return self._job_retry_concurrent() - @bulk_retry_on_exception(logger) - def _job_check_state(self) -> Optional[str]: + def _job_check_state(self) -> None: while not self._job_completed(): if self._job_canceled(): break else: self._job_track_running() - # external method to be used within other components - @bulk_retry_on_exception(logger) - def job_process_created(self, response: requests.Response) -> None: + def create_job(self, stream_slice: Mapping[str, str], filter_field: str) -> None: + if stream_slice: + query = self.query.get(filter_field, stream_slice["start"], stream_slice["end"]) + else: + query = self.query.get() + + _, response = self._http_client.send_request( + http_method="POST", + url=self.base_url, + json={"query": ShopifyBulkTemplates.prepare(query)}, + request_kwargs={}, + ) + + errors = self._collect_bulk_errors(response) + if self._has_running_concurrent_job(errors): + # when the concurrent job takes place, another job could not be created + # we typically need to wait and retry, but no longer than 10 min. (see retry in `bulk_retry_on_exception`) + raise ShopifyBulkExceptions.BulkJobCreationFailedConcurrentError(f"Failed to create job for stream {self.stream_name}") + else: + # There were no concurrent error for this job so even if there were other errors, we can reset this + self._concurrent_attempt = 0 + + if errors: + self._on_non_handable_job_error(errors) + + self._job_process_created(response) + + def _job_process_created(self, response: requests.Response) -> None: """ The Bulk Job with CREATED status, should be processed, before we move forward with Job Status Checks. """ - self._job_healthcheck(response) bulk_response = response.json().get("data", {}).get("bulkOperationRunQuery", {}).get("bulkOperation", {}) if response else None if bulk_response and bulk_response.get("status") == ShopifyBulkJobStatus.CREATED.value: self._job_id = bulk_response.get("id") self._job_created_at = bulk_response.get("createdAt") + self._job_state = ShopifyBulkJobStatus.CREATED.value self.logger.info(f"Stream: `{self.stream_name}`, the BULK Job: `{self._job_id}` is {ShopifyBulkJobStatus.CREATED.value}") def job_size_normalize(self, start: datetime, end: datetime) -> datetime: diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/shopify_graphql/bulk/retry.py b/airbyte-integrations/connectors/source-shopify/source_shopify/shopify_graphql/bulk/retry.py index 12d5d4d651cd3..140d77e91ad5f 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/shopify_graphql/bulk/retry.py +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/shopify_graphql/bulk/retry.py @@ -43,6 +43,17 @@ def wrapper(self, *args, **kwargs) -> Any: f"Stream `{stream_name}`: {ex}. Retrying {current_retries}/{max_retries} after {backoff_time} seconds." ) sleep(backoff_time) + except ShopifyBulkExceptions.BulkJobCreationFailedConcurrentError: + if self._concurrent_attempt == self._concurrent_max_retry: + message = f"The BULK Job couldn't be created at this time, since another job is running." + logger.error(message) + raise ShopifyBulkExceptions.BulkJobConcurrentError(message) + + self._concurrent_attempt += 1 + logger.warning( + f"Stream: `{self.stream_name}`, the BULK concurrency limit has reached. Waiting {self._concurrent_interval} sec before retry, attempt: {self._concurrent_attempt}.", + ) + sleep(self._concurrent_interval) return wrapper diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/streams/base_streams.py b/airbyte-integrations/connectors/source-shopify/source_shopify/streams/base_streams.py index bc084f3c482bb..0a10a1714e03f 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/streams/base_streams.py +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/streams/base_streams.py @@ -7,12 +7,14 @@ from abc import ABC, abstractmethod from datetime import datetime from functools import cached_property -from typing import Any, Dict, Iterable, Mapping, MutableMapping, Optional, Union +from typing import Any, Dict, Iterable, List, Mapping, MutableMapping, Optional, Union from urllib.parse import parse_qsl, urlparse import pendulum as pdm import requests +from airbyte_cdk.sources.streams.core import StreamData from airbyte_cdk.sources.streams.http import HttpStream +from airbyte_protocol.models import SyncMode from requests.exceptions import RequestException from source_shopify.shopify_graphql.bulk.job import ShopifyBulkManager from source_shopify.shopify_graphql.bulk.query import ShopifyBulkQuery, ShopifyBulkTemplates @@ -631,7 +633,6 @@ class IncrementalShopifyGraphQlBulkStream(IncrementalShopifyStream): filter_field = "updated_at" cursor_field = "updated_at" data_field = "graphql" - http_method = "POST" parent_stream_class: Optional[Union[ShopifyStream, IncrementalShopifyStream]] = None @@ -644,6 +645,7 @@ def __init__(self, config: Dict) -> None: session=self._session, base_url=f"{self.url_base}{self.path()}", stream_name=self.name, + query=self.query, ) # overide the default job slice size, if provided (it's auto-adjusted, later on) self.bulk_window_in_days = config.get("bulk_window_in_days") @@ -687,27 +689,6 @@ def availability_strategy(self) -> None: """NOT USED FOR BULK OPERATIONS TO SAVE THE RATE LIMITS AND TIME FOR THE SYNC.""" return None - def request_params(self, **kwargs) -> MutableMapping[str, Any]: - """ - NOT USED FOR SHOPIFY BULK OPERARTIONS. - https://shopify.dev/docs/api/usage/bulk-operations/queries#write-a-bulk-operation - """ - return {} - - def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: - """ - NOT USED FOR SHOPIFY BULK OPERATIONS. - https://shopify.dev/docs/api/usage/bulk-operations/queries#write-a-bulk-operation - """ - return None - - def request_body_json(self, stream_slice: Optional[Mapping[str, Any]] = None, **kwargs) -> Mapping[str, Any]: - """ - Override for _send_request CDK method to send HTTP request to Shopify BULK Operatoions. - https://shopify.dev/docs/api/usage/bulk-operations/queries#bulk-query-overview - """ - return {"query": ShopifyBulkTemplates.prepare(stream_slice.get("query"))} - def get_updated_state( self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any] ) -> MutableMapping[str, Any]: @@ -761,21 +742,23 @@ def stream_slices(self, stream_state: Optional[Mapping[str, Any]] = None, **kwar self.job_manager.job_size_normalize(start, end) slice_end = self.job_manager.get_adjusted_job_start(start) self.emit_slice_message(start, slice_end) - yield {"query": self.query.get(self.filter_field, start.to_rfc3339_string(), slice_end.to_rfc3339_string())} + yield {"start": start.to_rfc3339_string(), "end": slice_end.to_rfc3339_string()} # increment the end of the slice or reduce the next slice start = self.job_manager.get_adjusted_job_end(start, slice_end) else: # for the streams that don't support filtering - yield {"query": self.query.get()} + yield {} - def process_bulk_results( + def read_records( self, - response: requests.Response, + sync_mode: SyncMode, + cursor_field: Optional[List[str]] = None, + stream_slice: Optional[Mapping[str, Any]] = None, stream_state: Optional[Mapping[str, Any]] = None, - ) -> Optional[Iterable[Mapping[str, Any]]]: - # process the CREATED Job prior to other actions - self.job_manager.job_process_created(response) - # get results fetched from COMPLETED BULK Job + ) -> Iterable[StreamData]: + self.job_manager.create_job(stream_slice, self.filter_field) + stream_state = stream_state_cache.cached_state.get(self.name, {self.cursor_field: self.default_state_comparison_value}) + filename = self.job_manager.job_check_for_completion() # the `filename` could be `None`, meaning there are no data available for the slice period. if filename: @@ -785,8 +768,3 @@ def process_bulk_results( self.record_producer.read_file(filename) ) yield from self.filter_records_newer_than_state(stream_state, records) - - def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: - # get the cached substream state, to avoid state collisions for Incremental Syncs - stream_state = stream_state_cache.cached_state.get(self.name, {self.cursor_field: self.default_state_comparison_value}) - yield from self.process_bulk_results(response, stream_state) diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/utils.py b/airbyte-integrations/connectors/source-shopify/source_shopify/utils.py index 4a34ad1fab31f..69a27c8e5af8a 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/utils.py +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/utils.py @@ -43,7 +43,7 @@ class ShopifyBadJsonError(AirbyteTracedException): def __init__(self, message, **kwargs) -> None: self.message = f"Reason: Bad JSON Response from the Shopify server. Details: {message}." - super().__init__(internal_message=self.message, failure_type=FailureType.config_error, **kwargs) + super().__init__(internal_message=self.message, failure_type=FailureType.transient_error, **kwargs) class ShopifyConnectionError(AirbyteTracedException): diff --git a/airbyte-integrations/connectors/source-shopify/unit_tests/conftest.py b/airbyte-integrations/connectors/source-shopify/unit_tests/conftest.py index a627ced549fdc..79bada364f954 100644 --- a/airbyte-integrations/connectors/source-shopify/unit_tests/conftest.py +++ b/airbyte-integrations/connectors/source-shopify/unit_tests/conftest.py @@ -13,6 +13,13 @@ os.environ["REQUEST_CACHE_PATH"] = "REQUEST_CACHE_PATH" + +@pytest.fixture(autouse=True) +def time_sleep_mock(mocker): + time_mock = mocker.patch("time.sleep", lambda x: None) + yield time_mock + + def records_per_slice(parent_records: List[Mapping[str, Any]], state_checkpoint_interval) -> List[int]: num_batches = len(parent_records) // state_checkpoint_interval if len(parent_records) % state_checkpoint_interval != 0: diff --git a/airbyte-integrations/connectors/source-shopify/unit_tests/graphql_bulk/test_job.py b/airbyte-integrations/connectors/source-shopify/unit_tests/graphql_bulk/test_job.py index c59e7cd4e50ee..8ce58ef3eae83 100644 --- a/airbyte-integrations/connectors/source-shopify/unit_tests/graphql_bulk/test_job.py +++ b/airbyte-integrations/connectors/source-shopify/unit_tests/graphql_bulk/test_job.py @@ -5,6 +5,7 @@ import pytest import requests +from airbyte_protocol.models import SyncMode from source_shopify.shopify_graphql.bulk.exceptions import ShopifyBulkExceptions from source_shopify.shopify_graphql.bulk.status import ShopifyBulkJobStatus from source_shopify.streams.streams import ( @@ -22,6 +23,9 @@ TransactionsGraphql, ) +_ANY_SLICE = {} +_ANY_FILTER_FIELD = "any_filter_field" + def test_get_errors_from_response_invalid_response(auth_config) -> None: expected = "Couldn't check the `response` for `errors`" @@ -38,7 +42,7 @@ def test_retry_on_concurrent_job(request, requests_mock, auth_config) -> None: stream = MetafieldOrders(auth_config) stream.job_manager._concurrent_interval = 0 # mocking responses - requests_mock.get( + requests_mock.post( stream.job_manager.base_url, [ # concurrent request is running (3 - retries) @@ -49,8 +53,7 @@ def test_retry_on_concurrent_job(request, requests_mock, auth_config) -> None: {"json": request.getfixturevalue("bulk_successful_response")}, ]) - test_response = requests.get(stream.job_manager.base_url) - stream.job_manager._job_healthcheck(test_response) + stream.job_manager.create_job(_ANY_SLICE, _ANY_FILTER_FIELD) # call count should be 4 (3 retries, 1 - succeeded) assert requests_mock.call_count == 4 @@ -58,38 +61,34 @@ def test_retry_on_concurrent_job(request, requests_mock, auth_config) -> None: @pytest.mark.parametrize( "bulk_job_response, concurrent_max_retry, error_type, expected", [ - # method should return this response fixture, once retried. - ("bulk_successful_completed_response", 2, None, "gid://shopify/BulkOperation/4046733967549"), # method should raise AirbyteTracebackException, because the concurrent BULK Job is in progress ( - "bulk_error_with_concurrent_job", - 1, - ShopifyBulkExceptions.BulkJobConcurrentError, + "bulk_error_with_concurrent_job", + 1, + ShopifyBulkExceptions.BulkJobConcurrentError, "The BULK Job couldn't be created at this time, since another job is running", ), ], ids=[ - "regular concurrent request", - "max atttempt reached", + "max attempt reached", ] ) def test_job_retry_on_concurrency(request, requests_mock, bulk_job_response, concurrent_max_retry, error_type, auth_config, expected) -> None: stream = MetafieldOrders(auth_config) - # patching concurent settings + # patching concurrent settings stream.job_manager._concurrent_max_retry = concurrent_max_retry stream.job_manager._concurrent_interval = 1 - requests_mock.get(stream.job_manager.base_url, json=request.getfixturevalue(bulk_job_response)) - stream.job_manager._request = requests.get(stream.job_manager.base_url).request - + requests_mock.post(stream.job_manager.base_url, json=request.getfixturevalue(bulk_job_response)) + if error_type: with pytest.raises(error_type) as error: - stream.job_manager._job_retry_on_concurrency() + stream.job_manager.create_job(_ANY_SLICE, _ANY_FILTER_FIELD) assert expected in repr(error.value) and requests_mock.call_count == 2 else: # simulate the real job_id from created job stream.job_manager._job_id = expected - stream.job_manager._job_retry_on_concurrency() + stream.job_manager.create_job(_ANY_SLICE, _ANY_FILTER_FIELD) assert requests_mock.call_count == 2 @@ -105,7 +104,7 @@ def test_job_process_created(request, requests_mock, bulk_job_response, auth_con requests_mock.get(stream.job_manager.base_url, json=request.getfixturevalue(bulk_job_response)) test_response = requests.get(stream.job_manager.base_url) # process the job with id (typically CREATED one) - stream.job_manager.job_process_created(test_response) + stream.job_manager._job_process_created(test_response) assert stream.job_manager._job_id == expected @@ -165,15 +164,6 @@ def test_job_check_for_completion(mocker, request, requests_mock, job_response, 1, ), # Should be retried - ( - "bulk_successful_response_with_errors", - True, - ShopifyBulkExceptions.BulkJobError, - 2, - "Could not validate the status of the BULK Job", - 3, - ), - # Should be retried ( None, False, @@ -185,11 +175,10 @@ def test_job_check_for_completion(mocker, request, requests_mock, job_response, ], ids=[ "BulkJobNonHandableError", - "BulkJobError", "BulkJobBadResponse", ], ) -def test_retry_on_job_exception(mocker, request, requests_mock, job_response, auth_config, job_state, error_type, max_retry, call_count_expected, expected_msg) -> None: +def test_retry_on_job_creation_exception(request, requests_mock, auth_config, job_response, job_state, error_type, max_retry, call_count_expected, expected_msg) -> None: stream = MetafieldOrders(auth_config) stream.job_manager._job_backoff_time = 0 stream.job_manager._job_max_retries = max_retry @@ -207,7 +196,7 @@ def test_retry_on_job_exception(mocker, request, requests_mock, job_response, au # testing raised exception and backoff with pytest.raises(error_type) as error: - stream.job_manager._job_check_state() + stream.job_manager.create_job(_ANY_SLICE, _ANY_FILTER_FIELD) # we expect different call_count, because we set the different max_retries assert expected_msg in repr(error.value) and requests_mock.call_count == call_count_expected @@ -304,12 +293,10 @@ def test_bulk_stream_parse_response( test_result_url = bulk_job_completed_response.get("data").get("node").get("url") # mocking the result url with jsonl content requests_mock.post(stream.job_manager.base_url, json=bulk_job_completed_response) - # getting mock response - test_bulk_response: requests.Response = requests.post(stream.job_manager.base_url) # mocking nested api call to get data from result url requests_mock.get(test_result_url, text=request.getfixturevalue(json_content_example)) # parsing result from completed job - test_records = list(stream.parse_response(test_bulk_response)) + test_records = list(stream.read_records(SyncMode.full_refresh, stream_slice={})) expected_result = request.getfixturevalue(expected) if isinstance(expected_result, dict): assert test_records == [expected_result] @@ -318,13 +305,13 @@ def test_bulk_stream_parse_response( @pytest.mark.parametrize( - "stream, stream_state, with_start_date, expected", + "stream, stream_state, with_start_date, expected_start", [ - (DiscountCodes, {}, True, "updated_at:>='2023-01-01T00:00:00+00:00'"), + (DiscountCodes, {}, True, "2023-01-01T00:00:00+00:00"), # here the config migration is applied and the value should be "2020-01-01" - (DiscountCodes, {}, False, "updated_at:>='2020-01-01T00:00:00+00:00'"), - (DiscountCodes, {"updated_at": "2022-01-01T00:00:00Z"}, True, "updated_at:>='2022-01-01T00:00:00+00:00'"), - (DiscountCodes, {"updated_at": "2021-01-01T00:00:00Z"}, False, "updated_at:>='2021-01-01T00:00:00+00:00'"), + (DiscountCodes, {}, False, "2020-01-01T00:00:00+00:00"), + (DiscountCodes, {"updated_at": "2022-01-01T00:00:00Z"}, True, "2022-01-01T00:00:00+00:00"), + (DiscountCodes, {"updated_at": "2021-01-01T00:00:00Z"}, False, "2021-01-01T00:00:00+00:00"), ], ids=[ "No State, but Start Date", @@ -338,7 +325,7 @@ def test_stream_slices( stream, stream_state, with_start_date, - expected, + expected_start, ) -> None: # simulating `None` for `start_date` and `config migration` if not with_start_date: @@ -347,8 +334,7 @@ def test_stream_slices( stream = stream(auth_config) stream.job_manager.job_size = 1000 test_result = list(stream.stream_slices(stream_state=stream_state)) - test_query_from_slice = test_result[0].get("query") - assert expected in test_query_from_slice + assert test_result[0].get("start") == expected_start @pytest.mark.parametrize( @@ -377,8 +363,6 @@ def test_expand_stream_slices_job_size( test_result_url = bulk_job_completed_response.get("data").get("node").get("url") # mocking the result url with jsonl content requests_mock.post(stream.job_manager.base_url, json=bulk_job_completed_response) - # getting mock response - test_bulk_response: requests.Response = requests.post(stream.job_manager.base_url) # mocking nested api call to get data from result url requests_mock.get(test_result_url, text=request.getfixturevalue(json_content_example)) @@ -389,6 +373,8 @@ def test_expand_stream_slices_job_size( if last_job_elapsed_time: stream.job_manager._job_last_elapsed_time = last_job_elapsed_time # parsing result from completed job - list(stream.parse_response(test_bulk_response)) + + first_slice = next(stream.stream_slices()) + list(stream.read_records(SyncMode.incremental, stream_slice=first_slice)) # check the next slice assert stream.job_manager.job_size == adjusted_slice_size diff --git a/airbyte-integrations/connectors/source-shopify/unit_tests/integration/api/bulk.py b/airbyte-integrations/connectors/source-shopify/unit_tests/integration/api/bulk.py index d3328a760c807..548e8c6e3e147 100644 --- a/airbyte-integrations/connectors/source-shopify/unit_tests/integration/api/bulk.py +++ b/airbyte-integrations/connectors/source-shopify/unit_tests/integration/api/bulk.py @@ -13,7 +13,7 @@ def _create_job_url(shop_name: str) -> str: return f"https://{shop_name}.myshopify.com/admin/api/{ShopifyStream.api_version}/graphql.json" -def create_job_creation_request(shop_name: str, lower_boundary: datetime, upper_boundary: datetime) -> HttpRequest: +def create_job_creation_body(lower_boundary: datetime, upper_boundary: datetime): query = """ { orders( query: "updated_at:>='%LOWER_BOUNDARY_TOKEN%' AND updated_at:<='%UPPER_BOUNDARY_TOKEN%'" @@ -44,9 +44,13 @@ def create_job_creation_request(shop_name: str, lower_boundary: datetime, upper_ }""" query = query.replace("%LOWER_BOUNDARY_TOKEN%", lower_boundary.isoformat()).replace("%UPPER_BOUNDARY_TOKEN%", upper_boundary.isoformat()) prepared_query = ShopifyBulkTemplates.prepare(query) + return json.dumps({"query": prepared_query}) + + +def create_job_creation_request(shop_name: str, lower_boundary: datetime, upper_boundary: datetime) -> HttpRequest: return HttpRequest( url=_create_job_url(shop_name), - body=json.dumps({"query": prepared_query}) + body=create_job_creation_body(lower_boundary, upper_boundary) ) @@ -123,6 +127,19 @@ def __init__(self) -> None: } } + def with_running_status(self, bulk_operation_id: str) -> "JobStatusResponseBuilder": + self._template["data"]["node"] = { + "id": bulk_operation_id, + "status": "RUNNING", + "errorCode": None, + "createdAt": "2024-05-28T18:57:54Z", + "objectCount": "10", + "fileSize": None, + "url": None, + "partialDataUrl": None, + } + return self + def with_completed_status(self, bulk_operation_id: str, job_result_url: str) -> "JobStatusResponseBuilder": self._template["data"]["node"] = { "id": bulk_operation_id, diff --git a/airbyte-integrations/connectors/source-shopify/unit_tests/integration/test_bulk_stream.py b/airbyte-integrations/connectors/source-shopify/unit_tests/integration/test_bulk_stream.py index 9be0da4bd0613..399998756877e 100644 --- a/airbyte-integrations/connectors/source-shopify/unit_tests/integration/test_bulk_stream.py +++ b/airbyte-integrations/connectors/source-shopify/unit_tests/integration/test_bulk_stream.py @@ -1,21 +1,24 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. - +import json from datetime import datetime, timedelta from typing import Any, Dict from unittest import TestCase from airbyte_cdk.test.catalog_builder import CatalogBuilder from airbyte_cdk.test.entrypoint_wrapper import read -from airbyte_cdk.test.mock_http import HttpMocker, HttpResponse -from airbyte_cdk.test.mock_http.request import HttpRequest -from airbyte_protocol.models import FailureType, SyncMode +from airbyte_cdk.test.mock_http import HttpMocker, HttpRequest, HttpResponse + +_AN_ERROR_RESPONSE = HttpResponse(json.dumps({"errors": ["an error"]})) +from airbyte_protocol.models import SyncMode from freezegun import freeze_time +from requests.exceptions import ConnectionError from source_shopify import SourceShopify from unit_tests.integration.api.authentication import grant_all_scopes, set_up_shop from unit_tests.integration.api.bulk import ( JobCreationResponseBuilder, JobStatusResponseBuilder, MetafieldOrdersJobResponseBuilder, + create_job_creation_body, create_job_creation_request, create_job_status_request, ) @@ -75,21 +78,106 @@ def test_when_read_then_extract_records(self) -> None: assert output.errors == [] assert len(output.records) == 2 - def test_given_connection_error_when_read_then_fail_to_sync(self) -> None: + def test_given_errors_on_job_creation_when_read_then_do_not_retry(self) -> None: """ - We want to fix this behavior in a subsequent release so that instead, we retry and have records being emitted + The purpose of this test is to document the current behavior as I'm not sure we have an example of such errors on the job creation """ + job_creation_request = create_job_creation_request(_SHOP_NAME, _JOB_START_DATE, _JOB_END_DATE) + self._http_mocker.post(job_creation_request, _AN_ERROR_RESPONSE) + + self._read(_get_config(_JOB_START_DATE)) + + self._http_mocker.assert_number_of_calls(job_creation_request, 1) + + def test_given_response_is_not_json_on_job_creation_when_read_then_retry(self) -> None: + job_creation_request = create_job_creation_request(_SHOP_NAME, _JOB_START_DATE, _JOB_END_DATE) + self._http_mocker.post( + job_creation_request, + [ + HttpResponse("This is not json"), + JobCreationResponseBuilder().with_bulk_operation_id(_BULK_OPERATION_ID).build(), # This will never get called (see assertion below) + ] + ) + + self._http_mocker.post( + create_job_status_request(_SHOP_NAME, _BULK_OPERATION_ID), + JobStatusResponseBuilder().with_completed_status(_BULK_OPERATION_ID, _JOB_RESULT_URL).build(), + ) + self._http_mocker.get( + HttpRequest(_JOB_RESULT_URL), + MetafieldOrdersJobResponseBuilder().with_record().with_record().build(), + ) + + output = self._read(_get_config(_JOB_START_DATE)) + + assert output.errors == [] + assert len(output.records) == 2 + + def test_given_connection_error_on_job_creation_when_read_then_retry_job_creation(self) -> None: inner_mocker = self._http_mocker.__getattribute__("_mocker") inner_mocker.register_uri( # TODO the testing library should have the ability to generate ConnectionError. As this might not be trivial, we will wait for another case before implementing "POST", _URL_GRAPHQL, [{"exc": ConnectionError("ConnectionError")}, {"text": JobCreationResponseBuilder().with_bulk_operation_id(_BULK_OPERATION_ID).build().body, "status_code": 200}], + additional_matcher=lambda request: request.text == create_job_creation_body(_JOB_START_DATE, _JOB_END_DATE) + ) + self._http_mocker.post( + create_job_status_request(_SHOP_NAME, _BULK_OPERATION_ID), + JobStatusResponseBuilder().with_completed_status(_BULK_OPERATION_ID, _JOB_RESULT_URL).build(), + ) + self._http_mocker.get( + HttpRequest(_JOB_RESULT_URL), + MetafieldOrdersJobResponseBuilder().with_record().with_record().build(), ) output = self._read(_get_config(_JOB_START_DATE)) - assert list(map(lambda error: error.trace.error.failure_type, output.errors)) == [FailureType.system_error, FailureType.config_error] # The actual error followed by the error that crashes the python app - assert "ConnectionError" in output.errors[0].__str__() + assert output.errors == [] + + def test_given_retryable_error_on_first_get_job_status_when_read_then_retry(self) -> None: + self._http_mocker.post( + create_job_creation_request(_SHOP_NAME, _JOB_START_DATE, _JOB_END_DATE), + JobCreationResponseBuilder().with_bulk_operation_id(_BULK_OPERATION_ID).build(), + ) + self._http_mocker.post( + create_job_status_request(_SHOP_NAME, _BULK_OPERATION_ID), + [ + _AN_ERROR_RESPONSE, + JobStatusResponseBuilder().with_completed_status(_BULK_OPERATION_ID, _JOB_RESULT_URL).build(), + ] + ) + self._http_mocker.get( + HttpRequest(_JOB_RESULT_URL), + MetafieldOrdersJobResponseBuilder().with_record().with_record().build(), + ) + + output = self._read(_get_config(_JOB_START_DATE)) + + assert output.errors == [] + assert len(output.records) == 2 + + def test_given_retryable_error_on_get_job_status_when_read_then_retry(self) -> None: + self._http_mocker.post( + create_job_creation_request(_SHOP_NAME, _JOB_START_DATE, _JOB_END_DATE), + JobCreationResponseBuilder().with_bulk_operation_id(_BULK_OPERATION_ID).build(), + ) + self._http_mocker.post( + create_job_status_request(_SHOP_NAME, _BULK_OPERATION_ID), + [ + JobStatusResponseBuilder().with_running_status(_BULK_OPERATION_ID).build(), + HttpResponse(json.dumps({"errors": ["an error"]})), + JobStatusResponseBuilder().with_completed_status(_BULK_OPERATION_ID, _JOB_RESULT_URL).build(), + ] + ) + self._http_mocker.get( + HttpRequest(_JOB_RESULT_URL), + MetafieldOrdersJobResponseBuilder().with_record().with_record().build(), + ) + + output = self._read(_get_config(_JOB_START_DATE)) + + assert output.errors == [] + assert len(output.records) == 2 def _read(self, config): catalog = CatalogBuilder().with_stream(_BULK_STREAM, SyncMode.full_refresh).build()