Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🎉 Source Facebook Marketing: Add the option to fetch thumbnail image data. #8649

Merged
merged 15 commits into from
Dec 17, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"sourceDefinitionId": "e7778cfc-e97c-4458-9ecb-b4f2bba8946c",
"name": "Facebook Marketing",
"dockerRepository": "airbyte/source-facebook-marketing",
"dockerImageTag": "0.2.27",
"dockerImageTag": "0.2.29",
"documentationUrl": "https://docs.airbyte.io/integrations/sources/facebook-marketing",
"icon": "facebook.svg"
}
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@
- name: Facebook Marketing
sourceDefinitionId: e7778cfc-e97c-4458-9ecb-b4f2bba8946c
dockerRepository: airbyte/source-facebook-marketing
dockerImageTag: 0.2.28
dockerImageTag: 0.2.29
documentationUrl: https://docs.airbyte.io/integrations/sources/facebook-marketing
icon: facebook.svg
sourceType: api
Expand Down
14 changes: 10 additions & 4 deletions airbyte-config/init/src/main/resources/seed/source_specs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1428,7 +1428,7 @@
supportsNormalization: false
supportsDBT: false
supported_destination_sync_modes: []
- dockerImage: "airbyte/source-facebook-marketing:0.2.28"
- dockerImage: "airbyte/source-facebook-marketing:0.2.29"
spec:
documentationUrl: "https://docs.airbyte.io/integrations/sources/facebook-marketing"
changelogUrl: "https://docs.airbyte.io/integrations/sources/facebook-marketing"
Expand Down Expand Up @@ -1469,9 +1469,15 @@
- "2017-01-26T00:00:00Z"
type: "string"
format: "date-time"
fetch_thumbnail_images:
title: "Fetch Thumbnail Images"
description: "In each Ad Creative, fetch the thumbnail_url and store the\
\ result in thumbnail_data_url"
default: false
type: "boolean"
include_deleted:
title: "Include Deleted"
description: "Include data from deleted campaigns, ads, and adsets."
description: "Include data from deleted campaigns, ads, and adsets"
default: false
type: "boolean"
insights_lookback_window:
Expand All @@ -1483,8 +1489,8 @@
type: "integer"
insights_days_per_job:
title: "Insights Days Per Job"
description: "Number of days to sync in one job. The more data you have\
\ - the smaller you want this parameter to be."
description: "Number of days to sync in one job (the more data you have,\
\ the smaller this parameter should be)"
default: 7
minimum: 1
maximum: 30
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@ RUN pip install .
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]

LABEL io.airbyte.version=0.2.28
LABEL io.airbyte.version=0.2.29
LABEL io.airbyte.name=airbyte/source-facebook-marketing
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,15 @@
"type": "string",
"format": "date-time"
},
"fetch_thumbnail_images": {
"title": "Fetch Thumbnail Images",
"description": "In each Ad Creative, fetch the thumbnail_url and store the result in thumbnail_data_url",
"default": false,
"type": "boolean"
},
"include_deleted": {
"title": "Include Deleted",
"description": "Include data from deleted campaigns, ads, and adsets.",
"description": "Include data from deleted campaigns, ads, and adsets",
"default": false,
"type": "boolean"
},
Expand All @@ -48,7 +54,7 @@
},
"insights_days_per_job": {
"title": "Insights Days Per Job",
"description": "Number of days to sync in one job. The more data you have - the smaller you want this parameter to be.",
"description": "Number of days to sync in one job (the more data you have, the smaller this parameter should be)",
"default": 7,
"minimum": 1,
"maximum": 30,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -835,6 +835,9 @@
}
}
},
"thumbnail_data_url": {
"type": ["null", "string"]
},
"thumbnail_url": {
"type": ["null", "string"]
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,11 @@ class Config:
default_factory=pendulum.now,
)

include_deleted: bool = Field(default=False, description="Include data from deleted campaigns, ads, and adsets.")
fetch_thumbnail_images: bool = Field(
default=False, description="In each Ad Creative, fetch the thumbnail_url and store the result in thumbnail_data_url"
)

include_deleted: bool = Field(default=False, description="Include data from deleted campaigns, ads, and adsets")

insights_lookback_window: int = Field(
default=28,
Expand All @@ -87,7 +91,7 @@ class Config:

insights_days_per_job: int = Field(
default=7,
description="Number of days to sync in one job. The more data you have - the smaller you want this parameter to be.",
description="Number of days to sync in one job (the more data you have, the smaller this parameter should be)",
minimum=1,
maximum=30,
)
Expand Down Expand Up @@ -137,7 +141,7 @@ def streams(self, config: Mapping[str, Any]) -> List[Type[Stream]]:
Campaigns(api=api, start_date=config.start_date, end_date=config.end_date, include_deleted=config.include_deleted),
AdSets(api=api, start_date=config.start_date, end_date=config.end_date, include_deleted=config.include_deleted),
Ads(api=api, start_date=config.start_date, end_date=config.end_date, include_deleted=config.include_deleted),
AdCreatives(api=api),
AdCreatives(api=api, fetch_thumbnail_images=config.fetch_thumbnail_images),
AdsInsights(**insights_args),
AdsInsightsAgeAndGender(**insights_args),
AdsInsightsCountry(**insights_args),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Copyright (c) 2021 Airbyte, Inc., all rights reserved.
#

import base64
import time
import urllib.parse as urlparse
from abc import ABC
Expand All @@ -12,6 +13,7 @@
import airbyte_cdk.sources.utils.casing as casing
import backoff
import pendulum
import requests
from airbyte_cdk.models import SyncMode
from airbyte_cdk.sources.streams import Stream
from airbyte_cdk.sources.streams.core import package_name_from_class
Expand Down Expand Up @@ -43,6 +45,18 @@ def remove_params_from_url(url: str, params: List[str]) -> str:
)


def fetch_thumbnail_data_url(url: str) -> str:
try:
response = requests.get(url)
if response.status_code == 200:
type = response.headers["content-type"]
data = base64.b64encode(response.content)
return f"data:{type};base64,{data.decode('ascii')}"
except requests.exceptions.RequestException:
pass
return None


class FBMarketingStream(Stream, ABC):
"""Base stream class"""

Expand Down Expand Up @@ -198,6 +212,10 @@ class AdCreatives(FBMarketingStream):
entity_prefix = "adcreative"
batch_size = 50

def __init__(self, fetch_thumbnail_images: bool = False, **kwargs):
super().__init__(**kwargs)
self._fetch_thumbnail_images = fetch_thumbnail_images

def read_records(
self,
sync_mode: SyncMode,
Expand All @@ -207,17 +225,23 @@ def read_records(
) -> Iterable[Mapping[str, Any]]:
"""Read records using batch API"""
records = self._read_records(params=self.request_params(stream_state=stream_state))
requests = [record.api_get(fields=self.fields, pending=True) for record in records]
# "thumbnail_data_url" is a field in our stream's schema because we
# output it (see fix_thumbnail_urls below), but it's not a field that
# we can request from Facebook
request_fields = [f for f in self.fields if f != "thumbnail_data_url"]
requests = [record.api_get(fields=request_fields, pending=True) for record in records]
for requests_batch in batch(requests, size=self.batch_size):
for record in self.execute_in_batch(requests_batch):
yield self.clear_urls(record)
yield self.fix_thumbnail_urls(record)

@staticmethod
def clear_urls(record: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
"""Some URLs has random values, these values doesn't affect validity of URLs, but breaks SAT"""
def fix_thumbnail_urls(self, record: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
"""Cleans and, if enabled, fetches thumbnail URLs for each creative."""
# The thumbnail_url contains some extra query parameters that don't affect the validity of the URL, but break SAT
thumbnail_url = record.get("thumbnail_url")
if thumbnail_url:
record["thumbnail_url"] = remove_params_from_url(thumbnail_url, ["_nc_hash", "d"])
if self._fetch_thumbnail_images:
record["thumbnail_data_url"] = fetch_thumbnail_data_url(thumbnail_url)
return record

@backoff_policy
Expand Down
1 change: 1 addition & 0 deletions docs/integrations/sources/facebook-marketing.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ As a summary, custom insights allows to replicate only some fields, resulting in

| Version | Date | Pull Request | Subject |
| :--- | :--- | :--- | :--- |
| 0.2.29 | 2021-12-17 | [8649](https://github.com/airbytehq/airbyte/pull/8649) | Retrive ad_creatives image as data encoded |
| 0.2.28 | 2021-12-13 | [8742](https://github.com/airbytehq/airbyte/pull/8742) | Fix for schema generation related to "breakdown" fields |
| 0.2.27 | 2021-11-29 | [8257](https://github.com/airbytehq/airbyte/pull/8257) | Add fields to Campaign stream |
| 0.2.26 | 2021-11-19 | [7855](https://github.com/airbytehq/airbyte/pull/7855) | Add Video stream |
Expand Down