forked from fleetio/dbt-segment
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsegment_web_page_views.sql
121 lines (91 loc) · 2.93 KB
/
segment_web_page_views.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
with
{#
The if statement below checks to see if segment_page_views_table is a string or a list, and then builds the model accordingly
#}
{% if var('segment_page_views_table') is string %}
unioned_sources AS (
select cast('segment_page_views_table' as text) as source_name, * from {{var('segment_page_views_table')}}
),
{% elif var('segment_page_views_table') is iterable %}
{#
The section below takes each of the items listed for the segment_page_views_table variable, creates CTEs for them,
and then adds a field to note the name of the source table that the records are related to.
#}
unioned_sources as (
{% for table_ref in var('segment_page_views_table', default=[]) %}
SELECT
'{{ table_ref }}' as source_name
, *
FROM
{{ ref(table_ref) }}
{%- if not loop.last %}
UNION ALL
{%- endif %}
{% endfor %}
),
{% endif %}
row_numbering as (
select
*,
row_number() over (partition by source_name, id order by received_at asc) as row_num
from unioned_sources
),
deduped as (
select
*
from row_numbering
where row_num = 1
),
renamed as (
select
source_name,
id as page_view_id,
anonymous_id,
user_id,
received_at as received_at_tstamp,
sent_at as sent_at_tstamp,
timestamp as tstamp,
url as page_url,
{{ dbt_utils.get_url_host('url') }} as page_url_host,
path as page_url_path,
title as page_title,
search as page_url_query,
referrer,
replace(
{{ dbt_utils.get_url_host('referrer') }},
'www.',
''
) as referrer_host,
context_campaign_source as utm_source,
context_campaign_medium as utm_medium,
context_campaign_name as utm_campaign,
context_campaign_term as utm_term,
context_campaign_content as utm_content,
{{ dbt_utils.get_url_parameter('url', 'gclid') }} as gclid,
context_ip as ip,
context_user_agent as user_agent,
case
when lower(context_user_agent) like '%android%' then 'Android'
else replace(
{{ dbt.split_part(dbt.split_part('context_user_agent', "'('", 2), "' '", 1) }},
';', '')
end as device
{% if var('segment_pass_through_columns') != [] %}
,
{{ var('segment_pass_through_columns') | join (", ")}}
{% endif %}
from deduped
),
final as (
select
*,
case
when device = 'iPhone' then 'iPhone'
when device = 'Android' then 'Android'
when device in ('iPad', 'iPod') then 'Tablet'
when device in ('Windows', 'Macintosh', 'X11') then 'Desktop'
else 'Uncategorized'
end as device_category
from renamed
)
select * from final