Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow Excluding Multiple Events + events in unordered funnels #5150

Merged
merged 17 commits into from
Jul 27, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 11 additions & 11 deletions ee/clickhouse/queries/funnels/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,21 +128,21 @@ def _get_partition_cols(self, level_index: int, max_steps: int):
cols.append(f"step_{i}")
if i < level_index:
cols.append(f"latest_{i}")
for exclusion in self._filter.exclusions:
for exclusion_id, exclusion in enumerate(self._filter.exclusions):
if exclusion.funnel_from_step + 1 == i:
cols.append(f"exclusion_latest_{exclusion.funnel_from_step}")
cols.append(f"exclusion_{exclusion_id}_latest_{exclusion.funnel_from_step}")
else:
duplicate_event = 0
if i > 0 and self._filter.entities[i].equals(self._filter.entities[i - 1]):
duplicate_event = 1
cols.append(
f"min(latest_{i}) over (PARTITION by person_id {self._get_breakdown_prop()} ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND {duplicate_event} PRECEDING) latest_{i}"
)
for exclusion in self._filter.exclusions:
for exclusion_id, exclusion in enumerate(self._filter.exclusions):
# exclusion starting at step i follows semantics of step i+1 in the query (since we're looking for exclusions after step i)
if exclusion.funnel_from_step + 1 == i:
cols.append(
f"min(exclusion_latest_{exclusion.funnel_from_step}) over (PARTITION by person_id {self._get_breakdown_prop()} ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) exclusion_latest_{exclusion.funnel_from_step}"
f"min(exclusion_{exclusion_id}_latest_{exclusion.funnel_from_step}) over (PARTITION by person_id {self._get_breakdown_prop()} ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) exclusion_{exclusion_id}_latest_{exclusion.funnel_from_step}"
)
return ", ".join(cols)

Expand All @@ -151,10 +151,10 @@ def _get_exclusion_condition(self):
return ""

conditions = []
for exclusion in self._filter.exclusions:
for exclusion_id, exclusion in enumerate(self._filter.exclusions):
from_time = f"latest_{exclusion.funnel_from_step}"
to_time = f"latest_{exclusion.funnel_to_step}"
exclusion_time = f"exclusion_latest_{exclusion.funnel_from_step}"
exclusion_time = f"exclusion_{exclusion_id}_latest_{exclusion.funnel_from_step}"
condition = (
f"if( {exclusion_time} > {from_time} AND {exclusion_time} < "
f"if(isNull({to_time}), {from_time} + INTERVAL {self._filter.funnel_window_days} DAY, {to_time}), 1, 0)"
Expand Down Expand Up @@ -199,9 +199,9 @@ def _get_inner_event_query(
step_cols = self._get_step_col(entity, index, entity_name)
all_step_cols.extend(step_cols)

for entity in self._filter.exclusions:
step_cols = self._get_step_col(entity, entity.funnel_from_step, entity_name, "exclusion_")
# every exclusion entity has the form: exclusion_step_i & timestamp exclusion_latest_i
for exclusion_id, entity in enumerate(self._filter.exclusions):
step_cols = self._get_step_col(entity, entity.funnel_from_step, entity_name, f"exclusion_{exclusion_id}_")
# every exclusion entity has the form: exclusion_<id>_step_i & timestamp exclusion_<id>_latest_i
# where i is the starting step for exclusion on that entity
all_step_cols.extend(step_cols)

Expand Down Expand Up @@ -235,8 +235,8 @@ def _get_steps_conditions(self, length: int) -> str:
for index in range(length):
step_conditions.append(f"step_{index} = 1")

for entity in self._filter.exclusions:
step_conditions.append(f"exclusion_step_{entity.funnel_from_step} = 1")
for exclusion_id, entity in enumerate(self._filter.exclusions):
step_conditions.append(f"exclusion_{exclusion_id}_step_{entity.funnel_from_step} = 1")

return " OR ".join(step_conditions)

Expand Down
9 changes: 5 additions & 4 deletions ee/clickhouse/queries/funnels/funnel.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,17 +132,18 @@ def get_comparison_cols(self, level_index: int, max_steps: int):
cols.append(f"step_{i}")
if i < level_index:
cols.append(f"latest_{i}")
for exclusion in self._filter.exclusions:
for exclusion_id, exclusion in enumerate(self._filter.exclusions):
if exclusion.funnel_from_step + 1 == i:
cols.append(f"exclusion_latest_{exclusion.funnel_from_step}")
cols.append(f"exclusion_{exclusion_id}_latest_{exclusion.funnel_from_step}")
else:
comparison = self._get_comparison_at_step(i, level_index)
cols.append(f"if({comparison}, NULL, latest_{i}) as latest_{i}")

for exclusion in self._filter.exclusions:
for exclusion_id, exclusion in enumerate(self._filter.exclusions):
if exclusion.funnel_from_step + 1 == i:
exclusion_identifier = f"exclusion_{exclusion_id}_latest_{exclusion.funnel_from_step}"
cols.append(
f"if(exclusion_latest_{exclusion.funnel_from_step} < latest_{exclusion.funnel_from_step}, NULL, exclusion_latest_{exclusion.funnel_from_step}) as exclusion_latest_{exclusion.funnel_from_step}"
f"if({exclusion_identifier} < latest_{exclusion.funnel_from_step}, NULL, {exclusion_identifier}) as {exclusion_identifier}"
)

return ", ".join(cols)
Expand Down
54 changes: 50 additions & 4 deletions ee/clickhouse/queries/funnels/funnel_unordered.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from typing import List, Union
from typing import List

from rest_framework.exceptions import ValidationError

from ee.clickhouse.queries.funnels.base import ClickhouseFunnelBase

Expand All @@ -21,12 +23,22 @@ class ClickhouseFunnelUnordered(ClickhouseFunnelBase):

The result format is the same as the basic funnel, i.e. [step, count].
Here, `step_i` (0 indexed) signifies the number of people that did at least `i+1` steps.

## Exclusion Semantics
For unordered funnels, exclusion is a bit weird. It means, given all ordering of the steps,
how far can you go without seeing an exclusion event.
If you see an exclusion event => you're discarded.
See test_advanced_funnel_multiple_exclusions_between_steps for details.
"""

def get_query(self):

max_steps = len(self._filter.entities)

for exclusion in self._filter.exclusions:
if exclusion.funnel_from_step != 0 or exclusion.funnel_to_step != max_steps - 1:
raise ValidationError("Partial Exclusions not allowed in unordered funnels")

breakdown_clause = self._get_breakdown_prop()

return f"""
Expand Down Expand Up @@ -59,6 +71,7 @@ def get_step_counts_without_aggregation_query(self):
partition_select = self._get_partition_cols(1, max_steps)
sorting_condition = self.get_sorting_condition(max_steps)
breakdown_clause = self._get_breakdown_prop()
exclusion_clause = self._get_exclusion_condition()

for i in range(max_steps):
inner_query = f"""
Expand All @@ -71,9 +84,11 @@ def get_step_counts_without_aggregation_query(self):
"""

formatted_query = f"""
SELECT *, {sorting_condition} AS steps {self._get_step_times(max_steps)} FROM (
SELECT *, {sorting_condition} AS steps {exclusion_clause} {self._get_step_times(max_steps)} FROM (
{inner_query}
) WHERE step_0 = 1"""
) WHERE step_0 = 1
{'AND exclusion = 0' if exclusion_clause else ''}
"""

#  rotate entities by 1 to get new first event
entities_to_use.append(entities_to_use.pop(0))
Expand Down Expand Up @@ -109,13 +124,44 @@ def _get_step_times(self, max_steps: int):

def get_sorting_condition(self, max_steps: int):

conditions = []

event_times_elements = []
for i in range(max_steps):
event_times_elements.append(f"latest_{i}")

conditions.append(f"arraySort([{','.join(event_times_elements)}]) as event_times")
# replacement of latest_i for whatever query part requires it, just like conversion_times

basic_conditions: List[str] = []
for i in range(1, max_steps):
basic_conditions.append(
f"if(latest_0 < latest_{i} AND latest_{i} <= latest_0 + INTERVAL {self._filter.funnel_window_days} DAY, 1, 0)"
)

conditions.append(f"arraySum([{','.join(basic_conditions)}, 1])")

if basic_conditions:
return f"arraySum([{','.join(basic_conditions)}, 1])"
return ",".join(conditions)
else:
return "1"

def _get_exclusion_condition(self):
if not self._filter.exclusions:
return ""

conditions = []
for exclusion_id, exclusion in enumerate(self._filter.exclusions):
from_time = f"latest_{exclusion.funnel_from_step}"
to_time = f"event_times[{exclusion.funnel_to_step + 1}]"
exclusion_time = f"exclusion_{exclusion_id}_latest_{exclusion.funnel_from_step}"
condition = (
f"if( {exclusion_time} > {from_time} AND {exclusion_time} < "
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the "<" supposed to be trailing?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

multiple strings! These get concatenated automatically when enclosed in a paranthesis.

Felt clearer to me, about what's happening in the "<" (the next line)

f"if(isNull({to_time}), {from_time} + INTERVAL {self._filter.funnel_window_days} DAY, {to_time}), 1, 0)"
)
conditions.append(condition)

if conditions:
return f", arraySum([{','.join(conditions)}]) as exclusion"
else:
return ""
146 changes: 144 additions & 2 deletions ee/clickhouse/queries/funnels/test/test_funnel.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,9 @@ def test_funnel_exclusions_full_window(self):
"funnel_window_days": 14,
"date_from": "2021-05-01 00:00:00",
"date_to": "2021-05-14 00:00:00",
"exclusions": [{"id": "x", "type": "events", "funnel_from_step": 0, "funnel_to_step": 1},],
"exclusions": [
{"id": "x 1 name with numbers 2", "type": "events", "funnel_from_step": 0, "funnel_to_step": 1},
],
}
filter = Filter(data=filters)
funnel = ClickhouseFunnel(filter, self.team)
Expand All @@ -150,7 +152,9 @@ def test_funnel_exclusions_full_window(self):
# event 2
person2 = _create_person(distinct_ids=["person2"], team_id=self.team.pk)
_create_event(team=self.team, event="user signed up", distinct_id="person2", timestamp="2021-05-01 03:00:00")
_create_event(team=self.team, event="x", distinct_id="person2", timestamp="2021-05-01 03:30:00")
_create_event(
team=self.team, event="x 1 name with numbers 2", distinct_id="person2", timestamp="2021-05-01 03:30:00"
)
_create_event(team=self.team, event="paid", distinct_id="person2", timestamp="2021-05-01 04:00:00")

# event 3
Expand Down Expand Up @@ -1120,3 +1124,141 @@ def test_funnel_with_denormalised_properties(self):

self.assertEqual(result[0]["name"], "user signed up")
self.assertEqual(result[0]["count"], 1)

def test_advanced_funnel_multiple_exclusions_between_steps(self):
filters = {
"events": [
{"id": "user signed up", "type": "events", "order": 0},
{"id": "$pageview", "type": "events", "order": 1},
{"id": "insight viewed", "type": "events", "order": 2},
{"id": "invite teammate", "type": "events", "order": 3},
{"id": "pageview2", "type": "events", "order": 4},
],
"date_from": "2021-05-01 00:00:00",
"date_to": "2021-05-14 00:00:00",
"insight": INSIGHT_FUNNELS,
"exclusions": [
{"id": "x", "type": "events", "funnel_from_step": 0, "funnel_to_step": 1},
{"id": "y", "type": "events", "funnel_from_step": 2, "funnel_to_step": 3},
],
}

person1 = _create_person(distinct_ids=["person1"], team_id=self.team.pk)
_create_event(team=self.team, event="user signed up", distinct_id="person1", timestamp="2021-05-01 01:00:00")
_create_event(team=self.team, event="x", distinct_id="person1", timestamp="2021-05-01 02:00:00")
_create_event(team=self.team, event="$pageview", distinct_id="person1", timestamp="2021-05-01 03:00:00")
_create_event(team=self.team, event="insight viewed", distinct_id="person1", timestamp="2021-05-01 04:00:00")
_create_event(team=self.team, event="y", distinct_id="person1", timestamp="2021-05-01 04:30:00")
_create_event(team=self.team, event="invite teammate", distinct_id="person1", timestamp="2021-05-01 05:00:00")
_create_event(team=self.team, event="pageview2", distinct_id="person1", timestamp="2021-05-01 06:00:00")

person2 = _create_person(distinct_ids=["person2"], team_id=self.team.pk)
_create_event(team=self.team, event="user signed up", distinct_id="person2", timestamp="2021-05-01 01:00:00")
_create_event(team=self.team, event="y", distinct_id="person2", timestamp="2021-05-01 01:30:00")
_create_event(team=self.team, event="$pageview", distinct_id="person2", timestamp="2021-05-01 02:00:00")
_create_event(team=self.team, event="insight viewed", distinct_id="person2", timestamp="2021-05-01 04:00:00")
_create_event(team=self.team, event="y", distinct_id="person2", timestamp="2021-05-01 04:30:00")
_create_event(team=self.team, event="invite teammate", distinct_id="person2", timestamp="2021-05-01 05:00:00")
_create_event(team=self.team, event="x", distinct_id="person2", timestamp="2021-05-01 05:30:00")
_create_event(team=self.team, event="pageview2", distinct_id="person2", timestamp="2021-05-01 06:00:00")

person3 = _create_person(distinct_ids=["person3"], team_id=self.team.pk)
_create_event(team=self.team, event="user signed up", distinct_id="person3", timestamp="2021-05-01 01:00:00")
_create_event(team=self.team, event="x", distinct_id="person3", timestamp="2021-05-01 01:30:00")
_create_event(team=self.team, event="$pageview", distinct_id="person3", timestamp="2021-05-01 02:00:00")
_create_event(team=self.team, event="insight viewed", distinct_id="person3", timestamp="2021-05-01 04:00:00")
_create_event(team=self.team, event="invite teammate", distinct_id="person3", timestamp="2021-05-01 05:00:00")
_create_event(team=self.team, event="x", distinct_id="person3", timestamp="2021-05-01 05:30:00")
_create_event(team=self.team, event="pageview2", distinct_id="person3", timestamp="2021-05-01 06:00:00")

person4 = _create_person(distinct_ids=["person4"], team_id=self.team.pk)
_create_event(team=self.team, event="user signed up", distinct_id="person4", timestamp="2021-05-01 01:00:00")
_create_event(team=self.team, event="$pageview", distinct_id="person4", timestamp="2021-05-01 02:00:00")
_create_event(team=self.team, event="insight viewed", distinct_id="person4", timestamp="2021-05-01 04:00:00")
_create_event(team=self.team, event="invite teammate", distinct_id="person4", timestamp="2021-05-01 05:00:00")
_create_event(team=self.team, event="pageview2", distinct_id="person4", timestamp="2021-05-01 06:00:00")

filter = Filter(data=filters)
funnel = ClickhouseFunnel(filter, self.team)

result = funnel.run()

self.assertEqual(result[0]["name"], "user signed up")
self.assertEqual(result[0]["count"], 1)
self.assertEqual(len(result[0]["people"]), 1)

self.assertEqual(result[4]["count"], 1)
self.assertEqual(len(result[4]["people"]), 1)

self.assertCountEqual(
self._get_people_at_step(filter, 1), [person4.uuid],
)

filter = filter.with_data(
{
"exclusions": [
{"id": "x", "type": "events", "funnel_from_step": 0, "funnel_to_step": 1},
{"id": "y", "type": "events", "funnel_from_step": 0, "funnel_to_step": 1},
],
}
)
funnel = ClickhouseFunnel(filter, self.team)

result = funnel.run()

self.assertEqual(result[0]["name"], "user signed up")
self.assertEqual(result[0]["count"], 1)
self.assertEqual(len(result[0]["people"]), 1)

self.assertEqual(result[4]["count"], 1)
self.assertEqual(len(result[4]["people"]), 1)

self.assertCountEqual(
self._get_people_at_step(filter, 1), [person4.uuid],
)

filter = filter.with_data(
{
"exclusions": [
{"id": "x", "type": "events", "funnel_from_step": 0, "funnel_to_step": 1},
{"id": "y", "type": "events", "funnel_from_step": 0, "funnel_to_step": 1},
],
}
)
funnel = ClickhouseFunnel(filter, self.team)

result = funnel.run()

self.assertEqual(result[0]["name"], "user signed up")
self.assertEqual(result[0]["count"], 1)
self.assertEqual(len(result[0]["people"]), 1)

self.assertEqual(result[4]["count"], 1)
self.assertEqual(len(result[4]["people"]), 1)

self.assertCountEqual(
self._get_people_at_step(filter, 1), [person4.uuid],
)

filter = filter.with_data(
{
"exclusions": [
{"id": "x", "type": "events", "funnel_from_step": 0, "funnel_to_step": 4},
{"id": "y", "type": "events", "funnel_from_step": 0, "funnel_to_step": 4},
],
}
)
funnel = ClickhouseFunnel(filter, self.team)

result = funnel.run()

self.assertEqual(result[0]["name"], "user signed up")
self.assertEqual(result[0]["count"], 1)
self.assertEqual(len(result[0]["people"]), 1)

self.assertEqual(result[4]["count"], 1)
self.assertEqual(len(result[4]["people"]), 1)

self.assertCountEqual(
self._get_people_at_step(filter, 1), [person4.uuid],
)
Loading