Skip to content

Commit

Permalink
feat(hogql): run filter based insights via hogql
Browse files Browse the repository at this point in the history
  • Loading branch information
thmsobrmlr committed Sep 26, 2023
1 parent 16a0cf7 commit 258b88a
Show file tree
Hide file tree
Showing 10 changed files with 121 additions and 3 deletions.
1 change: 1 addition & 0 deletions frontend/src/queries/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ export function isLifecycleQuery(node?: Node | null): node is LifecycleQuery {
return node?.kind === NodeKind.LifecycleQuery
}

// sync with posthog/hogql_queries/legacy_compatibility/process_insight.py
export function isQueryWithHogQLSupport(node?: Node | null): node is LifecycleQuery {
return isLifecycleQuery(node) || isTrendsQuery(node)
}
Expand Down
4 changes: 4 additions & 0 deletions posthog/api/insight.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
from posthog.decorators import cached_by_filters
from posthog.helpers.multi_property_breakdown import protect_old_clients_from_multi_property_default
from posthog.hogql.errors import HogQLException
from posthog.hogql_queries.legacy_compatibility.process_insight import is_insight_with_hogql_support, process_insight
from posthog.kafka_client.topics import KAFKA_METRICS_TIME_TO_SEE_DATA
from posthog.models import DashboardTile, Filter, Insight, User
from posthog.models.activity_logging.activity_log import (
Expand Down Expand Up @@ -503,6 +504,9 @@ def insight_result(self, insight: Insight) -> InsightResult:
dashboard_tile = self.dashboard_tile_from_context(insight, dashboard)
target = insight if dashboard is None else dashboard_tile

if insight.team.hogql_insights_enabled and is_insight_with_hogql_support(target or insight):
return process_insight(target or insight, insight.team)

is_shared = self.context.get("is_shared", False)
refresh_insight_now, refresh_frequency = should_refresh_insight(
insight, dashboard_tile, request=self.context["request"], is_shared=is_shared
Expand Down
8 changes: 6 additions & 2 deletions posthog/api/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
from posthog.queries.time_to_see_data.sessions import get_session_events, get_sessions
from posthog.rate_limit import AIBurstRateThrottle, AISustainedRateThrottle, TeamRateThrottle
from posthog.schema import EventsQuery, HogQLQuery, HogQLMetadata
from posthog.types import InsightQueryNode
from posthog.utils import refresh_requested_by_client


Expand Down Expand Up @@ -198,11 +199,14 @@ def _unwrap_pydantic_dict(response: Any) -> Dict:


def process_query(
team: Team, query_json: Dict, default_limit: Optional[int] = None, request: Optional[Request] = None
team: Team,
query_json: Dict | InsightQueryNode,
default_limit: Optional[int] = None,
request: Optional[Request] = None,
) -> Dict:
# query_json has been parsed by QuerySchemaParser
# it _should_ be impossible to end up in here with a "bad" query
query_kind = query_json.get("kind")
query_kind = query_json.get("kind") if isinstance(query_json, dict) else query_json.kind

tag_queries(query=query_json)

Expand Down
26 changes: 26 additions & 0 deletions posthog/api/test/test_insight.py
Original file line number Diff line number Diff line change
Expand Up @@ -2680,3 +2680,29 @@ def test_insight_retention_hogql(self) -> None:
).json()
self.assertEqual(len(response["result"]), 11)
self.assertEqual(response["result"][0]["values"][0]["count"], 1)

@override_settings(HOGQL_INSIGHTS_OVERRIDE=True)
def test_insight_with_filters_via_hogql(self) -> None:
filter_dict = {"insight": "LIFECYCLE", "events": [{"id": "$pageview"}]}

Insight.objects.create(
filters=Filter(data=filter_dict).to_dict(),
team=self.team,
short_id="xyz123",
)

# fresh response
response = self.client.get(f"/api/projects/{self.team.id}/insights/?short_id=xyz123")
self.assertEqual(response.status_code, status.HTTP_200_OK)

self.assertEqual(len(response.json()["results"]), 1)
self.assertEqual(response.json()["results"][0]["short_id"], "xyz123")
self.assertEqual(response.json()["results"][0]["filters"]["events"][0]["id"], "$pageview")

# cached response
response = self.client.get(f"/api/projects/{self.team.id}/insights/?short_id=xyz123")
self.assertEqual(response.status_code, status.HTTP_200_OK)

self.assertEqual(len(response.json()["results"]), 1)
self.assertEqual(response.json()["results"][0]["short_id"], "xyz123")
self.assertEqual(response.json()["results"][0]["filters"]["events"][0]["id"], "$pageview")
4 changes: 3 additions & 1 deletion posthog/caching/fetch_from_cache.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from dataclasses import dataclass
from datetime import datetime, timedelta
from typing import Any, Optional, Union
from typing import Any, List, Optional, Union

from django.utils.timezone import now
from prometheus_client import Counter
Expand All @@ -9,6 +9,7 @@
from posthog.caching.insight_cache import update_cached_state
from posthog.models import DashboardTile, Insight
from posthog.models.dashboard import Dashboard
from posthog.schema import QueryTiming
from posthog.utils import get_safe_cache

insight_cache_read_counter = Counter(
Expand All @@ -24,6 +25,7 @@ class InsightResult:
is_cached: bool
timezone: Optional[str]
next_allowed_client_refresh: Optional[datetime] = None
timings: Optional[List[QueryTiming]] = None


@dataclass(frozen=True)
Expand Down
48 changes: 48 additions & 0 deletions posthog/hogql_queries/legacy_compatibility/process_insight.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from posthog.caching.fetch_from_cache import InsightResult
from posthog.hogql_queries.legacy_compatibility.filter_to_query import filter_to_query
from posthog.hogql_queries.lifecycle_query_runner import LifecycleQueryRunner
from posthog.hogql_queries.query_runner import CachedQueryResponse
from posthog.models.filters.filter import Filter as LegacyFilter
from posthog.models.filters.path_filter import PathFilter as LegacyPathFilter
from posthog.models.filters.retention_filter import RetentionFilter as LegacyRetentionFilter
from posthog.models.filters.stickiness_filter import StickinessFilter as LegacyStickinessFilter
from posthog.models.insight import Insight
from posthog.models.team.team import Team
from posthog.types import InsightQueryNode


# sync with frontend/src/queries/utils.ts
def is_insight_with_hogql_support(insight: Insight):
if insight.filters.get("insight") == "LIFECYCLE":
return True
else:
return False


def _insight_to_query(insight: Insight, team: Team) -> InsightQueryNode:
if insight.filters.get("insight") == "RETENTION":
filter = LegacyRetentionFilter(data=insight.filters, team=team)
elif insight.filters.get("insight") == "PATHS":
filter = LegacyPathFilter(data=insight.filters, team=team)
elif insight.filters.get("insight") == "STICKINESS":
filter = LegacyStickinessFilter(data=insight.filters, team=team)
else:
filter = LegacyFilter(data=insight.filters, team=team)
return filter_to_query(filter)


def _cached_response_to_insight_result(response: CachedQueryResponse) -> InsightResult:
result = InsightResult(**response, cache_key="todo", timezone="UTC") # TODO cache_key, timezone
return result


def process_insight(insight: Insight, team: Team) -> InsightResult:
query = _insight_to_query(insight, team)
# response = process_query(team, query_json=query)
# refresh_requested = refresh_requested_by_client(request) if request else False
lifecycle_query_runner = LifecycleQueryRunner(query=query, team=team)
# return _unwrap_pydantic_dict(lifecycle_query_runner.run(refresh_requested=refresh_requested))
response = lifecycle_query_runner.run(refresh_requested=False)

result = _cached_response_to_insight_result(response)
return result
4 changes: 4 additions & 0 deletions posthog/hogql_queries/query_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ class CachedQueryResponse(QueryResponse):
is_cached: bool
last_refresh: str
next_allowed_client_refresh: str
cache_key: str
timezone: str


class QueryRunner(ABC):
Expand Down Expand Up @@ -90,6 +92,8 @@ def run(self, refresh_requested: bool) -> CachedQueryResponse:
fresh_response_dict["next_allowed_client_refresh"] = (datetime.now() + self._refresh_frequency()).strftime(
"%Y-%m-%dT%H:%M:%SZ"
)
fresh_response_dict["cache_key"] = cache_key
fresh_response_dict["timezone"] = self.team.timezone
fresh_response = CachedQueryResponse(**fresh_response_dict)
cache.set(cache_key, fresh_response, settings.CACHED_RESULTS_TTL)
QUERY_CACHE_WRITE_COUNTER.labels(team_id=self.team.pk).inc()
Expand Down
20 changes: 20 additions & 0 deletions posthog/models/team/team.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,26 @@ def _person_on_events_v2_querying_enabled(self) -> bool:

return get_instance_setting("PERSON_ON_EVENTS_V2_ENABLED")

@property
def hogql_insights_enabled(self) -> bool:
if settings.HOGQL_INSIGHTS_OVERRIDE is not None:
return settings.HOGQL_INSIGHTS_OVERRIDE

# on PostHog Cloud, use the feature flag
if is_cloud():
return posthoganalytics.feature_enabled(
"hogql-insights",
str(self.uuid),
groups={"organization": str(self.organization.id)},
group_properties={
"organization": {"id": str(self.organization.id), "created_at": self.organization.created_at}
},
only_evaluate_locally=True,
send_feature_flag_events=False,
)
else:
return False

@property
def strict_caching_enabled(self) -> bool:
enabled_teams = get_list(get_instance_setting("STRICT_CACHING_TEAMS"))
Expand Down
3 changes: 3 additions & 0 deletions posthog/settings/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@
# Only written in specific scripts - do not use outside of them.
PERSON_ON_EVENTS_V2_OVERRIDE = get_from_env("PERSON_ON_EVENTS_V2_OVERRIDE", optional=True, type_cast=str_to_bool)

# Wether to use insight queries converted to HogQL.
HOGQL_INSIGHTS_OVERRIDE = get_from_env("HOGQL_INSIGHTS_OVERRIDE", optional=True, type_cast=str_to_bool)

HOOK_EVENTS: Dict[str, str] = {}

# Support creating multiple organizations in a single instance. Requires a premium license.
Expand Down
6 changes: 6 additions & 0 deletions posthog/settings/dynamic_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@
"Whether to use query path using person_id and person_properties on events or the old query",
bool,
),
"HOGQL_INSIGHTS_OVERRIDE": (
get_from_env("HOGQL_INSIGHTS_OVERRIDE", False, type_cast=str_to_bool),
"Whether to use insight queries converted to use hogql internally or the old queries",
bool,
),
"GROUPS_ON_EVENTS_ENABLED": (
get_from_env("GROUPS_ON_EVENTS_ENABLED", False, type_cast=str_to_bool),
"Whether to use query path using group_properties on events or the old query",
Expand Down Expand Up @@ -207,6 +212,7 @@
"ASYNC_MIGRATIONS_OPT_OUT_EMAILS",
"PERSON_ON_EVENTS_ENABLED",
"PERSON_ON_EVENTS_V2_ENABLED",
"HOGQL_INSIGHTS_OVERRIDE",
"GROUPS_ON_EVENTS_ENABLED",
"STRICT_CACHING_TEAMS",
"SLACK_APP_CLIENT_ID",
Expand Down

0 comments on commit 258b88a

Please sign in to comment.