Skip to content

Commit

Permalink
warming and filter
Browse files Browse the repository at this point in the history
  • Loading branch information
aspicer committed Oct 18, 2024
1 parent a9b5a60 commit 92b3538
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 15 deletions.
2 changes: 2 additions & 0 deletions posthog/caching/warming.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from posthog.caching.utils import largest_teams
from posthog.clickhouse.query_tagging import tag_queries
from posthog.errors import CHQueryErrorTooManySimultaneousQueries
from posthog.hogql.constants import LimitContext
from posthog.hogql_queries.query_cache import QueryCacheManager
from posthog.hogql_queries.legacy_compatibility.flagged_conversion_manager import conversion_to_query_based
from posthog.hogql_queries.query_runner import ExecutionMode
Expand Down Expand Up @@ -145,6 +146,7 @@ def warm_insight_cache_task(insight_id: int, dashboard_id: Optional[int]):
# We need an execution mode with recent cache:
# - in case someone refreshed after this task was triggered
# - if insight + dashboard combinations have the same cache key, we prevent needless recalculations
limit_context=LimitContext.QUERY_ASYNC,
execution_mode=ExecutionMode.RECENT_CACHE_CALCULATE_BLOCKING_IF_STALE,
insight_id=insight_id,
dashboard_id=dashboard_id,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def get_query(self) -> ast.SelectQuery:
'{breakdown_attribution_string}',
'{self.context.funnelsFilter.funnelOrderType}',
{prop_vals},
{udf_event_array_filter(self.context.funnelsFilter.funnelOrderType)}
{udf_event_array_filter()}
)) as af_tuple,
toTimeZone(toDateTime(_toUInt64(af_tuple.1)), '{self.context.team.timezone}') as entrance_period_start,
af_tuple.2 as success_bool,
Expand Down
28 changes: 14 additions & 14 deletions posthog/hogql_queries/insights/funnels/funnel_udf.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,26 @@
from posthog.hogql import ast
from posthog.hogql.parser import parse_select, parse_expr
from posthog.hogql_queries.insights.funnels.base import FunnelBase
from posthog.schema import BreakdownType, BreakdownAttributionType, StepOrderValue
from posthog.schema import BreakdownType, BreakdownAttributionType
from posthog.utils import DATERANGE_MAP

TIMESTAMP_FORMAT = "%Y-%m-%d %H:%M:%S"
HUMAN_READABLE_TIMESTAMP_FORMAT = "%-d-%b-%Y"


# This is used to reduce the number of events we look at in strict funnels
# We remove a non-matching event if there was already one before it (that don't have the same timestamp)
# This is a simple heuristic to reduce the number of events we look at in UDF funnels (thus are serialized and sent over)
# We remove an event if it matches one or zero steps and there was already the same type of event before and after it (that don't have the same timestamp)
# arrayRotateRight turns [1,2,3] into [3,1,2]
# For some reason, this uses much less memory than using indexing in clickhouse to check the previous element
def udf_event_array_filter(funnelOrderType: StepOrderValue | None):
if funnelOrderType == "strict":
return f"""
arrayFilter(
(x, x2) -> not (empty(x.4) and empty(x2.4) and x.3 == x2.3 and x.1 > x2.1),
events_array,
arrayRotateRight(events_array, 1))
"""
return "events_array"
# arrayRotateRight turns [1,2,3] into [2,3,1]
# For some reason, using these uses much less memory than using indexing in clickhouse to check the previous and next element
def udf_event_array_filter():
return """
arrayFilter(
(x, x_before, x_after) -> not (length(x.4) <= 1 and x.4 == x_before.4 and x.4 == x_after.4 and x.3 == x_before.3 and x.3 == x_after.3 and x.1 > x_before.1 and x.1 < x_after.1),
events_array,
arrayRotateRight(events_array, 1),
arrayRotateLeft(events_array, 1))
"""


class FunnelUDF(FunnelBase):
Expand Down Expand Up @@ -115,7 +115,7 @@ def matched_event_arrays_selects():
'{breakdown_attribution_string}',
'{self.context.funnelsFilter.funnelOrderType}',
{prop_vals},
{udf_event_array_filter(self.context.funnelsFilter.funnelOrderType)}
{udf_event_array_filter()}
)) as af_tuple,
af_tuple.1 as step_reached,
af_tuple.1 + 1 as steps, -- Backward compatibility
Expand Down

0 comments on commit 92b3538

Please sign in to comment.