Skip to content

Commit

Permalink
feat(hogql): placeholders and cleanup (#14116)
Browse files Browse the repository at this point in the history
* feat(hogql): placeholders, cleanup

* this wasn't used

* placeholder tests

* Update snapshots

* fix

* use ANSI SQL compatible strings

* Update snapshots

* final tweaks

* visitor pattern

* placeholders via the visitor pattern

* test everything visitor

* must return

---------

Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
  • Loading branch information
mariusandra and github-actions[bot] authored Feb 8, 2023
1 parent b68bfcf commit 3940ab5
Show file tree
Hide file tree
Showing 28 changed files with 2,580 additions and 2,183 deletions.
49 changes: 33 additions & 16 deletions posthog/hogql/ast.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,32 @@
from __future__ import annotations

import re
from enum import Enum
from typing import Any, List, Optional
from typing import Any, List, Literal

from pydantic import BaseModel, Extra

# NOTE: when you add new AST fields or nodes, add them to EverythingVisitor as well!

camel_case_pattern = re.compile(r"(?<!^)(?=[A-Z])")


class AST(BaseModel):
class Config:
extra = Extra.forbid

def accept(self, visitor):
camel_case_name = camel_case_pattern.sub("_", self.__class__.__name__).lower()
method_name = "visit_{}".format(camel_case_name)
visit = getattr(visitor, method_name)
return visit(self)


class Expr(AST):
pass


class Column(AST):
class Alias(Expr):
alias: str
expr: Expr
alias: Optional[str] = None


class BinaryOperationType(str, Enum):
Expand All @@ -34,17 +43,18 @@ class BinaryOperation(Expr):
op: BinaryOperationType


class BooleanOperationType(str, Enum):
And = "and"
Or = "or"
class And(Expr):
class Config:
extra = Extra.forbid

exprs: List[Expr]


class BooleanOperation(Expr):
class Or(Expr):
class Config:
extra = Extra.forbid

op: BooleanOperationType
values: List[Expr]
exprs: List[Expr]


class CompareOperationType(str, Enum):
Expand All @@ -58,6 +68,8 @@ class CompareOperationType(str, Enum):
ILike = "ilike"
NotLike = "not like"
NotILike = "not ilike"
In = "in"
NotIn = "not in"


class CompareOperation(Expr):
Expand All @@ -66,20 +78,25 @@ class CompareOperation(Expr):
op: CompareOperationType


class NotOperation(Expr):
class Not(Expr):
expr: Expr


class OrderExpr(Expr):
expr: Expr
order: Literal["ASC", "DESC"] = "ASC"


class Constant(Expr):
value: Any


class FieldAccess(Expr):
field: str
class Field(Expr):
chain: List[str]


class FieldAccessChain(Expr):
chain: List[str]
class Placeholder(Expr):
field: str


class Call(Expr):
Expand Down
126 changes: 126 additions & 0 deletions posthog/hogql/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
# fields you can select from in the events query
EVENT_FIELDS = [
"id",
"uuid",
"event",
"timestamp",
"properties",
"elements_chain",
"created_at",
"distinct_id",
"team_id",
]
# "person.*" fields you can select from in the events query
EVENT_PERSON_FIELDS = ["id", "created_at", "properties"]

# HogQL -> ClickHouse allowed transformations
CLICKHOUSE_FUNCTIONS = {
# arithmetic
"abs": "abs",
"max2": "max2",
"min2": "min2",
# type conversions
"toInt": "toInt64OrNull",
"toFloat": "toFloat64OrNull",
"toDecimal": "toDecimal64OrNull",
"toDate": "toDateOrNull",
"toDateTime": "parseDateTimeBestEffort",
"toIntervalSecond": "toIntervalSecond",
"toIntervalMinute": "toIntervalMinute",
"toIntervalHour": "toIntervalHour",
"toIntervalDay": "toIntervalDay",
"toIntervalWeek": "toIntervalWeek",
"toIntervalMonth": "toIntervalMonth",
"toIntervalQuarter": "toIntervalQuarter",
"toIntervalYear": "toIntervalYear",
"toString": "toString",
# date functions
"now": "now",
"NOW": "now",
"toMonday": "toMonday",
"toStartOfYear": "toStartOfYear",
"toStartOfQuarter": "toStartOfQuarter",
"toStartOfMonth": "toStartOfMonth",
"toStartOfWeek": "toStartOfWeek",
"toStartOfDay": "toStartOfDay",
"toStartOfHour": "toStartOfHour",
"toStartOfMinute": "toStartOfMinute",
"toStartOfSecond": "toStartOfSecond",
"toStartOfFiveMinutes": "toStartOfFiveMinutes",
"toStartOfTenMinutes": "toStartOfTenMinutes",
"toStartOfFifteenMinutes": "toStartOfFifteenMinutes",
"toTimezone": "toTimezone",
"age": "age",
"dateDiff": "dateDiff",
"dateTrunc": "dateTrunc",
"formatDateTime": "formatDateTime",
# string functions
"length": "lengthUTF8",
"empty": "empty",
"notEmpty": "notEmpty",
"leftPad": "leftPad",
"rightPad": "rightPad",
"lower": "lower",
"upper": "upper",
"repeat": "repeat",
"format": "format",
"concat": "concat",
"coalesce": "coalesce",
"substring": "substringUTF8",
"appendTrailingCharIfAbsent": "appendTrailingCharIfAbsent",
"endsWith": "endsWith",
"startsWith": "startsWith",
"trim": "trimBoth",
"trimLeft": "trimLeft",
"trimRight": "trimRight",
"extractTextFromHTML": "extractTextFromHTML",
"like": "like",
"ilike": "ilike",
"notLike": "notLike",
"replace": "replace",
"replaceOne": "replaceOne",
# array functions
"tuple": "tuple",
# conditional
"ifElse": "if",
"multiIf": "multiIf",
# rounding
"round": "round",
"floor": "floor",
"ceil": "ceil",
"trunc": "trunc",
}
# Permitted HogQL aggregations
HOGQL_AGGREGATIONS = {
"count": 0,
"countIf": 1,
"countDistinct": 1,
"countDistinctIf": 2,
"min": 1,
"minIf": 2,
"max": 1,
"maxIf": 2,
"sum": 1,
"sumIf": 2,
"avg": 1,
"avgIf": 2,
"any": 1,
"anyIf": 2,
}
# Keywords passed to ClickHouse without transformation
KEYWORDS = ["true", "false", "null"]

# Allow-listed fields returned when you select "*" from events. Person and group fields will be nested later.
SELECT_STAR_FROM_EVENTS_FIELDS = [
"uuid",
"event",
"properties",
"timestamp",
"team_id",
"distinct_id",
"elements_chain",
"created_at",
"person.id",
"person.created_at",
"person.properties",
]
23 changes: 23 additions & 0 deletions posthog/hogql/context.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from dataclasses import dataclass, field
from typing import Dict, List, Literal, Optional


@dataclass
class HogQLFieldAccess:
input: List[str]
type: Optional[Literal["event", "event.properties", "person", "person.properties"]]
field: Optional[str]
sql: str


@dataclass
class HogQLContext:
"""Context given to a HogQL expression printer"""

# If set, will save string constants to this dict. Inlines strings into the query if None.
values: Dict = field(default_factory=dict)
# List of field and property accesses found in the expression
field_access_logs: List[HogQLFieldAccess] = field(default_factory=list)
# Did the last calls to translate_hogql since setting these to False contain any of the following
found_aggregation: bool = False
using_person_on_events: bool = True
1 change: 1 addition & 0 deletions posthog/hogql/grammar/HogQLLexer.g4
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,7 @@ HEXADECIMAL_LITERAL: '0' X HEX_DIGIT+;
// It's important that quote-symbol is a single character.
STRING_LITERAL: QUOTE_SINGLE ( ~([\\']) | ESCAPE_CHAR | (QUOTE_SINGLE QUOTE_SINGLE) )* QUOTE_SINGLE;
PLACEHOLDER: LBRACE ( ~([\\}]) | ESCAPE_CHAR | (LBRACE LBRACE) )* RBRACE;
// Alphabet and allowed symbols
Expand Down
5 changes: 4 additions & 1 deletion posthog/hogql/grammar/HogQLLexer.interp

Large diffs are not rendered by default.

Loading

0 comments on commit 3940ab5

Please sign in to comment.