Skip to content

Commit

Permalink
Deduplicate rows in subquery (#409)
Browse files Browse the repository at this point in the history
* Deduplicate rows in subquery

* Rename columns

* Use correct feature names

* Select k.* instead of iterating through fields
  • Loading branch information
Chen Zhiling authored and feast-ci-bot committed Jan 6, 2020
1 parent 2b291a3 commit 57b3887
Showing 1 changed file with 10 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ SELECT
{{ featureSet.entities | join(', ')}},
false AS is_entity_table
FROM `{{projectId}}.{{datasetId}}.{{ featureSet.project }}_{{ featureSet.name }}_v{{ featureSet.version }}` WHERE event_timestamp <= '{{maxTimestamp}}' AND event_timestamp >= Timestamp_sub(TIMESTAMP '{{ minTimestamp }}', interval {{ featureSet.maxAge }} second)
)
), joined AS (
SELECT
uuid,
event_timestamp,
Expand Down Expand Up @@ -44,4 +44,12 @@ SELECT
{% endfor %}
FROM `{{projectId}}.{{datasetId}}.{{ featureSet.project }}_{{ featureSet.name }}_v{{ featureSet.version }}` WHERE event_timestamp <= '{{maxTimestamp}}' AND event_timestamp >= Timestamp_sub(TIMESTAMP '{{ minTimestamp }}', interval {{ featureSet.maxAge }} second)
) USING ({{ featureSet.project }}_{{ featureSet.name }}_v{{ featureSet.version }}_feature_timestamp, created_timestamp, {{ featureSet.entities | join(', ')}})
WHERE is_entity_table
WHERE is_entity_table
)
SELECT
k.*
FROM (
SELECT ARRAY_AGG(row LIMIT 1)[OFFSET(0)] k
FROM joined row
GROUP BY uuid
)

0 comments on commit 57b3887

Please sign in to comment.