diff --git a/docker-compose.yml b/docker-compose.yml index 75eb5bf9..8cbb88e9 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,4 +1,3 @@ -version: "3" services: pgstac: container_name: pgstac diff --git a/docker/pypgstac/Dockerfile b/docker/pypgstac/Dockerfile index b5f7bfb9..5a7fced2 100644 --- a/docker/pypgstac/Dockerfile +++ b/docker/pypgstac/Dockerfile @@ -21,7 +21,7 @@ FROM pyrustbase as pypgstac COPY ./src/pypgstac/pyproject.toml /tmp/pyproject.toml WORKDIR /tmp RUN \ - toml-to-req --include-optional \ + toml-to-req --optional-lists test,build,dev,psycopg,migrations \ && pip install -r /tmp/requirements.txt COPY docker/pypgstac/bin /opt/docker/pypgstac/bin COPY src/pypgstac /opt/src/pypgstac diff --git a/docker/pypgstac/bin/test b/docker/pypgstac/bin/test index 4655510a..b6334d20 100755 --- a/docker/pypgstac/bin/test +++ b/docker/pypgstac/bin/test @@ -198,7 +198,7 @@ while [[ $# -gt 0 ]] ;; --vv) - MESSAGELOG=1 + MESSAGEDEBUG=1 shift ;; @@ -255,8 +255,8 @@ while [[ $# -gt 0 ]] CLIENTMESSAGES='warning' -[ $MESSAGENOTICE -eq 1 ] && CLIENTMESSAGES='notice' -[ $MESSAGELOG -eq 1 ] && CLIENTMESSAGES='log' +[[ $MESSAGENOTICE -eq 1 ]] && CLIENTMESSAGES='notice' +[[ $MESSAGEDEBUG -eq 1 ]] && CLIENTMESSAGES='debug1' echo $CLIENTMESSAGES if [[ ($FORMATTING -eq 0) && ($SETUPDB -eq 0) && ($PGTAP -eq 0) && ($BASICSQL -eq 0) && ($PYPGSTAC -eq 0) && ($MIGRATIONS -eq 0) ]] diff --git a/src/pgstac/migrations/pgstac.0.8.5-unreleased.sql b/src/pgstac/migrations/pgstac.0.8.5-unreleased.sql index c5637359..737a3450 100644 --- a/src/pgstac/migrations/pgstac.0.8.5-unreleased.sql +++ b/src/pgstac/migrations/pgstac.0.8.5-unreleased.sql @@ -196,6 +196,15 @@ $$ LANGUAGE SQL IMMUTABLE STRICT; -- BEGIN migra calculated SQL set check_function_bodies = off; +CREATE OR REPLACE FUNCTION pgstac.search_fromhash(_hash text) + RETURNS searches + LANGUAGE sql + STRICT +AS $function$ + SELECT * FROM search_query((SELECT search FROM searches WHERE hash=_hash LIMIT 1)); +$function$ +; + CREATE OR REPLACE FUNCTION pgstac.collection_search(_search jsonb DEFAULT '{}'::jsonb) RETURNS jsonb LANGUAGE plpgsql @@ -459,6 +468,112 @@ END; $function$ ; +CREATE OR REPLACE FUNCTION pgstac.geometrysearch(geom geometry, queryhash text, fields jsonb DEFAULT NULL::jsonb, _scanlimit integer DEFAULT 10000, _limit integer DEFAULT 100, _timelimit interval DEFAULT '00:00:05'::interval, exitwhenfull boolean DEFAULT true, skipcovered boolean DEFAULT true) + RETURNS jsonb + LANGUAGE plpgsql +AS $function$ +DECLARE + search searches%ROWTYPE; + curs refcursor; + _where text; + query text; + iter_record items%ROWTYPE; + out_records jsonb := '{}'::jsonb[]; + exit_flag boolean := FALSE; + counter int := 1; + scancounter int := 1; + remaining_limit int := _scanlimit; + tilearea float; + unionedgeom geometry; + clippedgeom geometry; + unionedgeom_area float := 0; + prev_area float := 0; + excludes text[]; + includes text[]; + +BEGIN + DROP TABLE IF EXISTS pgstac_results; + CREATE TEMP TABLE pgstac_results (content jsonb) ON COMMIT DROP; + + -- If the passed in geometry is not an area set exitwhenfull and skipcovered to false + IF ST_GeometryType(geom) !~* 'polygon' THEN + RAISE NOTICE 'GEOMETRY IS NOT AN AREA'; + skipcovered = FALSE; + exitwhenfull = FALSE; + END IF; + + -- If skipcovered is true then you will always want to exit when the passed in geometry is full + IF skipcovered THEN + exitwhenfull := TRUE; + END IF; + + search := search_fromhash(queryhash); + + IF search IS NULL THEN + RAISE EXCEPTION 'Search with Query Hash % Not Found', queryhash; + END IF; + + tilearea := st_area(geom); + _where := format('%s AND st_intersects(geometry, %L::geometry)', search._where, geom); + + + FOR query IN SELECT * FROM partition_queries(_where, search.orderby) LOOP + query := format('%s LIMIT %L', query, remaining_limit); + RAISE NOTICE '%', query; + OPEN curs FOR EXECUTE query; + LOOP + FETCH curs INTO iter_record; + EXIT WHEN NOT FOUND; + IF exitwhenfull OR skipcovered THEN -- If we are not using exitwhenfull or skipcovered, we do not need to do expensive geometry operations + clippedgeom := st_intersection(geom, iter_record.geometry); + + IF unionedgeom IS NULL THEN + unionedgeom := clippedgeom; + ELSE + unionedgeom := st_union(unionedgeom, clippedgeom); + END IF; + + unionedgeom_area := st_area(unionedgeom); + + IF skipcovered AND prev_area = unionedgeom_area THEN + scancounter := scancounter + 1; + CONTINUE; + END IF; + + prev_area := unionedgeom_area; + + RAISE NOTICE '% % % %', unionedgeom_area/tilearea, counter, scancounter, ftime(); + END IF; + RAISE NOTICE '% %', iter_record, content_hydrate(iter_record, fields); + INSERT INTO pgstac_results (content) VALUES (content_hydrate(iter_record, fields)); + + IF counter >= _limit + OR scancounter > _scanlimit + OR ftime() > _timelimit + OR (exitwhenfull AND unionedgeom_area >= tilearea) + THEN + exit_flag := TRUE; + EXIT; + END IF; + counter := counter + 1; + scancounter := scancounter + 1; + + END LOOP; + CLOSE curs; + EXIT WHEN exit_flag; + remaining_limit := _scanlimit - scancounter; + END LOOP; + + SELECT jsonb_agg(content) INTO out_records FROM pgstac_results WHERE content IS NOT NULL; + + RETURN jsonb_build_object( + 'type', 'FeatureCollection', + 'features', coalesce(out_records, '[]'::jsonb) + ); +END; +$function$ +; + CREATE OR REPLACE FUNCTION pgstac.search(_search jsonb DEFAULT '{}'::jsonb) RETURNS jsonb LANGUAGE plpgsql @@ -502,18 +617,18 @@ BEGIN token_prev := token.prev; token_item := token.item; token_where := get_token_filter(_search->'sortby', token_item, token_prev, FALSE); - RAISE LOG 'TOKEN_WHERE: % (%ms from search start)', token_where, age_ms(timer); + RAISE DEBUG 'TOKEN_WHERE: % (%ms from search start)', token_where, age_ms(timer); IF token_prev THEN -- if we are using a prev token, we know has_next is true - RAISE LOG 'There is a previous token, so automatically setting has_next to true'; + RAISE DEBUG 'There is a previous token, so automatically setting has_next to true'; has_next := TRUE; orderby := sort_sqlorderby(_search, TRUE); ELSE - RAISE LOG 'There is a next token, so automatically setting has_prev to true'; + RAISE DEBUG 'There is a next token, so automatically setting has_prev to true'; has_prev := TRUE; END IF; ELSE -- if there was no token, we know there is no prev - RAISE LOG 'There is no token, so we know there is no prev. setting has_prev to false'; + RAISE DEBUG 'There is no token, so we know there is no prev. setting has_prev to false'; has_prev := FALSE; END IF; @@ -547,9 +662,9 @@ BEGIN END IF; RAISE NOTICE 'Query returned % records.', jsonb_array_length(out_records); - RAISE LOG 'TOKEN: % %', token_item.id, token_item.collection; - RAISE LOG 'RECORD_1: % %', out_records->0->>'id', out_records->0->>'collection'; - RAISE LOG 'RECORD-1: % %', out_records->-1->>'id', out_records->-1->>'collection'; + RAISE DEBUG 'TOKEN: % %', token_item.id, token_item.collection; + RAISE DEBUG 'RECORD_1: % %', out_records->0->>'id', out_records->0->>'collection'; + RAISE DEBUG 'RECORD-1: % %', out_records->-1->>'id', out_records->-1->>'collection'; -- REMOVE records that were from our token IF out_records->0->>'id' = token_item.id AND out_records->0->>'collection' = token_item.collection THEN @@ -615,6 +730,330 @@ END; $function$ ; +CREATE OR REPLACE FUNCTION pgstac.search_query(_search jsonb DEFAULT '{}'::jsonb, updatestats boolean DEFAULT false, _metadata jsonb DEFAULT '{}'::jsonb) + RETURNS searches + LANGUAGE plpgsql + SECURITY DEFINER +AS $function$ +DECLARE + search searches%ROWTYPE; + cached_search searches%ROWTYPE; + pexplain jsonb; + t timestamptz; + i interval; + doupdate boolean := FALSE; + insertfound boolean := FALSE; + ro boolean := pgstac.readonly(); + found_search text; +BEGIN + RAISE NOTICE 'SEARCH: %', _search; + -- Calculate hash, where clause, and order by statement + search.search := _search; + search.metadata := _metadata; + search.hash := search_hash(_search, _metadata); + search._where := stac_search_to_where(_search); + search.orderby := sort_sqlorderby(_search); + search.lastused := now(); + search.usecount := 1; + + -- If we are in read only mode, directly return search + IF ro THEN + RETURN search; + END IF; + + RAISE NOTICE 'Updating Statistics for search: %s', search; + -- Update statistics for times used and and when last used + -- If the entry is locked, rather than waiting, skip updating the stats + INSERT INTO searches (search, lastused, usecount, metadata) + VALUES (search.search, now(), 1, search.metadata) + ON CONFLICT DO NOTHING + RETURNING * INTO cached_search + ; + + IF NOT FOUND OR cached_search IS NULL THEN + UPDATE searches SET + lastused = now(), + usecount = searches.usecount + 1 + WHERE hash = ( + SELECT hash FROM searches WHERE hash=search.hash FOR UPDATE SKIP LOCKED + ) + RETURNING * INTO cached_search + ; + END IF; + + IF cached_search IS NOT NULL THEN + cached_search._where = search._where; + cached_search.orderby = search.orderby; + RETURN cached_search; + END IF; + RETURN search; + +END; +$function$ +; + +CREATE OR REPLACE FUNCTION pgstac.search_rows(_where text DEFAULT 'TRUE'::text, _orderby text DEFAULT 'datetime DESC, id DESC'::text, partitions text[] DEFAULT NULL::text[], _limit integer DEFAULT 10) + RETURNS SETOF items + LANGUAGE plpgsql + SET search_path TO 'pgstac', 'public' +AS $function$ +DECLARE + base_query text; + query text; + sdate timestamptz; + edate timestamptz; + n int; + records_left int := _limit; + timer timestamptz := clock_timestamp(); + full_timer timestamptz := clock_timestamp(); +BEGIN +IF _where IS NULL OR trim(_where) = '' THEN + _where = ' TRUE '; +END IF; +RAISE NOTICE 'Getting chunks for % %', _where, _orderby; + +base_query := $q$ + SELECT * FROM items + WHERE + datetime >= %L AND datetime < %L + AND (%s) + ORDER BY %s + LIMIT %L +$q$; + +IF _orderby ILIKE 'datetime d%' THEN + FOR sdate, edate IN SELECT * FROM chunker(_where) ORDER BY 1 DESC LOOP + RAISE NOTICE 'Running Query for % to %. %', sdate, edate, age_ms(full_timer); + query := format( + base_query, + sdate, + edate, + _where, + _orderby, + records_left + ); + RAISE DEBUG 'QUERY: %', query; + timer := clock_timestamp(); + RETURN QUERY EXECUTE query; + + GET DIAGNOSTICS n = ROW_COUNT; + records_left := records_left - n; + RAISE NOTICE 'Returned %/% Rows From % to %. % to go. Time: %ms', n, _limit, sdate, edate, records_left, age_ms(timer); + timer := clock_timestamp(); + IF records_left <= 0 THEN + RAISE NOTICE 'SEARCH_ROWS TOOK %ms', age_ms(full_timer); + RETURN; + END IF; + END LOOP; +ELSIF _orderby ILIKE 'datetime a%' THEN + FOR sdate, edate IN SELECT * FROM chunker(_where) ORDER BY 1 ASC LOOP + RAISE NOTICE 'Running Query for % to %. %', sdate, edate, age_ms(full_timer); + query := format( + base_query, + sdate, + edate, + _where, + _orderby, + records_left + ); + RAISE DEBUG 'QUERY: %', query; + timer := clock_timestamp(); + RETURN QUERY EXECUTE query; + + GET DIAGNOSTICS n = ROW_COUNT; + records_left := records_left - n; + RAISE NOTICE 'Returned %/% Rows From % to %. % to go. Time: %ms', n, _limit, sdate, edate, records_left, age_ms(timer); + timer := clock_timestamp(); + IF records_left <= 0 THEN + RAISE NOTICE 'SEARCH_ROWS TOOK %ms', age_ms(full_timer); + RETURN; + END IF; + END LOOP; +ELSE + query := format($q$ + SELECT * FROM items + WHERE %s + ORDER BY %s + LIMIT %L + $q$, _where, _orderby, _limit + ); + RAISE DEBUG 'QUERY: %', query; + timer := clock_timestamp(); + RETURN QUERY EXECUTE query; + RAISE NOTICE 'FULL QUERY TOOK %ms', age_ms(timer); +END IF; +RAISE NOTICE 'SEARCH_ROWS TOOK %ms', age_ms(full_timer); +RETURN; +END; +$function$ +; + +CREATE OR REPLACE FUNCTION pgstac.where_stats(inwhere text, updatestats boolean DEFAULT false, conf jsonb DEFAULT NULL::jsonb) + RETURNS search_wheres + LANGUAGE plpgsql + SECURITY DEFINER +AS $function$ +DECLARE + t timestamptz; + i interval; + explain_json jsonb; + partitions text[]; + sw search_wheres%ROWTYPE; + inwhere_hash text := md5(inwhere); + _context text := lower(context(conf)); + _stats_ttl interval := context_stats_ttl(conf); + _estimated_cost_threshold float := context_estimated_cost(conf); + _estimated_count_threshold int := context_estimated_count(conf); + ro bool := pgstac.readonly(conf); +BEGIN + -- If updatestats is true then set ttl to 0 + IF updatestats THEN + RAISE DEBUG 'Updatestats set to TRUE, setting TTL to 0'; + _stats_ttl := '0'::interval; + END IF; + + -- If we don't need to calculate context, just return + IF _context = 'off' THEN + sw._where = inwhere; + RETURN sw; + END IF; + + -- Get any stats that we have. If there is a lock where another process is + -- updating the stats, wait so that we don't end up calculating a bunch of times. + SELECT * INTO sw FROM search_wheres WHERE md5(_where)=inwhere_hash FOR UPDATE; + + -- If there is a cached row, figure out if we need to update + IF + sw IS NOT NULL + AND sw.statslastupdated IS NOT NULL + AND sw.total_count IS NOT NULL + AND now() - sw.statslastupdated <= _stats_ttl + THEN + -- we have a cached row with data that is within our ttl + RAISE DEBUG 'Stats present in table and lastupdated within ttl: %', sw; + IF NOT ro THEN + RAISE DEBUG 'Updating search_wheres only bumping lastused and usecount'; + UPDATE search_wheres SET + lastused = now(), + usecount = search_wheres.usecount + 1 + WHERE md5(_where) = inwhere_hash + RETURNING * INTO sw; + END IF; + RAISE DEBUG 'Returning cached counts. %', sw; + RETURN sw; + END IF; + + -- Calculate estimated cost and rows + -- Use explain to get estimated count/cost + IF sw.estimated_count IS NULL OR sw.estimated_cost IS NULL THEN + RAISE DEBUG 'Calculating estimated stats'; + t := clock_timestamp(); + EXECUTE format('EXPLAIN (format json) SELECT 1 FROM items WHERE %s', inwhere) + INTO explain_json; + RAISE DEBUG 'Time for just the explain: %', clock_timestamp() - t; + i := clock_timestamp() - t; + + sw.estimated_count := explain_json->0->'Plan'->'Plan Rows'; + sw.estimated_cost := explain_json->0->'Plan'->'Total Cost'; + sw.time_to_estimate := extract(epoch from i); + END IF; + + RAISE DEBUG 'ESTIMATED_COUNT: %, THRESHOLD %', sw.estimated_count, _estimated_count_threshold; + RAISE DEBUG 'ESTIMATED_COST: %, THRESHOLD %', sw.estimated_cost, _estimated_cost_threshold; + + -- If context is set to auto and the costs are within the threshold return the estimated costs + IF + _context = 'auto' + AND sw.estimated_count >= _estimated_count_threshold + AND sw.estimated_cost >= _estimated_cost_threshold + THEN + IF NOT ro THEN + INSERT INTO search_wheres ( + _where, + lastused, + usecount, + statslastupdated, + estimated_count, + estimated_cost, + time_to_estimate, + total_count, + time_to_count + ) VALUES ( + inwhere, + now(), + 1, + now(), + sw.estimated_count, + sw.estimated_cost, + sw.time_to_estimate, + null, + null + ) ON CONFLICT ((md5(_where))) + DO UPDATE SET + lastused = EXCLUDED.lastused, + usecount = search_wheres.usecount + 1, + statslastupdated = EXCLUDED.statslastupdated, + estimated_count = EXCLUDED.estimated_count, + estimated_cost = EXCLUDED.estimated_cost, + time_to_estimate = EXCLUDED.time_to_estimate, + total_count = EXCLUDED.total_count, + time_to_count = EXCLUDED.time_to_count + RETURNING * INTO sw; + END IF; + RAISE DEBUG 'Estimates are within thresholds, returning estimates. %', sw; + RETURN sw; + END IF; + + -- Calculate Actual Count + t := clock_timestamp(); + RAISE NOTICE 'Calculating actual count...'; + EXECUTE format( + 'SELECT count(*) FROM items WHERE %s', + inwhere + ) INTO sw.total_count; + i := clock_timestamp() - t; + RAISE NOTICE 'Actual Count: % -- %', sw.total_count, i; + sw.time_to_count := extract(epoch FROM i); + + IF NOT ro THEN + INSERT INTO search_wheres ( + _where, + lastused, + usecount, + statslastupdated, + estimated_count, + estimated_cost, + time_to_estimate, + total_count, + time_to_count + ) VALUES ( + inwhere, + now(), + 1, + now(), + sw.estimated_count, + sw.estimated_cost, + sw.time_to_estimate, + sw.total_count, + sw.time_to_count + ) ON CONFLICT ((md5(_where))) + DO UPDATE SET + lastused = EXCLUDED.lastused, + usecount = search_wheres.usecount + 1, + statslastupdated = EXCLUDED.statslastupdated, + estimated_count = EXCLUDED.estimated_count, + estimated_cost = EXCLUDED.estimated_cost, + time_to_estimate = EXCLUDED.time_to_estimate, + total_count = EXCLUDED.total_count, + time_to_count = EXCLUDED.time_to_count + RETURNING * INTO sw; + END IF; + RAISE DEBUG 'Returning with actual count. %', sw; + RETURN sw; +END; +$function$ +; + -- END migra calculated SQL DO $$ diff --git a/src/pgstac/migrations/pgstac.unreleased.sql b/src/pgstac/migrations/pgstac.unreleased.sql index d872c251..92a0a084 100644 --- a/src/pgstac/migrations/pgstac.unreleased.sql +++ b/src/pgstac/migrations/pgstac.unreleased.sql @@ -3361,7 +3361,11 @@ CREATE TABLE IF NOT EXISTS search_wheres( CREATE INDEX IF NOT EXISTS search_wheres_partitions ON search_wheres USING GIN (partitions); CREATE UNIQUE INDEX IF NOT EXISTS search_wheres_where ON search_wheres ((md5(_where))); -CREATE OR REPLACE FUNCTION where_stats(inwhere text, updatestats boolean default false, conf jsonb default null) RETURNS search_wheres AS $$ +CREATE OR REPLACE FUNCTION where_stats( + inwhere text, + updatestats boolean default false, + conf jsonb default null +) RETURNS search_wheres AS $$ DECLARE t timestamptz; i interval; @@ -3371,110 +3375,153 @@ DECLARE inwhere_hash text := md5(inwhere); _context text := lower(context(conf)); _stats_ttl interval := context_stats_ttl(conf); - _estimated_cost float := context_estimated_cost(conf); - _estimated_count int := context_estimated_count(conf); + _estimated_cost_threshold float := context_estimated_cost(conf); + _estimated_count_threshold int := context_estimated_count(conf); ro bool := pgstac.readonly(conf); BEGIN - IF ro THEN - updatestats := FALSE; + -- If updatestats is true then set ttl to 0 + IF updatestats THEN + RAISE DEBUG 'Updatestats set to TRUE, setting TTL to 0'; + _stats_ttl := '0'::interval; END IF; + -- If we don't need to calculate context, just return IF _context = 'off' THEN - sw._where := inwhere; - return sw; + sw._where = inwhere; + RETURN sw; END IF; + -- Get any stats that we have. If there is a lock where another process is + -- updating the stats, wait so that we don't end up calculating a bunch of times. SELECT * INTO sw FROM search_wheres WHERE md5(_where)=inwhere_hash FOR UPDATE; - -- Update statistics if explicitly set, if statistics do not exist, or statistics ttl has expired - IF NOT updatestats THEN - RAISE NOTICE 'Checking if update is needed for: % .', inwhere; - RAISE NOTICE 'Stats Last Updated: %', sw.statslastupdated; - RAISE NOTICE 'TTL: %, Age: %', _stats_ttl, now() - sw.statslastupdated; - RAISE NOTICE 'Context: %, Existing Total: %', _context, sw.total_count; - IF - ( - sw.statslastupdated IS NULL - OR (now() - sw.statslastupdated) > _stats_ttl - OR (context(conf) != 'off' AND sw.total_count IS NULL) - ) AND NOT ro - THEN - updatestats := TRUE; + -- If there is a cached row, figure out if we need to update + IF + sw IS NOT NULL + AND sw.statslastupdated IS NOT NULL + AND sw.total_count IS NOT NULL + AND now() - sw.statslastupdated <= _stats_ttl + THEN + -- we have a cached row with data that is within our ttl + RAISE DEBUG 'Stats present in table and lastupdated within ttl: %', sw; + IF NOT ro THEN + RAISE DEBUG 'Updating search_wheres only bumping lastused and usecount'; + UPDATE search_wheres SET + lastused = now(), + usecount = search_wheres.usecount + 1 + WHERE md5(_where) = inwhere_hash + RETURNING * INTO sw; END IF; - END IF; - - sw._where := inwhere; - sw.lastused := now(); - sw.usecount := coalesce(sw.usecount,0) + 1; - - IF NOT updatestats THEN - UPDATE search_wheres SET - lastused = sw.lastused, - usecount = sw.usecount - WHERE md5(_where) = inwhere_hash - RETURNING * INTO sw - ; + RAISE DEBUG 'Returning cached counts. %', sw; RETURN sw; END IF; - -- Use explain to get estimated count/cost and a list of the partitions that would be hit by the query - t := clock_timestamp(); - EXECUTE format('EXPLAIN (format json) SELECT 1 FROM items WHERE %s', inwhere) - INTO explain_json; - RAISE NOTICE 'Time for just the explain: %', clock_timestamp() - t; - i := clock_timestamp() - t; + -- Calculate estimated cost and rows + -- Use explain to get estimated count/cost + IF sw.estimated_count IS NULL OR sw.estimated_cost IS NULL THEN + RAISE DEBUG 'Calculating estimated stats'; + t := clock_timestamp(); + EXECUTE format('EXPLAIN (format json) SELECT 1 FROM items WHERE %s', inwhere) + INTO explain_json; + RAISE DEBUG 'Time for just the explain: %', clock_timestamp() - t; + i := clock_timestamp() - t; - sw.statslastupdated := now(); - sw.estimated_count := explain_json->0->'Plan'->'Plan Rows'; - sw.estimated_cost := explain_json->0->'Plan'->'Total Cost'; - sw.time_to_estimate := extract(epoch from i); + sw.estimated_count := explain_json->0->'Plan'->'Plan Rows'; + sw.estimated_cost := explain_json->0->'Plan'->'Total Cost'; + sw.time_to_estimate := extract(epoch from i); + END IF; - RAISE NOTICE 'ESTIMATED_COUNT: % < %', sw.estimated_count, _estimated_count; - RAISE NOTICE 'ESTIMATED_COST: % < %', sw.estimated_cost, _estimated_cost; + RAISE DEBUG 'ESTIMATED_COUNT: %, THRESHOLD %', sw.estimated_count, _estimated_count_threshold; + RAISE DEBUG 'ESTIMATED_COST: %, THRESHOLD %', sw.estimated_cost, _estimated_cost_threshold; - -- Do a full count of rows if context is set to on or if auto is set and estimates are low enough + -- If context is set to auto and the costs are within the threshold return the estimated costs IF - _context = 'on' - OR - ( _context = 'auto' AND - ( - sw.estimated_count < _estimated_count - AND - sw.estimated_cost < _estimated_cost - ) - ) + _context = 'auto' + AND sw.estimated_count >= _estimated_count_threshold + AND sw.estimated_cost >= _estimated_cost_threshold THEN - t := clock_timestamp(); - RAISE NOTICE 'Calculating actual count...'; - EXECUTE format( - 'SELECT count(*) FROM items WHERE %s', - inwhere - ) INTO sw.total_count; - i := clock_timestamp() - t; - RAISE NOTICE 'Actual Count: % -- %', sw.total_count, i; - sw.time_to_count := extract(epoch FROM i); - ELSE - sw.total_count := NULL; - sw.time_to_count := NULL; + IF NOT ro THEN + INSERT INTO search_wheres ( + _where, + lastused, + usecount, + statslastupdated, + estimated_count, + estimated_cost, + time_to_estimate, + total_count, + time_to_count + ) VALUES ( + inwhere, + now(), + 1, + now(), + sw.estimated_count, + sw.estimated_cost, + sw.time_to_estimate, + null, + null + ) ON CONFLICT ((md5(_where))) + DO UPDATE SET + lastused = EXCLUDED.lastused, + usecount = search_wheres.usecount + 1, + statslastupdated = EXCLUDED.statslastupdated, + estimated_count = EXCLUDED.estimated_count, + estimated_cost = EXCLUDED.estimated_cost, + time_to_estimate = EXCLUDED.time_to_estimate, + total_count = EXCLUDED.total_count, + time_to_count = EXCLUDED.time_to_count + RETURNING * INTO sw; + END IF; + RAISE DEBUG 'Estimates are within thresholds, returning estimates. %', sw; + RETURN sw; END IF; + -- Calculate Actual Count + t := clock_timestamp(); + RAISE NOTICE 'Calculating actual count...'; + EXECUTE format( + 'SELECT count(*) FROM items WHERE %s', + inwhere + ) INTO sw.total_count; + i := clock_timestamp() - t; + RAISE NOTICE 'Actual Count: % -- %', sw.total_count, i; + sw.time_to_count := extract(epoch FROM i); + IF NOT ro THEN - INSERT INTO search_wheres - (_where, lastused, usecount, statslastupdated, estimated_count, estimated_cost, time_to_estimate, partitions, total_count, time_to_count) - SELECT sw._where, sw.lastused, sw.usecount, sw.statslastupdated, sw.estimated_count, sw.estimated_cost, sw.time_to_estimate, sw.partitions, sw.total_count, sw.time_to_count - ON CONFLICT ((md5(_where))) - DO UPDATE - SET - lastused = sw.lastused, - usecount = sw.usecount, - statslastupdated = sw.statslastupdated, - estimated_count = sw.estimated_count, - estimated_cost = sw.estimated_cost, - time_to_estimate = sw.time_to_estimate, - total_count = sw.total_count, - time_to_count = sw.time_to_count - ; - END IF; + INSERT INTO search_wheres ( + _where, + lastused, + usecount, + statslastupdated, + estimated_count, + estimated_cost, + time_to_estimate, + total_count, + time_to_count + ) VALUES ( + inwhere, + now(), + 1, + now(), + sw.estimated_count, + sw.estimated_cost, + sw.time_to_estimate, + sw.total_count, + sw.time_to_count + ) ON CONFLICT ((md5(_where))) + DO UPDATE SET + lastused = EXCLUDED.lastused, + usecount = search_wheres.usecount + 1, + statslastupdated = EXCLUDED.statslastupdated, + estimated_count = EXCLUDED.estimated_count, + estimated_cost = EXCLUDED.estimated_cost, + time_to_estimate = EXCLUDED.time_to_estimate, + total_count = EXCLUDED.total_count, + time_to_count = EXCLUDED.time_to_count + RETURNING * INTO sw; + END IF; + RAISE DEBUG 'Returning with actual count. %', sw; RETURN sw; END; $$ LANGUAGE PLPGSQL SECURITY DEFINER; @@ -3487,68 +3534,66 @@ CREATE OR REPLACE FUNCTION search_query( ) RETURNS searches AS $$ DECLARE search searches%ROWTYPE; + cached_search searches%ROWTYPE; pexplain jsonb; t timestamptz; i interval; - _hash text := search_hash(_search, _metadata); doupdate boolean := FALSE; insertfound boolean := FALSE; ro boolean := pgstac.readonly(); + found_search text; BEGIN + RAISE NOTICE 'SEARCH: %', _search; + -- Calculate hash, where clause, and order by statement + search.search := _search; + search.metadata := _metadata; + search.hash := search_hash(_search, _metadata); + search._where := stac_search_to_where(_search); + search.orderby := sort_sqlorderby(_search); + search.lastused := now(); + search.usecount := 1; + + -- If we are in read only mode, directly return search IF ro THEN - updatestats := FALSE; + RETURN search; END IF; - SELECT * INTO search FROM searches - WHERE hash=_hash; - - search.hash := _hash; - - -- Calculate the where clause if not already calculated - IF search._where IS NULL THEN - search._where := stac_search_to_where(_search); - ELSE - doupdate := TRUE; - END IF; + RAISE NOTICE 'Updating Statistics for search: %s', search; + -- Update statistics for times used and and when last used + -- If the entry is locked, rather than waiting, skip updating the stats + INSERT INTO searches (search, lastused, usecount, metadata) + VALUES (search.search, now(), 1, search.metadata) + ON CONFLICT DO NOTHING + RETURNING * INTO cached_search + ; - -- Calculate the order by clause if not already calculated - IF search.orderby IS NULL THEN - search.orderby := sort_sqlorderby(_search); - ELSE - doupdate := TRUE; + IF NOT FOUND OR cached_search IS NULL THEN + UPDATE searches SET + lastused = now(), + usecount = searches.usecount + 1 + WHERE hash = ( + SELECT hash FROM searches WHERE hash=search.hash FOR UPDATE SKIP LOCKED + ) + RETURNING * INTO cached_search + ; END IF; - PERFORM where_stats(search._where, updatestats, _search->'conf'); - - IF NOT ro THEN - IF NOT doupdate THEN - INSERT INTO searches (search, _where, orderby, lastused, usecount, metadata) - VALUES (_search, search._where, search.orderby, clock_timestamp(), 1, _metadata) - ON CONFLICT (hash) DO NOTHING RETURNING * INTO search; - IF FOUND THEN - RETURN search; - END IF; - END IF; - - UPDATE searches - SET - lastused=clock_timestamp(), - usecount=usecount+1 - WHERE hash=( - SELECT hash FROM searches - WHERE hash=_hash - FOR UPDATE SKIP LOCKED - ); - IF NOT FOUND THEN - RAISE NOTICE 'Did not update stats for % due to lock. (This is generally OK)', _search; - END IF; + IF cached_search IS NOT NULL THEN + cached_search._where = search._where; + cached_search.orderby = search.orderby; + RETURN cached_search; END IF; - RETURN search; END; $$ LANGUAGE PLPGSQL SECURITY DEFINER; +CREATE OR REPLACE FUNCTION search_fromhash( + _hash text +) RETURNS searches AS $$ + SELECT * FROM search_query((SELECT search FROM searches WHERE hash=_hash LIMIT 1)); +$$ LANGUAGE SQL STRICT; + CREATE OR REPLACE FUNCTION search_rows( IN _where text DEFAULT 'TRUE', IN _orderby text DEFAULT 'datetime DESC, id DESC', @@ -3590,7 +3635,7 @@ IF _orderby ILIKE 'datetime d%' THEN _orderby, records_left ); - RAISE LOG 'QUERY: %', query; + RAISE DEBUG 'QUERY: %', query; timer := clock_timestamp(); RETURN QUERY EXECUTE query; @@ -3614,7 +3659,7 @@ ELSIF _orderby ILIKE 'datetime a%' THEN _orderby, records_left ); - RAISE LOG 'QUERY: %', query; + RAISE DEBUG 'QUERY: %', query; timer := clock_timestamp(); RETURN QUERY EXECUTE query; @@ -3635,7 +3680,7 @@ ELSE LIMIT %L $q$, _where, _orderby, _limit ); - RAISE LOG 'QUERY: %', query; + RAISE DEBUG 'QUERY: %', query; timer := clock_timestamp(); RETURN QUERY EXECUTE query; RAISE NOTICE 'FULL QUERY TOOK %ms', age_ms(timer); @@ -3730,18 +3775,18 @@ BEGIN token_prev := token.prev; token_item := token.item; token_where := get_token_filter(_search->'sortby', token_item, token_prev, FALSE); - RAISE LOG 'TOKEN_WHERE: % (%ms from search start)', token_where, age_ms(timer); + RAISE DEBUG 'TOKEN_WHERE: % (%ms from search start)', token_where, age_ms(timer); IF token_prev THEN -- if we are using a prev token, we know has_next is true - RAISE LOG 'There is a previous token, so automatically setting has_next to true'; + RAISE DEBUG 'There is a previous token, so automatically setting has_next to true'; has_next := TRUE; orderby := sort_sqlorderby(_search, TRUE); ELSE - RAISE LOG 'There is a next token, so automatically setting has_prev to true'; + RAISE DEBUG 'There is a next token, so automatically setting has_prev to true'; has_prev := TRUE; END IF; ELSE -- if there was no token, we know there is no prev - RAISE LOG 'There is no token, so we know there is no prev. setting has_prev to false'; + RAISE DEBUG 'There is no token, so we know there is no prev. setting has_prev to false'; has_prev := FALSE; END IF; @@ -3775,9 +3820,9 @@ BEGIN END IF; RAISE NOTICE 'Query returned % records.', jsonb_array_length(out_records); - RAISE LOG 'TOKEN: % %', token_item.id, token_item.collection; - RAISE LOG 'RECORD_1: % %', out_records->0->>'id', out_records->0->>'collection'; - RAISE LOG 'RECORD-1: % %', out_records->-1->>'id', out_records->-1->>'collection'; + RAISE DEBUG 'TOKEN: % %', token_item.id, token_item.collection; + RAISE DEBUG 'RECORD_1: % %', out_records->0->>'id', out_records->0->>'collection'; + RAISE DEBUG 'RECORD-1: % %', out_records->-1->>'id', out_records->-1->>'collection'; -- REMOVE records that were from our token IF out_records->0->>'id' = token_item.id AND out_records->0->>'collection' = token_item.collection THEN @@ -4098,9 +4143,9 @@ BEGIN exitwhenfull := TRUE; END IF; - SELECT * INTO search FROM searches WHERE hash=queryhash; + search := search_fromhash(queryhash); - IF NOT FOUND THEN + IF search IS NULL THEN RAISE EXCEPTION 'Search with Query Hash % Not Found', queryhash; END IF; diff --git a/src/pgstac/sql/004_search.sql b/src/pgstac/sql/004_search.sql index 41a180db..40e7af04 100644 --- a/src/pgstac/sql/004_search.sql +++ b/src/pgstac/sql/004_search.sql @@ -443,7 +443,11 @@ CREATE TABLE IF NOT EXISTS search_wheres( CREATE INDEX IF NOT EXISTS search_wheres_partitions ON search_wheres USING GIN (partitions); CREATE UNIQUE INDEX IF NOT EXISTS search_wheres_where ON search_wheres ((md5(_where))); -CREATE OR REPLACE FUNCTION where_stats(inwhere text, updatestats boolean default false, conf jsonb default null) RETURNS search_wheres AS $$ +CREATE OR REPLACE FUNCTION where_stats( + inwhere text, + updatestats boolean default false, + conf jsonb default null +) RETURNS search_wheres AS $$ DECLARE t timestamptz; i interval; @@ -453,110 +457,153 @@ DECLARE inwhere_hash text := md5(inwhere); _context text := lower(context(conf)); _stats_ttl interval := context_stats_ttl(conf); - _estimated_cost float := context_estimated_cost(conf); - _estimated_count int := context_estimated_count(conf); + _estimated_cost_threshold float := context_estimated_cost(conf); + _estimated_count_threshold int := context_estimated_count(conf); ro bool := pgstac.readonly(conf); BEGIN - IF ro THEN - updatestats := FALSE; + -- If updatestats is true then set ttl to 0 + IF updatestats THEN + RAISE DEBUG 'Updatestats set to TRUE, setting TTL to 0'; + _stats_ttl := '0'::interval; END IF; + -- If we don't need to calculate context, just return IF _context = 'off' THEN - sw._where := inwhere; - return sw; + sw._where = inwhere; + RETURN sw; END IF; + -- Get any stats that we have. If there is a lock where another process is + -- updating the stats, wait so that we don't end up calculating a bunch of times. SELECT * INTO sw FROM search_wheres WHERE md5(_where)=inwhere_hash FOR UPDATE; - -- Update statistics if explicitly set, if statistics do not exist, or statistics ttl has expired - IF NOT updatestats THEN - RAISE NOTICE 'Checking if update is needed for: % .', inwhere; - RAISE NOTICE 'Stats Last Updated: %', sw.statslastupdated; - RAISE NOTICE 'TTL: %, Age: %', _stats_ttl, now() - sw.statslastupdated; - RAISE NOTICE 'Context: %, Existing Total: %', _context, sw.total_count; - IF - ( - sw.statslastupdated IS NULL - OR (now() - sw.statslastupdated) > _stats_ttl - OR (context(conf) != 'off' AND sw.total_count IS NULL) - ) AND NOT ro - THEN - updatestats := TRUE; + -- If there is a cached row, figure out if we need to update + IF + sw IS NOT NULL + AND sw.statslastupdated IS NOT NULL + AND sw.total_count IS NOT NULL + AND now() - sw.statslastupdated <= _stats_ttl + THEN + -- we have a cached row with data that is within our ttl + RAISE DEBUG 'Stats present in table and lastupdated within ttl: %', sw; + IF NOT ro THEN + RAISE DEBUG 'Updating search_wheres only bumping lastused and usecount'; + UPDATE search_wheres SET + lastused = now(), + usecount = search_wheres.usecount + 1 + WHERE md5(_where) = inwhere_hash + RETURNING * INTO sw; END IF; - END IF; - - sw._where := inwhere; - sw.lastused := now(); - sw.usecount := coalesce(sw.usecount,0) + 1; - - IF NOT updatestats THEN - UPDATE search_wheres SET - lastused = sw.lastused, - usecount = sw.usecount - WHERE md5(_where) = inwhere_hash - RETURNING * INTO sw - ; + RAISE DEBUG 'Returning cached counts. %', sw; RETURN sw; END IF; - -- Use explain to get estimated count/cost and a list of the partitions that would be hit by the query - t := clock_timestamp(); - EXECUTE format('EXPLAIN (format json) SELECT 1 FROM items WHERE %s', inwhere) - INTO explain_json; - RAISE NOTICE 'Time for just the explain: %', clock_timestamp() - t; - i := clock_timestamp() - t; + -- Calculate estimated cost and rows + -- Use explain to get estimated count/cost + IF sw.estimated_count IS NULL OR sw.estimated_cost IS NULL THEN + RAISE DEBUG 'Calculating estimated stats'; + t := clock_timestamp(); + EXECUTE format('EXPLAIN (format json) SELECT 1 FROM items WHERE %s', inwhere) + INTO explain_json; + RAISE DEBUG 'Time for just the explain: %', clock_timestamp() - t; + i := clock_timestamp() - t; - sw.statslastupdated := now(); - sw.estimated_count := explain_json->0->'Plan'->'Plan Rows'; - sw.estimated_cost := explain_json->0->'Plan'->'Total Cost'; - sw.time_to_estimate := extract(epoch from i); + sw.estimated_count := explain_json->0->'Plan'->'Plan Rows'; + sw.estimated_cost := explain_json->0->'Plan'->'Total Cost'; + sw.time_to_estimate := extract(epoch from i); + END IF; - RAISE NOTICE 'ESTIMATED_COUNT: % < %', sw.estimated_count, _estimated_count; - RAISE NOTICE 'ESTIMATED_COST: % < %', sw.estimated_cost, _estimated_cost; + RAISE DEBUG 'ESTIMATED_COUNT: %, THRESHOLD %', sw.estimated_count, _estimated_count_threshold; + RAISE DEBUG 'ESTIMATED_COST: %, THRESHOLD %', sw.estimated_cost, _estimated_cost_threshold; - -- Do a full count of rows if context is set to on or if auto is set and estimates are low enough + -- If context is set to auto and the costs are within the threshold return the estimated costs IF - _context = 'on' - OR - ( _context = 'auto' AND - ( - sw.estimated_count < _estimated_count - AND - sw.estimated_cost < _estimated_cost - ) - ) + _context = 'auto' + AND sw.estimated_count >= _estimated_count_threshold + AND sw.estimated_cost >= _estimated_cost_threshold THEN - t := clock_timestamp(); - RAISE NOTICE 'Calculating actual count...'; - EXECUTE format( - 'SELECT count(*) FROM items WHERE %s', - inwhere - ) INTO sw.total_count; - i := clock_timestamp() - t; - RAISE NOTICE 'Actual Count: % -- %', sw.total_count, i; - sw.time_to_count := extract(epoch FROM i); - ELSE - sw.total_count := NULL; - sw.time_to_count := NULL; + IF NOT ro THEN + INSERT INTO search_wheres ( + _where, + lastused, + usecount, + statslastupdated, + estimated_count, + estimated_cost, + time_to_estimate, + total_count, + time_to_count + ) VALUES ( + inwhere, + now(), + 1, + now(), + sw.estimated_count, + sw.estimated_cost, + sw.time_to_estimate, + null, + null + ) ON CONFLICT ((md5(_where))) + DO UPDATE SET + lastused = EXCLUDED.lastused, + usecount = search_wheres.usecount + 1, + statslastupdated = EXCLUDED.statslastupdated, + estimated_count = EXCLUDED.estimated_count, + estimated_cost = EXCLUDED.estimated_cost, + time_to_estimate = EXCLUDED.time_to_estimate, + total_count = EXCLUDED.total_count, + time_to_count = EXCLUDED.time_to_count + RETURNING * INTO sw; + END IF; + RAISE DEBUG 'Estimates are within thresholds, returning estimates. %', sw; + RETURN sw; END IF; + -- Calculate Actual Count + t := clock_timestamp(); + RAISE NOTICE 'Calculating actual count...'; + EXECUTE format( + 'SELECT count(*) FROM items WHERE %s', + inwhere + ) INTO sw.total_count; + i := clock_timestamp() - t; + RAISE NOTICE 'Actual Count: % -- %', sw.total_count, i; + sw.time_to_count := extract(epoch FROM i); + IF NOT ro THEN - INSERT INTO search_wheres - (_where, lastused, usecount, statslastupdated, estimated_count, estimated_cost, time_to_estimate, partitions, total_count, time_to_count) - SELECT sw._where, sw.lastused, sw.usecount, sw.statslastupdated, sw.estimated_count, sw.estimated_cost, sw.time_to_estimate, sw.partitions, sw.total_count, sw.time_to_count - ON CONFLICT ((md5(_where))) - DO UPDATE - SET - lastused = sw.lastused, - usecount = sw.usecount, - statslastupdated = sw.statslastupdated, - estimated_count = sw.estimated_count, - estimated_cost = sw.estimated_cost, - time_to_estimate = sw.time_to_estimate, - total_count = sw.total_count, - time_to_count = sw.time_to_count - ; - END IF; + INSERT INTO search_wheres ( + _where, + lastused, + usecount, + statslastupdated, + estimated_count, + estimated_cost, + time_to_estimate, + total_count, + time_to_count + ) VALUES ( + inwhere, + now(), + 1, + now(), + sw.estimated_count, + sw.estimated_cost, + sw.time_to_estimate, + sw.total_count, + sw.time_to_count + ) ON CONFLICT ((md5(_where))) + DO UPDATE SET + lastused = EXCLUDED.lastused, + usecount = search_wheres.usecount + 1, + statslastupdated = EXCLUDED.statslastupdated, + estimated_count = EXCLUDED.estimated_count, + estimated_cost = EXCLUDED.estimated_cost, + time_to_estimate = EXCLUDED.time_to_estimate, + total_count = EXCLUDED.total_count, + time_to_count = EXCLUDED.time_to_count + RETURNING * INTO sw; + END IF; + RAISE DEBUG 'Returning with actual count. %', sw; RETURN sw; END; $$ LANGUAGE PLPGSQL SECURITY DEFINER; @@ -569,68 +616,66 @@ CREATE OR REPLACE FUNCTION search_query( ) RETURNS searches AS $$ DECLARE search searches%ROWTYPE; + cached_search searches%ROWTYPE; pexplain jsonb; t timestamptz; i interval; - _hash text := search_hash(_search, _metadata); doupdate boolean := FALSE; insertfound boolean := FALSE; ro boolean := pgstac.readonly(); + found_search text; BEGIN + RAISE NOTICE 'SEARCH: %', _search; + -- Calculate hash, where clause, and order by statement + search.search := _search; + search.metadata := _metadata; + search.hash := search_hash(_search, _metadata); + search._where := stac_search_to_where(_search); + search.orderby := sort_sqlorderby(_search); + search.lastused := now(); + search.usecount := 1; + + -- If we are in read only mode, directly return search IF ro THEN - updatestats := FALSE; - END IF; - - SELECT * INTO search FROM searches - WHERE hash=_hash; - - search.hash := _hash; - - -- Calculate the where clause if not already calculated - IF search._where IS NULL THEN - search._where := stac_search_to_where(_search); - ELSE - doupdate := TRUE; - END IF; - - -- Calculate the order by clause if not already calculated - IF search.orderby IS NULL THEN - search.orderby := sort_sqlorderby(_search); - ELSE - doupdate := TRUE; + RETURN search; + END IF; + + RAISE NOTICE 'Updating Statistics for search: %s', search; + -- Update statistics for times used and and when last used + -- If the entry is locked, rather than waiting, skip updating the stats + INSERT INTO searches (search, lastused, usecount, metadata) + VALUES (search.search, now(), 1, search.metadata) + ON CONFLICT DO NOTHING + RETURNING * INTO cached_search + ; + + IF NOT FOUND OR cached_search IS NULL THEN + UPDATE searches SET + lastused = now(), + usecount = searches.usecount + 1 + WHERE hash = ( + SELECT hash FROM searches WHERE hash=search.hash FOR UPDATE SKIP LOCKED + ) + RETURNING * INTO cached_search + ; END IF; - PERFORM where_stats(search._where, updatestats, _search->'conf'); - - IF NOT ro THEN - IF NOT doupdate THEN - INSERT INTO searches (search, _where, orderby, lastused, usecount, metadata) - VALUES (_search, search._where, search.orderby, clock_timestamp(), 1, _metadata) - ON CONFLICT (hash) DO NOTHING RETURNING * INTO search; - IF FOUND THEN - RETURN search; - END IF; - END IF; - - UPDATE searches - SET - lastused=clock_timestamp(), - usecount=usecount+1 - WHERE hash=( - SELECT hash FROM searches - WHERE hash=_hash - FOR UPDATE SKIP LOCKED - ); - IF NOT FOUND THEN - RAISE NOTICE 'Did not update stats for % due to lock. (This is generally OK)', _search; - END IF; + IF cached_search IS NOT NULL THEN + cached_search._where = search._where; + cached_search.orderby = search.orderby; + RETURN cached_search; END IF; - RETURN search; END; $$ LANGUAGE PLPGSQL SECURITY DEFINER; +CREATE OR REPLACE FUNCTION search_fromhash( + _hash text +) RETURNS searches AS $$ + SELECT * FROM search_query((SELECT search FROM searches WHERE hash=_hash LIMIT 1)); +$$ LANGUAGE SQL STRICT; + CREATE OR REPLACE FUNCTION search_rows( IN _where text DEFAULT 'TRUE', IN _orderby text DEFAULT 'datetime DESC, id DESC', @@ -672,7 +717,7 @@ IF _orderby ILIKE 'datetime d%' THEN _orderby, records_left ); - RAISE LOG 'QUERY: %', query; + RAISE DEBUG 'QUERY: %', query; timer := clock_timestamp(); RETURN QUERY EXECUTE query; @@ -696,7 +741,7 @@ ELSIF _orderby ILIKE 'datetime a%' THEN _orderby, records_left ); - RAISE LOG 'QUERY: %', query; + RAISE DEBUG 'QUERY: %', query; timer := clock_timestamp(); RETURN QUERY EXECUTE query; @@ -717,7 +762,7 @@ ELSE LIMIT %L $q$, _where, _orderby, _limit ); - RAISE LOG 'QUERY: %', query; + RAISE DEBUG 'QUERY: %', query; timer := clock_timestamp(); RETURN QUERY EXECUTE query; RAISE NOTICE 'FULL QUERY TOOK %ms', age_ms(timer); @@ -812,18 +857,18 @@ BEGIN token_prev := token.prev; token_item := token.item; token_where := get_token_filter(_search->'sortby', token_item, token_prev, FALSE); - RAISE LOG 'TOKEN_WHERE: % (%ms from search start)', token_where, age_ms(timer); + RAISE DEBUG 'TOKEN_WHERE: % (%ms from search start)', token_where, age_ms(timer); IF token_prev THEN -- if we are using a prev token, we know has_next is true - RAISE LOG 'There is a previous token, so automatically setting has_next to true'; + RAISE DEBUG 'There is a previous token, so automatically setting has_next to true'; has_next := TRUE; orderby := sort_sqlorderby(_search, TRUE); ELSE - RAISE LOG 'There is a next token, so automatically setting has_prev to true'; + RAISE DEBUG 'There is a next token, so automatically setting has_prev to true'; has_prev := TRUE; END IF; ELSE -- if there was no token, we know there is no prev - RAISE LOG 'There is no token, so we know there is no prev. setting has_prev to false'; + RAISE DEBUG 'There is no token, so we know there is no prev. setting has_prev to false'; has_prev := FALSE; END IF; @@ -857,9 +902,9 @@ BEGIN END IF; RAISE NOTICE 'Query returned % records.', jsonb_array_length(out_records); - RAISE LOG 'TOKEN: % %', token_item.id, token_item.collection; - RAISE LOG 'RECORD_1: % %', out_records->0->>'id', out_records->0->>'collection'; - RAISE LOG 'RECORD-1: % %', out_records->-1->>'id', out_records->-1->>'collection'; + RAISE DEBUG 'TOKEN: % %', token_item.id, token_item.collection; + RAISE DEBUG 'RECORD_1: % %', out_records->0->>'id', out_records->0->>'collection'; + RAISE DEBUG 'RECORD-1: % %', out_records->-1->>'id', out_records->-1->>'collection'; -- REMOVE records that were from our token IF out_records->0->>'id' = token_item.id AND out_records->0->>'collection' = token_item.collection THEN diff --git a/src/pgstac/sql/006_tilesearch.sql b/src/pgstac/sql/006_tilesearch.sql index d7c0e328..81788a48 100644 --- a/src/pgstac/sql/006_tilesearch.sql +++ b/src/pgstac/sql/006_tilesearch.sql @@ -46,9 +46,9 @@ BEGIN exitwhenfull := TRUE; END IF; - SELECT * INTO search FROM searches WHERE hash=queryhash; + search := search_fromhash(queryhash); - IF NOT FOUND THEN + IF search IS NULL THEN RAISE EXCEPTION 'Search with Query Hash % Not Found', queryhash; END IF; diff --git a/src/pgstac/tests/basic/xyz_searches.sql b/src/pgstac/tests/basic/xyz_searches.sql index cfa677b4..841e194c 100644 --- a/src/pgstac/tests/basic/xyz_searches.sql +++ b/src/pgstac/tests/basic/xyz_searches.sql @@ -2,6 +2,8 @@ SET pgstac."default_filter_lang" TO 'cql-json'; SELECT hash from search_query('{"collections":["pgstac-test-collection"]}'); +SELECT hash, search, metadata FROM search_fromhash('2bbae9a0ef0bbb5ffaca06603ce621d7'); + SELECT xyzsearch(8615, 13418, 15, '2bbae9a0ef0bbb5ffaca06603ce621d7', '{"include":["id"]}'::jsonb); SELECT xyzsearch(1048, 1682, 12, '2bbae9a0ef0bbb5ffaca06603ce621d7', '{"include":["id"]}'::jsonb); diff --git a/src/pgstac/tests/basic/xyz_searches.sql.out b/src/pgstac/tests/basic/xyz_searches.sql.out index 22f76aa8..efec8aff 100644 --- a/src/pgstac/tests/basic/xyz_searches.sql.out +++ b/src/pgstac/tests/basic/xyz_searches.sql.out @@ -3,6 +3,9 @@ SET SELECT hash from search_query('{"collections":["pgstac-test-collection"]}'); 2bbae9a0ef0bbb5ffaca06603ce621d7 +SELECT hash, search, metadata FROM search_fromhash('2bbae9a0ef0bbb5ffaca06603ce621d7'); + 2bbae9a0ef0bbb5ffaca06603ce621d7 | {"collections": ["pgstac-test-collection"]} | {} + SELECT xyzsearch(8615, 13418, 15, '2bbae9a0ef0bbb5ffaca06603ce621d7', '{"include":["id"]}'::jsonb); {"type": "FeatureCollection", "features": [{"id": "pgstac-test-item-0003", "collection": "pgstac-test-collection"}]}