Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgrade to h3 v4.2.0 #139

Merged
merged 7 commits into from
Dec 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/_extension_deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ jobs:
cd duckdb
git checkout ${{ inputs.duckdb_version }}

- uses: actions/download-artifact@v3
- uses: actions/download-artifact@v4
with:
name: ${{ inputs.extension_name }}-${{ inputs.duckdb_version }}-extension-${{matrix.duckdb_arch}}${{inputs.artifact_postfix}}${{startsWith(matrix.duckdb, 'wasm') && '.wasm' || ''}}
path: |
Expand Down
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[![Extension Test](https://github.com/isaacbrodsky/h3-duckdb/actions/workflows/test.yml/badge.svg)](https://github.com/isaacbrodsky/h3-duckdb/actions/workflows/test.yml)
[![DuckDB Version](https://img.shields.io/static/v1?label=duckdb&message=v1.1.3&color=blue)](https://github.com/duckdb/duckdb/releases/tag/v1.1.3)
[![H3 Version](https://img.shields.io/static/v1?label=h3&message=v4.1.0&color=blue)](https://github.com/uber/h3/releases/tag/v4.1.0)
[![H3 Version](https://img.shields.io/static/v1?label=h3&message=v4.2.0&color=blue)](https://github.com/uber/h3/releases/tag/v4.2.0)
[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE)

This is a [DuckDB](https://duckdb.org) extension that adds support for the [H3 discrete global grid system](https://github.com/uber/h3/), so you can index points and geometries to hexagons in SQL.
Expand Down Expand Up @@ -94,6 +94,8 @@ one to use. The unsigned and signed APIs are identical. All functions also suppo
| `h3_cells_to_multi_polygon_wkt` | Convert a set of cells to multipolygon WKT
| `h3_polygon_wkt_to_cells` | Convert polygon WKT to a set of cells
| `h3_polygon_wkt_to_cells_string` | Convert polygon WKT to a set of cells (returns VARCHAR)
| `h3_polygon_wkt_to_cells_experimental` | Convert polygon WKT to a set of cells, new algorithm
| `h3_polygon_wkt_to_cells_experimental_string` | Convert polygon WKT to a set of cells, new algorithm (returns VARCHAR)

# Alternative download / install

Expand Down
2 changes: 1 addition & 1 deletion h3
Submodule h3 updated 186 files
211 changes: 211 additions & 0 deletions src/h3_regions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,203 @@ static void PolygonWktToCellsVarcharFunction(DataChunk &args,
});
}

static void PolygonWktToCellsExperimentalFunction(DataChunk &args, ExpressionState &state,
Vector &result) {
// TODO: Note this function is not fully noexcept -- some invalid WKT strings
// will throw, others will return empty lists.
TernaryExecutor::Execute<string_t, string_t, int, list_entry_t>(
args.data[0], args.data[1], args.data[2], result, args.size(),
[&](string_t input, string_t flagsStr, int res) {
GeoPolygon polygon;
int32_t flags = 0;

std::string str = input.GetString();

uint64_t offset = ListVector::GetListSize(result);

// TODO: Make flags easier to work with
if (flagsStr == "CONTAINMENT_CENTER") {
flags = 0;
} else if (flagsStr == "CONTAINMENT_FULL") {
flags = 1;
} else if (flagsStr == "CONTAINMENT_OVERLAPPING") {
flags = 2;
} else if (flagsStr == "CONTAINMENT_OVERLAPPING_BBOX") {
flags = 3;
} else {
// Invalid flags input
return list_entry_t(offset, 0);
}

if (str.rfind(POLYGON, 0) != 0) {
return list_entry_t(offset, 0);
}

size_t strIndex = POLYGON.length();
strIndex = whitespace(str, strIndex);

if (str.rfind(EMPTY, strIndex) == strIndex) {
return list_entry_t(offset, 0);
}

if (str[strIndex] == '(') {
strIndex++;
strIndex = whitespace(str, strIndex);

auto outerVerts = duckdb::make_shared_ptr<std::vector<LatLng>>();
strIndex = readGeoLoop(str, strIndex, outerVerts, polygon.geoloop);

std::vector<GeoLoop> holes;
std::vector<duckdb::shared_ptr<std::vector<LatLng>>> holesVerts;
while (strIndex < str.length() && str[strIndex] == ',') {
strIndex++;
strIndex = whitespace(str, strIndex);
if (str[strIndex] == '(') {
GeoLoop hole;
auto verts = duckdb::make_shared_ptr<std::vector<LatLng>>();
strIndex = readGeoLoop(str, strIndex, verts, hole);
holes.push_back(hole);
holesVerts.push_back(verts);
} else {
throw InvalidInputException(StringUtil::Format(
"Invalid WKT: expected a hole loop '(' after ',' at pos %lu",
strIndex));
}
}
if (str[strIndex] != ')') {
throw InvalidInputException(StringUtil::Format(
"Invalid WKT: expected a hole loop ',' or final ')' at pos %lu",
strIndex));
}

polygon.numHoles = holes.size();
polygon.holes = holes.data();

int64_t numCells = 0;
H3Error err = maxPolygonToCellsSizeExperimental(&polygon, res, flags, &numCells);
if (err) {
return list_entry_t(offset, 0);
} else {
std::vector<H3Index> out(numCells);
H3Error err2 = polygonToCellsExperimental(&polygon, res, flags, numCells, out.data());
if (err2) {
return list_entry_t(offset, 0);
} else {
uint64_t actual = 0;
for (H3Index outCell : out) {
if (outCell != H3_NULL) {
ListVector::PushBack(result, Value::UBIGINT(outCell));
actual++;
}
}
return list_entry_t(offset, actual);
}
}
}
return list_entry_t(offset, 0);
});
}

static void PolygonWktToCellsExperimentalVarcharFunction(DataChunk &args,
ExpressionState &state,
Vector &result) {
// TODO: Note this function is not fully noexcept -- some invalid WKT strings
// will throw, others will return empty lists.
TernaryExecutor::Execute<string_t, string_t, int, list_entry_t>(
args.data[0], args.data[1], args.data[2], result, args.size(),
[&](string_t input, string_t flagsStr, int res) {
GeoPolygon polygon;
int32_t flags = 0;

std::string str = input.GetString();

uint64_t offset = ListVector::GetListSize(result);

// TODO: Make flags easier to work with
if (flagsStr == "CONTAINMENT_CENTER") {
flags = 0;
} else if (flagsStr == "CONTAINMENT_FULL") {
flags = 1;
} else if (flagsStr == "CONTAINMENT_OVERLAPPING") {
flags = 2;
} else if (flagsStr == "CONTAINMENT_OVERLAPPING_BBOX") {
flags = 3;
} else {
// Invalid flags input
return list_entry_t(offset, 0);
}

if (str.rfind(POLYGON, 0) != 0) {
return list_entry_t(offset, 0);
}

size_t strIndex = POLYGON.length();
strIndex = whitespace(str, strIndex);

if (str.rfind(EMPTY, strIndex) == strIndex) {
return list_entry_t(offset, 0);
}

if (str[strIndex] == '(') {
strIndex++;
strIndex = whitespace(str, strIndex);

auto outerVerts = duckdb::make_shared_ptr<std::vector<LatLng>>();
strIndex = readGeoLoop(str, strIndex, outerVerts, polygon.geoloop);

std::vector<GeoLoop> holes;
std::vector<duckdb::shared_ptr<std::vector<LatLng>>> holesVerts;
while (strIndex < str.length() && str[strIndex] == ',') {
strIndex++;
strIndex = whitespace(str, strIndex);
if (str[strIndex] == '(') {
GeoLoop hole;
auto verts = duckdb::make_shared_ptr<std::vector<LatLng>>();
strIndex = readGeoLoop(str, strIndex, verts, hole);
holes.push_back(hole);
holesVerts.push_back(verts);
} else {
throw InvalidInputException(StringUtil::Format(
"Invalid WKT: expected a hole loop '(' after ',' at pos %lu",
strIndex));
}
}
if (str[strIndex] != ')') {
throw InvalidInputException(StringUtil::Format(
"Invalid WKT: expected a hole loop ',' or final ')' at pos %lu",
strIndex));
}

polygon.numHoles = holes.size();
polygon.holes = holes.data();

int64_t numCells = 0;
H3Error err = maxPolygonToCellsSizeExperimental(&polygon, res, flags, &numCells);
if (err) {
return list_entry_t(offset, 0);
} else {
std::vector<H3Index> out(numCells);
H3Error err2 = polygonToCellsExperimental(&polygon, res, flags, numCells, out.data());
if (err2) {
return list_entry_t(offset, 0);
} else {
uint64_t actual = 0;
for (H3Index outCell : out) {
if (outCell != H3_NULL) {
auto str = StringUtil::Format("%llx", outCell);
string_t strAsStr = string_t(strdup(str.c_str()), str.size());
ListVector::PushBack(result, strAsStr);
actual++;
}
}
return list_entry_t(offset, actual);
}
}
}
return list_entry_t(offset, 0);
});
}

CreateScalarFunctionInfo H3Functions::GetCellsToMultiPolygonWktFunction() {
ScalarFunctionSet funcs("h3_cells_to_multi_polygon_wkt");
funcs.AddFunction(ScalarFunction(
Expand Down Expand Up @@ -393,4 +590,18 @@ CreateScalarFunctionInfo H3Functions::GetPolygonWktToCellsVarcharFunction() {
PolygonWktToCellsVarcharFunction));
}

CreateScalarFunctionInfo H3Functions::GetPolygonWktToCellsExperimentalFunction() {
return CreateScalarFunctionInfo(ScalarFunction(
"h3_polygon_wkt_to_cells_experimental", {LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::INTEGER},
LogicalType::LIST(LogicalType::UBIGINT), PolygonWktToCellsExperimentalFunction));
}

CreateScalarFunctionInfo H3Functions::GetPolygonWktToCellsExperimentalVarcharFunction() {
return CreateScalarFunctionInfo(
ScalarFunction("h3_polygon_wkt_to_cells_experimental_string",
{LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::INTEGER},
LogicalType::LIST(LogicalType::VARCHAR),
PolygonWktToCellsExperimentalVarcharFunction));
}

} // namespace duckdb
4 changes: 4 additions & 0 deletions src/include/h3_functions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ class H3Functions {
functions.push_back(GetCellsToMultiPolygonWktFunction());
functions.push_back(GetPolygonWktToCellsFunction());
functions.push_back(GetPolygonWktToCellsVarcharFunction());
functions.push_back(GetPolygonWktToCellsExperimentalFunction());
functions.push_back(GetPolygonWktToCellsExperimentalVarcharFunction());

return functions;
}
Expand Down Expand Up @@ -168,6 +170,8 @@ class H3Functions {
static CreateScalarFunctionInfo GetCellsToMultiPolygonWktFunction();
static CreateScalarFunctionInfo GetPolygonWktToCellsFunction();
static CreateScalarFunctionInfo GetPolygonWktToCellsVarcharFunction();
static CreateScalarFunctionInfo GetPolygonWktToCellsExperimentalFunction();
static CreateScalarFunctionInfo GetPolygonWktToCellsExperimentalVarcharFunction();

static void AddAliases(vector<string> names, CreateScalarFunctionInfo fun,
vector<CreateScalarFunctionInfo> &functions) {
Expand Down
105 changes: 105 additions & 0 deletions test/sql/h3/h3_functions_regions.test
Original file line number Diff line number Diff line change
Expand Up @@ -274,3 +274,108 @@ query I
select h3_polygon_wkt_to_cells_string('POLYGON EMPTY', 9);
----
[]

query I
select length(h3_polygon_wkt_to_cells_experimental('POLYGON', 'CONTAINMENT_CENTER', 9));
----
0

query I
select h3_polygon_wkt_to_cells_experimental('POLYGON ((-122.53401215374411 37.81666158907579, -122.53401215374411 37.70454536656959, -122.3479361380842 37.70454536656959, -122.3479361380842 37.81666158907579, -122.53401215374411 37.81666158907579))', 'AAA', 9);
----
[]

query I
select h3_polygon_wkt_to_cells_experimental('POLYGON ((-122.53401215374411 37.81666158907579, -122.53401215374411 37.70454536656959, -122.3479361380842 37.70454536656959, -122.3479361380842 37.81666158907579, -122.53401215374411 37.81666158907579))', 'CONTAINMENT_CENTER', 5)
----
[]

query I
select h3_polygon_wkt_to_cells_experimental('POLYGON ((-122.53401215374411 37.81666158907579, -122.53401215374411 37.70454536656959, -122.3479361380842 37.70454536656959, -122.3479361380842 37.81666158907579, -122.53401215374411 37.81666158907579))', 'CONTAINMENT_FULL', 5)
----
[]

query I
select h3_polygon_wkt_to_cells_experimental('POLYGON ((-122.53401215374411 37.81666158907579, -122.53401215374411 37.70454536656959, -122.3479361380842 37.70454536656959, -122.3479361380842 37.81666158907579, -122.53401215374411 37.81666158907579))', 'CONTAINMENT_OVERLAPPING', 5)
----
[599685771850416127, 599685772924157951, 599685776145383423, 599685777219125247]

query I
select h3_polygon_wkt_to_cells_experimental('POLYGON ((-122.53401215374411 37.81666158907579, -122.53401215374411 37.70454536656959, -122.3479361380842 37.70454536656959, -122.3479361380842 37.81666158907579, -122.53401215374411 37.81666158907579))', 'CONTAINMENT_OVERLAPPING_BBOX', 5)
----
[599685771850416127, 599685772924157951, 599685773997899775, 599685775071641599, 599685776145383423, 599685777219125247, 599685784735318015, 599686100415414271, 599686104710381567]

query I
select h3_polygon_wkt_to_cells_experimental_string('POLYGON ((-122.53401215374411 37.81666158907579, -122.53401215374411 37.70454536656959, -122.3479361380842 37.70454536656959, -122.3479361380842 37.81666158907579, -122.53401215374411 37.81666158907579))', 'CONTAINMENT_CENTER', 5)
----
[]

query I
select h3_polygon_wkt_to_cells_experimental_string('POLYGON ((-122.53401215374411 37.81666158907579, -122.53401215374411 37.70454536656959, -122.3479361380842 37.70454536656959, -122.3479361380842 37.81666158907579, -122.53401215374411 37.81666158907579))', 'CONTAINMENT_FULL', 5)
----
[]

query I
select h3_polygon_wkt_to_cells_experimental_string('POLYGON ((-122.53401215374411 37.81666158907579, -122.53401215374411 37.70454536656959, -122.3479361380842 37.70454536656959, -122.3479361380842 37.81666158907579, -122.53401215374411 37.81666158907579))', 'CONTAINMENT_OVERLAPPING', 5)
----
[85283083fffffff, 85283087fffffff, 85283093fffffff, 85283097fffffff]

query I
select h3_polygon_wkt_to_cells_experimental_string('POLYGON ((-122.53401215374411 37.81666158907579, -122.53401215374411 37.70454536656959, -122.3479361380842 37.70454536656959, -122.3479361380842 37.81666158907579, -122.53401215374411 37.81666158907579))', 'CONTAINMENT_OVERLAPPING_BBOX', 5)
----
[85283083fffffff, 85283087fffffff, 8528308bfffffff, 8528308ffffffff, 85283093fffffff, 85283097fffffff, 852830b3fffffff, 8528354bfffffff, 8528355bfffffff]

query I
select h3_polygon_wkt_to_cells_experimental('POLYGON ((-122.53401215374411 37.81666158907579, -122.53401215374411 37.70454536656959, -122.3479361380842 37.70454536656959, -122.3479361380842 37.81666158907579, -122.53401215374411 37.81666158907579))', 'CONTAINMENT_FULL', 5)
----
[]

query I
select h3_polygon_wkt_to_cells_experimental('POLYGON ((-122.53401215374411 37.81666158907579, -122.53401215374411 37.70454536656959, -122.3479361380842 37.70454536656959, -122.3479361380842 37.81666158907579, -122.53401215374411 37.81666158907579))', 'CONTAINMENT_OVERLAPPING', 5)
----
[599685771850416127, 599685772924157951, 599685776145383423, 599685777219125247]

query I
select h3_polygon_wkt_to_cells_experimental('POLYGON ((-122.53401215374411 37.81666158907579, -122.53401215374411 37.70454536656959, -122.3479361380842 37.70454536656959, -122.3479361380842 37.81666158907579, -122.53401215374411 37.81666158907579))', 'CONTAINMENT_OVERLAPPING_BBOX', 5)
----
[599685771850416127, 599685772924157951, 599685773997899775, 599685775071641599, 599685776145383423, 599685777219125247, 599685784735318015, 599686100415414271, 599686104710381567]

query I
select h3_polygon_wkt_to_cells_experimental('POLYGON ((-122.53401215374411 37.81666158907579, -122.53401215374411 37.70454536656959, -122.3479361380842 37.70454536656959, -122.3479361380842 37.81666158907579, -122.53401215374411 37.81666158907579))', 'CONTAINMENT_CENTER', 6)
----
[604189371075133439, 604189371209351167, 604189372417310719, 604189376175407103, 604189376309624831]

query I
select h3_polygon_wkt_to_cells_experimental('POLYGON ((-122.53401215374411 37.81666158907579, -122.53401215374411 37.70454536656959, -122.3479361380842 37.70454536656959, -122.3479361380842 37.81666158907579, -122.53401215374411 37.81666158907579))', 'CONTAINMENT_FULL', 6)
----
[604189371209351167, 604189376309624831]

query I
select h3_polygon_wkt_to_cells_experimental('POLYGON ((-122.53401215374411 37.81666158907579, -122.53401215374411 37.70454536656959, -122.3479361380842 37.70454536656959, -122.3479361380842 37.81666158907579, -122.53401215374411 37.81666158907579))', 'CONTAINMENT_OVERLAPPING', 6)
----
[604189370538262527, 604189370672480255, 604189371075133439, 604189371209351167, 604189372148875263, 604189372417310719, 604189374967447551, 604189375235883007, 604189375906971647, 604189376041189375, 604189376175407103, 604189376309624831]

query I
select h3_polygon_wkt_to_cells_experimental('POLYGON ((-122.53401215374411 37.81666158907579, -122.53401215374411 37.70454536656959, -122.3479361380842 37.70454536656959, -122.3479361380842 37.81666158907579, -122.53401215374411 37.81666158907579))', 'CONTAINMENT_OVERLAPPING_BBOX', 6)
----
[604189370538262527, 604189370672480255, 604189370940915711, 604189371075133439, 604189371209351167, 604189371343568895, 604189371612004351, 604189372148875263, 604189372283092991, 604189372417310719, 604189374967447551, 604189375235883007, 604189375906971647, 604189376041189375, 604189376175407103, 604189376309624831, 604189376578060287, 604189376712278015]

query I
select h3_polygon_wkt_to_cells_experimental_string('POLYGON ((-122.53401215374411 37.81666158907579, -122.53401215374411 37.70454536656959, -122.3479361380842 37.70454536656959, -122.3479361380842 37.81666158907579, -122.53401215374411 37.81666158907579))', 'CONTAINMENT_CENTER', 6)
----
[862830827ffffff, 86283082fffffff, 862830877ffffff, 862830957ffffff, 86283095fffffff]

query I
select h3_polygon_wkt_to_cells_experimental_string('POLYGON ((-122.53401215374411 37.81666158907579, -122.53401215374411 37.70454536656959, -122.3479361380842 37.70454536656959, -122.3479361380842 37.81666158907579, -122.53401215374411 37.81666158907579))', 'CONTAINMENT_FULL', 6)
----
[86283082fffffff, 86283095fffffff]

query I
select h3_polygon_wkt_to_cells_experimental_string('POLYGON ((-122.53401215374411 37.81666158907579, -122.53401215374411 37.70454536656959, -122.3479361380842 37.70454536656959, -122.3479361380842 37.81666158907579, -122.53401215374411 37.81666158907579))', 'CONTAINMENT_OVERLAPPING', 6)
----
[862830807ffffff, 86283080fffffff, 862830827ffffff, 86283082fffffff, 862830867ffffff, 862830877ffffff, 86283090fffffff, 86283091fffffff, 862830947ffffff, 86283094fffffff, 862830957ffffff, 86283095fffffff]

query I
select h3_polygon_wkt_to_cells_experimental_string('POLYGON ((-122.53401215374411 37.81666158907579, -122.53401215374411 37.70454536656959, -122.3479361380842 37.70454536656959, -122.3479361380842 37.81666158907579, -122.53401215374411 37.81666158907579))', 'CONTAINMENT_OVERLAPPING_BBOX', 6)
----
[862830807ffffff, 86283080fffffff, 86283081fffffff, 862830827ffffff, 86283082fffffff, 862830837ffffff, 862830847ffffff, 862830867ffffff, 86283086fffffff, 862830877ffffff, 86283090fffffff, 86283091fffffff, 862830947ffffff, 86283094fffffff, 862830957ffffff, 86283095fffffff, 86283096fffffff, 862830977ffffff]
Loading
Loading