Skip to content

Commit

Permalink
add experimental functions
Browse files Browse the repository at this point in the history
  • Loading branch information
isaacbrodsky committed Dec 5, 2024
1 parent e262ed0 commit 264434f
Show file tree
Hide file tree
Showing 3 changed files with 213 additions and 0 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@ one to use. The unsigned and signed APIs are identical. All functions also suppo
| `h3_cells_to_multi_polygon_wkt` | Convert a set of cells to multipolygon WKT
| `h3_polygon_wkt_to_cells` | Convert polygon WKT to a set of cells
| `h3_polygon_wkt_to_cells_string` | Convert polygon WKT to a set of cells (returns VARCHAR)
| `h3_polygon_wkt_to_cells_experimental` | Convert polygon WKT to a set of cells, new algorithm
| `h3_polygon_wkt_to_cells_experimental_string` | Convert polygon WKT to a set of cells, new algorithm (returns VARCHAR)

# Alternative download / install

Expand Down
207 changes: 207 additions & 0 deletions src/h3_regions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,199 @@ static void PolygonWktToCellsVarcharFunction(DataChunk &args,
});
}

static void PolygonWktToCellsExperimentalFunction(DataChunk &args, ExpressionState &state,
Vector &result) {
// TODO: Note this function is not fully noexcept -- some invalid WKT strings
// will throw, others will return empty lists.
TernaryExecutor::Execute<string_t, string_t, int, list_entry_t>(
args.data[0], args.data[1], args.data[2], result, args.size(),
[&](string_t input, string_t flagsStr, int res) {
GeoPolygon polygon;
int32_t flags = 0;

std::string str = input.GetString();

uint64_t offset = ListVector::GetListSize(result);

// TODO: Make flags easier to work with
if (flagsStr == "CONTAINMENT_CENTER") {
flags = 0;
} else if (flagsStr == "CONTAINMENT_FULL") {
flags = 1;
} else if (flagsStr == "CONTAINMENT_OVERLAPPING") {
flags = 2;
} else {
// Invalid flags input
return list_entry_t(offset, 0);
}

if (str.rfind(POLYGON, 0) != 0) {
return list_entry_t(offset, 0);
}

size_t strIndex = POLYGON.length();
strIndex = whitespace(str, strIndex);

if (str.rfind(EMPTY, strIndex) == strIndex) {
return list_entry_t(offset, 0);
}

if (str[strIndex] == '(') {
strIndex++;
strIndex = whitespace(str, strIndex);

auto outerVerts = duckdb::make_shared_ptr<std::vector<LatLng>>();
strIndex = readGeoLoop(str, strIndex, outerVerts, polygon.geoloop);

std::vector<GeoLoop> holes;
std::vector<duckdb::shared_ptr<std::vector<LatLng>>> holesVerts;
while (strIndex < str.length() && str[strIndex] == ',') {
strIndex++;
strIndex = whitespace(str, strIndex);
if (str[strIndex] == '(') {
GeoLoop hole;
auto verts = duckdb::make_shared_ptr<std::vector<LatLng>>();
strIndex = readGeoLoop(str, strIndex, verts, hole);
holes.push_back(hole);
holesVerts.push_back(verts);
} else {
throw InvalidInputException(StringUtil::Format(
"Invalid WKT: expected a hole loop '(' after ',' at pos %lu",
strIndex));
}
}
if (str[strIndex] != ')') {
throw InvalidInputException(StringUtil::Format(
"Invalid WKT: expected a hole loop ',' or final ')' at pos %lu",
strIndex));
}

polygon.numHoles = holes.size();
polygon.holes = holes.data();

int64_t numCells = 0;
H3Error err = maxPolygonToCellsSizeExperimental(&polygon, res, flags, &numCells);
if (err) {
return list_entry_t(offset, 0);
} else {
std::vector<H3Index> out(numCells);
H3Error err2 = polygonToCellsExperimental(&polygon, res, flags, numCells, out.data());
if (err2) {
return list_entry_t(offset, 0);
} else {
uint64_t actual = 0;
for (H3Index outCell : out) {
if (outCell != H3_NULL) {
ListVector::PushBack(result, Value::UBIGINT(outCell));
actual++;
}
}
return list_entry_t(offset, actual);
}
}
}
return list_entry_t(offset, 0);
});
}

static void PolygonWktToCellsExperimentalVarcharFunction(DataChunk &args,
ExpressionState &state,
Vector &result) {
// TODO: Note this function is not fully noexcept -- some invalid WKT strings
// will throw, others will return empty lists.
TernaryExecutor::Execute<string_t, string_t, int, list_entry_t>(
args.data[0], args.data[1], args.data[2], result, args.size(),
[&](string_t input, string_t flagsStr, int res) {
GeoPolygon polygon;
int32_t flags = 0;

std::string str = input.GetString();

uint64_t offset = ListVector::GetListSize(result);

// TODO: Make flags easier to work with
if (flagsStr == "CONTAINMENT_CENTER") {
flags = 0;
} else if (flagsStr == "CONTAINMENT_FULL") {
flags = 1;
} else if (flagsStr == "CONTAINMENT_OVERLAPPING") {
flags = 2;
} else {
// Invalid flags input
return list_entry_t(offset, 0);
}

if (str.rfind(POLYGON, 0) != 0) {
return list_entry_t(offset, 0);
}

size_t strIndex = POLYGON.length();
strIndex = whitespace(str, strIndex);

if (str.rfind(EMPTY, strIndex) == strIndex) {
return list_entry_t(offset, 0);
}

if (str[strIndex] == '(') {
strIndex++;
strIndex = whitespace(str, strIndex);

auto outerVerts = duckdb::make_shared_ptr<std::vector<LatLng>>();
strIndex = readGeoLoop(str, strIndex, outerVerts, polygon.geoloop);

std::vector<GeoLoop> holes;
std::vector<duckdb::shared_ptr<std::vector<LatLng>>> holesVerts;
while (strIndex < str.length() && str[strIndex] == ',') {
strIndex++;
strIndex = whitespace(str, strIndex);
if (str[strIndex] == '(') {
GeoLoop hole;
auto verts = duckdb::make_shared_ptr<std::vector<LatLng>>();
strIndex = readGeoLoop(str, strIndex, verts, hole);
holes.push_back(hole);
holesVerts.push_back(verts);
} else {
throw InvalidInputException(StringUtil::Format(
"Invalid WKT: expected a hole loop '(' after ',' at pos %lu",
strIndex));
}
}
if (str[strIndex] != ')') {
throw InvalidInputException(StringUtil::Format(
"Invalid WKT: expected a hole loop ',' or final ')' at pos %lu",
strIndex));
}

polygon.numHoles = holes.size();
polygon.holes = holes.data();

int64_t numCells = 0;
H3Error err = maxPolygonToCellsSizeExperimental(&polygon, res, flags, &numCells);
if (err) {
return list_entry_t(offset, 0);
} else {
std::vector<H3Index> out(numCells);
H3Error err2 = polygonToCellsExperimental(&polygon, res, flags, numCells, out.data());
if (err2) {
return list_entry_t(offset, 0);
} else {
uint64_t actual = 0;
for (H3Index outCell : out) {
if (outCell != H3_NULL) {
auto str = StringUtil::Format("%llx", outCell);
string_t strAsStr = string_t(strdup(str.c_str()), str.size());
ListVector::PushBack(result, strAsStr);
actual++;
}
}
return list_entry_t(offset, actual);
}
}
}
return list_entry_t(offset, 0);
});
}

CreateScalarFunctionInfo H3Functions::GetCellsToMultiPolygonWktFunction() {
ScalarFunctionSet funcs("h3_cells_to_multi_polygon_wkt");
funcs.AddFunction(ScalarFunction(
Expand Down Expand Up @@ -393,4 +586,18 @@ CreateScalarFunctionInfo H3Functions::GetPolygonWktToCellsVarcharFunction() {
PolygonWktToCellsVarcharFunction));
}

CreateScalarFunctionInfo H3Functions::GetPolygonWktToCellsExperimentalFunction() {
return CreateScalarFunctionInfo(ScalarFunction(
"h3_polygon_wkt_to_cells_experimental", {LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::INTEGER},
LogicalType::LIST(LogicalType::UBIGINT), PolygonWktToCellsExperimentalFunction));
}

CreateScalarFunctionInfo H3Functions::GetPolygonWktToCellsExperimentalVarcharFunction() {
return CreateScalarFunctionInfo(
ScalarFunction("h3_polygon_wkt_to_cells_experimental_string",
{LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::INTEGER},
LogicalType::LIST(LogicalType::VARCHAR),
PolygonWktToCellsExperimentalVarcharFunction));
}

} // namespace duckdb
4 changes: 4 additions & 0 deletions src/include/h3_functions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ class H3Functions {
functions.push_back(GetCellsToMultiPolygonWktFunction());
functions.push_back(GetPolygonWktToCellsFunction());
functions.push_back(GetPolygonWktToCellsVarcharFunction());
functions.push_back(GetPolygonWktToCellsExperimentalFunction());
functions.push_back(GetPolygonWktToCellsExperimentalVarcharFunction());

return functions;
}
Expand Down Expand Up @@ -168,6 +170,8 @@ class H3Functions {
static CreateScalarFunctionInfo GetCellsToMultiPolygonWktFunction();
static CreateScalarFunctionInfo GetPolygonWktToCellsFunction();
static CreateScalarFunctionInfo GetPolygonWktToCellsVarcharFunction();
static CreateScalarFunctionInfo GetPolygonWktToCellsExperimentalFunction();
static CreateScalarFunctionInfo GetPolygonWktToCellsExperimentalVarcharFunction();

static void AddAliases(vector<string> names, CreateScalarFunctionInfo fun,
vector<CreateScalarFunctionInfo> &functions) {
Expand Down

0 comments on commit 264434f

Please sign in to comment.