Skip to content

Commit

Permalink
[SEDONA-673] Fix issue when loading geoparquet file without bbox meta…
Browse files Browse the repository at this point in the history
…data. (#1681)

* Fix issue when loading geoparquet file.

* Fix issue when loading geoparquet file.
  • Loading branch information
Imbruced authored Nov 15, 2024
1 parent ae3b398 commit 59aa504
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ object GeoParquetSpatialFilter {
def evaluate(columns: Map[String, GeometryFieldMetaData]): Boolean = {
columns.get(columnName).forall { column =>
val bbox = column.bbox
if (bbox.isEmpty) {
return true
}

val columnEnvelope =
queryWindow.getFactory.toGeometry(new Envelope(bbox(0), bbox(2), bbox(1), bbox(3)))
predicateType match {
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ class geoparquetIOTests extends TestBaseScala with BeforeAndAfterAll {
val legacyparquetdatalocation: String =
resourceFolder + "parquet/legacy-parquet-nested-columns.snappy.parquet"
val geoparquetoutputlocation: String = resourceFolder + "geoparquet/geoparquet_output/"
val overtureBBOX: String = resourceFolder + "geoparquet/overture/bbox.geoparquet"

override def afterAll(): Unit = FileUtils.deleteDirectory(new File(geoparquetoutputlocation))

Expand Down Expand Up @@ -732,6 +733,18 @@ class geoparquetIOTests extends TestBaseScala with BeforeAndAfterAll {
}
}

describe("loading one file geoparquet and filtering") {
it("should not fail when bbox is not available in geoparquet metadata") {
val numberOfRecords = sparkSession.read
.format("geoparquet")
.load(overtureBBOX)
.where("ST_Intersects(geometry, ST_PolygonFromEnvelope(0, 0, 1, 1))")
.count()

assert(numberOfRecords == 9)
}
}

def validateGeoParquetMetadata(path: String)(body: org.json4s.JValue => Unit): Unit = {
val parquetFiles = new File(path).listFiles().filter(_.getName.endsWith(".parquet"))
parquetFiles.foreach { filePath =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ class geoparquetIOTests extends TestBaseScala with BeforeAndAfterAll {
val legacyparquetdatalocation: String =
resourceFolder + "parquet/legacy-parquet-nested-columns.snappy.parquet"
val geoparquetoutputlocation: String = resourceFolder + "geoparquet/geoparquet_output/"
val overtureBBOX: String = resourceFolder + "geoparquet/overture/bbox.geoparquet"

override def afterAll(): Unit = FileUtils.deleteDirectory(new File(geoparquetoutputlocation))

Expand Down Expand Up @@ -758,6 +759,18 @@ class geoparquetIOTests extends TestBaseScala with BeforeAndAfterAll {
}
}

describe("loading one file geoparquet and filtering") {
it("should not fail when bbox is not available in geoparquet metadata") {
val numberOfRecords = sparkSession.read
.format("geoparquet")
.load(overtureBBOX)
.where("ST_Intersects(geometry, ST_PolygonFromEnvelope(0, 0, 1, 1))")
.count()

assert(numberOfRecords == 9)
}
}

def validateGeoParquetMetadata(path: String)(body: org.json4s.JValue => Unit): Unit = {
val parquetFiles = new File(path).listFiles().filter(_.getName.endsWith(".parquet"))
parquetFiles.foreach { filePath =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ class geoparquetIOTests extends TestBaseScala with BeforeAndAfterAll {
val legacyparquetdatalocation: String =
resourceFolder + "parquet/legacy-parquet-nested-columns.snappy.parquet"
val geoparquetoutputlocation: String = resourceFolder + "geoparquet/geoparquet_output/"

val overtureBBOX: String = resourceFolder + "geoparquet/overture/bbox.geoparquet"
override def afterAll(): Unit = FileUtils.deleteDirectory(new File(geoparquetoutputlocation))

describe("GeoParquet IO tests") {
Expand Down Expand Up @@ -761,6 +761,18 @@ class geoparquetIOTests extends TestBaseScala with BeforeAndAfterAll {
}
}

describe("loading one file geoparquet and filtering") {
it("should not fail when bbox is not available in geoparquet metadata") {
val numberOfRecords = sparkSession.read
.format("geoparquet")
.load(overtureBBOX)
.where("ST_Intersects(geometry, ST_PolygonFromEnvelope(0, 0, 1, 1))")
.count()

assert(numberOfRecords == 9)
}
}

def validateGeoParquetMetadata(path: String)(body: org.json4s.JValue => Unit): Unit = {
val parquetFiles = new File(path).listFiles().filter(_.getName.endsWith(".parquet"))
parquetFiles.foreach { filePath =>
Expand Down

0 comments on commit 59aa504

Please sign in to comment.