From fcd9a480d16d2cd874f4fefae2f3cc24f75c4768 Mon Sep 17 00:00:00 2001 From: Jia Yu Date: Tue, 8 Aug 2023 01:10:38 -0700 Subject: [PATCH 01/11] Drop rarely used constructors in Java --- .../sedona/core/spatialRDD/LineStringRDD.java | 392 +----------------- .../sedona/core/spatialRDD/PointRDD.java | 379 +---------------- .../sedona/core/spatialRDD/PolygonRDD.java | 384 +---------------- .../sedona/core/spatialRDD/RectangleRDD.java | 388 ----------------- .../sedona/core/io/EarthdataHDFTest.java | 4 +- .../JoinQueryCorrectnessChecker.java | 48 +-- .../core/spatialOperator/JoinTestBase.java | 8 +- .../spatialOperator/LineStringKnnTest.java | 2 +- .../spatialOperator/LineStringRangeTest.java | 6 +- .../core/spatialOperator/PointKnnTest.java | 2 +- .../core/spatialOperator/PointRangeTest.java | 2 +- .../core/spatialOperator/PolygonKnnTest.java | 2 +- .../spatialOperator/PolygonRangeTest.java | 6 +- .../spatialOperator/RectangleKnnTest.java | 2 +- .../spatialOperator/RectangleRangeTest.java | 8 +- .../core/spatialRDD/GeometryOpTest.java | 2 +- .../core/spatialRDD/LineStringRDDTest.java | 12 +- .../sedona/core/spatialRDD/PointRDDTest.java | 10 +- .../core/spatialRDD/PolygonRDDTest.java | 18 +- .../core/spatialRDD/RectangleRDDTest.java | 8 +- .../core/spatialRDD/SpatialRDDWriterTest.java | 16 +- .../core/utils/CRSTransformationTest.java | 148 +------ .../org/apache/sedona/core/scalaTest.scala | 19 +- 23 files changed, 96 insertions(+), 1770 deletions(-) diff --git a/core/src/main/java/org/apache/sedona/core/spatialRDD/LineStringRDD.java b/core/src/main/java/org/apache/sedona/core/spatialRDD/LineStringRDD.java index 48650ad28b..4131569a0d 100644 --- a/core/src/main/java/org/apache/sedona/core/spatialRDD/LineStringRDD.java +++ b/core/src/main/java/org/apache/sedona/core/spatialRDD/LineStringRDD.java @@ -25,8 +25,6 @@ import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.FlatMapFunction; -import org.apache.spark.storage.StorageLevel; -import org.locationtech.jts.geom.Envelope; import org.locationtech.jts.geom.LineString; // TODO: Auto-generated Javadoc @@ -52,35 +50,6 @@ public LineStringRDD(JavaRDD rawSpatialRDD) this.rawSpatialRDD = rawSpatialRDD; } - /** - * Instantiates a new line string RDD. - * - * @param rawSpatialRDD the raw spatial RDD - * @param sourceEpsgCRSCode the source epsg CRS code - * @param targetEpsgCode the target epsg code - */ - public LineStringRDD(JavaRDD rawSpatialRDD, String sourceEpsgCRSCode, String targetEpsgCode) - { - this.rawSpatialRDD = rawSpatialRDD; - this.CRSTransform(sourceEpsgCRSCode, targetEpsgCode); - } - - /** - * Instantiates a new line string RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param startOffset the start offset - * @param endOffset the end offset - * @param splitter the splitter - * @param carryInputData the carry input data - * @param partitions the partitions - */ - public LineStringRDD(JavaSparkContext sparkContext, String InputLocation, Integer startOffset, Integer endOffset, FileDataSplitter splitter, boolean carryInputData, Integer partitions) - { - this(sparkContext, InputLocation, startOffset, endOffset, splitter, carryInputData, partitions, null, null, null); - } - /** * Instantiates a new line string RDD. * @@ -93,7 +62,7 @@ public LineStringRDD(JavaSparkContext sparkContext, String InputLocation, Intege */ public LineStringRDD(JavaSparkContext sparkContext, String InputLocation, Integer startOffset, Integer endOffset, FileDataSplitter splitter, boolean carryInputData) { - this(sparkContext, InputLocation, startOffset, endOffset, splitter, carryInputData, null, null, null, null); + this(sparkContext, InputLocation, startOffset, endOffset, splitter, carryInputData, null); } /** @@ -107,7 +76,7 @@ public LineStringRDD(JavaSparkContext sparkContext, String InputLocation, Intege */ public LineStringRDD(JavaSparkContext sparkContext, String InputLocation, FileDataSplitter splitter, boolean carryInputData, Integer partitions) { - this(sparkContext, InputLocation, null, null, splitter, carryInputData, partitions, null, null, null); + this(sparkContext, InputLocation, null, null, splitter, carryInputData, partitions); } /** @@ -120,7 +89,7 @@ public LineStringRDD(JavaSparkContext sparkContext, String InputLocation, FileDa */ public LineStringRDD(JavaSparkContext sparkContext, String InputLocation, FileDataSplitter splitter, boolean carryInputData) { - this(sparkContext, InputLocation, null, null, splitter, carryInputData, null, null, null, null); + this(sparkContext, InputLocation, null, null, splitter, carryInputData); } /** @@ -148,265 +117,6 @@ public LineStringRDD(JavaSparkContext sparkContext, String InputLocation, FlatMa this.setRawSpatialRDD(sparkContext.textFile(InputLocation).mapPartitions(userSuppliedMapper)); } - /** - * Instantiates a new line string RDD. - * - * @param rawSpatialRDD the raw spatial RDD - * @param datasetBoundary the dataset boundary - * @param approximateTotalCount the approximate total count - */ - public LineStringRDD(JavaRDD rawSpatialRDD, Envelope datasetBoundary, Integer approximateTotalCount) - { - this.rawSpatialRDD = rawSpatialRDD; - this.boundaryEnvelope = datasetBoundary; - this.approximateTotalCount = approximateTotalCount; - } - - /** - * Instantiates a new line string RDD. - * - * @param rawSpatialRDD the raw spatial RDD - * @param sourceEpsgCRSCode the source epsg CRS code - * @param targetEpsgCode the target epsg code - * @param datasetBoundary the dataset boundary - * @param approximateTotalCount the approximate total count - */ - public LineStringRDD(JavaRDD rawSpatialRDD, String sourceEpsgCRSCode, String targetEpsgCode, Envelope datasetBoundary, Integer approximateTotalCount) - { - this.rawSpatialRDD = rawSpatialRDD; - this.CRSTransform(sourceEpsgCRSCode, targetEpsgCode); - this.boundaryEnvelope = datasetBoundary; - this.approximateTotalCount = approximateTotalCount; - } - - /** - * Instantiates a new line string RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param startOffset the start offset - * @param endOffset the end offset - * @param splitter the splitter - * @param carryInputData the carry input data - * @param partitions the partitions - * @param datasetBoundary the dataset boundary - * @param approximateTotalCount the approximate total count - */ - public LineStringRDD(JavaSparkContext sparkContext, String InputLocation, Integer startOffset, Integer endOffset, FileDataSplitter splitter, boolean carryInputData, Integer partitions, Envelope datasetBoundary, Integer approximateTotalCount) - { - this(sparkContext, InputLocation, startOffset, endOffset, splitter, carryInputData, partitions, null, null, null); - this.boundaryEnvelope = datasetBoundary; - this.approximateTotalCount = approximateTotalCount; - } - - /** - * Instantiates a new line string RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param startOffset the start offset - * @param endOffset the end offset - * @param splitter the splitter - * @param carryInputData the carry input data - * @param datasetBoundary the dataset boundary - * @param approximateTotalCount the approximate total count - */ - public LineStringRDD(JavaSparkContext sparkContext, String InputLocation, Integer startOffset, Integer endOffset, FileDataSplitter splitter, boolean carryInputData, Envelope datasetBoundary, Integer approximateTotalCount) - { - this(sparkContext, InputLocation, startOffset, endOffset, splitter, carryInputData, null, null, null, null); - this.boundaryEnvelope = datasetBoundary; - this.approximateTotalCount = approximateTotalCount; - } - - /** - * Instantiates a new line string RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param splitter the splitter - * @param carryInputData the carry input data - * @param partitions the partitions - * @param datasetBoundary the dataset boundary - * @param approximateTotalCount the approximate total count - */ - public LineStringRDD(JavaSparkContext sparkContext, String InputLocation, FileDataSplitter splitter, boolean carryInputData, Integer partitions, Envelope datasetBoundary, Integer approximateTotalCount) - { - this(sparkContext, InputLocation, null, null, splitter, carryInputData, partitions, null, null, null); - this.boundaryEnvelope = datasetBoundary; - this.approximateTotalCount = approximateTotalCount; - } - - /** - * Instantiates a new line string RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param splitter the splitter - * @param carryInputData the carry input data - * @param datasetBoundary the dataset boundary - * @param approximateTotalCount the approximate total count - */ - public LineStringRDD(JavaSparkContext sparkContext, String InputLocation, FileDataSplitter splitter, boolean carryInputData, Envelope datasetBoundary, Integer approximateTotalCount) - { - this(sparkContext, InputLocation, null, null, splitter, carryInputData, null, null, null, null); - this.boundaryEnvelope = datasetBoundary; - this.approximateTotalCount = approximateTotalCount; - } - - /** - * Instantiates a new line string RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param partitions the partitions - * @param userSuppliedMapper the user supplied mapper - * @param datasetBoundary the dataset boundary - * @param approximateTotalCount the approximate total count - */ - public LineStringRDD(JavaSparkContext sparkContext, String InputLocation, Integer partitions, FlatMapFunction userSuppliedMapper, Envelope datasetBoundary, Integer approximateTotalCount) - { - this.setRawSpatialRDD(sparkContext.textFile(InputLocation, partitions).mapPartitions(userSuppliedMapper)); - this.boundaryEnvelope = datasetBoundary; - this.approximateTotalCount = approximateTotalCount; - } - - /** - * Instantiates a new line string RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param userSuppliedMapper the user supplied mapper - * @param datasetBoundary the dataset boundary - * @param approximateTotalCount the approximate total count - */ - public LineStringRDD(JavaSparkContext sparkContext, String InputLocation, FlatMapFunction userSuppliedMapper, Envelope datasetBoundary, Integer approximateTotalCount) - { - this.setRawSpatialRDD(sparkContext.textFile(InputLocation).mapPartitions(userSuppliedMapper)); - this.boundaryEnvelope = datasetBoundary; - this.approximateTotalCount = approximateTotalCount; - } - - /** - * Instantiates a new line string RDD. - * - * @param rawSpatialRDD the raw spatial RDD - * @param newLevel the new level - */ - public LineStringRDD(JavaRDD rawSpatialRDD, StorageLevel newLevel) - { - this.rawSpatialRDD = rawSpatialRDD; - this.analyze(newLevel); - } - - /** - * Instantiates a new line string RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param startOffset the start offset - * @param endOffset the end offset - * @param splitter the splitter - * @param carryInputData the carry input data - * @param partitions the partitions - * @param newLevel the new level - */ - public LineStringRDD(JavaSparkContext sparkContext, String InputLocation, Integer startOffset, Integer endOffset, - FileDataSplitter splitter, boolean carryInputData, Integer partitions, StorageLevel newLevel) - { - this(sparkContext, InputLocation, startOffset, endOffset, splitter, carryInputData, partitions, newLevel, null, null); - } - - /** - * Instantiates a new line string RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param startOffset the start offset - * @param endOffset the end offset - * @param splitter the splitter - * @param carryInputData the carry input data - * @param newLevel the new level - */ - public LineStringRDD(JavaSparkContext sparkContext, String InputLocation, Integer startOffset, Integer endOffset, - FileDataSplitter splitter, boolean carryInputData, StorageLevel newLevel) - { - this(sparkContext, InputLocation, startOffset, endOffset, splitter, carryInputData, null, newLevel, null, null); - } - - /** - * Instantiates a new line string RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param splitter the splitter - * @param carryInputData the carry input data - * @param partitions the partitions - * @param newLevel the new level - */ - public LineStringRDD(JavaSparkContext sparkContext, String InputLocation, FileDataSplitter splitter, boolean carryInputData, Integer partitions, StorageLevel newLevel) - { - this(sparkContext, InputLocation, null, null, splitter, carryInputData, partitions, newLevel, null, null); - } - - /** - * Instantiates a new line string RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param splitter the splitter - * @param carryInputData the carry input data - * @param newLevel the new level - */ - public LineStringRDD(JavaSparkContext sparkContext, String InputLocation, - FileDataSplitter splitter, boolean carryInputData, StorageLevel newLevel) - { - this(sparkContext, InputLocation, null, null, splitter, carryInputData, null, newLevel, null, null); - } - - /** - * Instantiates a new line string RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param partitions the partitions - * @param userSuppliedMapper the user supplied mapper - * @param newLevel the new level - */ - public LineStringRDD(JavaSparkContext sparkContext, String InputLocation, Integer partitions, FlatMapFunction userSuppliedMapper, StorageLevel newLevel) - { - this.setRawSpatialRDD(sparkContext.textFile(InputLocation, partitions).mapPartitions(userSuppliedMapper)); - this.analyze(newLevel); - } - - /** - * Instantiates a new line string RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param userSuppliedMapper the user supplied mapper - * @param newLevel the new level - */ - public LineStringRDD(JavaSparkContext sparkContext, String InputLocation, FlatMapFunction userSuppliedMapper, StorageLevel newLevel) - { - this.setRawSpatialRDD(sparkContext.textFile(InputLocation).mapPartitions(userSuppliedMapper)); - this.analyze(newLevel); - } - - /** - * Instantiates a new line string RDD. - * - * @param rawSpatialRDD the raw spatial RDD - * @param newLevel the new level - * @param sourceEpsgCRSCode the source epsg CRS code - * @param targetEpsgCode the target epsg code - */ - public LineStringRDD(JavaRDD rawSpatialRDD, StorageLevel newLevel, String sourceEpsgCRSCode, String targetEpsgCode) - { - this.rawSpatialRDD = rawSpatialRDD; - this.CRSTransform(sourceEpsgCRSCode, targetEpsgCode); - this.analyze(newLevel); - } - /** * Instantiates a new line string RDD. * @@ -417,12 +127,8 @@ public LineStringRDD(JavaRDD rawSpatialRDD, StorageLevel newLevel, S * @param splitter the splitter * @param carryInputData the carry input data * @param partitions the partitions - * @param newLevel the new level - * @param sourceEpsgCRSCode the source epsg CRS code - * @param targetEpsgCode the target epsg code */ - public LineStringRDD(JavaSparkContext sparkContext, String InputLocation, Integer startOffset, Integer endOffset, - FileDataSplitter splitter, boolean carryInputData, Integer partitions, StorageLevel newLevel, String sourceEpsgCRSCode, String targetEpsgCode) + public LineStringRDD(JavaSparkContext sparkContext, String InputLocation, Integer startOffset, Integer endOffset, FileDataSplitter splitter, boolean carryInputData, Integer partitions) { JavaRDD rawTextRDD = partitions != null ? sparkContext.textFile(InputLocation, partitions) : sparkContext.textFile(InputLocation); if (startOffset != null && endOffset != null) { @@ -431,96 +137,6 @@ public LineStringRDD(JavaSparkContext sparkContext, String InputLocation, Intege else { this.setRawSpatialRDD(rawTextRDD.mapPartitions(new LineStringFormatMapper(splitter, carryInputData))); } - if (sourceEpsgCRSCode != null && targetEpsgCode != null) { this.CRSTransform(sourceEpsgCRSCode, targetEpsgCode);} - if (newLevel != null) { this.analyze(newLevel);} if (splitter.equals(FileDataSplitter.GEOJSON)) { this.fieldNames = FormatMapper.readGeoJsonPropertyNames(rawTextRDD.take(1).get(0).toString()); } } - - /** - * Instantiates a new line string RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param startOffset the start offset - * @param endOffset the end offset - * @param splitter the splitter - * @param carryInputData the carry input data - * @param newLevel the new level - * @param sourceEpsgCRSCode the source epsg CRS code - * @param targetEpsgCode the target epsg code - */ - public LineStringRDD(JavaSparkContext sparkContext, String InputLocation, Integer startOffset, Integer endOffset, - FileDataSplitter splitter, boolean carryInputData, StorageLevel newLevel, String sourceEpsgCRSCode, String targetEpsgCode) - { - this(sparkContext, InputLocation, startOffset, endOffset, splitter, carryInputData, null, newLevel, sourceEpsgCRSCode, targetEpsgCode); - } - - /** - * Instantiates a new line string RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param splitter the splitter - * @param carryInputData the carry input data - * @param partitions the partitions - * @param newLevel the new level - * @param sourceEpsgCRSCode the source epsg CRS code - * @param targetEpsgCode the target epsg code - */ - public LineStringRDD(JavaSparkContext sparkContext, String InputLocation, FileDataSplitter splitter, boolean carryInputData, Integer partitions, StorageLevel newLevel, String sourceEpsgCRSCode, String targetEpsgCode) - { - this(sparkContext, InputLocation, null, null, splitter, carryInputData, partitions, newLevel, sourceEpsgCRSCode, targetEpsgCode); - } - - /** - * Instantiates a new line string RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param splitter the splitter - * @param carryInputData the carry input data - * @param newLevel the new level - * @param sourceEpsgCRSCode the source epsg CRS code - * @param targetEpsgCode the target epsg code - */ - public LineStringRDD(JavaSparkContext sparkContext, String InputLocation, - FileDataSplitter splitter, boolean carryInputData, StorageLevel newLevel, String sourceEpsgCRSCode, String targetEpsgCode) - { - this(sparkContext, InputLocation, null, null, splitter, carryInputData, null, newLevel, sourceEpsgCRSCode, targetEpsgCode); - } - - /** - * Instantiates a new line string RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param partitions the partitions - * @param userSuppliedMapper the user supplied mapper - * @param newLevel the new level - * @param sourceEpsgCRSCode the source epsg CRS code - * @param targetEpsgCode the target epsg code - */ - public LineStringRDD(JavaSparkContext sparkContext, String InputLocation, Integer partitions, FlatMapFunction userSuppliedMapper, StorageLevel newLevel, String sourceEpsgCRSCode, String targetEpsgCode) - { - this.setRawSpatialRDD(sparkContext.textFile(InputLocation, partitions).mapPartitions(userSuppliedMapper)); - this.CRSTransform(sourceEpsgCRSCode, targetEpsgCode); - this.analyze(newLevel); - } - - /** - * Instantiates a new line string RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param userSuppliedMapper the user supplied mapper - * @param newLevel the new level - * @param sourceEpsgCRSCode the source epsg CRS code - * @param targetEpsgCode the target epsg code - */ - public LineStringRDD(JavaSparkContext sparkContext, String InputLocation, FlatMapFunction userSuppliedMapper, StorageLevel newLevel, String sourceEpsgCRSCode, String targetEpsgCode) - { - this.setRawSpatialRDD(sparkContext.textFile(InputLocation).mapPartitions(userSuppliedMapper)); - this.CRSTransform(sourceEpsgCRSCode, targetEpsgCode); - this.analyze(newLevel); - } } diff --git a/core/src/main/java/org/apache/sedona/core/spatialRDD/PointRDD.java b/core/src/main/java/org/apache/sedona/core/spatialRDD/PointRDD.java index ca4c560c1d..9e5bc9d693 100644 --- a/core/src/main/java/org/apache/sedona/core/spatialRDD/PointRDD.java +++ b/core/src/main/java/org/apache/sedona/core/spatialRDD/PointRDD.java @@ -54,34 +54,6 @@ public PointRDD(JavaRDD rawSpatialRDD) this.rawSpatialRDD = rawSpatialRDD; } - /** - * Instantiates a new point RDD. - * - * @param rawSpatialRDD the raw spatial RDD - * @param sourceEpsgCRSCode the source epsg CRS code - * @param targetEpsgCode the target epsg code - */ - public PointRDD(JavaRDD rawSpatialRDD, String sourceEpsgCRSCode, String targetEpsgCode) - { - this.rawSpatialRDD = rawSpatialRDD; - this.CRSTransform(sourceEpsgCRSCode, targetEpsgCode); - } - - /** - * Instantiates a new point RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param Offset the offset - * @param splitter the splitter - * @param carryInputData the carry input data - * @param partitions the partitions - */ - public PointRDD(JavaSparkContext sparkContext, String InputLocation, Integer Offset, FileDataSplitter splitter, boolean carryInputData, Integer partitions) - { - this(sparkContext, InputLocation, Offset, splitter, carryInputData, partitions, null, null, null); - } - /** * Instantiates a new point RDD. * @@ -93,7 +65,7 @@ public PointRDD(JavaSparkContext sparkContext, String InputLocation, Integer Off */ public PointRDD(JavaSparkContext sparkContext, String InputLocation, Integer Offset, FileDataSplitter splitter, boolean carryInputData) { - this(sparkContext, InputLocation, Offset, splitter, carryInputData, null, null, null, null); + this(sparkContext, InputLocation, Offset, splitter, carryInputData, null); } /** @@ -107,7 +79,7 @@ public PointRDD(JavaSparkContext sparkContext, String InputLocation, Integer Off */ public PointRDD(JavaSparkContext sparkContext, String InputLocation, FileDataSplitter splitter, boolean carryInputData, Integer partitions) { - this(sparkContext, InputLocation, null, splitter, carryInputData, partitions, null, null, null); + this(sparkContext, InputLocation, null, splitter, carryInputData, partitions); } /** @@ -120,7 +92,7 @@ public PointRDD(JavaSparkContext sparkContext, String InputLocation, FileDataSpl */ public PointRDD(JavaSparkContext sparkContext, String InputLocation, FileDataSplitter splitter, boolean carryInputData) { - this(sparkContext, InputLocation, null, splitter, carryInputData, null, null, null, null); + this(sparkContext, InputLocation, null, splitter, carryInputData, null); } /** @@ -148,258 +120,6 @@ public PointRDD(JavaSparkContext sparkContext, String InputLocation, FlatMapFunc this.setRawSpatialRDD(sparkContext.textFile(InputLocation).mapPartitions(userSuppliedMapper)); } - /** - * Instantiates a new point RDD. - * - * @param rawSpatialRDD the raw spatial RDD - * @param datasetBoundary the dataset boundary - * @param approximateTotalCount the approximate total count - */ - public PointRDD(JavaRDD rawSpatialRDD, Envelope datasetBoundary, Integer approximateTotalCount) - { - this.rawSpatialRDD = rawSpatialRDD; - this.boundaryEnvelope = datasetBoundary; - this.approximateTotalCount = approximateTotalCount; - } - - /** - * Instantiates a new point RDD. - * - * @param rawSpatialRDD the raw spatial RDD - * @param sourceEpsgCRSCode the source epsg CRS code - * @param targetEpsgCode the target epsg code - * @param datasetBoundary the dataset boundary - * @param approximateTotalCount the approximate total count - */ - public PointRDD(JavaRDD rawSpatialRDD, String sourceEpsgCRSCode, String targetEpsgCode, Envelope datasetBoundary, Integer approximateTotalCount) - { - this.rawSpatialRDD = rawSpatialRDD; - this.CRSTransform(sourceEpsgCRSCode, targetEpsgCode); - this.boundaryEnvelope = datasetBoundary; - this.approximateTotalCount = approximateTotalCount; - } - - /** - * Instantiates a new point RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param Offset the offset - * @param splitter the splitter - * @param carryInputData the carry input data - * @param partitions the partitions - * @param datasetBoundary the dataset boundary - * @param approximateTotalCount the approximate total count - */ - public PointRDD(JavaSparkContext sparkContext, String InputLocation, Integer Offset, FileDataSplitter splitter, boolean carryInputData, Integer partitions, Envelope datasetBoundary, Integer approximateTotalCount) - { - this(sparkContext, InputLocation, Offset, splitter, carryInputData, partitions, null, null, null); - this.boundaryEnvelope = datasetBoundary; - this.approximateTotalCount = approximateTotalCount; - } - - /** - * Instantiates a new point RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param Offset the offset - * @param splitter the splitter - * @param carryInputData the carry input data - * @param datasetBoundary the dataset boundary - * @param approximateTotalCount the approximate total count - */ - public PointRDD(JavaSparkContext sparkContext, String InputLocation, Integer Offset, FileDataSplitter splitter, boolean carryInputData, Envelope datasetBoundary, Integer approximateTotalCount) - { - this(sparkContext, InputLocation, Offset, splitter, carryInputData, null, null, null, null); - this.boundaryEnvelope = datasetBoundary; - this.approximateTotalCount = approximateTotalCount; - } - - /** - * Instantiates a new point RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param splitter the splitter - * @param carryInputData the carry input data - * @param partitions the partitions - * @param datasetBoundary the dataset boundary - * @param approximateTotalCount the approximate total count - */ - public PointRDD(JavaSparkContext sparkContext, String InputLocation, FileDataSplitter splitter, boolean carryInputData, Integer partitions, Envelope datasetBoundary, Integer approximateTotalCount) - { - this(sparkContext, InputLocation, null, splitter, carryInputData, partitions, null, null, null); - this.boundaryEnvelope = datasetBoundary; - this.approximateTotalCount = approximateTotalCount; - } - - /** - * Instantiates a new point RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param splitter the splitter - * @param carryInputData the carry input data - * @param datasetBoundary the dataset boundary - * @param approximateTotalCount the approximate total count - */ - public PointRDD(JavaSparkContext sparkContext, String InputLocation, FileDataSplitter splitter, boolean carryInputData, Envelope datasetBoundary, Integer approximateTotalCount) - { - this(sparkContext, InputLocation, null, splitter, carryInputData, null, null, null, null); - this.boundaryEnvelope = datasetBoundary; - this.approximateTotalCount = approximateTotalCount; - } - - /** - * Instantiates a new point RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param partitions the partitions - * @param userSuppliedMapper the user supplied mapper - * @param datasetBoundary the dataset boundary - * @param approximateTotalCount the approximate total count - */ - public PointRDD(JavaSparkContext sparkContext, String InputLocation, Integer partitions, FlatMapFunction userSuppliedMapper, Envelope datasetBoundary, Integer approximateTotalCount) - { - this.setRawSpatialRDD(sparkContext.textFile(InputLocation, partitions).mapPartitions(userSuppliedMapper)); - this.boundaryEnvelope = datasetBoundary; - this.approximateTotalCount = approximateTotalCount; - } - - /** - * Instantiates a new point RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param userSuppliedMapper the user supplied mapper - * @param datasetBoundary the dataset boundary - * @param approximateTotalCount the approximate total count - */ - public PointRDD(JavaSparkContext sparkContext, String InputLocation, FlatMapFunction userSuppliedMapper, Envelope datasetBoundary, Integer approximateTotalCount) - { - this.setRawSpatialRDD(sparkContext.textFile(InputLocation).mapPartitions(userSuppliedMapper)); - this.boundaryEnvelope = datasetBoundary; - this.approximateTotalCount = approximateTotalCount; - } - - /** - * Instantiates a new point RDD. - * - * @param rawSpatialRDD the raw spatial RDD - * @param newLevel the new level - */ - public PointRDD(JavaRDD rawSpatialRDD, StorageLevel newLevel) - { - this.rawSpatialRDD = rawSpatialRDD; - this.analyze(newLevel); - } - - /** - * Instantiates a new point RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param Offset the offset - * @param splitter the splitter - * @param carryInputData the carry input data - * @param partitions the partitions - * @param newLevel the new level - */ - public PointRDD(JavaSparkContext sparkContext, String InputLocation, Integer Offset, FileDataSplitter splitter, boolean carryInputData, Integer partitions, StorageLevel newLevel) - { - this(sparkContext, InputLocation, Offset, splitter, carryInputData, partitions, newLevel, null, null); - } - - /** - * Instantiates a new point RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param Offset the offset - * @param splitter the splitter - * @param carryInputData the carry input data - * @param newLevel the new level - */ - public PointRDD(JavaSparkContext sparkContext, String InputLocation, Integer Offset, FileDataSplitter splitter, boolean carryInputData, StorageLevel newLevel) - { - this(sparkContext, InputLocation, Offset, splitter, carryInputData, null, newLevel, null, null); - } - - /** - * Instantiates a new point RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param splitter the splitter - * @param carryInputData the carry input data - * @param partitions the partitions - * @param newLevel the new level - */ - public PointRDD(JavaSparkContext sparkContext, String InputLocation, FileDataSplitter splitter, boolean carryInputData, Integer partitions, StorageLevel newLevel) - { - this(sparkContext, InputLocation, null, splitter, carryInputData, partitions, newLevel, null, null); - } - - /** - * Instantiates a new point RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param splitter the splitter - * @param carryInputData the carry input data - * @param newLevel the new level - */ - public PointRDD(JavaSparkContext sparkContext, String InputLocation, FileDataSplitter splitter, boolean carryInputData, StorageLevel newLevel) - { - this(sparkContext, InputLocation, null, splitter, carryInputData, null, newLevel, null, null); - } - - /** - * Instantiates a new point RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param partitions the partitions - * @param userSuppliedMapper the user supplied mapper - * @param newLevel the new level - */ - public PointRDD(JavaSparkContext sparkContext, String InputLocation, Integer partitions, FlatMapFunction userSuppliedMapper, StorageLevel newLevel) - { - this.setRawSpatialRDD(sparkContext.textFile(InputLocation, partitions).mapPartitions(userSuppliedMapper)); - this.analyze(newLevel); - } - - /** - * Instantiates a new point RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param userSuppliedMapper the user supplied mapper - * @param newLevel the new level - */ - public PointRDD(JavaSparkContext sparkContext, String InputLocation, FlatMapFunction userSuppliedMapper, StorageLevel newLevel) - { - this.setRawSpatialRDD(sparkContext.textFile(InputLocation).mapPartitions(userSuppliedMapper)); - this.analyze(newLevel); - } - - /** - * Instantiates a new point RDD. - * - * @param rawSpatialRDD the raw spatial RDD - * @param newLevel the new level - * @param sourceEpsgCRSCode the source epsg CRS code - * @param targetEpsgCode the target epsg code - */ - public PointRDD(JavaRDD rawSpatialRDD, StorageLevel newLevel, String sourceEpsgCRSCode, String targetEpsgCode) - { - this.rawSpatialRDD = rawSpatialRDD; - this.CRSTransform(sourceEpsgCRSCode, targetEpsgCode); - this.analyze(newLevel); - } - /** * Instantiates a new point RDD. * @@ -409,103 +129,12 @@ public PointRDD(JavaRDD rawSpatialRDD, StorageLevel newLevel, String sour * @param splitter the splitter * @param carryInputData the carry input data * @param partitions the partitions - * @param newLevel the new level - * @param sourceEpsgCRSCode the source epsg CRS code - * @param targetEpsgCode the target epsg code */ - public PointRDD(JavaSparkContext sparkContext, String InputLocation, Integer Offset, FileDataSplitter splitter, - boolean carryInputData, Integer partitions, StorageLevel newLevel, String sourceEpsgCRSCode, String targetEpsgCode) + public PointRDD(JavaSparkContext sparkContext, String InputLocation, Integer Offset, FileDataSplitter splitter, boolean carryInputData, Integer partitions) { JavaRDD rawTextRDD = partitions != null ? sparkContext.textFile(InputLocation, partitions) : sparkContext.textFile(InputLocation); if (Offset != null) {this.setRawSpatialRDD(rawTextRDD.mapPartitions(new PointFormatMapper(Offset, splitter, carryInputData)));} else {this.setRawSpatialRDD(rawTextRDD.mapPartitions(new PointFormatMapper(splitter, carryInputData)));} - if (sourceEpsgCRSCode != null && targetEpsgCode != null) { this.CRSTransform(sourceEpsgCRSCode, targetEpsgCode);} - if (newLevel != null) { this.analyze(newLevel);} if (splitter.equals(FileDataSplitter.GEOJSON)) { this.fieldNames = FormatMapper.readGeoJsonPropertyNames(rawTextRDD.take(1).get(0).toString()); } } - - /** - * Instantiates a new point RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param Offset the offset - * @param splitter the splitter - * @param carryInputData the carry input data - * @param newLevel the new level - * @param sourceEpsgCRSCode the source epsg CRS code - * @param targetEpsgCode the target epsg code - */ - public PointRDD(JavaSparkContext sparkContext, String InputLocation, Integer Offset, FileDataSplitter splitter, - boolean carryInputData, StorageLevel newLevel, String sourceEpsgCRSCode, String targetEpsgCode) - { - this(sparkContext, InputLocation, Offset, splitter, carryInputData, null, newLevel, sourceEpsgCRSCode, targetEpsgCode); - } - - /** - * Instantiates a new point RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param splitter the splitter - * @param carryInputData the carry input data - * @param partitions the partitions - * @param newLevel the new level - * @param sourceEpsgCRSCode the source epsg CRS code - * @param targetEpsgCode the target epsg code - */ - public PointRDD(JavaSparkContext sparkContext, String InputLocation, FileDataSplitter splitter, boolean carryInputData, - Integer partitions, StorageLevel newLevel, String sourceEpsgCRSCode, String targetEpsgCode) - { - this(sparkContext, InputLocation, null, splitter, carryInputData, partitions, newLevel, sourceEpsgCRSCode, targetEpsgCode); - } - - /** - * Instantiates a new point RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param splitter the splitter - * @param carryInputData the carry input data - * @param newLevel the new level - * @param sourceEpsgCRSCode the source epsg CRS code - * @param targetEpsgCode the target epsg code - */ - public PointRDD(JavaSparkContext sparkContext, String InputLocation, FileDataSplitter splitter, boolean carryInputData, - StorageLevel newLevel, String sourceEpsgCRSCode, String targetEpsgCode) - { - this(sparkContext, InputLocation, null, splitter, carryInputData, null, newLevel, sourceEpsgCRSCode, targetEpsgCode); - } - - /** - * Instantiates a new point RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param partitions the partitions - * @param userSuppliedMapper the user supplied mapper - * @param newLevel the new level - * @param sourceEpsgCRSCode the source epsg CRS code - * @param targetEpsgCode the target epsg code - */ - public PointRDD(JavaSparkContext sparkContext, String InputLocation, Integer partitions, FlatMapFunction userSuppliedMapper, - StorageLevel newLevel, String sourceEpsgCRSCode, String targetEpsgCode) - { - this(sparkContext, InputLocation, null, null, false, partitions, newLevel, sourceEpsgCRSCode, targetEpsgCode); - } - - /** - * Instantiates a new point RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param userSuppliedMapper the user supplied mapper - * @param newLevel the new level - * @param sourceEpsgCRSCode the source epsg CRS code - * @param targetEpsgCode the target epsg code - */ - public PointRDD(JavaSparkContext sparkContext, String InputLocation, FlatMapFunction userSuppliedMapper, StorageLevel newLevel, String sourceEpsgCRSCode, String targetEpsgCode) - { - this(sparkContext, InputLocation, null, null, false, null, newLevel, sourceEpsgCRSCode, targetEpsgCode); - } } diff --git a/core/src/main/java/org/apache/sedona/core/spatialRDD/PolygonRDD.java b/core/src/main/java/org/apache/sedona/core/spatialRDD/PolygonRDD.java index c85c510c87..097d7c1ab9 100644 --- a/core/src/main/java/org/apache/sedona/core/spatialRDD/PolygonRDD.java +++ b/core/src/main/java/org/apache/sedona/core/spatialRDD/PolygonRDD.java @@ -62,35 +62,6 @@ public PolygonRDD(JavaRDD rawSpatialRDD) this.rawSpatialRDD = rawSpatialRDD; } - /** - * Instantiates a new polygon RDD. - * - * @param rawSpatialRDD the raw spatial RDD - * @param sourceEpsgCRSCode the source epsg CRS code - * @param targetEpsgCode the target epsg code - */ - public PolygonRDD(JavaRDD rawSpatialRDD, String sourceEpsgCRSCode, String targetEpsgCode) - { - this.rawSpatialRDD = rawSpatialRDD; - this.CRSTransform(sourceEpsgCRSCode, targetEpsgCode); - } - - /** - * Instantiates a new polygon RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param startOffset the start offset - * @param endOffset the end offset - * @param splitter the splitter - * @param carryInputData the carry input data - * @param partitions the partitions - */ - public PolygonRDD(JavaSparkContext sparkContext, String InputLocation, Integer startOffset, Integer endOffset, FileDataSplitter splitter, boolean carryInputData, Integer partitions) - { - this(sparkContext, InputLocation, startOffset, endOffset, splitter, carryInputData, partitions, null, null, null); - } - /** * Instantiates a new polygon RDD. * @@ -103,7 +74,7 @@ public PolygonRDD(JavaSparkContext sparkContext, String InputLocation, Integer s */ public PolygonRDD(JavaSparkContext sparkContext, String InputLocation, Integer startOffset, Integer endOffset, FileDataSplitter splitter, boolean carryInputData) { - this(sparkContext, InputLocation, startOffset, endOffset, splitter, carryInputData, null, null, null, null); + this(sparkContext, InputLocation, startOffset, endOffset, splitter, carryInputData, null); } /** @@ -117,7 +88,7 @@ public PolygonRDD(JavaSparkContext sparkContext, String InputLocation, Integer s */ public PolygonRDD(JavaSparkContext sparkContext, String InputLocation, FileDataSplitter splitter, boolean carryInputData, Integer partitions) { - this(sparkContext, InputLocation, null, null, splitter, carryInputData, partitions, null, null, null); + this(sparkContext, InputLocation, null, null, splitter, carryInputData, partitions); } /** @@ -130,7 +101,7 @@ public PolygonRDD(JavaSparkContext sparkContext, String InputLocation, FileDataS */ public PolygonRDD(JavaSparkContext sparkContext, String InputLocation, FileDataSplitter splitter, boolean carryInputData) { - this(sparkContext, InputLocation, null, null, splitter, carryInputData, null, null, null, null); + this(sparkContext, InputLocation, null, null, splitter, carryInputData, null); } /** @@ -158,258 +129,6 @@ public PolygonRDD(JavaSparkContext sparkContext, String InputLocation, FlatMapFu this.setRawSpatialRDD(sparkContext.textFile(InputLocation).mapPartitions(userSuppliedMapper)); } - /** - * Instantiates a new polygon RDD. - * - * @param rawSpatialRDD the raw spatial RDD - * @param datasetBoundary the dataset boundary - * @param approximateTotalCount the approximate total count - */ - public PolygonRDD(JavaRDD rawSpatialRDD, Envelope datasetBoundary, Integer approximateTotalCount) - { - this.rawSpatialRDD = rawSpatialRDD; - this.boundaryEnvelope = datasetBoundary; - this.approximateTotalCount = approximateTotalCount; - } - - /** - * Instantiates a new polygon RDD. - * - * @param rawSpatialRDD the raw spatial RDD - * @param sourceEpsgCRSCode the source epsg CRS code - * @param targetEpsgCode the target epsg code - * @param datasetBoundary the dataset boundary - * @param approximateTotalCount the approximate total count - */ - public PolygonRDD(JavaRDD rawSpatialRDD, String sourceEpsgCRSCode, String targetEpsgCode, Envelope datasetBoundary, Integer approximateTotalCount) - { - this.rawSpatialRDD = rawSpatialRDD; - this.CRSTransform(sourceEpsgCRSCode, targetEpsgCode); - this.boundaryEnvelope = datasetBoundary; - this.approximateTotalCount = approximateTotalCount; - } - - /** - * Instantiates a new polygon RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param startOffset the start offset - * @param endOffset the end offset - * @param splitter the splitter - * @param carryInputData the carry input data - * @param partitions the partitions - * @param datasetBoundary the dataset boundary - * @param approximateTotalCount the approximate total count - */ - public PolygonRDD(JavaSparkContext sparkContext, String InputLocation, Integer startOffset, Integer endOffset, FileDataSplitter splitter, boolean carryInputData, Integer partitions, Envelope datasetBoundary, Integer approximateTotalCount) - { - this(sparkContext, InputLocation, startOffset, endOffset, splitter, carryInputData, partitions, null, null, null); - } - - /** - * Instantiates a new polygon RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param startOffset the start offset - * @param endOffset the end offset - * @param splitter the splitter - * @param carryInputData the carry input data - * @param datasetBoundary the dataset boundary - * @param approximateTotalCount the approximate total count - */ - public PolygonRDD(JavaSparkContext sparkContext, String InputLocation, Integer startOffset, Integer endOffset, FileDataSplitter splitter, boolean carryInputData, Envelope datasetBoundary, Integer approximateTotalCount) - { - this(sparkContext, InputLocation, startOffset, endOffset, splitter, carryInputData, null, null, null, null); - } - - /** - * Instantiates a new polygon RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param splitter the splitter - * @param carryInputData the carry input data - * @param partitions the partitions - * @param datasetBoundary the dataset boundary - * @param approximateTotalCount the approximate total count - */ - public PolygonRDD(JavaSparkContext sparkContext, String InputLocation, FileDataSplitter splitter, boolean carryInputData, Integer partitions, Envelope datasetBoundary, Integer approximateTotalCount) - { - this(sparkContext, InputLocation, null, null, splitter, carryInputData, partitions, null, null, null); - } - - /** - * Instantiates a new polygon RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param splitter the splitter - * @param carryInputData the carry input data - * @param datasetBoundary the dataset boundary - * @param approximateTotalCount the approximate total count - */ - public PolygonRDD(JavaSparkContext sparkContext, String InputLocation, FileDataSplitter splitter, boolean carryInputData, Envelope datasetBoundary, Integer approximateTotalCount) - { - this(sparkContext, InputLocation, null, null, splitter, carryInputData, null, null, null, null); - } - - /** - * Instantiates a new polygon RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param partitions the partitions - * @param userSuppliedMapper the user supplied mapper - * @param datasetBoundary the dataset boundary - * @param approximateTotalCount the approximate total count - */ - public PolygonRDD(JavaSparkContext sparkContext, String InputLocation, Integer partitions, FlatMapFunction userSuppliedMapper, Envelope datasetBoundary, Integer approximateTotalCount) - { - this.setRawSpatialRDD(sparkContext.textFile(InputLocation, partitions).mapPartitions(userSuppliedMapper)); - this.boundaryEnvelope = datasetBoundary; - this.approximateTotalCount = approximateTotalCount; - } - - /** - * Instantiates a new polygon RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param userSuppliedMapper the user supplied mapper - * @param datasetBoundary the dataset boundary - * @param approximateTotalCount the approximate total count - */ - public PolygonRDD(JavaSparkContext sparkContext, String InputLocation, FlatMapFunction userSuppliedMapper, Envelope datasetBoundary, Integer approximateTotalCount) - { - this.setRawSpatialRDD(sparkContext.textFile(InputLocation).mapPartitions(userSuppliedMapper)); - this.boundaryEnvelope = datasetBoundary; - this.approximateTotalCount = approximateTotalCount; - } - - /** - * Instantiates a new polygon RDD. - * - * @param rawSpatialRDD the raw spatial RDD - * @param newLevel the new level - */ - public PolygonRDD(JavaRDD rawSpatialRDD, StorageLevel newLevel) - { - this.rawSpatialRDD = rawSpatialRDD; - this.analyze(newLevel); - } - - /** - * Instantiates a new polygon RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param startOffset the start offset - * @param endOffset the end offset - * @param splitter the splitter - * @param carryInputData the carry input data - * @param partitions the partitions - * @param newLevel the new level - */ - public PolygonRDD(JavaSparkContext sparkContext, String InputLocation, Integer startOffset, Integer endOffset, - FileDataSplitter splitter, boolean carryInputData, Integer partitions, StorageLevel newLevel) - { - this(sparkContext, InputLocation, startOffset, endOffset, splitter, carryInputData, partitions, newLevel, null, null); - } - - /** - * Instantiates a new polygon RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param startOffset the start offset - * @param endOffset the end offset - * @param splitter the splitter - * @param carryInputData the carry input data - * @param newLevel the new level - */ - public PolygonRDD(JavaSparkContext sparkContext, String InputLocation, Integer startOffset, Integer endOffset, - FileDataSplitter splitter, boolean carryInputData, StorageLevel newLevel) - { - this(sparkContext, InputLocation, startOffset, endOffset, splitter, carryInputData, null, newLevel, null, null); - } - - /** - * Instantiates a new polygon RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param splitter the splitter - * @param carryInputData the carry input data - * @param partitions the partitions - * @param newLevel the new level - */ - public PolygonRDD(JavaSparkContext sparkContext, String InputLocation, - FileDataSplitter splitter, boolean carryInputData, Integer partitions, StorageLevel newLevel) - { - this(sparkContext, InputLocation, null, null, splitter, carryInputData, partitions, newLevel, null, null); - } - - /** - * Instantiates a new polygon RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param splitter the splitter - * @param carryInputData the carry input data - * @param newLevel the new level - */ - public PolygonRDD(JavaSparkContext sparkContext, String InputLocation, - FileDataSplitter splitter, boolean carryInputData, StorageLevel newLevel) - { - this(sparkContext, InputLocation, null, null, splitter, carryInputData, null, newLevel, null, null); - } - - /** - * Instantiates a new polygon RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param partitions the partitions - * @param userSuppliedMapper the user supplied mapper - * @param newLevel the new level - */ - public PolygonRDD(JavaSparkContext sparkContext, String InputLocation, Integer partitions, FlatMapFunction userSuppliedMapper, StorageLevel newLevel) - { - this.setRawSpatialRDD(sparkContext.textFile(InputLocation, partitions).mapPartitions(userSuppliedMapper)); - this.analyze(newLevel); - } - - /** - * Instantiates a new polygon RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param userSuppliedMapper the user supplied mapper - * @param newLevel the new level - */ - public PolygonRDD(JavaSparkContext sparkContext, String InputLocation, FlatMapFunction userSuppliedMapper, StorageLevel newLevel) - { - this.setRawSpatialRDD(sparkContext.textFile(InputLocation).mapPartitions(userSuppliedMapper)); - this.analyze(newLevel); - } - - /** - * Instantiates a new polygon RDD. - * - * @param rawSpatialRDD the raw spatial RDD - * @param newLevel the new level - * @param sourceEpsgCRSCode the source epsg CRS code - * @param targetEpsgCode the target epsg code - */ - public PolygonRDD(JavaRDD rawSpatialRDD, StorageLevel newLevel, String sourceEpsgCRSCode, String targetEpsgCode) - { - this.rawSpatialRDD = rawSpatialRDD; - this.CRSTransform(sourceEpsgCRSCode, targetEpsgCode); - this.analyze(newLevel); - } - /** * Instantiates a new polygon RDD. * @@ -420,12 +139,8 @@ public PolygonRDD(JavaRDD rawSpatialRDD, StorageLevel newLevel, String * @param splitter the splitter * @param carryInputData the carry input data * @param partitions the partitions - * @param newLevel the new level - * @param sourceEpsgCRSCode the source epsg CRS code - * @param targetEpsgCode the target epsg code */ - public PolygonRDD(JavaSparkContext sparkContext, String InputLocation, Integer startOffset, Integer endOffset, - FileDataSplitter splitter, boolean carryInputData, Integer partitions, StorageLevel newLevel, String sourceEpsgCRSCode, String targetEpsgCode) + public PolygonRDD(JavaSparkContext sparkContext, String InputLocation, Integer startOffset, Integer endOffset, FileDataSplitter splitter, boolean carryInputData, Integer partitions) { JavaRDD rawTextRDD = partitions != null ? sparkContext.textFile(InputLocation, partitions) : sparkContext.textFile(InputLocation); if (startOffset != null && endOffset != null) { @@ -434,100 +149,9 @@ public PolygonRDD(JavaSparkContext sparkContext, String InputLocation, Integer s else { this.setRawSpatialRDD(rawTextRDD.mapPartitions(new PolygonFormatMapper(splitter, carryInputData))); } - if (sourceEpsgCRSCode != null && targetEpsgCode != null) { this.CRSTransform(sourceEpsgCRSCode, targetEpsgCode);} - if (newLevel != null) { this.analyze(newLevel);} if (splitter.equals(FileDataSplitter.GEOJSON)) { this.fieldNames = FormatMapper.readGeoJsonPropertyNames(rawTextRDD.take(1).get(0).toString()); } } - /** - * Instantiates a new polygon RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param startOffset the start offset - * @param endOffset the end offset - * @param splitter the splitter - * @param carryInputData the carry input data - * @param newLevel the new level - * @param sourceEpsgCRSCode the source epsg CRS code - * @param targetEpsgCode the target epsg code - */ - public PolygonRDD(JavaSparkContext sparkContext, String InputLocation, Integer startOffset, Integer endOffset, - FileDataSplitter splitter, boolean carryInputData, StorageLevel newLevel, String sourceEpsgCRSCode, String targetEpsgCode) - { - this(sparkContext, InputLocation, startOffset, endOffset, splitter, carryInputData, null, newLevel, sourceEpsgCRSCode, targetEpsgCode); - } - - /** - * Instantiates a new polygon RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param splitter the splitter - * @param carryInputData the carry input data - * @param partitions the partitions - * @param newLevel the new level - * @param sourceEpsgCRSCode the source epsg CRS code - * @param targetEpsgCode the target epsg code - */ - public PolygonRDD(JavaSparkContext sparkContext, String InputLocation, - FileDataSplitter splitter, boolean carryInputData, Integer partitions, StorageLevel newLevel, String sourceEpsgCRSCode, String targetEpsgCode) - { - this(sparkContext, InputLocation, null, null, splitter, carryInputData, partitions, newLevel, sourceEpsgCRSCode, targetEpsgCode); - } - - /** - * Instantiates a new polygon RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param splitter the splitter - * @param carryInputData the carry input data - * @param newLevel the new level - * @param sourceEpsgCRSCode the source epsg CRS code - * @param targetEpsgCode the target epsg code - */ - public PolygonRDD(JavaSparkContext sparkContext, String InputLocation, - FileDataSplitter splitter, boolean carryInputData, StorageLevel newLevel, String sourceEpsgCRSCode, String targetEpsgCode) - { - this(sparkContext, InputLocation, null, null, splitter, carryInputData, null, newLevel, sourceEpsgCRSCode, targetEpsgCode); - } - - /** - * Instantiates a new polygon RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param partitions the partitions - * @param userSuppliedMapper the user supplied mapper - * @param newLevel the new level - * @param sourceEpsgCRSCode the source epsg CRS code - * @param targetEpsgCode the target epsg code - */ - public PolygonRDD(JavaSparkContext sparkContext, String InputLocation, Integer partitions, FlatMapFunction userSuppliedMapper, StorageLevel newLevel, String sourceEpsgCRSCode, String targetEpsgCode) - { - this.setRawSpatialRDD(sparkContext.textFile(InputLocation, partitions).mapPartitions(userSuppliedMapper)); - this.CRSTransform(sourceEpsgCRSCode, targetEpsgCode); - this.analyze(newLevel); - } - - /** - * Instantiates a new polygon RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param userSuppliedMapper the user supplied mapper - * @param newLevel the new level - * @param sourceEpsgCRSCode the source epsg CRS code - * @param targetEpsgCode the target epsg code - */ - public PolygonRDD(JavaSparkContext sparkContext, String InputLocation, FlatMapFunction userSuppliedMapper, StorageLevel newLevel, String sourceEpsgCRSCode, String targetEpsgCode) - { - this.setRawSpatialRDD(sparkContext.textFile(InputLocation).mapPartitions(userSuppliedMapper)); - this.CRSTransform(sourceEpsgCRSCode, targetEpsgCode); - this.analyze(newLevel); - } - /** * Polygon union. * diff --git a/core/src/main/java/org/apache/sedona/core/spatialRDD/RectangleRDD.java b/core/src/main/java/org/apache/sedona/core/spatialRDD/RectangleRDD.java index 2e247a3710..253e7e6d97 100644 --- a/core/src/main/java/org/apache/sedona/core/spatialRDD/RectangleRDD.java +++ b/core/src/main/java/org/apache/sedona/core/spatialRDD/RectangleRDD.java @@ -52,19 +52,6 @@ public RectangleRDD(JavaRDD rawSpatialRDD) this.rawSpatialRDD = rawSpatialRDD; } - /** - * Instantiates a new rectangle RDD. - * - * @param rawSpatialRDD the raw spatial RDD - * @param sourceEpsgCRSCode the source epsg CRS code - * @param targetEpsgCode the target epsg code - */ - public RectangleRDD(JavaRDD rawSpatialRDD, String sourceEpsgCRSCode, String targetEpsgCode) - { - this.rawSpatialRDD = rawSpatialRDD; - this.CRSTransform(sourceEpsgCRSCode, targetEpsgCode); - } - /** * Instantiates a new rectangle RDD. * @@ -145,379 +132,4 @@ public RectangleRDD(JavaSparkContext sparkContext, String InputLocation, FlatMap { this.setRawSpatialRDD(sparkContext.textFile(InputLocation).mapPartitions(userSuppliedMapper)); } - - /** - * Instantiates a new rectangle RDD. - * - * @param rawSpatialRDD the raw spatial RDD - * @param datasetBoundary the dataset boundary - * @param approximateTotalCount the approximate total count - */ - public RectangleRDD(JavaRDD rawSpatialRDD, Envelope datasetBoundary, Integer approximateTotalCount) - { - this.rawSpatialRDD = rawSpatialRDD; - this.boundaryEnvelope = datasetBoundary; - this.approximateTotalCount = approximateTotalCount; - } - - /** - * Instantiates a new rectangle RDD. - * - * @param rawSpatialRDD the raw spatial RDD - * @param sourceEpsgCRSCode the source epsg CRS code - * @param targetEpsgCode the target epsg code - * @param datasetBoundary the dataset boundary - * @param approximateTotalCount the approximate total count - */ - public RectangleRDD(JavaRDD rawSpatialRDD, String sourceEpsgCRSCode, String targetEpsgCode, Envelope datasetBoundary, Integer approximateTotalCount) - { - this.rawSpatialRDD = rawSpatialRDD; - this.CRSTransform(sourceEpsgCRSCode, targetEpsgCode); - this.boundaryEnvelope = datasetBoundary; - this.approximateTotalCount = approximateTotalCount; - } - - /** - * Instantiates a new rectangle RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param Offset the offset - * @param splitter the splitter - * @param carryInputData the carry input data - * @param partitions the partitions - * @param datasetBoundary the dataset boundary - * @param approximateTotalCount the approximate total count - */ - public RectangleRDD(JavaSparkContext sparkContext, String InputLocation, Integer Offset, FileDataSplitter splitter, boolean carryInputData, Integer partitions, Envelope datasetBoundary, Integer approximateTotalCount) - { - this.setRawSpatialRDD(sparkContext.textFile(InputLocation, partitions).mapPartitions(new RectangleFormatMapper(Offset, splitter, carryInputData))); - this.boundaryEnvelope = datasetBoundary; - this.approximateTotalCount = approximateTotalCount; - } - - /** - * Instantiates a new rectangle RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param Offset the offset - * @param splitter the splitter - * @param carryInputData the carry input data - * @param datasetBoundary the dataset boundary - * @param approximateTotalCount the approximate total count - */ - public RectangleRDD(JavaSparkContext sparkContext, String InputLocation, Integer Offset, FileDataSplitter splitter, boolean carryInputData, Envelope datasetBoundary, Integer approximateTotalCount) - { - this.setRawSpatialRDD(sparkContext.textFile(InputLocation).mapPartitions(new RectangleFormatMapper(Offset, splitter, carryInputData))); - this.boundaryEnvelope = datasetBoundary; - this.approximateTotalCount = approximateTotalCount; - } - - /** - * Instantiates a new rectangle RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param splitter the splitter - * @param carryInputData the carry input data - * @param partitions the partitions - * @param datasetBoundary the dataset boundary - * @param approximateTotalCount the approximate total count - */ - public RectangleRDD(JavaSparkContext sparkContext, String InputLocation, FileDataSplitter splitter, boolean carryInputData, Integer partitions, Envelope datasetBoundary, Integer approximateTotalCount) - { - this.setRawSpatialRDD(sparkContext.textFile(InputLocation, partitions).mapPartitions(new RectangleFormatMapper(splitter, carryInputData))); - this.boundaryEnvelope = datasetBoundary; - this.approximateTotalCount = approximateTotalCount; - } - - /** - * Instantiates a new rectangle RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param splitter the splitter - * @param carryInputData the carry input data - * @param datasetBoundary the dataset boundary - * @param approximateTotalCount the approximate total count - */ - public RectangleRDD(JavaSparkContext sparkContext, String InputLocation, FileDataSplitter splitter, boolean carryInputData, Envelope datasetBoundary, Integer approximateTotalCount) - { - this.setRawSpatialRDD(sparkContext.textFile(InputLocation).mapPartitions(new RectangleFormatMapper(splitter, carryInputData))); - this.boundaryEnvelope = datasetBoundary; - this.approximateTotalCount = approximateTotalCount; - } - - /** - * Instantiates a new rectangle RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param partitions the partitions - * @param userSuppliedMapper the user supplied mapper - * @param datasetBoundary the dataset boundary - * @param approximateTotalCount the approximate total count - */ - public RectangleRDD(JavaSparkContext sparkContext, String InputLocation, Integer partitions, FlatMapFunction userSuppliedMapper, Envelope datasetBoundary, Integer approximateTotalCount) - { - this.setRawSpatialRDD(sparkContext.textFile(InputLocation, partitions).mapPartitions(userSuppliedMapper)); - this.boundaryEnvelope = datasetBoundary; - this.approximateTotalCount = approximateTotalCount; - } - - /** - * Instantiates a new rectangle RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param userSuppliedMapper the user supplied mapper - * @param datasetBoundary the dataset boundary - * @param approximateTotalCount the approximate total count - */ - public RectangleRDD(JavaSparkContext sparkContext, String InputLocation, FlatMapFunction userSuppliedMapper, Envelope datasetBoundary, Integer approximateTotalCount) - { - this.setRawSpatialRDD(sparkContext.textFile(InputLocation).mapPartitions(userSuppliedMapper)); - this.boundaryEnvelope = datasetBoundary; - this.approximateTotalCount = approximateTotalCount; - } - - /** - * Instantiates a new rectangle RDD. - * - * @param rawSpatialRDD the raw spatial RDD - * @param newLevel the new level - */ - public RectangleRDD(JavaRDD rawSpatialRDD, StorageLevel newLevel) - { - this.rawSpatialRDD = rawSpatialRDD; - this.analyze(newLevel); - } - - /** - * Instantiates a new rectangle RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param Offset the offset - * @param splitter the splitter - * @param carryInputData the carry input data - * @param partitions the partitions - * @param newLevel the new level - */ - public RectangleRDD(JavaSparkContext sparkContext, String InputLocation, Integer Offset, - FileDataSplitter splitter, boolean carryInputData, Integer partitions, StorageLevel newLevel) - { - this.setRawSpatialRDD(sparkContext.textFile(InputLocation, partitions).mapPartitions(new RectangleFormatMapper(Offset, splitter, carryInputData))); - this.analyze(newLevel); - } - - /** - * Instantiates a new rectangle RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param Offset the offset - * @param splitter the splitter - * @param carryInputData the carry input data - * @param newLevel the new level - */ - public RectangleRDD(JavaSparkContext sparkContext, String InputLocation, Integer Offset, - FileDataSplitter splitter, boolean carryInputData, StorageLevel newLevel) - { - this.setRawSpatialRDD(sparkContext.textFile(InputLocation).mapPartitions(new RectangleFormatMapper(Offset, splitter, carryInputData))); - this.analyze(newLevel); - } - - /** - * Instantiates a new rectangle RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param splitter the splitter - * @param carryInputData the carry input data - * @param partitions the partitions - * @param newLevel the new level - */ - public RectangleRDD(JavaSparkContext sparkContext, String InputLocation, - FileDataSplitter splitter, boolean carryInputData, Integer partitions, StorageLevel newLevel) - { - this.setRawSpatialRDD(sparkContext.textFile(InputLocation, partitions).mapPartitions(new RectangleFormatMapper(splitter, carryInputData))); - this.analyze(newLevel); - } - - /** - * Instantiates a new rectangle RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param splitter the splitter - * @param carryInputData the carry input data - * @param newLevel the new level - */ - public RectangleRDD(JavaSparkContext sparkContext, String InputLocation, - FileDataSplitter splitter, boolean carryInputData, StorageLevel newLevel) - { - this.setRawSpatialRDD(sparkContext.textFile(InputLocation).mapPartitions(new RectangleFormatMapper(splitter, carryInputData))); - this.analyze(newLevel); - } - - /** - * Instantiates a new rectangle RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param partitions the partitions - * @param userSuppliedMapper the user supplied mapper - * @param newLevel the new level - */ - public RectangleRDD(JavaSparkContext sparkContext, String InputLocation, Integer partitions, FlatMapFunction userSuppliedMapper, StorageLevel newLevel) - { - this.setRawSpatialRDD(sparkContext.textFile(InputLocation, partitions).mapPartitions(userSuppliedMapper)); - this.analyze(newLevel); - } - - /** - * Instantiates a new rectangle RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param userSuppliedMapper the user supplied mapper - * @param newLevel the new level - */ - public RectangleRDD(JavaSparkContext sparkContext, String InputLocation, FlatMapFunction userSuppliedMapper, StorageLevel newLevel) - { - this.setRawSpatialRDD(sparkContext.textFile(InputLocation).mapPartitions(userSuppliedMapper)); - this.analyze(newLevel); - } - - /** - * Instantiates a new rectangle RDD. - * - * @param rawSpatialRDD the raw spatial RDD - * @param newLevel the new level - * @param sourceEpsgCRSCode the source epsg CRS code - * @param targetEpsgCode the target epsg code - */ - public RectangleRDD(JavaRDD rawSpatialRDD, StorageLevel newLevel, String sourceEpsgCRSCode, String targetEpsgCode) - { - this.rawSpatialRDD = rawSpatialRDD; - this.CRSTransform(sourceEpsgCRSCode, targetEpsgCode); - this.analyze(newLevel); - } - - /** - * Instantiates a new rectangle RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param Offset the offset - * @param splitter the splitter - * @param carryInputData the carry input data - * @param partitions the partitions - * @param newLevel the new level - * @param sourceEpsgCRSCode the source epsg CRS code - * @param targetEpsgCode the target epsg code - */ - public RectangleRDD(JavaSparkContext sparkContext, String InputLocation, Integer Offset, - FileDataSplitter splitter, boolean carryInputData, Integer partitions, StorageLevel newLevel, String sourceEpsgCRSCode, String targetEpsgCode) - { - this.setRawSpatialRDD(sparkContext.textFile(InputLocation, partitions).mapPartitions(new RectangleFormatMapper(splitter, carryInputData))); - this.CRSTransform(sourceEpsgCRSCode, targetEpsgCode); - this.analyze(newLevel); - } - - /** - * Instantiates a new rectangle RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param Offset the offset - * @param splitter the splitter - * @param carryInputData the carry input data - * @param newLevel the new level - * @param sourceEpsgCRSCode the source epsg CRS code - * @param targetEpsgCode the target epsg code - */ - public RectangleRDD(JavaSparkContext sparkContext, String InputLocation, Integer Offset, - FileDataSplitter splitter, boolean carryInputData, StorageLevel newLevel, String sourceEpsgCRSCode, String targetEpsgCode) - { - this.setRawSpatialRDD(sparkContext.textFile(InputLocation).mapPartitions(new RectangleFormatMapper(Offset, splitter, carryInputData))); - this.CRSTransform(sourceEpsgCRSCode, targetEpsgCode); - this.analyze(newLevel); - } - - /** - * Instantiates a new rectangle RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param splitter the splitter - * @param carryInputData the carry input data - * @param partitions the partitions - * @param newLevel the new level - * @param sourceEpsgCRSCode the source epsg CRS code - * @param targetEpsgCode the target epsg code - */ - public RectangleRDD(JavaSparkContext sparkContext, String InputLocation, - FileDataSplitter splitter, boolean carryInputData, Integer partitions, StorageLevel newLevel, String sourceEpsgCRSCode, String targetEpsgCode) - { - this.setRawSpatialRDD(sparkContext.textFile(InputLocation, partitions).mapPartitions(new RectangleFormatMapper(splitter, carryInputData))); - this.CRSTransform(sourceEpsgCRSCode, targetEpsgCode); - this.analyze(newLevel); - } - - /** - * Instantiates a new rectangle RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param splitter the splitter - * @param carryInputData the carry input data - * @param newLevel the new level - * @param sourceEpsgCRSCode the source epsg CRS code - * @param targetEpsgCode the target epsg code - */ - public RectangleRDD(JavaSparkContext sparkContext, String InputLocation, - FileDataSplitter splitter, boolean carryInputData, StorageLevel newLevel, String sourceEpsgCRSCode, String targetEpsgCode) - { - this.setRawSpatialRDD(sparkContext.textFile(InputLocation).mapPartitions(new RectangleFormatMapper(splitter, carryInputData))); - this.CRSTransform(sourceEpsgCRSCode, targetEpsgCode); - this.analyze(newLevel); - } - - /** - * Instantiates a new rectangle RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param partitions the partitions - * @param userSuppliedMapper the user supplied mapper - * @param newLevel the new level - * @param sourceEpsgCRSCode the source epsg CRS code - * @param targetEpsgCode the target epsg code - */ - public RectangleRDD(JavaSparkContext sparkContext, String InputLocation, Integer partitions, FlatMapFunction userSuppliedMapper, StorageLevel newLevel, String sourceEpsgCRSCode, String targetEpsgCode) - { - this.setRawSpatialRDD(sparkContext.textFile(InputLocation, partitions).mapPartitions(userSuppliedMapper)); - this.CRSTransform(sourceEpsgCRSCode, targetEpsgCode); - this.analyze(newLevel); - } - - /** - * Instantiates a new rectangle RDD. - * - * @param sparkContext the spark context - * @param InputLocation the input location - * @param userSuppliedMapper the user supplied mapper - * @param newLevel the new level - * @param sourceEpsgCRSCode the source epsg CRS code - * @param targetEpsgCode the target epsg code - */ - public RectangleRDD(JavaSparkContext sparkContext, String InputLocation, FlatMapFunction userSuppliedMapper, StorageLevel newLevel, String sourceEpsgCRSCode, String targetEpsgCode) - { - this.setRawSpatialRDD(sparkContext.textFile(InputLocation).mapPartitions(userSuppliedMapper)); - this.CRSTransform(sourceEpsgCRSCode, targetEpsgCode); - this.analyze(newLevel); - } } diff --git a/core/src/test/java/org/apache/sedona/core/io/EarthdataHDFTest.java b/core/src/test/java/org/apache/sedona/core/io/EarthdataHDFTest.java index b0d7c36e25..1030d55a2d 100644 --- a/core/src/test/java/org/apache/sedona/core/io/EarthdataHDFTest.java +++ b/core/src/test/java/org/apache/sedona/core/io/EarthdataHDFTest.java @@ -150,7 +150,7 @@ public void testSpatialRangeQuery() { EarthdataHDFPointMapper earthdataHDFPoint = new EarthdataHDFPointMapper(HDFincrement, HDFoffset, HDFrootGroupName, HDFDataVariableList, HDFDataVariableName, urlPrefix); - PointRDD spatialRDD = new PointRDD(sc, InputLocation, numPartitions, earthdataHDFPoint, StorageLevel.MEMORY_ONLY()); + PointRDD spatialRDD = new PointRDD(sc, InputLocation, numPartitions, earthdataHDFPoint); for (int i = 0; i < loopTimes; i++) { long resultSize = RangeQuery.SpatialRangeQuery(spatialRDD, queryEnvelope, false, false).count(); assert resultSize > -1; @@ -168,7 +168,7 @@ public void testSpatialRangeQueryUsingIndex() { EarthdataHDFPointMapper earthdataHDFPoint = new EarthdataHDFPointMapper(HDFincrement, HDFoffset, HDFrootGroupName, HDFDataVariableList, HDFDataVariableName, urlPrefix); - PointRDD spatialRDD = new PointRDD(sc, InputLocation, numPartitions, earthdataHDFPoint, StorageLevel.MEMORY_ONLY()); + PointRDD spatialRDD = new PointRDD(sc, InputLocation, numPartitions, earthdataHDFPoint); spatialRDD.buildIndex(IndexType.RTREE, false); for (int i = 0; i < loopTimes; i++) { long resultSize = RangeQuery.SpatialRangeQuery(spatialRDD, queryEnvelope, false, true).count(); diff --git a/core/src/test/java/org/apache/sedona/core/spatialOperator/JoinQueryCorrectnessChecker.java b/core/src/test/java/org/apache/sedona/core/spatialOperator/JoinQueryCorrectnessChecker.java index 9959f31c55..11b5fe5210 100644 --- a/core/src/test/java/org/apache/sedona/core/spatialOperator/JoinQueryCorrectnessChecker.java +++ b/core/src/test/java/org/apache/sedona/core/spatialOperator/JoinQueryCorrectnessChecker.java @@ -242,8 +242,8 @@ private void prepareRDDs(SpatialRDD public void testInsidePointJoinCorrectness() throws Exception { - PolygonRDD windowRDD = new PolygonRDD(sc.parallelize(testPolygonWindowSet), StorageLevel.MEMORY_ONLY()); - PointRDD objectRDD = new PointRDD(sc.parallelize(testInsidePointSet), StorageLevel.MEMORY_ONLY()); + PolygonRDD windowRDD = new PolygonRDD(sc.parallelize(testPolygonWindowSet)); + PointRDD objectRDD = new PointRDD(sc.parallelize(testInsidePointSet)); prepareRDDs(objectRDD, windowRDD); List>> result = JoinQuery.SpatialJoinQuery(objectRDD, windowRDD, true, false).collect(); @@ -262,8 +262,8 @@ public void testInsidePointJoinCorrectness() public void testOnBoundaryPointJoinCorrectness() throws Exception { - PolygonRDD windowRDD = new PolygonRDD(sc.parallelize(testPolygonWindowSet), StorageLevel.MEMORY_ONLY()); - PointRDD objectRDD = new PointRDD(sc.parallelize(testOnBoundaryPointSet), StorageLevel.MEMORY_ONLY()); + PolygonRDD windowRDD = new PolygonRDD(sc.parallelize(testPolygonWindowSet)); + PointRDD objectRDD = new PointRDD(sc.parallelize(testOnBoundaryPointSet)); prepareRDDs(objectRDD, windowRDD); List>> result = JoinQuery.SpatialJoinQuery(objectRDD, windowRDD, true, false).collect(); @@ -282,8 +282,8 @@ public void testOnBoundaryPointJoinCorrectness() public void testOutsidePointJoinCorrectness() throws Exception { - PolygonRDD windowRDD = new PolygonRDD(sc.parallelize(testPolygonWindowSet), StorageLevel.MEMORY_ONLY()); - PointRDD objectRDD = new PointRDD(sc.parallelize(testOutsidePointSet), StorageLevel.MEMORY_ONLY()); + PolygonRDD windowRDD = new PolygonRDD(sc.parallelize(testPolygonWindowSet)); + PointRDD objectRDD = new PointRDD(sc.parallelize(testOutsidePointSet)); prepareRDDs(objectRDD, windowRDD); List>> result = JoinQuery.SpatialJoinQuery(objectRDD, windowRDD, true, false).collect(); @@ -302,8 +302,8 @@ public void testOutsidePointJoinCorrectness() public void testInsideLineStringJoinCorrectness() throws Exception { - PolygonRDD windowRDD = new PolygonRDD(sc.parallelize(testPolygonWindowSet), StorageLevel.MEMORY_ONLY()); - LineStringRDD objectRDD = new LineStringRDD(sc.parallelize(testInsideLineStringSet), StorageLevel.MEMORY_ONLY()); + PolygonRDD windowRDD = new PolygonRDD(sc.parallelize(testPolygonWindowSet)); + LineStringRDD objectRDD = new LineStringRDD(sc.parallelize(testInsideLineStringSet)); prepareRDDs(objectRDD, windowRDD); List>> result = JoinQuery.SpatialJoinQuery(objectRDD, windowRDD, true, false).collect(); @@ -322,8 +322,8 @@ public void testInsideLineStringJoinCorrectness() public void testOverlappedLineStringJoinCorrectness() throws Exception { - PolygonRDD windowRDD = new PolygonRDD(sc.parallelize(testPolygonWindowSet), StorageLevel.MEMORY_ONLY()); - LineStringRDD objectRDD = new LineStringRDD(sc.parallelize(testOverlappedLineStringSet), StorageLevel.MEMORY_ONLY()); + PolygonRDD windowRDD = new PolygonRDD(sc.parallelize(testPolygonWindowSet)); + LineStringRDD objectRDD = new LineStringRDD(sc.parallelize(testOverlappedLineStringSet)); prepareRDDs(objectRDD, windowRDD); List>> result = JoinQuery.SpatialJoinQuery(objectRDD, windowRDD, true, true).collect(); @@ -342,8 +342,8 @@ public void testOverlappedLineStringJoinCorrectness() public void testOutsideLineStringJoinCorrectness() throws Exception { - PolygonRDD windowRDD = new PolygonRDD(sc.parallelize(testPolygonWindowSet), StorageLevel.MEMORY_ONLY()); - LineStringRDD objectRDD = new LineStringRDD(sc.parallelize(testOutsideLineStringSet), StorageLevel.MEMORY_ONLY()); + PolygonRDD windowRDD = new PolygonRDD(sc.parallelize(testPolygonWindowSet)); + LineStringRDD objectRDD = new LineStringRDD(sc.parallelize(testOutsideLineStringSet)); prepareRDDs(objectRDD, windowRDD); List>> result = JoinQuery.SpatialJoinQuery(objectRDD, windowRDD, true, false).collect(); @@ -362,8 +362,8 @@ public void testOutsideLineStringJoinCorrectness() public void testInsidePolygonJoinCorrectness() throws Exception { - PolygonRDD windowRDD = new PolygonRDD(sc.parallelize(testPolygonWindowSet), StorageLevel.MEMORY_ONLY()); - PolygonRDD objectRDD = new PolygonRDD(sc.parallelize(testInsidePolygonSet), StorageLevel.MEMORY_ONLY()); + PolygonRDD windowRDD = new PolygonRDD(sc.parallelize(testPolygonWindowSet)); + PolygonRDD objectRDD = new PolygonRDD(sc.parallelize(testInsidePolygonSet)); prepareRDDs(objectRDD, windowRDD); List>> result = JoinQuery.SpatialJoinQuery(objectRDD, windowRDD, true, false).collect(); @@ -382,8 +382,8 @@ public void testInsidePolygonJoinCorrectness() public void testOverlappedPolygonJoinCorrectness() throws Exception { - PolygonRDD windowRDD = new PolygonRDD(sc.parallelize(testPolygonWindowSet), StorageLevel.MEMORY_ONLY()); - PolygonRDD objectRDD = new PolygonRDD(sc.parallelize(testOverlappedPolygonSet), StorageLevel.MEMORY_ONLY()); + PolygonRDD windowRDD = new PolygonRDD(sc.parallelize(testPolygonWindowSet)); + PolygonRDD objectRDD = new PolygonRDD(sc.parallelize(testOverlappedPolygonSet)); prepareRDDs(objectRDD, windowRDD); List>> result = JoinQuery.SpatialJoinQuery(objectRDD, windowRDD, true, true).collect(); @@ -426,8 +426,8 @@ private void verifyJoinResults(List>> result = JoinQuery.SpatialJoinQuery(objectRDD, windowRDD, true, false).collect(); @@ -446,9 +446,9 @@ public void testOutsidePolygonJoinCorrectness() public void testInsidePolygonDistanceJoinCorrectness() throws Exception { - PolygonRDD centerGeometryRDD = new PolygonRDD(sc.parallelize(testPolygonWindowSet), StorageLevel.MEMORY_ONLY()); + PolygonRDD centerGeometryRDD = new PolygonRDD(sc.parallelize(testPolygonWindowSet)); CircleRDD windowRDD = new CircleRDD(centerGeometryRDD, 0.1); - PolygonRDD objectRDD = new PolygonRDD(sc.parallelize(testInsidePolygonSet), StorageLevel.MEMORY_ONLY()); + PolygonRDD objectRDD = new PolygonRDD(sc.parallelize(testInsidePolygonSet)); prepareRDDs(objectRDD, windowRDD); List>> result = JoinQuery.DistanceJoinQuery(objectRDD, windowRDD, true, false).collect(); @@ -467,9 +467,9 @@ public void testInsidePolygonDistanceJoinCorrectness() public void testOverlappedPolygonDistanceJoinCorrectness() throws Exception { - PolygonRDD centerGeometryRDD = new PolygonRDD(sc.parallelize(testPolygonWindowSet), StorageLevel.MEMORY_ONLY()); + PolygonRDD centerGeometryRDD = new PolygonRDD(sc.parallelize(testPolygonWindowSet)); CircleRDD windowRDD = new CircleRDD(centerGeometryRDD, 0.1); - PolygonRDD objectRDD = new PolygonRDD(sc.parallelize(testOverlappedPolygonSet), StorageLevel.MEMORY_ONLY()); + PolygonRDD objectRDD = new PolygonRDD(sc.parallelize(testOverlappedPolygonSet)); prepareRDDs(objectRDD, windowRDD); List>> result = JoinQuery.DistanceJoinQuery(objectRDD, windowRDD, true, true).collect(); @@ -488,9 +488,9 @@ public void testOverlappedPolygonDistanceJoinCorrectness() public void testOutsidePolygonDistanceJoinCorrectness() throws Exception { - PolygonRDD centerGeometryRDD = new PolygonRDD(sc.parallelize(testPolygonWindowSet), StorageLevel.MEMORY_ONLY()); + PolygonRDD centerGeometryRDD = new PolygonRDD(sc.parallelize(testPolygonWindowSet)); CircleRDD windowRDD = new CircleRDD(centerGeometryRDD, 0.1); - PolygonRDD objectRDD = new PolygonRDD(sc.parallelize(testOutsidePolygonSet), StorageLevel.MEMORY_ONLY()); + PolygonRDD objectRDD = new PolygonRDD(sc.parallelize(testOutsidePolygonSet)); prepareRDDs(objectRDD, windowRDD); List>> result = JoinQuery.DistanceJoinQuery(objectRDD, windowRDD, true, true).collect(); diff --git a/core/src/test/java/org/apache/sedona/core/spatialOperator/JoinTestBase.java b/core/src/test/java/org/apache/sedona/core/spatialOperator/JoinTestBase.java index 2f9dd551ec..0ee7e0c285 100644 --- a/core/src/test/java/org/apache/sedona/core/spatialOperator/JoinTestBase.java +++ b/core/src/test/java/org/apache/sedona/core/spatialOperator/JoinTestBase.java @@ -131,25 +131,25 @@ protected static void initialize(final String testSuiteName, final String proper protected PointRDD createPointRDD(String location) { final PointRDD rdd = new PointRDD(sc, location, 1, splitter, false, numPartitions); - return new PointRDD(rdd.rawSpatialRDD, StorageLevel.MEMORY_ONLY()); + return new PointRDD(rdd.rawSpatialRDD); } protected LineStringRDD createLineStringRDD(String location) { final LineStringRDD rdd = new LineStringRDD(sc, location, splitter, true, numPartitions); - return new LineStringRDD(rdd.rawSpatialRDD, StorageLevel.MEMORY_ONLY()); + return new LineStringRDD(rdd.rawSpatialRDD); } protected PolygonRDD createPolygonRDD(String location) { final PolygonRDD rdd = new PolygonRDD(sc, location, splitter, true, numPartitions); - return new PolygonRDD(rdd.rawSpatialRDD, StorageLevel.MEMORY_ONLY()); + return new PolygonRDD(rdd.rawSpatialRDD); } protected RectangleRDD createRectangleRDD(String location) { final RectangleRDD rdd = new RectangleRDD(sc, location, splitter, true, numPartitions); - return new RectangleRDD(rdd.rawSpatialRDD, StorageLevel.MEMORY_ONLY()); + return new RectangleRDD(rdd.rawSpatialRDD); } protected void partitionRdds(SpatialRDD queryRDD, diff --git a/core/src/test/java/org/apache/sedona/core/spatialOperator/LineStringKnnTest.java b/core/src/test/java/org/apache/sedona/core/spatialOperator/LineStringKnnTest.java index 155d17d6af..72920c57d9 100644 --- a/core/src/test/java/org/apache/sedona/core/spatialOperator/LineStringKnnTest.java +++ b/core/src/test/java/org/apache/sedona/core/spatialOperator/LineStringKnnTest.java @@ -127,7 +127,7 @@ public static void onceExecutedBeforeAll() splitter = FileDataSplitter.getFileDataSplitter(prop.getProperty("splitter")); indexType = IndexType.getIndexType(prop.getProperty("indexType")); numPartitions = Integer.parseInt(prop.getProperty("numPartitions")); - loopTimes = 5; + loopTimes = 1; } catch (IOException ex) { ex.printStackTrace(); diff --git a/core/src/test/java/org/apache/sedona/core/spatialOperator/LineStringRangeTest.java b/core/src/test/java/org/apache/sedona/core/spatialOperator/LineStringRangeTest.java index 8f74e6c3c1..82d1df5560 100644 --- a/core/src/test/java/org/apache/sedona/core/spatialOperator/LineStringRangeTest.java +++ b/core/src/test/java/org/apache/sedona/core/spatialOperator/LineStringRangeTest.java @@ -129,7 +129,7 @@ public static void onceExecutedBeforeAll() indexType = IndexType.getIndexType(prop.getProperty("indexType")); numPartitions = Integer.parseInt(prop.getProperty("numPartitions")); queryEnvelope = new Envelope(-85.01, -60.01, 34.01, 50.01); - loopTimes = 5; + loopTimes = 1; } catch (IOException ex) { ex.printStackTrace(); @@ -164,7 +164,7 @@ public static void TearDown() public void testSpatialRangeQuery() throws Exception { - LineStringRDD spatialRDD = new LineStringRDD(sc, InputLocation, splitter, true, StorageLevel.MEMORY_ONLY()); + LineStringRDD spatialRDD = new LineStringRDD(sc, InputLocation, splitter, true); for (int i = 0; i < loopTimes; i++) { long resultSize = RangeQuery.SpatialRangeQuery(spatialRDD, queryEnvelope, false, false).count(); assertEquals(resultSize, 999); @@ -181,7 +181,7 @@ public void testSpatialRangeQuery() public void testSpatialRangeQueryUsingIndex() throws Exception { - LineStringRDD spatialRDD = new LineStringRDD(sc, InputLocation, splitter, true, StorageLevel.MEMORY_ONLY()); + LineStringRDD spatialRDD = new LineStringRDD(sc, InputLocation, splitter, true); spatialRDD.buildIndex(IndexType.RTREE, false); for (int i = 0; i < loopTimes; i++) { long resultSize = RangeQuery.SpatialRangeQuery(spatialRDD, queryEnvelope, false, true).count(); diff --git a/core/src/test/java/org/apache/sedona/core/spatialOperator/PointKnnTest.java b/core/src/test/java/org/apache/sedona/core/spatialOperator/PointKnnTest.java index 1af99ad37f..41c3ee51fe 100644 --- a/core/src/test/java/org/apache/sedona/core/spatialOperator/PointKnnTest.java +++ b/core/src/test/java/org/apache/sedona/core/spatialOperator/PointKnnTest.java @@ -134,7 +134,7 @@ public static void onceExecutedBeforeAll() splitter = FileDataSplitter.getFileDataSplitter(prop.getProperty("splitter")); indexType = IndexType.getIndexType(prop.getProperty("indexType")); numPartitions = Integer.parseInt(prop.getProperty("numPartitions")); - loopTimes = 5; + loopTimes = 1; } catch (IOException ex) { ex.printStackTrace(); diff --git a/core/src/test/java/org/apache/sedona/core/spatialOperator/PointRangeTest.java b/core/src/test/java/org/apache/sedona/core/spatialOperator/PointRangeTest.java index 93aa7de3bf..8fccc90716 100644 --- a/core/src/test/java/org/apache/sedona/core/spatialOperator/PointRangeTest.java +++ b/core/src/test/java/org/apache/sedona/core/spatialOperator/PointRangeTest.java @@ -126,7 +126,7 @@ public static void onceExecutedBeforeAll() indexType = IndexType.getIndexType(prop.getProperty("indexType")); numPartitions = Integer.parseInt(prop.getProperty("numPartitions")); queryEnvelope = new Envelope(-90.01, -80.01, 30.01, 40.01); - loopTimes = 5; + loopTimes = 1; } catch (IOException ex) { ex.printStackTrace(); diff --git a/core/src/test/java/org/apache/sedona/core/spatialOperator/PolygonKnnTest.java b/core/src/test/java/org/apache/sedona/core/spatialOperator/PolygonKnnTest.java index 0d6e7b914d..2631b100d6 100644 --- a/core/src/test/java/org/apache/sedona/core/spatialOperator/PolygonKnnTest.java +++ b/core/src/test/java/org/apache/sedona/core/spatialOperator/PolygonKnnTest.java @@ -139,7 +139,7 @@ public static void onceExecutedBeforeAll() splitter = FileDataSplitter.getFileDataSplitter(prop.getProperty("splitter")); indexType = IndexType.getIndexType(prop.getProperty("indexType")); numPartitions = Integer.parseInt(prop.getProperty("numPartitions")); - loopTimes = 5; + loopTimes = 1; } catch (IOException ex) { ex.printStackTrace(); diff --git a/core/src/test/java/org/apache/sedona/core/spatialOperator/PolygonRangeTest.java b/core/src/test/java/org/apache/sedona/core/spatialOperator/PolygonRangeTest.java index f526d4173b..233e8d90cf 100644 --- a/core/src/test/java/org/apache/sedona/core/spatialOperator/PolygonRangeTest.java +++ b/core/src/test/java/org/apache/sedona/core/spatialOperator/PolygonRangeTest.java @@ -127,7 +127,7 @@ public static void onceExecutedBeforeAll() indexType = IndexType.getIndexType(prop.getProperty("indexType")); numPartitions = Integer.parseInt(prop.getProperty("numPartitions")); queryEnvelope = new Envelope(-85.01, -60.01, 34.01, 50.01); - loopTimes = 5; + loopTimes = 1; } catch (IOException ex) { ex.printStackTrace(); @@ -162,7 +162,7 @@ public static void TearDown() public void testSpatialRangeQuery() throws Exception { - PolygonRDD spatialRDD = new PolygonRDD(sc, InputLocation, splitter, true, StorageLevel.MEMORY_ONLY()); + PolygonRDD spatialRDD = new PolygonRDD(sc, InputLocation, splitter, true); for (int i = 0; i < loopTimes; i++) { long resultSize = RangeQuery.SpatialRangeQuery(spatialRDD, queryEnvelope, false, false).count(); assertEquals(resultSize, 704); @@ -179,7 +179,7 @@ public void testSpatialRangeQuery() public void testSpatialRangeQueryUsingIndex() throws Exception { - PolygonRDD spatialRDD = new PolygonRDD(sc, InputLocation, splitter, true, StorageLevel.MEMORY_ONLY()); + PolygonRDD spatialRDD = new PolygonRDD(sc, InputLocation, splitter, true); spatialRDD.buildIndex(IndexType.RTREE, false); for (int i = 0; i < loopTimes; i++) { long resultSize = RangeQuery.SpatialRangeQuery(spatialRDD, queryEnvelope, false, true).count(); diff --git a/core/src/test/java/org/apache/sedona/core/spatialOperator/RectangleKnnTest.java b/core/src/test/java/org/apache/sedona/core/spatialOperator/RectangleKnnTest.java index 7f5184e2b5..e43fe1c313 100644 --- a/core/src/test/java/org/apache/sedona/core/spatialOperator/RectangleKnnTest.java +++ b/core/src/test/java/org/apache/sedona/core/spatialOperator/RectangleKnnTest.java @@ -135,7 +135,7 @@ public static void onceExecutedBeforeAll() splitter = FileDataSplitter.getFileDataSplitter(prop.getProperty("splitter")); indexType = IndexType.getIndexType(prop.getProperty("indexType")); numPartitions = Integer.parseInt(prop.getProperty("numPartitions")); - loopTimes = 5; + loopTimes = 1; } catch (IOException ex) { ex.printStackTrace(); diff --git a/core/src/test/java/org/apache/sedona/core/spatialOperator/RectangleRangeTest.java b/core/src/test/java/org/apache/sedona/core/spatialOperator/RectangleRangeTest.java index d5fbe7ac15..03e9f4e3fd 100644 --- a/core/src/test/java/org/apache/sedona/core/spatialOperator/RectangleRangeTest.java +++ b/core/src/test/java/org/apache/sedona/core/spatialOperator/RectangleRangeTest.java @@ -132,7 +132,7 @@ public static void onceExecutedBeforeAll() indexType = IndexType.getIndexType(prop.getProperty("indexType")); numPartitions = Integer.parseInt(prop.getProperty("numPartitions")); queryEnvelope = new Envelope(-90.01, -80.01, 30.01, 40.01); - loopTimes = 5; + loopTimes = 1; } catch (IOException ex) { ex.printStackTrace(); @@ -167,7 +167,7 @@ public static void TearDown() public void testSpatialRangeQuery() throws Exception { - RectangleRDD spatialRDD = new RectangleRDD(sc, InputLocation, offset, splitter, true, StorageLevel.MEMORY_ONLY()); + RectangleRDD spatialRDD = new RectangleRDD(sc, InputLocation, offset, splitter, true); for (int i = 0; i < loopTimes; i++) { long resultSize = RangeQuery.SpatialRangeQuery(spatialRDD, queryEnvelope, false, false).count(); assertEquals(resultSize, 193); @@ -184,7 +184,7 @@ public void testSpatialRangeQuery() public void testSpatialRangeQueryUsingIndex() throws Exception { - RectangleRDD spatialRDD = new RectangleRDD(sc, InputLocation, offset, splitter, true, StorageLevel.MEMORY_ONLY()); + RectangleRDD spatialRDD = new RectangleRDD(sc, InputLocation, offset, splitter, true); spatialRDD.buildIndex(IndexType.RTREE, false); for (int i = 0; i < loopTimes; i++) { long resultSize = RangeQuery.SpatialRangeQuery(spatialRDD, queryEnvelope, false, true).count(); @@ -201,7 +201,7 @@ public void testSpatialRangeQueryUsingIndex() @Test public void testSpatialRangeQueryLeftCoveredByRightFalse() throws Exception { - RectangleRDD spatialRDD = new RectangleRDD(sc, InputLocation, offset, splitter, true, StorageLevel.MEMORY_ONLY()); + RectangleRDD spatialRDD = new RectangleRDD(sc, InputLocation, offset, splitter, true); Coordinate[] coordinates = new Coordinate[5]; coordinates[0] = new Coordinate(queryEnvelope.getMinX(), queryEnvelope.getMinY()); coordinates[1] = new Coordinate(queryEnvelope.getMinX(), queryEnvelope.getMaxY()); diff --git a/core/src/test/java/org/apache/sedona/core/spatialRDD/GeometryOpTest.java b/core/src/test/java/org/apache/sedona/core/spatialRDD/GeometryOpTest.java index 75d82cb46f..171442470b 100644 --- a/core/src/test/java/org/apache/sedona/core/spatialRDD/GeometryOpTest.java +++ b/core/src/test/java/org/apache/sedona/core/spatialRDD/GeometryOpTest.java @@ -50,7 +50,7 @@ public static void TearDown() @Test public void testFlipPolygonCoordinates() { - PolygonRDD spatialRDD = new PolygonRDD(sc, InputLocation, splitter, true, numPartitions, StorageLevel.MEMORY_ONLY()); + PolygonRDD spatialRDD = new PolygonRDD(sc, InputLocation, splitter, true, numPartitions); Polygon oldGeom = spatialRDD.rawSpatialRDD.take(1).get(0); spatialRDD.flipCoordinates(); Polygon newGeom = spatialRDD.rawSpatialRDD.take(1).get(0); diff --git a/core/src/test/java/org/apache/sedona/core/spatialRDD/LineStringRDDTest.java b/core/src/test/java/org/apache/sedona/core/spatialRDD/LineStringRDDTest.java index fe7692ed9b..1020961b93 100644 --- a/core/src/test/java/org/apache/sedona/core/spatialRDD/LineStringRDDTest.java +++ b/core/src/test/java/org/apache/sedona/core/spatialRDD/LineStringRDDTest.java @@ -66,7 +66,7 @@ public static void TearDown() public void testConstructor() throws Exception { - LineStringRDD spatialRDD = new LineStringRDD(sc, InputLocation, splitter, true, numPartitions, StorageLevel.MEMORY_ONLY()); + LineStringRDD spatialRDD = new LineStringRDD(sc, InputLocation, splitter, true, numPartitions); assertEquals(inputCount, spatialRDD.approximateTotalCount); assertEquals(inputBoundary, spatialRDD.boundaryEnvelope); } @@ -75,7 +75,7 @@ public void testConstructor() public void testEmptyConstructor() throws Exception { - LineStringRDD spatialRDD = new LineStringRDD(sc, InputLocation, splitter, true, numPartitions, StorageLevel.MEMORY_ONLY()); + LineStringRDD spatialRDD = new LineStringRDD(sc, InputLocation, splitter, true, numPartitions); spatialRDD.spatialPartitioning(gridType); spatialRDD.buildIndex(IndexType.RTREE, true); // Create an empty spatialRDD and manually assemble it @@ -94,7 +94,7 @@ public void testEmptyConstructor() public void testBuildIndexWithoutSetGrid() throws Exception { - LineStringRDD spatialRDD = new LineStringRDD(sc, InputLocation, splitter, true, numPartitions, StorageLevel.MEMORY_ONLY()); + LineStringRDD spatialRDD = new LineStringRDD(sc, InputLocation, splitter, true, numPartitions); spatialRDD.buildIndex(IndexType.RTREE, false); } @@ -107,7 +107,7 @@ public void testBuildIndexWithoutSetGrid() public void testBuildRtreeIndex() throws Exception { - LineStringRDD spatialRDD = new LineStringRDD(sc, InputLocation, splitter, true, numPartitions, StorageLevel.MEMORY_ONLY()); + LineStringRDD spatialRDD = new LineStringRDD(sc, InputLocation, splitter, true, numPartitions); spatialRDD.spatialPartitioning(gridType); spatialRDD.buildIndex(IndexType.RTREE, true); if (spatialRDD.indexedRDD.take(1).get(0) instanceof STRtree) { @@ -127,7 +127,7 @@ public void testBuildRtreeIndex() public void testBuildQuadtreeIndex() throws Exception { - LineStringRDD spatialRDD = new LineStringRDD(sc, InputLocation, splitter, true, numPartitions, StorageLevel.MEMORY_ONLY()); + LineStringRDD spatialRDD = new LineStringRDD(sc, InputLocation, splitter, true, numPartitions); spatialRDD.spatialPartitioning(gridType); spatialRDD.buildIndex(IndexType.QUADTREE, true); if (spatialRDD.indexedRDD.take(1).get(0) instanceof STRtree) { @@ -156,7 +156,7 @@ public void testPolygonUnion() public void testMBR() throws Exception { - LineStringRDD lineStringRDD = new LineStringRDD(sc, InputLocation, splitter, true, numPartitions, StorageLevel.MEMORY_ONLY()); + LineStringRDD lineStringRDD = new LineStringRDD(sc, InputLocation, splitter, true, numPartitions); RectangleRDD rectangleRDD = lineStringRDD.MinimumBoundingRectangle(); List result = rectangleRDD.rawSpatialRDD.collect(); assert result.size() > -1; diff --git a/core/src/test/java/org/apache/sedona/core/spatialRDD/PointRDDTest.java b/core/src/test/java/org/apache/sedona/core/spatialRDD/PointRDDTest.java index bca056387d..c00fdd11a0 100644 --- a/core/src/test/java/org/apache/sedona/core/spatialRDD/PointRDDTest.java +++ b/core/src/test/java/org/apache/sedona/core/spatialRDD/PointRDDTest.java @@ -64,7 +64,7 @@ public static void TearDown() @Test public void testConstructor() { - PointRDD spatialRDD = new PointRDD(sc, InputLocation, offset, splitter, true, numPartitions, StorageLevel.MEMORY_ONLY()); + PointRDD spatialRDD = new PointRDD(sc, InputLocation, offset, splitter, true, numPartitions); assertEquals(inputCount, spatialRDD.approximateTotalCount); assertEquals(inputBoundary, spatialRDD.boundaryEnvelope); assert spatialRDD.rawSpatialRDD.take(9).get(0).getUserData().equals("testattribute0\ttestattribute1\ttestattribute2"); @@ -77,7 +77,7 @@ public void testConstructor() public void testEmptyConstructor() throws Exception { - PointRDD spatialRDD = new PointRDD(sc, InputLocation, offset, splitter, true, numPartitions, StorageLevel.MEMORY_ONLY()); + PointRDD spatialRDD = new PointRDD(sc, InputLocation, offset, splitter, true, numPartitions); spatialRDD.buildIndex(IndexType.RTREE, false); // Create an empty spatialRDD and manually assemble it PointRDD spatialRDDcopy = new PointRDD(); @@ -95,7 +95,7 @@ public void testEmptyConstructor() public void testBuildIndexWithoutSetGrid() throws Exception { - PointRDD spatialRDD = new PointRDD(sc, InputLocation, offset, splitter, true, numPartitions, StorageLevel.MEMORY_ONLY()); + PointRDD spatialRDD = new PointRDD(sc, InputLocation, offset, splitter, true, numPartitions); spatialRDD.buildIndex(IndexType.RTREE, false); } @@ -108,7 +108,7 @@ public void testBuildIndexWithoutSetGrid() public void testBuildRtreeIndex() throws Exception { - PointRDD spatialRDD = new PointRDD(sc, InputLocation, offset, splitter, true, numPartitions, StorageLevel.MEMORY_ONLY()); + PointRDD spatialRDD = new PointRDD(sc, InputLocation, offset, splitter, true, numPartitions); spatialRDD.spatialPartitioning(gridType); spatialRDD.buildIndex(IndexType.RTREE, true); if (spatialRDD.indexedRDD.take(1).get(0) instanceof STRtree) { @@ -128,7 +128,7 @@ public void testBuildRtreeIndex() public void testBuildQuadtreeIndex() throws Exception { - PointRDD spatialRDD = new PointRDD(sc, InputLocation, offset, splitter, true, numPartitions, StorageLevel.MEMORY_ONLY()); + PointRDD spatialRDD = new PointRDD(sc, InputLocation, offset, splitter, true, numPartitions); spatialRDD.spatialPartitioning(gridType); spatialRDD.buildIndex(IndexType.QUADTREE, true); if (spatialRDD.indexedRDD.take(1).get(0) instanceof STRtree) { diff --git a/core/src/test/java/org/apache/sedona/core/spatialRDD/PolygonRDDTest.java b/core/src/test/java/org/apache/sedona/core/spatialRDD/PolygonRDDTest.java index 49b0d3973d..5edbd789b4 100644 --- a/core/src/test/java/org/apache/sedona/core/spatialRDD/PolygonRDDTest.java +++ b/core/src/test/java/org/apache/sedona/core/spatialRDD/PolygonRDDTest.java @@ -75,7 +75,7 @@ public static void TearDown() @Test public void testConstructor() { - PolygonRDD spatialRDD = new PolygonRDD(sc, InputLocation, splitter, true, numPartitions, StorageLevel.MEMORY_ONLY()); + PolygonRDD spatialRDD = new PolygonRDD(sc, InputLocation, splitter, true, numPartitions); assertEquals(inputCount, spatialRDD.approximateTotalCount); assertEquals(inputBoundary, spatialRDD.boundaryEnvelope); } @@ -84,7 +84,7 @@ public void testConstructor() public void testEmptyConstructor() throws Exception { - PolygonRDD spatialRDD = new PolygonRDD(sc, InputLocation, splitter, true, numPartitions, StorageLevel.MEMORY_ONLY()); + PolygonRDD spatialRDD = new PolygonRDD(sc, InputLocation, splitter, true, numPartitions); spatialRDD.spatialPartitioning(gridType); spatialRDD.buildIndex(IndexType.RTREE, true); // Create an empty spatialRDD and manually assemble it @@ -97,7 +97,7 @@ public void testEmptyConstructor() @Test public void testGeoJSONConstructor() { - PolygonRDD spatialRDD = new PolygonRDD(sc, InputLocationGeojson, FileDataSplitter.GEOJSON, true, 4, StorageLevel.MEMORY_ONLY()); + PolygonRDD spatialRDD = new PolygonRDD(sc, InputLocationGeojson, FileDataSplitter.GEOJSON, true, 4); assert spatialRDD.approximateTotalCount == 1001; assert spatialRDD.boundaryEnvelope != null; assertEquals(spatialRDD.rawSpatialRDD.take(1).get(0).getUserData(), "01\t077\t011501\t5\t1500000US010770115015\t010770115015\t5\tBG\t6844991\t32636"); @@ -108,7 +108,7 @@ public void testGeoJSONConstructor() @Test public void testWktConstructor() { - PolygonRDD spatialRDD = new PolygonRDD(sc, InputLocationWkt, FileDataSplitter.WKT, true, StorageLevel.MEMORY_ONLY()); + PolygonRDD spatialRDD = new PolygonRDD(sc, InputLocationWkt, FileDataSplitter.WKT, true); assert spatialRDD.approximateTotalCount == 103; assert spatialRDD.boundaryEnvelope != null; assert spatialRDD.rawSpatialRDD.take(1).get(0).getUserData().equals("31\t039\t00835841\t31039\tCuming\tCuming County\t06\tH1\tG4020\t\t\t\tA\t1477895811\t10447360\t+41.9158651\t-096.7885168"); @@ -117,7 +117,7 @@ public void testWktConstructor() @Test public void testWkbConstructor() { - PolygonRDD spatialRDD = new PolygonRDD(sc, InputLocationWkb, FileDataSplitter.WKB, true, StorageLevel.MEMORY_ONLY()); + PolygonRDD spatialRDD = new PolygonRDD(sc, InputLocationWkb, FileDataSplitter.WKB, true); assert spatialRDD.approximateTotalCount == 103; assert spatialRDD.boundaryEnvelope != null; assert spatialRDD.rawSpatialRDD.take(1).get(0).getUserData().equals("31\t039\t00835841\t31039\tCuming\tCuming County\t06\tH1\tG4020\t\t\t\tA\t1477895811\t10447360\t+41.9158651\t-096.7885168"); @@ -132,7 +132,7 @@ public void testWkbConstructor() public void testBuildIndexWithoutSetGrid() throws Exception { - PolygonRDD spatialRDD = new PolygonRDD(sc, InputLocation, splitter, true, numPartitions, StorageLevel.MEMORY_ONLY()); + PolygonRDD spatialRDD = new PolygonRDD(sc, InputLocation, splitter, true, numPartitions); spatialRDD.buildIndex(IndexType.RTREE, false); } @@ -145,7 +145,7 @@ public void testBuildIndexWithoutSetGrid() public void testBuildRtreeIndex() throws Exception { - PolygonRDD spatialRDD = new PolygonRDD(sc, InputLocation, splitter, true, numPartitions, StorageLevel.MEMORY_ONLY()); + PolygonRDD spatialRDD = new PolygonRDD(sc, InputLocation, splitter, true, numPartitions); spatialRDD.spatialPartitioning(gridType); spatialRDD.buildIndex(IndexType.RTREE, true); if (spatialRDD.indexedRDD.take(1).get(0) instanceof STRtree) { @@ -165,7 +165,7 @@ public void testBuildRtreeIndex() public void testBuildQuadtreeIndex() throws Exception { - PolygonRDD spatialRDD = new PolygonRDD(sc, InputLocation, splitter, true, numPartitions, StorageLevel.MEMORY_ONLY()); + PolygonRDD spatialRDD = new PolygonRDD(sc, InputLocation, splitter, true, numPartitions); spatialRDD.spatialPartitioning(gridType); spatialRDD.buildIndex(IndexType.QUADTREE, true); if (spatialRDD.indexedRDD.take(1).get(0) instanceof STRtree) { @@ -185,7 +185,7 @@ public void testBuildQuadtreeIndex() public void testMBR() throws Exception { - PolygonRDD polygonRDD = new PolygonRDD(sc, InputLocation, splitter, true, numPartitions, StorageLevel.MEMORY_ONLY()); + PolygonRDD polygonRDD = new PolygonRDD(sc, InputLocation, splitter, true, numPartitions); RectangleRDD rectangleRDD = polygonRDD.MinimumBoundingRectangle(); List result = rectangleRDD.rawSpatialRDD.collect(); assert result.size() > -1; diff --git a/core/src/test/java/org/apache/sedona/core/spatialRDD/RectangleRDDTest.java b/core/src/test/java/org/apache/sedona/core/spatialRDD/RectangleRDDTest.java index aa63f21763..2541004521 100644 --- a/core/src/test/java/org/apache/sedona/core/spatialRDD/RectangleRDDTest.java +++ b/core/src/test/java/org/apache/sedona/core/spatialRDD/RectangleRDDTest.java @@ -66,7 +66,7 @@ public static void TearDown() public void testConstructor() throws Exception { - RectangleRDD spatialRDD = new RectangleRDD(sc, InputLocation, offset, splitter, true, numPartitions, StorageLevel.MEMORY_ONLY()); + RectangleRDD spatialRDD = new RectangleRDD(sc, InputLocation, offset, splitter, true, numPartitions); assertEquals(inputCount, spatialRDD.approximateTotalCount); assertEquals(inputBoundary, spatialRDD.boundaryEnvelope); } @@ -75,7 +75,7 @@ public void testConstructor() public void testEmptyConstructor() throws Exception { - RectangleRDD spatialRDD = new RectangleRDD(sc, InputLocation, offset, splitter, true, numPartitions, StorageLevel.MEMORY_ONLY()); + RectangleRDD spatialRDD = new RectangleRDD(sc, InputLocation, offset, splitter, true, numPartitions); spatialRDD.buildIndex(IndexType.RTREE, false); // Create an empty spatialRDD and manually assemble it RectangleRDD spatialRDDcopy = new RectangleRDD(); @@ -106,7 +106,7 @@ public void testBuildIndexWithoutSetGrid() public void testBuildRtreeIndex() throws Exception { - RectangleRDD spatialRDD = new RectangleRDD(sc, InputLocation, offset, splitter, true, numPartitions, StorageLevel.MEMORY_ONLY()); + RectangleRDD spatialRDD = new RectangleRDD(sc, InputLocation, offset, splitter, true, numPartitions); spatialRDD.spatialPartitioning(gridType); spatialRDD.buildIndex(IndexType.RTREE, true); if (spatialRDD.indexedRDD.take(1).get(0) instanceof STRtree) { @@ -126,7 +126,7 @@ public void testBuildRtreeIndex() public void testBuildQuadtreeIndex() throws Exception { - RectangleRDD spatialRDD = new RectangleRDD(sc, InputLocation, offset, splitter, true, numPartitions, StorageLevel.MEMORY_ONLY()); + RectangleRDD spatialRDD = new RectangleRDD(sc, InputLocation, offset, splitter, true, numPartitions); spatialRDD.spatialPartitioning(gridType); spatialRDD.buildIndex(IndexType.QUADTREE, true); if (spatialRDD.indexedRDD.take(1).get(0) instanceof STRtree) { diff --git a/core/src/test/java/org/apache/sedona/core/spatialRDD/SpatialRDDWriterTest.java b/core/src/test/java/org/apache/sedona/core/spatialRDD/SpatialRDDWriterTest.java index 123f9e9fab..680f80a935 100644 --- a/core/src/test/java/org/apache/sedona/core/spatialRDD/SpatialRDDWriterTest.java +++ b/core/src/test/java/org/apache/sedona/core/spatialRDD/SpatialRDDWriterTest.java @@ -82,11 +82,11 @@ public void testSaveAsWKBWithData() File wkb = new File(testSaveAsWKBWithData); if (wkb.exists()) { FileUtils.deleteDirectory(wkb);} - PointRDD spatialRDD = new PointRDD(sc, InputLocation, offset, splitter, true, numPartitions, StorageLevel.MEMORY_ONLY()); + PointRDD spatialRDD = new PointRDD(sc, InputLocation, offset, splitter, true, numPartitions); spatialRDD.saveAsWKB(testSaveAsWKBWithData); // Load the saved rdd and compare them - PointRDD resultWKB = new PointRDD(sc, testSaveAsWKBWithData, 0, FileDataSplitter.WKB, true, numPartitions, StorageLevel.MEMORY_ONLY()); + PointRDD resultWKB = new PointRDD(sc, testSaveAsWKBWithData, 0, FileDataSplitter.WKB, true, numPartitions); assertEquals(resultWKB.rawSpatialRDD.count(), spatialRDD.rawSpatialRDD.count()); verifyResult(resultWKB.rawSpatialRDD.takeOrdered(5), spatialRDD.rawSpatialRDD.takeOrdered(5)); @@ -102,11 +102,11 @@ public void testSaveAsWKTWithData() File wkt = new File(testSaveAsWKTWithData); if (wkt.exists()) {FileUtils.deleteDirectory(wkt);} - PointRDD spatialRDD = new PointRDD(sc, InputLocation, offset, splitter, true, numPartitions, StorageLevel.MEMORY_ONLY()); + PointRDD spatialRDD = new PointRDD(sc, InputLocation, offset, splitter, true, numPartitions); spatialRDD.saveAsWKT(testSaveAsWKTWithData); // Load the saved rdd and compare them - PointRDD resultWKT = new PointRDD(sc, testSaveAsWKTWithData, 0, FileDataSplitter.WKT, true, numPartitions, StorageLevel.MEMORY_ONLY()); + PointRDD resultWKT = new PointRDD(sc, testSaveAsWKTWithData, 0, FileDataSplitter.WKT, true, numPartitions); assertEquals(resultWKT.rawSpatialRDD.count(), spatialRDD.rawSpatialRDD.count()); verifyResult(resultWKT.rawSpatialRDD.takeOrdered(5), spatialRDD.rawSpatialRDD.takeOrdered(5)); @@ -122,11 +122,11 @@ public void testSaveAsWKB() File wkb = new File(testSaveAsWKB); if (wkb.exists()) { FileUtils.deleteDirectory(wkb);} - PointRDD spatialRDD = new PointRDD(sc, InputLocation, offset, splitter, false, numPartitions, StorageLevel.MEMORY_ONLY()); + PointRDD spatialRDD = new PointRDD(sc, InputLocation, offset, splitter, false, numPartitions); spatialRDD.saveAsWKB(testSaveAsWKB); // Load the saved rdd and compare them - PointRDD resultWKB = new PointRDD(sc, testSaveAsWKB, 0, FileDataSplitter.WKB, false, numPartitions, StorageLevel.MEMORY_ONLY()); + PointRDD resultWKB = new PointRDD(sc, testSaveAsWKB, 0, FileDataSplitter.WKB, false, numPartitions); assertEquals(resultWKB.rawSpatialRDD.count(), spatialRDD.rawSpatialRDD.count()); verifyResult(resultWKB.rawSpatialRDD.takeOrdered(5), spatialRDD.rawSpatialRDD.takeOrdered(5)); @@ -142,11 +142,11 @@ public void testSaveAsWKT() File wkt = new File(testSaveAsWKT); if (wkt.exists()) {FileUtils.deleteDirectory(wkt);} - PointRDD spatialRDD = new PointRDD(sc, InputLocation, offset, splitter, false, numPartitions, StorageLevel.MEMORY_ONLY()); + PointRDD spatialRDD = new PointRDD(sc, InputLocation, offset, splitter, false, numPartitions); spatialRDD.saveAsWKT(testSaveAsWKT); // Load the saved rdd and compare them - PointRDD resultWKT = new PointRDD(sc, testSaveAsWKT, 0, FileDataSplitter.WKT, false, numPartitions, StorageLevel.MEMORY_ONLY()); + PointRDD resultWKT = new PointRDD(sc, testSaveAsWKT, 0, FileDataSplitter.WKT, false, numPartitions); assertEquals(resultWKT.rawSpatialRDD.count(), spatialRDD.rawSpatialRDD.count()); verifyResult(resultWKT.rawSpatialRDD.takeOrdered(5), spatialRDD.rawSpatialRDD.takeOrdered(5)); diff --git a/core/src/test/java/org/apache/sedona/core/utils/CRSTransformationTest.java b/core/src/test/java/org/apache/sedona/core/utils/CRSTransformationTest.java index 398c73aa47..e051d83378 100644 --- a/core/src/test/java/org/apache/sedona/core/utils/CRSTransformationTest.java +++ b/core/src/test/java/org/apache/sedona/core/utils/CRSTransformationTest.java @@ -161,7 +161,7 @@ public static void setUpBeforeClass() indexType = IndexType.getIndexType(prop.getProperty("indexType")); numPartitions = Integer.parseInt(prop.getProperty("numPartitions")); queryEnvelope = new Envelope(30.01, 40.01, -90.01, -80.01); - loopTimes = 5; + loopTimes = 1; } catch (IOException ex) { ex.printStackTrace(); @@ -202,148 +202,10 @@ public static void tearDown() public void testSpatialRangeQuery() throws Exception { - PointRDD spatialRDD = new PointRDD(sc, InputLocation, offset, splitter, true, StorageLevel.MEMORY_ONLY(), "epsg:4326", "epsg:3005"); - for (int i = 0; i < loopTimes; i++) { - long resultSize = RangeQuery.SpatialRangeQuery(spatialRDD, queryEnvelope, false, false).count(); - assert resultSize == 3127; - } + PointRDD spatialRDD = new PointRDD(sc, InputLocation, offset, splitter, true); + spatialRDD.CRSTransform( "epsg:4326", "epsg:3005"); + long resultSize = RangeQuery.SpatialRangeQuery(spatialRDD, queryEnvelope, false, false).count(); + assert resultSize == 3127; assert RangeQuery.SpatialRangeQuery(spatialRDD, queryEnvelope, false, false).take(10).get(1).getUserData().toString() != null; } - - /** - * Test spatial range query using index. - * - * @throws Exception the exception - */ - @Test - public void testSpatialRangeQueryUsingIndex() - throws Exception - { - PointRDD spatialRDD = new PointRDD(sc, InputLocation, offset, splitter, true, StorageLevel.MEMORY_ONLY(), "epsg:4326", "epsg:3005"); - spatialRDD.buildIndex(IndexType.RTREE, false); - for (int i = 0; i < loopTimes; i++) { - long resultSize = RangeQuery.SpatialRangeQuery(spatialRDD, queryEnvelope, false, true).count(); - assert resultSize == 3127; - } - assert RangeQuery.SpatialRangeQuery(spatialRDD, queryEnvelope, false, true).take(10).get(1).getUserData().toString() != null; - } - - /** - * Test spatial knn query. - * - * @throws Exception the exception - */ - @Test - public void testSpatialKnnQuery() - throws Exception - { - PointRDD pointRDD = new PointRDD(sc, InputLocation, offset, splitter, true, StorageLevel.MEMORY_ONLY(), "epsg:4326", "epsg:3005"); - - for (int i = 0; i < loopTimes; i++) { - List result = KNNQuery.SpatialKnnQuery(pointRDD, queryPoint, topK, false); - assert result.size() > 0; - assert result.get(0).getUserData().toString() != null; - //System.out.println(result.get(0).getUserData().toString()); - } - } - - /** - * Test spatial knn query using index. - * - * @throws Exception the exception - */ - @Test - public void testSpatialKnnQueryUsingIndex() - throws Exception - { - PointRDD pointRDD = new PointRDD(sc, InputLocation, offset, splitter, true, StorageLevel.MEMORY_ONLY(), "epsg:4326", "epsg:3005"); - pointRDD.buildIndex(IndexType.RTREE, false); - for (int i = 0; i < loopTimes; i++) { - List result = KNNQuery.SpatialKnnQuery(pointRDD, queryPoint, topK, true); - assert result.size() > 0; - assert result.get(0).getUserData().toString() != null; - //System.out.println(result.get(0).getUserData().toString()); - } - } - - /** - * Test spatial KNN correctness. - * - * @throws Exception the exception - */ - @Test - public void testSpatialKNNCorrectness() - throws Exception - { - PointRDD pointRDD = new PointRDD(sc, InputLocation, offset, splitter, true, StorageLevel.MEMORY_ONLY(), "epsg:4326", "epsg:3005"); - List resultNoIndex = KNNQuery.SpatialKnnQuery(pointRDD, queryPoint, topK, false); - pointRDD.buildIndex(IndexType.RTREE, false); - List resultWithIndex = KNNQuery.SpatialKnnQuery(pointRDD, queryPoint, topK, true); - GeometryDistanceComparator geometryDistanceComparator = new GeometryDistanceComparator(queryPoint, true); - List resultNoIndexModifiable = new ArrayList<>(resultNoIndex); - List resultWithIndexModifiable = new ArrayList<>(resultWithIndex); - Collections.sort(resultNoIndexModifiable, geometryDistanceComparator); - Collections.sort(resultWithIndexModifiable, geometryDistanceComparator); - int difference = 0; - for (int i = 0; i < topK; i++) { - if (geometryDistanceComparator.compare(resultNoIndex.get(i), resultWithIndex.get(i)) != 0) { - difference++; - } - } - assert difference == 0; - } - - /** - * Test spatial join query with polygon RDD. - * - * @throws Exception the exception - */ - @Test - public void testSpatialJoinQueryWithPolygonRDD() - throws Exception - { - - PolygonRDD queryRDD = new PolygonRDD(sc, InputLocationQueryPolygon, splitter, true, numPartitions, StorageLevel.MEMORY_ONLY(), "epsg:4326", "epsg:3005"); - - PointRDD spatialRDD = new PointRDD(sc, InputLocation, offset, splitter, true, numPartitions, StorageLevel.MEMORY_ONLY(), "epsg:4326", "epsg:3005"); - - spatialRDD.spatialPartitioning(gridType); - - queryRDD.spatialPartitioning(spatialRDD.getPartitioner()); - - List>> result = JoinQuery.SpatialJoinQuery(spatialRDD, queryRDD, false, true).collect(); - - assert result.get(1)._1().getUserData() != null; - for (int i = 0; i < result.size(); i++) { - assert result.get(i)._2().size() == 0 || result.get(i)._2().iterator().next().getUserData() != null; - } - } - - /** - * Test spatial join query with polygon RDD using R tree index. - * - * @throws Exception the exception - */ - @Test - public void testSpatialJoinQueryWithPolygonRDDUsingRTreeIndex() - throws Exception - { - - PolygonRDD queryRDD = new PolygonRDD(sc, InputLocationQueryPolygon, splitter, true, numPartitions, StorageLevel.MEMORY_ONLY(), "epsg:4326", "epsg:3005"); - - PointRDD spatialRDD = new PointRDD(sc, InputLocation, offset, splitter, true, numPartitions, StorageLevel.MEMORY_ONLY(), "epsg:4326", "epsg:3005"); - - spatialRDD.spatialPartitioning(gridType); - - spatialRDD.buildIndex(IndexType.RTREE, true); - - queryRDD.spatialPartitioning(spatialRDD.getPartitioner()); - - List>> result = JoinQuery.SpatialJoinQuery(spatialRDD, queryRDD, false, true).collect(); - - assert result.get(1)._1().getUserData() != null; - for (int i = 0; i < result.size(); i++) { - assert result.get(i)._2().size() == 0 || result.get(i)._2().iterator().next().getUserData() != null; - } - } } diff --git a/core/src/test/scala/org/apache/sedona/core/scalaTest.scala b/core/src/test/scala/org/apache/sedona/core/scalaTest.scala index 78492bb26c..6cc59c7b4e 100644 --- a/core/src/test/scala/org/apache/sedona/core/scalaTest.scala +++ b/core/src/test/scala/org/apache/sedona/core/scalaTest.scala @@ -23,10 +23,8 @@ import org.apache.sedona.common.enums.FileDataSplitter import org.apache.sedona.core.enums.{GridType, IndexType, JoinBuildSide} import org.apache.sedona.core.formatMapper.EarthdataHDFPointMapper import org.apache.sedona.core.spatialOperator.JoinQuery.JoinParams -import org.apache.sedona.core.spatialOperator.SpatialPredicate -import org.apache.sedona.core.spatialOperator.{JoinQuery, KNNQuery, RangeQuery} +import org.apache.sedona.core.spatialOperator.{JoinQuery, KNNQuery, RangeQuery, SpatialPredicate} import org.apache.sedona.core.spatialRDD.{CircleRDD, PointRDD, PolygonRDD} -import org.apache.spark.storage.StorageLevel import org.locationtech.jts.geom.{Coordinate, Envelope, GeometryFactory} class scalaTest extends SparkUtil { @@ -235,19 +233,4 @@ class scalaTest extends SparkUtil { i = i + 1 } } - - test("should pass CRS transformed spatial range query") { - val objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, false, StorageLevel.NONE, "epsg:4326", "epsg:3005") - for (i <- 1 to eachQueryLoopTimes) { - val resultSize = RangeQuery.SpatialRangeQuery(objectRDD, rangeQueryWindow, false, false).count - } - } - - test("should pass CRS transformed spatial range query using index") { - val objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, false, StorageLevel.NONE, "epsg:4326", "epsg:3005") - objectRDD.buildIndex(PointRDDIndexType, false) - for (i <- 1 to eachQueryLoopTimes) { - val resultSize = RangeQuery.SpatialRangeQuery(objectRDD, rangeQueryWindow, false, true).count - } - } } From b17c78f141abd1bec228533f6e4f94924127d7e7 Mon Sep 17 00:00:00 2001 From: Jia Yu Date: Tue, 8 Aug 2023 01:32:04 -0700 Subject: [PATCH 02/11] Drop the constructors in Python --- .../sedona/core/SpatialRDD/linestring_rdd.py | 264 ----------------- python/sedona/core/SpatialRDD/point_rdd.py | 266 ----------------- python/sedona/core/SpatialRDD/polygon_rdd.py | 270 ------------------ .../sedona/core/SpatialRDD/rectangle_rdd.py | 269 ----------------- python/tests/core/test_rdd.py | 37 --- .../tests/core/test_spatial_rdd_from_disc.py | 18 +- .../tests/spatial_operator/test_join_base.py | 9 +- .../test_join_query_correctness.py | 45 ++- .../spatial_operator/test_linestring_range.py | 5 +- .../spatial_operator/test_polygon_range.py | 6 +- .../spatial_operator/test_rectangle_range.py | 6 +- python/tests/spatial_rdd/test_circle_rdd.py | 5 +- .../tests/spatial_rdd/test_linestring_rdd.py | 70 +---- python/tests/spatial_rdd/test_point_rdd.py | 38 +-- python/tests/spatial_rdd/test_polygon_rdd.py | 169 +---------- .../tests/spatial_rdd/test_rectangle_rdd.py | 10 +- python/tests/spatial_rdd/test_spatial_rdd.py | 70 +---- .../spatial_rdd/test_spatial_rdd_writer.py | 7 +- .../test_spatial_rdd_to_spatial_dataframe.py | 4 +- python/tests/test_assign_raw_spatial_rdd.py | 7 +- python/tests/test_scala_example.py | 38 +-- python/tests/utils/test_crs_transformation.py | 120 +------- 22 files changed, 74 insertions(+), 1659 deletions(-) diff --git a/python/sedona/core/SpatialRDD/linestring_rdd.py b/python/sedona/core/SpatialRDD/linestring_rdd.py index dff4e780bf..0bd720f56f 100644 --- a/python/sedona/core/SpatialRDD/linestring_rdd.py +++ b/python/sedona/core/SpatialRDD/linestring_rdd.py @@ -22,7 +22,6 @@ from sedona.core.enums import FileDataSplitter from sedona.core.enums.file_data_splitter import FileSplitterJvm from sedona.core.jvm.translate import PythonRddToJavaRDDAdapter -from sedona.utils.jvm import JvmStorageLevel from sedona.utils.meta import MultipleMeta @@ -36,16 +35,6 @@ def __init__(self, rdd: RDD): srdd = self._jvm_spatial_rdd(spatial_rdd) self._srdd = srdd - def __init__(self, rdd: RDD, newLevel: StorageLevel): - self._sc = rdd.ctx - self._jvm = self._sc._jvm - - spatial_rdd = PythonRddToJavaRDDAdapter(self._jvm).deserialize_to_linestring_raw_rdd(rdd._jrdd) - - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - srdd = self._jvm_spatial_rdd(spatial_rdd, new_level_jvm) - self._srdd = srdd - def __init__(self): self._do_init() self._srdd = self._jvm_spatial_rdd() @@ -59,17 +48,6 @@ def __init__(self, rawSpatialRDD: JvmSpatialRDD): jsrdd = rawSpatialRDD.jsrdd self._srdd = self._jvm_spatial_rdd(jsrdd) - def __init__(self, rawSpatialRDD: JvmSpatialRDD, sourceEpsgCode: str, targetEpsgCode: str): - """ - - :param rawSpatialRDD: RDD - :param sourceEpsgCode: str, epsg code which loaded files is in, ex. epsg:4326 stands for WGS84 - :param targetEpsgCode: str, epsg code to transform SpatialRDD - """ - super().__init__(rawSpatialRDD.sc) - jsrdd = rawSpatialRDD.jsrdd - self._srdd = self._jvm_spatial_rdd(jsrdd, sourceEpsgCode, targetEpsgCode) - def __init__(self, sparkContext: SparkContext, InputLocation: str, startOffset: int, endOffset: int, splitter: FileDataSplitter, carryInputData: bool, partitions: int): """ @@ -158,248 +136,6 @@ def __init__(self, sparkContext: SparkContext, InputLocation: str, splitter: Fil carryInputData ) - def __init__(self, rawSpatialRDD: JvmSpatialRDD, newLevel: StorageLevel): - """ - :param rawSpatialRDD: RDD - :param newLevel: StorageLevel - """ - super().__init__(rawSpatialRDD.sc) - jsrdd = rawSpatialRDD.jsrdd - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - self._srdd = self._jvm_spatial_rdd(jsrdd, new_level_jvm) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, startOffset: int, endOffset: int, - splitter: FileDataSplitter, carryInputData: bool, partitions: int, newLevel: StorageLevel): - - """ - - :param sparkContext: SparkContext instance - :param InputLocation: str, location for loaded file - :param startOffset: int, starting offset - :param endOffset: int, ending offset - :param splitter: FileDataSplitter, data file splitter - :param carryInputData: bool, if spatial rdd should keep non geometry attributes - :param partitions: int, number of partitions - :param newLevel: StorageLevel - """ - super().__init__(sparkContext) - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - startOffset, - endOffset, - FileSplitterJvm(self._jvm, splitter).jvm_instance, - carryInputData, - partitions, - JvmStorageLevel(self._jvm, newLevel).jvm_instance - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, startOffset: int, endOffset: int, - splitter: FileDataSplitter, carryInputData: bool, newLevel: StorageLevel): - """ - - :param sparkContext: SparkContext instance - :param InputLocation: str, location for loaded file - :param startOffset: int, starting offset - :param endOffset: int, ending offset - :param splitter: FileDataSplitter, data file splitter - :param carryInputData: bool, if spatial rdd should keep non geometry attributes - :param newLevel: StorageLevel - """ - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter).jvm_instance - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - startOffset, - endOffset, - jvm_splitter, - carryInputData, - new_level_jvm - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, splitter: FileDataSplitter, - carryInputData: bool, partitions: int, newLevel: StorageLevel): - """ - - :param sparkContext: SparkContext instance - :param InputLocation: str, location for loaded file - :param splitter: FileDataSplitter, data file splitter - :param carryInputData: bool, if spatial rdd should keep non geometry attributes - :param partitions: int, number of partitions - :param newLevel: StorageLevel - """ - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter).jvm_instance - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - jvm_splitter, - carryInputData, - partitions, - new_level_jvm - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, splitter: FileDataSplitter, carryInputData: bool, - newLevel: StorageLevel): - """ - - :param sparkContext: SparkContext instance - :param InputLocation: str, location for loaded file - :param splitter: FileDataSplitter, data file splitter - :param carryInputData: bool, if spatial rdd should keep non geometry attributes - :param newLevel: StorageLevel - """ - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter).jvm_instance - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - jvm_splitter, - carryInputData, - new_level_jvm - ) - - def __init__(self, rawSpatialRDD: JvmSpatialRDD, newLevel: StorageLevel, sourceEpsgCRSCode: str, - targetEpsgCode: str): - """ - - :param rawSpatialRDD: RDD - :param newLevel: StorageLevel - :param sourceEpsgCRSCode: str, epsg code which loaded files is in, ex. epsg:4326 stands for WGS84 - :param targetEpsgCode: str, epsg code to transform SpatialRDD - """ - - super().__init__(rawSpatialRDD.sc) - jsrdd = rawSpatialRDD.jsrdd - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - self._srdd = self._jvm_spatial_rdd(jsrdd, new_level_jvm, sourceEpsgCRSCode, targetEpsgCode) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, startOffset: int, endOffset: int, - splitter: FileDataSplitter, carryInputData: bool, partitions: int, newLevel: StorageLevel, - sourceEpsgCRSCode: str, targetEpsgCode: str): - """ - - :param sparkContext: SparkContext instance - :param InputLocation: str, location for loaded file - :param startOffset: int, starting offset - :param endOffset: int, ending offset - :param splitter: FileDataSplitter, data file splitter - :param carryInputData: bool, if spatial rdd should keep non geometry attributes - :param targetEpsgCode: str, epsg code to transform SpatialRDD - :param newLevel: StorageLevel - :param sourceEpsgCRSCode: str, epsg code which loaded files is in, ex. epsg:4326 stands for WGS84 - :param targetEpsgCode: str, epsg code to transform SpatialRDD - """ - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter).jvm_instance - - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - startOffset, - endOffset, - jvm_splitter, - carryInputData, - partitions, - new_level_jvm, - sourceEpsgCRSCode, - targetEpsgCode - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, startOffset: int, endOffset: int, - splitter: FileDataSplitter, carryInputData: bool, newLevel: StorageLevel, sourceEpsgCRSCode: str, - targetEpsgCode: str): - """ - - :param sparkContext: SparkContext instance - :param InputLocation: str, location for loaded file - :param startOffset: int, starting offset - :param endOffset: int, ending offset - :param splitter: FileDataSplitter, data file splitter - :param carryInputData: bool, if spatial rdd should keep non geometry attributes - :param newLevel: StorageLevel - :param sourceEpsgCRSCode: str, epsg code which loaded files is in, ex. epsg:4326 stands for WGS84 - :param targetEpsgCode: str, epsg code to transform SpatialRDD - """ - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter).jvm_instance - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - startOffset, - endOffset, - jvm_splitter, - carryInputData, - new_level_jvm, - sourceEpsgCRSCode, - targetEpsgCode - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, splitter: FileDataSplitter, carryInputData: bool, - partitions: int, newLevel: StorageLevel, sourceEpsgCRSCode: str, targetEpsgCode: str): - """ - - :param sparkContext: SparkContext instance - :param InputLocation: str, location for loaded file - :param splitter: FileDataSplitter, data file splitter - :param carryInputData: bool, if spatial rdd should keep non geometry attributes - :param targetEpsgCode: str, epsg code to transform SpatialRDD - :param newLevel: StorageLevel - :param sourceEpsgCRSCode: str, epsg code which loaded files is in, ex. epsg:4326 stands for WGS84 - :param targetEpsgCode: str, epsg code to transform SpatialRDD - """ - - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter).jvm_instance - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - jvm_splitter, - carryInputData, - partitions, - new_level_jvm, - sourceEpsgCRSCode, - targetEpsgCode - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, splitter: FileDataSplitter, carryInputData: bool, - newLevel: StorageLevel, sourceEpsgCRSCode: str, targetEpsgCode: str): - """ - - :param sparkContext: SparkContext instance - :param InputLocation: str, location for loaded file - :param splitter: FileDataSplitter, data file splitter - :param carryInputData: bool, if spatial rdd should keep non geometry attributes - :param newLevel: StorageLevel - :param sourceEpsgCRSCode: str, epsg code which loaded files is in, ex. epsg:4326 stands for WGS84 - :param targetEpsgCode: str, epsg code to transform SpatialRDD - """ - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter).jvm_instance - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - jvm_splitter, - carryInputData, - new_level_jvm, - sourceEpsgCRSCode, - targetEpsgCode - ) - @property def _jvm_spatial_rdd(self): if self._sc is not None: diff --git a/python/sedona/core/SpatialRDD/point_rdd.py b/python/sedona/core/SpatialRDD/point_rdd.py index 6354742ac2..658e59217b 100644 --- a/python/sedona/core/SpatialRDD/point_rdd.py +++ b/python/sedona/core/SpatialRDD/point_rdd.py @@ -21,26 +21,11 @@ from sedona.core.SpatialRDD.spatial_rdd_factory import SpatialRDDFactory from sedona.core.enums.file_data_splitter import FileSplitterJvm, FileDataSplitter from sedona.core.jvm.translate import PythonRddToJavaRDDAdapter -from sedona.utils.jvm import JvmStorageLevel from sedona.utils.meta import MultipleMeta class PointRDD(SpatialRDD, metaclass=MultipleMeta): - def __init__(self, rdd: RDD, newLevel: StorageLevel): - """ - - :param rdd: RDD - :param newLevel: StorageLevel StorageLevel - """ - super().__init__(rdd.ctx) - - spatial_rdd = PythonRddToJavaRDDAdapter(self._jvm).deserialize_to_point_raw_rdd(rdd._jrdd) - - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - srdd = self._jvm_spatial_rdd(spatial_rdd, new_level_jvm) - self._srdd = srdd - def __init__(self, rdd: RDD): """ @@ -66,18 +51,6 @@ def __init__(self, rawSpatialRDD: JvmSpatialRDD): jsrdd = rawSpatialRDD.jsrdd self._srdd = self._jvm_spatial_rdd(jsrdd) - def __init__(self, rawSpatialRDD: JvmSpatialRDD, sourceEpsgCode: str, targetEpsgCode: str): - """ - - :param rawSpatialRDD: JvmSpatialRDD, jvm representation of spatial rdd RDD - :param sourceEpsgCode: str - :param targetEpsgCode: str, epsg code to transform SpatialRDD str - """ - - super().__init__(rawSpatialRDD.sc) - jsrdd = rawSpatialRDD.jsrdd - self._srdd = self._jvm_spatial_rdd(jsrdd, sourceEpsgCode, targetEpsgCode) - def __init__(self, sparkContext: SparkContext, InputLocation: str, Offset: int, splitter: FileDataSplitter, carryInputData: bool, partitions: int): """ @@ -160,245 +133,6 @@ def __init__(self, sparkContext: SparkContext, InputLocation: str, splitter: Fil carryInputData ) - def __init__(self, rawSpatialRDD: JvmSpatialRDD, newLevel: StorageLevel): - """ - - :param rawSpatialRDD: - :param newLevel: - """ - - super().__init__(rawSpatialRDD.sc) - jsrdd = rawSpatialRDD.jsrdd - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - self._srdd = self._jvm_spatial_rdd(jsrdd, new_level_jvm) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, Offset: int, splitter: FileDataSplitter, - carryInputData: bool, partitions: int, newLevel: StorageLevel): - """ - - :param sparkContext: SparkContext instance - :param InputLocation: str, location for loaded file - :param Offset: int, point offset - :param splitter: FileDataSplitter, data file splitter - :param carryInputData: bool, if spatial rdd should keep non geometry attributes - :param partitions: int, number of partitions - :param newLevel: StorageLevel - """ - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter).jvm_instance - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - Offset, - jvm_splitter, - carryInputData, - partitions, - new_level_jvm - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, Offset: int, splitter: FileDataSplitter, - carryInputData: bool, newLevel: StorageLevel): - """ - - :param sparkContext: SparkContext instance - :param InputLocation: str, location for loaded file - :param Offset: int, point offset - :param splitter: FileDataSplitter, data file splitter - :param carryInputData: bool, if spatial rdd should keep non geometry attributes - :param newLevel: StorageLevel - """ - - super().__init__(sparkContext) - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - jvm_splitter = FileSplitterJvm(self._jvm, splitter).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - Offset, - jvm_splitter, - carryInputData, - new_level_jvm - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, splitter: FileDataSplitter, carryInputData: bool, - partitions: int, newLevel: StorageLevel): - """ - - :param sparkContext: SparkContext instance - :param InputLocation: str, location for loaded file - :param splitter: FileDataSplitter, data file splitter - :param carryInputData: bool, if spatial rdd should keep non geometry attributes - :param partitions: int, number of partitions - :param newLevel: StorageLevel - """ - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter).jvm_instance - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - jvm_splitter, - carryInputData, - partitions, - new_level_jvm - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, splitter: FileDataSplitter, carryInputData: bool, - newLevel: StorageLevel): - """ - - :param sparkContext: SparkContext instance - :param InputLocation: str, location for loaded file - :param splitter: FileDataSplitter, data file splitter - :param carryInputData: bool, if spatial rdd should keep non geometry attributes - :param newLevel: StorageLevel - """ - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter).jvm_instance - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - jvm_splitter, - carryInputData, - new_level_jvm - ) - - def __init__(self, rawSpatialRDD: JvmSpatialRDD, newLevel: StorageLevel, sourceEpsgCRSCode: str, - targetEpsgCode: str): - """ - - :param rawSpatialRDD: JvmSpatialRDD, jvm representation of spatial rdd - :param newLevel: StorageLevel - :param sourceEpsgCRSCode: str, epsg code which loaded files is in, ex. epsg:4326 stands for WGS84 - :param targetEpsgCode: str, epsg code to transform SpatialRDD - """ - - super().__init__(rawSpatialRDD.sc) - jsrdd = rawSpatialRDD.jsrdd - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - self._srdd = self._jvm_spatial_rdd(jsrdd, new_level_jvm, sourceEpsgCRSCode, targetEpsgCode) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, Offset: int, splitter: FileDataSplitter, - carryInputData: bool, partitions: int, newLevel: StorageLevel, sourceEpsgCRSCode: str, - targetEpsgCode: str): - """ - - :param sparkContext: SparkContext instance - :param InputLocation: str, location for loaded file - :param Offset: int, point offset - :param splitter: FileDataSplitter, data file splitter - :param carryInputData: bool, if spatial rdd should keep non geometry attributes - :param partitions: int, number of partitions - :param newLevel: StorageLevel - :param sourceEpsgCRSCode: str, epsg code which loaded files is in, ex. epsg:4326 stands for WGS84 - :param targetEpsgCode: str, epsg code to transform SpatialRDD - """ - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter).jvm_instance - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - Offset, - jvm_splitter, - carryInputData, - partitions, - new_level_jvm, - sourceEpsgCRSCode, - targetEpsgCode - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, Offset: int, splitter: FileDataSplitter, - carryInputData: bool, newLevel: StorageLevel, sourceEpsgCRSCode: str, targetEpsgCode: str): - """ - - :param sparkContext: SparkContext instance - :param InputLocation: str, location for loaded file - :param Offset: int, point offset - :param splitter: FileDataSplitter, data file splitter - :param carryInputData: bool, if spatial rdd should keep non geometry attributes - :param newLevel: StorageLevel - :param sourceEpsgCRSCode: str, epsg code which loaded files is in, ex. epsg:4326 stands for WGS84 - :param targetEpsgCode: str, epsg code to transform SpatialRDD - """ - - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter).jvm_instance - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - Offset, - jvm_splitter, - carryInputData, - new_level_jvm, - sourceEpsgCRSCode, - targetEpsgCode - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, splitter: FileDataSplitter, carryInputData: bool, - partitions: int, newLevel: StorageLevel, sourceEpsgCRSCode: str, targetEpsgCode: str): - """ - - :param sparkContext: SparkContext instance - :param InputLocation: str, location for loaded file - :param splitter: FileDataSplitter, data file splitter - :param carryInputData: bool, if spatial rdd should keep non geometry attributes - :param partitions: int, number of partitions - :param newLevel: StorageLevel - :param sourceEpsgCRSCode: str, epsg code which loaded files is in, ex. epsg:4326 stands for WGS84 - :param targetEpsgCode: str, epsg code to transform SpatialRDD - """ - - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter).jvm_instance - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - jvm_splitter, - carryInputData, - partitions, - new_level_jvm, - sourceEpsgCRSCode, - targetEpsgCode - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, splitter: FileDataSplitter, carryInputData: bool, - newLevel: StorageLevel, sourceEpsgCRSCode: str, targetEpsgCode: str): - """ - - :param sparkContext: SparkContext instance - :param InputLocation: str, location for loaded file - :param splitter: FileDataSplitter, data file splitter - :param carryInputData: bool, if spatial rdd should keep non geometry attributes - :param newLevel: StorageLevel - :param sourceEpsgCRSCode: str, epsg code which loaded files is in, ex. epsg:4326 stands for WGS84 - :param targetEpsgCode: str, epsg code to transform SpatialRDD - """ - - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter).jvm_instance - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - jvm_splitter, - carryInputData, - new_level_jvm, - sourceEpsgCRSCode, - targetEpsgCode - ) - def MinimumBoundingRectangle(self): raise NotImplementedError("PointRDD has not MinimumBoundingRectangle method.") diff --git a/python/sedona/core/SpatialRDD/polygon_rdd.py b/python/sedona/core/SpatialRDD/polygon_rdd.py index a2758ebf8c..c72ba9c72b 100644 --- a/python/sedona/core/SpatialRDD/polygon_rdd.py +++ b/python/sedona/core/SpatialRDD/polygon_rdd.py @@ -21,21 +21,11 @@ from sedona.core.SpatialRDD.spatial_rdd_factory import SpatialRDDFactory from sedona.core.enums.file_data_splitter import FileSplitterJvm, FileDataSplitter from sedona.core.jvm.translate import PythonRddToJavaRDDAdapter -from sedona.utils.jvm import JvmStorageLevel from sedona.utils.meta import MultipleMeta class PolygonRDD(SpatialRDD, metaclass=MultipleMeta): - def __init__(self, rdd: RDD, newLevel: StorageLevel): - super().__init__(rdd.ctx) - - spatial_rdd = PythonRddToJavaRDDAdapter(self._jvm).deserialize_to_polygon_raw_rdd(rdd._jrdd) - - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - srdd = self._jvm_spatial_rdd(spatial_rdd, new_level_jvm) - self._srdd = srdd - def __init__(self, rdd: RDD): super().__init__(rdd.ctx) spatial_rdd = PythonRddToJavaRDDAdapter(self._jvm).deserialize_to_polygon_raw_rdd(rdd._jrdd) @@ -55,30 +45,6 @@ def __init__(self, rawSpatialRDD: JvmSpatialRDD): jsrdd = rawSpatialRDD.jsrdd self._srdd = self._jvm_spatial_rdd(jsrdd) - def __init__(self, rawSpatialRDD: JvmSpatialRDD, sourceEpsgCode: str, targetEpsgCode: str): - """ - - :param rawSpatialRDD: - :param sourceEpsgCode: - :param targetEpsgCode: - """ - - super().__init__(rawSpatialRDD.sc) - jsrdd = rawSpatialRDD.jsrdd - self._srdd = self._jvm_spatial_rdd(jsrdd, sourceEpsgCode, targetEpsgCode) - - def __init__(self, rawSpatialRDD: JvmSpatialRDD, newLevel: StorageLevel): - """ - :param rawSpatialRDD: - :param sourceEpsgCode: - :param targetEpsgCode: - """ - - super().__init__(rawSpatialRDD.sc) - jsrdd = rawSpatialRDD.jsrdd - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - self._srdd = self._jvm_spatial_rdd(jsrdd, new_level_jvm) - def __init__(self, sparkContext: SparkContext, InputLocation: str, startOffset: int, endOffset: int, splitter: FileDataSplitter, carryInputData: bool, partitions: int): """ @@ -169,242 +135,6 @@ def __init__(self, sparkContext: SparkContext, InputLocation: str, splitter: Fil carryInputData ) - def __init__(self, sparkContext: SparkContext, InputLocation: str, startOffset: int, endOffset: int, - splitter: FileDataSplitter, carryInputData: bool, partitions: int, newLevel: StorageLevel): - """ - - :param sparkContext: SparkContext, the spark context - :param InputLocation: str, the input location - :param startOffset: - :param endOffset: - :param splitter: FileDataSplitter, File data splitter which should be used to split the data - :param carryInputData: - :param partitions: int, the partitions - :param newLevel: - """ - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter) - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - startOffset, - endOffset, - jvm_splitter.jvm_instance, - carryInputData, - partitions, - new_level_jvm - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, startOffset: int, endOffset: int, - splitter: FileDataSplitter, carryInputData: bool, newLevel: StorageLevel): - """ - - :param sparkContext: SparkContext, the spark context - :param InputLocation: str, the input location - :param startOffset: - :param endOffset: - :param splitter: FileDataSplitter, File data splitter which should be used to split the data - :param carryInputData: - :param newLevel: - """ - - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter) - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - startOffset, - endOffset, - jvm_splitter.jvm_instance, - carryInputData, - new_level_jvm - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, - splitter: FileDataSplitter, carryInputData: bool, partitions: int, newLevel: StorageLevel): - """ - - :param sparkContext: SparkContext, the spark context - :param InputLocation: str, the input location - :param splitter: FileDataSplitter, File data splitter which should be used to split the data - :param carryInputData: - :param partitions: int, the partitions - :param newLevel: - """ - - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter) - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - jvm_splitter.jvm_instance, - carryInputData, - partitions, - new_level_jvm - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, - splitter: FileDataSplitter, carryInputData: bool, newLevel: StorageLevel): - """ - - :param sparkContext: SparkContext, the spark context - :param InputLocation: str, the input location - :param splitter: FileDataSplitter, File data splitter which should be used to split the data - :param carryInputData: - :param newLevel: - """ - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter) - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - jvm_splitter.jvm_instance, - carryInputData, - new_level_jvm - ) - - def __init__(self, rawSpatialRDD: JvmSpatialRDD, newLevel: StorageLevel, sourceEpsgCRSCode: str, - targetEpsgCode: str): - """ - - :param rawSpatialRDD: - :param newLevel: - :param sourceEpsgCRSCode: - :param targetEpsgCode: - """ - - super().__init__(rawSpatialRDD.sc) - jsrdd = rawSpatialRDD.jsrdd - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - self._srdd = self._jvm_spatial_rdd(jsrdd, new_level_jvm, sourceEpsgCRSCode, targetEpsgCode) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, startOffset: int, endOffset: int, - splitter: FileDataSplitter, carryInputData: bool, partitions: int, newLevel: StorageLevel, - sourceEpsgCRSCode: str, targetEpsgCode: str): - """ - - :param sparkContext: SparkContext, the spark context - :param InputLocation: str, the input location - :param startOffset: - :param endOffset: - :param splitter: FileDataSplitter, File data splitter which should be used to split the data - :param carryInputData: - :param partitions: int, the partitions - :param newLevel: - :param sourceEpsgCRSCode: str, the source epsg CRS code - :param targetEpsgCode: str, the target epsg code - """ - - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter).jvm_instance - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - startOffset, - endOffset, - jvm_splitter, - carryInputData, - partitions, - new_level_jvm, - sourceEpsgCRSCode, - targetEpsgCode - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, startOffset: int, endOffset: int, - splitter: FileDataSplitter, carryInputData: bool, newLevel: StorageLevel, sourceEpsgCRSCode: str, - targetEpsgCode: str): - """ - - :param sparkContext: SparkContext, the spark context - :param InputLocation: str, the input location - :param startOffset: - :param endOffset: - :param splitter: FileDataSplitter, File data splitter which should be used to split the data - :param carryInputData: - :param newLevel: - :param sourceEpsgCRSCode: str, the source epsg CRS code - :param targetEpsgCode: str, the target epsg code - """ - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter).jvm_instance - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - startOffset, - endOffset, - jvm_splitter, - carryInputData, - new_level_jvm, - sourceEpsgCRSCode, - targetEpsgCode - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, splitter: FileDataSplitter, carryInputData: bool, - partitions: int, newLevel: StorageLevel, sourceEpsgCRSCode: str, targetEpsgCode: str): - """ - - :param sparkContext: SparkContext, the spark context - :param InputLocation: str, the input location - :param splitter: FileDataSplitter, File data splitter which should be used to split the data - :param carryInputData: - :param partitions: int, the partitions - :param newLevel: - :param sourceEpsgCRSCode: str, the source epsg CRS code - :param targetEpsgCode: str, the target epsg code - """ - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter) - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - jvm_splitter.jvm_instance, - carryInputData, - partitions, - new_level_jvm, - sourceEpsgCRSCode, - targetEpsgCode - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, splitter: FileDataSplitter, - carryInputData: bool, newLevel: StorageLevel, sourceEpsgCRSCode: str, targetEpsgCode: str): - """ - - :param sparkContext: SparkContext, the spark context - :param InputLocation: str, the input location - :param splitter: FileDataSplitter, File data splitter which should be used to split the data - :param carryInputData: bool, - :param newLevel: - :param sourceEpsgCRSCode: str, the source epsg CRS code - :param targetEpsgCode: str, the target epsg code - """ - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter) - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - jvm_splitter.jvm_instance, - carryInputData, - new_level_jvm, - sourceEpsgCRSCode, - targetEpsgCode - ) - def MinimumBoundingRectangle(self): from sedona.core.SpatialRDD import RectangleRDD rectangle_rdd = RectangleRDD() diff --git a/python/sedona/core/SpatialRDD/rectangle_rdd.py b/python/sedona/core/SpatialRDD/rectangle_rdd.py index 5851bb0498..48d9a222f5 100644 --- a/python/sedona/core/SpatialRDD/rectangle_rdd.py +++ b/python/sedona/core/SpatialRDD/rectangle_rdd.py @@ -26,16 +26,6 @@ class RectangleRDD(SpatialRDD, metaclass=MultipleMeta): - def __init__(self, rdd: RDD, newLevel: StorageLevel): - self._sc = rdd.ctx - self._jvm = self._sc._jvm - - spatial_rdd = self._jvm.GeoSerializerData.deserializeToPolygonRawRDD(rdd._jrdd) - - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - srdd = self._jvm_spatial_rdd(spatial_rdd, new_level_jvm) - self._srdd = srdd - def __init__(self): self._do_init() self._srdd = self._jvm_spatial_rdd() @@ -49,36 +39,6 @@ def __init__(self, rawSpatialRDD: JvmSpatialRDD): jsrdd = rawSpatialRDD.jsrdd self._srdd = self._jvm_spatial_rdd(jsrdd) - def __init__(self, rawSpatialRDD: JvmSpatialRDD, sourceEpsgCode: str, targetEpsgCode: str): - """ - - :param rawSpatialRDD: - :param sourceEpsgCode: str, the source epsg CRS code - :param targetEpsgCode: str, the target epsg code - """ - - super().__init__(rawSpatialRDD.sc) - - self._srdd = self._jvm_spatial_rdd( - rawSpatialRDD.jsrdd, - sourceEpsgCode, - targetEpsgCode - ) - - def __init__(self, rawSpatialRDD: JvmSpatialRDD, newLevel: StorageLevel): - """ - - :param rawSpatialRDD: - :param newLevel: - """ - super().__init__(rawSpatialRDD.sc) - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - rawSpatialRDD.jsrdd, - new_level_jvm - ) - def __init__(self, sparkContext: SparkContext, InputLocation: str, Offset: int, splitter: FileDataSplitter, carryInputData: bool, partitions: int): """ @@ -164,235 +124,6 @@ def __init__(self, sparkContext: SparkContext, InputLocation: str, splitter: Fil jvm_splitter.jvm_instance, carryInputData ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, Offset: int, - splitter: FileDataSplitter, carryInputData: bool, partitions: int, newLevel: StorageLevel): - """ - - :param sparkContext: SparkContext, the spark context - :param InputLocation: str, the input location - :param Offset: - :param splitter: FileDataSplitter, File data splitter which should be used to split the data - :param carryInputData: - :param partitions: int, the partitions - :param newLevel: - """ - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter) - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - Offset, - jvm_splitter.jvm_instance, - carryInputData, - partitions, - new_level_jvm - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, Offset: int, - splitter: FileDataSplitter, carryInputData: bool, newLevel: StorageLevel): - """ - - :param sparkContext: SparkContext, the spark context - :param InputLocation: str, the input location - :param Offset: - :param splitter: FileDataSplitter, File data splitter which should be used to split the data - :param carryInputData: - :param newLevel: - """ - - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter) - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - Offset, - jvm_splitter.jvm_instance, - carryInputData, - new_level_jvm - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, - splitter: FileDataSplitter, carryInputData: bool, partitions: int, newLevel: StorageLevel): - """ - - :param sparkContext: SparkContext, the spark context - :param InputLocation: str, the input location - :param splitter: FileDataSplitter, File data splitter which should be used to split the data - :param carryInputData: - :param partitions: int, the partitions - :param newLevel: - """ - - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter) - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - jvm_splitter.jvm_instance, - carryInputData, - partitions, - new_level_jvm - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, - splitter: FileDataSplitter, carryInputData: bool, newLevel: StorageLevel): - """ - - :param sparkContext: SparkContext, the spark context - :param InputLocation: str, the input location - :param splitter: FileDataSplitter, File data splitter which should be used to split the data - :param carryInputData: - :param newLevel: - """ - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter) - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - jvm_splitter.jvm_instance, - carryInputData, - new_level_jvm - ) - - def __init__(self, rawSpatialRDD: JvmSpatialRDD, newLevel: StorageLevel, sourceEpsgCRSCode: str, - targetEpsgCode: str): - """ - - :param rawSpatialRDD: - :param newLevel: - :param sourceEpsgCRSCode: - :param targetEpsgCode: - """ - - super().__init__(rawSpatialRDD.sc) - jsrdd = rawSpatialRDD.jsrdd - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - self._srdd = self._jvm_spatial_rdd(jsrdd, new_level_jvm, sourceEpsgCRSCode, targetEpsgCode) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, Offset: int, - splitter: FileDataSplitter, carryInputData: bool, partitions: int, newLevel: StorageLevel, - sourceEpsgCRSCode: str, targetEpsgCode: str): - """ - - :param sparkContext: SparkContext, the spark context - :param InputLocation: str, the input location - :param Offset: - :param splitter: FileDataSplitter, File data splitter which should be used to split the data - :param carryInputData: - :param partitions: int, the partitions - :param newLevel: - :param sourceEpsgCRSCode: str, the source epsg CRS code - :param targetEpsgCode: str, the target epsg code - """ - - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter).jvm_instance - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - Offset, - jvm_splitter, - carryInputData, - partitions, - new_level_jvm, - sourceEpsgCRSCode, - targetEpsgCode - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, Offset: int, - splitter: FileDataSplitter, carryInputData: bool, newLevel: StorageLevel, sourceEpsgCRSCode: str, - targetEpsgCode: str): - """ - - :param sparkContext: SparkContext, the spark context - :param InputLocation: str, the input location - :param Offset: - :param splitter: FileDataSplitter, File data splitter which should be used to split the data - :param carryInputData: - :param newLevel: - :param sourceEpsgCRSCode: str, the source epsg CRS code - :param targetEpsgCode: str, the target epsg code - """ - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter) - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - Offset, - jvm_splitter.jvm_instance, - carryInputData, - new_level_jvm, - sourceEpsgCRSCode, - targetEpsgCode - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, splitter: FileDataSplitter, carryInputData: bool, - partitions: int, newLevel: StorageLevel, sourceEpsgCRSCode: str, targetEpsgCode: str): - """ - - :param sparkContext: SparkContext, the spark context - :param InputLocation: str, the input location - :param splitter: FileDataSplitter, File data splitter which should be used to split the data - :param carryInputData: - :param partitions: int, the partitions - :param newLevel: - :param sourceEpsgCRSCode: str, the source epsg CRS code - :param targetEpsgCode: str, the target epsg code - """ - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter) - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - jvm_splitter.jvm_instance, - carryInputData, - partitions, - new_level_jvm, - sourceEpsgCRSCode, - targetEpsgCode - ) - - def __init__(self, sparkContext: SparkContext, InputLocation: str, splitter: FileDataSplitter, - carryInputData: bool, newLevel: StorageLevel, sourceEpsgCRSCode: str, targetEpsgCode: str): - """ - - :param sparkContext: SparkContext, the spark context - :param InputLocation: str, the input location - :param splitter: FileDataSplitter, File data splitter which should be used to split the data - :param carryInputData: bool, - :param newLevel: - :param sourceEpsgCRSCode: str, the source epsg CRS code - :param targetEpsgCode: str, the target epsg code - """ - super().__init__(sparkContext) - jvm_splitter = FileSplitterJvm(self._jvm, splitter) - new_level_jvm = JvmStorageLevel(self._jvm, newLevel).jvm_instance - - self._srdd = self._jvm_spatial_rdd( - self._jsc, - InputLocation, - jvm_splitter.jvm_instance, - carryInputData, - new_level_jvm, - sourceEpsgCRSCode, - targetEpsgCode - ) - @property def _jvm_spatial_rdd(self): spatial_factory = SpatialRDDFactory(self._sc) diff --git a/python/tests/core/test_rdd.py b/python/tests/core/test_rdd.py index 5560f4e0f1..201d442928 100644 --- a/python/tests/core/test_rdd.py +++ b/python/tests/core/test_rdd.py @@ -17,7 +17,6 @@ import logging -from pyspark import StorageLevel from shapely.geometry import Point from sedona.core.SpatialRDD import PointRDD, PolygonRDD, CircleRDD @@ -318,39 +317,3 @@ def test_distance_join_query_using_index(self): True, True ).count - - def test_crs_transformed_spatial_range_query(self): - object_rdd = PointRDD( - sparkContext=self.sc, - InputLocation=point_rdd_input_location, - Offset=point_rdd_offset, - splitter=point_rdd_splitter, - carryInputData=False, - newLevel=StorageLevel.DISK_ONLY, - sourceEpsgCRSCode="epsg:4326", - targetEpsgCode="epsg:3005" - ) - for i in range(each_query_loop_times): - result_size = RangeQuery.SpatialRangeQuery( - object_rdd, range_query_window, False, False - ) - - def test_crs_transformed_spatial_range_query_using_index(self): - object_rdd = PointRDD( - sparkContext=self.sc, - InputLocation=point_rdd_input_location, - Offset=point_rdd_offset, - splitter=point_rdd_splitter, - carryInputData=False, - newLevel=StorageLevel.DISK_ONLY, - sourceEpsgCRSCode="epsg:4326", - targetEpsgCode="epsg:3005" - ) - object_rdd.buildIndex(point_rdd_index_type, False) - for i in range(each_query_loop_times): - result_size = RangeQuery.SpatialRangeQuery( - object_rdd, - range_query_window, - False, - True - ).count diff --git a/python/tests/core/test_spatial_rdd_from_disc.py b/python/tests/core/test_spatial_rdd_from_disc.py index 6f9f645707..b202418125 100644 --- a/python/tests/core/test_spatial_rdd_from_disc.py +++ b/python/tests/core/test_spatial_rdd_from_disc.py @@ -19,7 +19,6 @@ import shutil import pytest -from pyspark import StorageLevel from sedona.core.SpatialRDD import PointRDD, PolygonRDD, LineStringRDD from sedona.core.enums import IndexType, GridType @@ -45,7 +44,7 @@ def test_saving_to_disc_spatial_rdd_point(self): from tests.properties.point_properties import input_location, offset, splitter, num_partitions point_rdd = PointRDD( - self.sc, input_location, offset, splitter, True, num_partitions, StorageLevel.MEMORY_ONLY + self.sc, input_location, offset, splitter, True, num_partitions ) point_rdd.rawJvmSpatialRDD.saveAsObjectFile(os.path.join(disc_location, "point")) @@ -57,8 +56,7 @@ def test_saving_to_disc_spatial_rdd_polygon(self): input_location, splitter, True, - num_partitions, - StorageLevel.MEMORY_ONLY + num_partitions ) polygon_rdd.rawJvmSpatialRDD.saveAsObjectFile(os.path.join(disc_location, "polygon")) @@ -69,8 +67,7 @@ def test_saving_to_disc_spatial_rdd_linestring(self): input_location, splitter, True, - num_partitions, - StorageLevel.MEMORY_ONLY + num_partitions ) linestring_rdd.rawJvmSpatialRDD.saveAsObjectFile(os.path.join(disc_location, "line_string")) @@ -81,8 +78,7 @@ def test_saving_to_disc_index_linestring(self): input_location, splitter, True, - num_partitions, - StorageLevel.MEMORY_ONLY + num_partitions ) linestring_rdd.buildIndex(IndexType.RTREE, False) linestring_rdd.indexedRawRDD.saveAsObjectFile(os.path.join(disc_location, "line_string_index")) @@ -94,8 +90,7 @@ def test_saving_to_disc_index_polygon(self): input_location, splitter, True, - num_partitions, - StorageLevel.MEMORY_ONLY + num_partitions ) polygon_rdd.buildIndex(IndexType.RTREE, False) polygon_rdd.indexedRawRDD.saveAsObjectFile(os.path.join(disc_location, "polygon_index")) @@ -103,8 +98,7 @@ def test_saving_to_disc_index_polygon(self): def test_saving_to_disc_index_point(self): from tests.properties.point_properties import input_location, offset, splitter, num_partitions point_rdd = PointRDD( - self.sc, input_location, offset, splitter, True, num_partitions, StorageLevel.MEMORY_ONLY - ) + self.sc, input_location, offset, splitter, True, num_partitions) point_rdd.buildIndex(IndexType.RTREE, False) point_rdd.indexedRawRDD.saveAsObjectFile(os.path.join(disc_location, "point_index")) diff --git a/python/tests/spatial_operator/test_join_base.py b/python/tests/spatial_operator/test_join_base.py index 434e98757e..922141942b 100644 --- a/python/tests/spatial_operator/test_join_base.py +++ b/python/tests/spatial_operator/test_join_base.py @@ -16,7 +16,6 @@ # under the License. import pytest -from pyspark import StorageLevel from sedona.core.SpatialRDD import RectangleRDD, PolygonRDD, LineStringRDD, PointRDD from sedona.core.SpatialRDD.spatial_rdd import SpatialRDD @@ -32,25 +31,25 @@ def create_point_rdd(self, location, splitter, num_partitions): rdd = PointRDD( self.sc, location, 1, splitter, False, num_partitions ) - return PointRDD(rdd.rawJvmSpatialRDD, StorageLevel.MEMORY_ONLY) + return PointRDD(rdd.rawJvmSpatialRDD) def create_linestring_rdd(self, location, splitter, num_partitions): rdd = LineStringRDD( self.sc, location, splitter, True, num_partitions ) - return LineStringRDD(rdd.rawJvmSpatialRDD, StorageLevel.MEMORY_ONLY) + return LineStringRDD(rdd.rawJvmSpatialRDD) def create_polygon_rdd(self, location, splitter, num_partitions): rdd = PolygonRDD( self.sc, location, splitter, True, num_partitions ) - return PolygonRDD(rdd.rawJvmSpatialRDD, StorageLevel.MEMORY_ONLY) + return PolygonRDD(rdd.rawJvmSpatialRDD) def create_rectangle_rdd(self, location, splitter, num_partitions): rdd = RectangleRDD( self.sc, location, splitter, True, num_partitions) return RectangleRDD( - rdd.rawJvmSpatialRDD, StorageLevel.MEMORY_ONLY + rdd.rawJvmSpatialRDD ) def partition_rdds(self, query_rdd: SpatialRDD, spatial_rdd: SpatialRDD, grid_type): diff --git a/python/tests/spatial_operator/test_join_query_correctness.py b/python/tests/spatial_operator/test_join_query_correctness.py index 79e7aeb022..e2390b28c9 100644 --- a/python/tests/spatial_operator/test_join_query_correctness.py +++ b/python/tests/spatial_operator/test_join_query_correctness.py @@ -15,7 +15,6 @@ # specific language governing permissions and limitations # under the License. -from pyspark import StorageLevel from shapely.geometry import Point, Polygon, LineString from shapely.geometry.base import BaseGeometry @@ -47,8 +46,8 @@ def test_inside_point_join_correctness(self): self.verify_join_result(result_no_index) def test_on_boundary_point_join_correctness(self): - window_rdd = PolygonRDD(self.sc.parallelize(self.test_polygon_window_set), StorageLevel.MEMORY_ONLY) - object_rdd = PointRDD(self.sc.parallelize(self.test_on_boundary_point_set), StorageLevel.MEMORY_ONLY) + window_rdd = PolygonRDD(self.sc.parallelize(self.test_polygon_window_set)) + object_rdd = PointRDD(self.sc.parallelize(self.test_on_boundary_point_set)) self.prepare_rdd(object_rdd, window_rdd, GridType.QUADTREE) result = JoinQuery.SpatialJoinQuery(object_rdd, window_rdd, True, False).collect() @@ -59,8 +58,8 @@ def test_on_boundary_point_join_correctness(self): def test_outside_point_join_correctness(self): self.once_before_all() - window_rdd = PolygonRDD(self.sc.parallelize(self.test_polygon_window_set), StorageLevel.MEMORY_ONLY) - object_rdd = PointRDD(self.sc.parallelize(self.test_outside_point_set), StorageLevel.MEMORY_ONLY) + window_rdd = PolygonRDD(self.sc.parallelize(self.test_polygon_window_set)) + object_rdd = PointRDD(self.sc.parallelize(self.test_outside_point_set)) self.prepare_rdd(object_rdd, window_rdd, GridType.QUADTREE) result = JoinQuery.SpatialJoinQuery(object_rdd, window_rdd, True, False).collect() @@ -71,9 +70,9 @@ def test_outside_point_join_correctness(self): def test_inside_linestring_join_correctness(self): window_rdd = PolygonRDD( - self.sc.parallelize(self.test_polygon_window_set), StorageLevel.MEMORY_ONLY + self.sc.parallelize(self.test_polygon_window_set) ) - object_rdd = LineStringRDD(self.sc.parallelize(self.test_inside_linestring_set), StorageLevel.MEMORY_ONLY) + object_rdd = LineStringRDD(self.sc.parallelize(self.test_inside_linestring_set)) self.prepare_rdd(object_rdd, window_rdd, GridType.QUADTREE) @@ -84,8 +83,8 @@ def test_inside_linestring_join_correctness(self): self.verify_join_result(result_no_index) def test_overlapped_linestring_join_correctness(self): - window_rdd = PolygonRDD(self.sc.parallelize(self.test_polygon_window_set), StorageLevel.MEMORY_ONLY) - object_rdd = LineStringRDD(self.sc.parallelize(self.test_overlapped_linestring_set), StorageLevel.MEMORY_ONLY) + window_rdd = PolygonRDD(self.sc.parallelize(self.test_polygon_window_set)) + object_rdd = LineStringRDD(self.sc.parallelize(self.test_overlapped_linestring_set)) self.prepare_rdd(object_rdd, window_rdd, GridType.QUADTREE) result = JoinQuery.SpatialJoinQuery(object_rdd, window_rdd, True, True).collect() @@ -95,8 +94,8 @@ def test_overlapped_linestring_join_correctness(self): self.verify_join_result(result_no_index) def test_outside_line_string_join_correctness(self): - window_rdd = PolygonRDD(self.sc.parallelize(self.test_polygon_window_set), StorageLevel.MEMORY_ONLY) - object_rdd = LineStringRDD(self.sc.parallelize(self.test_outside_linestring_set), StorageLevel.MEMORY_ONLY) + window_rdd = PolygonRDD(self.sc.parallelize(self.test_polygon_window_set)) + object_rdd = LineStringRDD(self.sc.parallelize(self.test_outside_linestring_set)) self.prepare_rdd(object_rdd, window_rdd, GridType.QUADTREE) result = JoinQuery.SpatialJoinQuery(object_rdd, window_rdd, True, False).collect() @@ -106,9 +105,9 @@ def test_outside_line_string_join_correctness(self): assert 0 == result_no_index.__len__() def test_inside_polygon_join_correctness(self): - window_rdd = PolygonRDD(self.sc.parallelize(self.test_polygon_window_set), StorageLevel.MEMORY_ONLY) + window_rdd = PolygonRDD(self.sc.parallelize(self.test_polygon_window_set)) - object_rdd = PolygonRDD(self.sc.parallelize(self.test_inside_polygon_set), StorageLevel.MEMORY_ONLY) + object_rdd = PolygonRDD(self.sc.parallelize(self.test_inside_polygon_set)) self.prepare_rdd(object_rdd, window_rdd, GridType.QUADTREE) result = JoinQuery.SpatialJoinQuery(object_rdd, window_rdd, True, False).collect() @@ -118,8 +117,8 @@ def test_inside_polygon_join_correctness(self): self.verify_join_result(result_no_index) def test_overlapped_polygon_join_correctness(self): - window_rdd = PolygonRDD(self.sc.parallelize(self.test_polygon_window_set), StorageLevel.MEMORY_ONLY) - object_rdd = PolygonRDD(self.sc.parallelize(self.test_overlapped_polygon_set), StorageLevel.MEMORY_ONLY) + window_rdd = PolygonRDD(self.sc.parallelize(self.test_polygon_window_set)) + object_rdd = PolygonRDD(self.sc.parallelize(self.test_overlapped_polygon_set)) self.prepare_rdd(object_rdd, window_rdd, GridType.QUADTREE) result = JoinQuery.SpatialJoinQuery(object_rdd, window_rdd, True, True).collect() @@ -129,8 +128,8 @@ def test_overlapped_polygon_join_correctness(self): self.verify_join_result(result_no_index) def test_outside_polygon_join_correctness(self): - window_rdd = PolygonRDD(self.sc.parallelize(self.test_polygon_window_set), StorageLevel.MEMORY_ONLY) - object_rdd = PolygonRDD(self.sc.parallelize(self.test_outside_polygon_set), StorageLevel.MEMORY_ONLY) + window_rdd = PolygonRDD(self.sc.parallelize(self.test_polygon_window_set)) + object_rdd = PolygonRDD(self.sc.parallelize(self.test_outside_polygon_set)) self.prepare_rdd(object_rdd, window_rdd, GridType.QUADTREE) result = JoinQuery.SpatialJoinQuery(object_rdd, window_rdd, True, False).collect() @@ -140,9 +139,9 @@ def test_outside_polygon_join_correctness(self): assert 0 == result_no_index.__len__() def test_inside_polygon_distance_join_correctness(self): - center_geometry_rdd = PolygonRDD(self.sc.parallelize(self.test_polygon_window_set), StorageLevel.MEMORY_ONLY) + center_geometry_rdd = PolygonRDD(self.sc.parallelize(self.test_polygon_window_set)) window_rdd = CircleRDD(center_geometry_rdd, 0.1) - object_rdd = PolygonRDD(self.sc.parallelize(self.test_inside_polygon_set), StorageLevel.MEMORY_ONLY) + object_rdd = PolygonRDD(self.sc.parallelize(self.test_inside_polygon_set)) self.prepare_rdd(object_rdd, window_rdd, GridType.QUADTREE) result = JoinQuery.DistanceJoinQuery(object_rdd, window_rdd, True, False).collect() @@ -152,9 +151,9 @@ def test_inside_polygon_distance_join_correctness(self): self.verify_join_result(result_no_index) def test_overlapped_polygon_distance_join_correctness(self): - center_geometry_rdd = PolygonRDD(self.sc.parallelize(self.test_polygon_window_set), StorageLevel.MEMORY_ONLY) + center_geometry_rdd = PolygonRDD(self.sc.parallelize(self.test_polygon_window_set)) window_rdd = CircleRDD(center_geometry_rdd, 0.1) - object_rdd = PolygonRDD(self.sc.parallelize(self.test_overlapped_polygon_set), StorageLevel.MEMORY_ONLY) + object_rdd = PolygonRDD(self.sc.parallelize(self.test_overlapped_polygon_set)) self.prepare_rdd(object_rdd, window_rdd, GridType.QUADTREE) result = JoinQuery.DistanceJoinQuery(object_rdd, window_rdd, True, True).collect() @@ -164,9 +163,9 @@ def test_overlapped_polygon_distance_join_correctness(self): self.verify_join_result(result_no_index) def test_outside_polygon_distance_join_correctness(self): - center_geometry_rdd = PolygonRDD(self.sc.parallelize(self.test_polygon_window_set), StorageLevel.MEMORY_ONLY) + center_geometry_rdd = PolygonRDD(self.sc.parallelize(self.test_polygon_window_set)) window_rdd = CircleRDD(center_geometry_rdd, 0.1) - object_rdd = PolygonRDD(self.sc.parallelize(self.test_outside_polygon_set), StorageLevel.MEMORY_ONLY) + object_rdd = PolygonRDD(self.sc.parallelize(self.test_outside_polygon_set)) self.prepare_rdd(object_rdd, window_rdd, GridType.QUADTREE) result = JoinQuery.DistanceJoinQuery(object_rdd, window_rdd, True, True).collect() diff --git a/python/tests/spatial_operator/test_linestring_range.py b/python/tests/spatial_operator/test_linestring_range.py index e5827bca53..1b60e92482 100644 --- a/python/tests/spatial_operator/test_linestring_range.py +++ b/python/tests/spatial_operator/test_linestring_range.py @@ -17,7 +17,6 @@ import os -from pyspark import StorageLevel from sedona.core.SpatialRDD import LineStringRDD from sedona.core.enums import IndexType, FileDataSplitter @@ -39,7 +38,7 @@ class TestLineStringRange(TestBase): def test_spatial_range_query(self): spatial_rdd = LineStringRDD( - self.sc, input_location, splitter, True, StorageLevel.MEMORY_ONLY + self.sc, input_location, splitter, True ) for i in range(self.loop_times): result_size = RangeQuery.SpatialRangeQuery(spatial_rdd, self.query_envelope, False, False).count() @@ -50,7 +49,7 @@ def test_spatial_range_query(self): def test_spatial_range_query_using_index(self): spatial_rdd = LineStringRDD( - self.sc, input_location, splitter, True, StorageLevel.MEMORY_ONLY + self.sc, input_location, splitter, True ) spatial_rdd.buildIndex(IndexType.RTREE, False) diff --git a/python/tests/spatial_operator/test_polygon_range.py b/python/tests/spatial_operator/test_polygon_range.py index 1d83c4e976..58c775c781 100644 --- a/python/tests/spatial_operator/test_polygon_range.py +++ b/python/tests/spatial_operator/test_polygon_range.py @@ -17,8 +17,6 @@ import os -from pyspark import StorageLevel - from sedona.core.SpatialRDD import PolygonRDD from sedona.core.enums import IndexType, FileDataSplitter from sedona.core.geom.envelope import Envelope @@ -38,7 +36,7 @@ class TestPolygonRange(TestBase): def test_spatial_range_query(self): spatial_rdd = PolygonRDD( - self.sc, input_location, splitter, True, StorageLevel.MEMORY_ONLY + self.sc, input_location, splitter, True ) for i in range(self.loop_times): result_size = RangeQuery.\ @@ -50,7 +48,7 @@ def test_spatial_range_query(self): def test_spatial_range_query_using_index(self): spatial_rdd = PolygonRDD( - self.sc, input_location, splitter, True, StorageLevel.MEMORY_ONLY + self.sc, input_location, splitter, True ) spatial_rdd.buildIndex(IndexType.RTREE, False) for i in range(self.loop_times): diff --git a/python/tests/spatial_operator/test_rectangle_range.py b/python/tests/spatial_operator/test_rectangle_range.py index 16136305d1..ac92a66354 100644 --- a/python/tests/spatial_operator/test_rectangle_range.py +++ b/python/tests/spatial_operator/test_rectangle_range.py @@ -17,8 +17,6 @@ import os -from pyspark import StorageLevel - from sedona.core.SpatialRDD import RectangleRDD from sedona.core.enums import IndexType, FileDataSplitter from sedona.core.geom.envelope import Envelope @@ -46,7 +44,7 @@ class TestRectangleRange(TestBase): loop_times = 5 def test_spatial_range_query(self): - spatial_rdd = RectangleRDD(self.sc, inputLocation, offset, splitter, True, StorageLevel.MEMORY_ONLY) + spatial_rdd = RectangleRDD(self.sc, inputLocation, offset, splitter, True) for i in range(self.loop_times): result_size = RangeQuery.SpatialRangeQuery( @@ -58,7 +56,7 @@ def test_spatial_range_query(self): def test_spatial_range_query_using_index(self): spatial_rdd = RectangleRDD( - self.sc, inputLocation, offset, splitter, True, StorageLevel.MEMORY_ONLY) + self.sc, inputLocation, offset, splitter, True) spatial_rdd.buildIndex(IndexType.RTREE, False) for i in range(self.loop_times): diff --git a/python/tests/spatial_rdd/test_circle_rdd.py b/python/tests/spatial_rdd/test_circle_rdd.py index 9dc8948641..746c5d206a 100644 --- a/python/tests/spatial_rdd/test_circle_rdd.py +++ b/python/tests/spatial_rdd/test_circle_rdd.py @@ -15,8 +15,6 @@ # specific language governing permissions and limitations # under the License. -from pyspark import StorageLevel - from sedona.core.SpatialRDD import PointRDD, CircleRDD from tests.test_base import TestBase from tests.properties.point_properties import input_location, offset, splitter, num_partitions @@ -31,8 +29,7 @@ def test_circle_rdd(self): offset, splitter, True, - num_partitions, - StorageLevel.MEMORY_ONLY + num_partitions ) circle_rdd = CircleRDD(spatial_rdd, 0.5) diff --git a/python/tests/spatial_rdd/test_linestring_rdd.py b/python/tests/spatial_rdd/test_linestring_rdd.py index f693e88204..ea6d4e7342 100644 --- a/python/tests/spatial_rdd/test_linestring_rdd.py +++ b/python/tests/spatial_rdd/test_linestring_rdd.py @@ -15,8 +15,6 @@ # specific language governing permissions and limitations # under the License. -from pyspark import StorageLevel - from sedona.core.SpatialRDD import LineStringRDD from sedona.core.enums import IndexType, GridType from sedona.core.geom.envelope import Envelope @@ -40,32 +38,17 @@ def test_constructor(self): InputLocation=input_location, splitter=splitter, carryInputData=True, - partitions=num_partitions, - newLevel=StorageLevel.MEMORY_ONLY + partitions=num_partitions ) self.compare_count(spatial_rdd_core, input_boundary, input_count) - spatial_rdd = LineStringRDD() - - spatial_rdd_core = LineStringRDD( - self.sc, - input_location, - splitter, - True, - num_partitions, - StorageLevel.MEMORY_ONLY - ) self.compare_count(spatial_rdd_core, input_boundary, input_count) spatial_rdd = LineStringRDD(spatial_rdd_core.rawJvmSpatialRDD) self.compare_count(spatial_rdd, input_boundary, input_count) - spatial_rdd = LineStringRDD(spatial_rdd_core.rawJvmSpatialRDD, "epsg:4326", "epsg:5070") - - self.compare_count(spatial_rdd, transformed_envelope, input_count) - spatial_rdd = LineStringRDD(self.sc, input_location, 0, 3, splitter, True, num_partitions) self.compare_count(spatial_rdd, input_boundary_2, input_count) @@ -82,52 +65,14 @@ def test_constructor(self): self.compare_count(spatial_rdd, input_boundary, input_count) - spatial_rdd = LineStringRDD(spatial_rdd_core.rawJvmSpatialRDD, StorageLevel.MEMORY_ONLY) + spatial_rdd = LineStringRDD(spatial_rdd_core.rawJvmSpatialRDD) self.compare_count(spatial_rdd, input_boundary, input_count) - spatial_rdd = LineStringRDD(self.sc, input_location, 0, 3, splitter, True, num_partitions, StorageLevel.MEMORY_ONLY) - - self.compare_count(spatial_rdd, input_boundary_2, input_count) - - spatial_rdd = LineStringRDD(self.sc, input_location, 0, 3, splitter, True, - StorageLevel.MEMORY_ONLY) + spatial_rdd = LineStringRDD(self.sc, input_location, 0, 3, splitter, True, num_partitions) self.compare_count(spatial_rdd, input_boundary_2, input_count) - spatial_rdd = LineStringRDD(self.sc, input_location, splitter, True, num_partitions, - StorageLevel.MEMORY_ONLY) - - self.compare_count(spatial_rdd, input_boundary, input_count) - - spatial_rdd = LineStringRDD(self.sc, input_location, splitter, True, StorageLevel.MEMORY_ONLY) - - self.compare_count(spatial_rdd, input_boundary, input_count) - - spatial_rdd = LineStringRDD(spatial_rdd_core.rawJvmSpatialRDD, StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:5070") - - self.compare_count(spatial_rdd, transformed_envelope, input_count) - - spatial_rdd = LineStringRDD(self.sc, input_location, 0, 3, splitter, True, num_partitions, - StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:5070") - - self.compare_count(spatial_rdd, transformed_envelope_2, input_count) - - spatial_rdd = LineStringRDD(self.sc, input_location, 0, 3, splitter, True, - StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:5070") - - self.compare_count(spatial_rdd, transformed_envelope_2, input_count) - - spatial_rdd = LineStringRDD(self.sc, input_location, splitter, True, num_partitions, - StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:5070") - - self.compare_count(spatial_rdd, transformed_envelope, input_count) - - spatial_rdd = LineStringRDD(self.sc, input_location, splitter, True, - StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:5070") - - self.compare_count(spatial_rdd, transformed_envelope, input_count) - def test_empty_constructor(self): spatial_rdd = LineStringRDD( @@ -135,8 +80,7 @@ def test_empty_constructor(self): InputLocation=input_location, splitter=splitter, carryInputData=True, - partitions=num_partitions, - newLevel=StorageLevel.MEMORY_ONLY + partitions=num_partitions ) spatial_rdd.analyze() @@ -152,8 +96,7 @@ def test_build_index_without_set_grid(self): InputLocation=input_location, splitter=splitter, carryInputData=True, - partitions=num_partitions, - newLevel=StorageLevel.MEMORY_ONLY + partitions=num_partitions ) spatial_rdd.analyze() @@ -165,8 +108,7 @@ def test_mbr(self): InputLocation=input_location, splitter=splitter, carryInputData=True, - partitions=num_partitions, - newLevel=StorageLevel.MEMORY_ONLY + partitions=num_partitions ) rectangle_rdd = linestring_rdd.MinimumBoundingRectangle() diff --git a/python/tests/spatial_rdd/test_point_rdd.py b/python/tests/spatial_rdd/test_point_rdd.py index 24b7bdf7d2..9cb1545465 100644 --- a/python/tests/spatial_rdd/test_point_rdd.py +++ b/python/tests/spatial_rdd/test_point_rdd.py @@ -15,8 +15,6 @@ # specific language governing permissions and limitations # under the License. -from pyspark import StorageLevel - from sedona.core.SpatialRDD import PointRDD from sedona.core.SpatialRDD.spatial_rdd import SpatialRDD from sedona.core.enums import IndexType, GridType @@ -52,37 +50,10 @@ def test_constructor(self): self.compare_count(spatial_rdd_copy, input_count, input_boundary) spatial_rdd_copy = PointRDD(spatial_rdd.rawJvmSpatialRDD) self.compare_count(spatial_rdd_copy, input_count, input_boundary) - spatial_rdd_copy = PointRDD(spatial_rdd.rawJvmSpatialRDD, "epsg:4326", "epsg:5070") - self.compare_count(spatial_rdd_copy, input_count, transformed_envelope) spatial_rdd_copy = PointRDD(self.sc, input_location, offset, splitter, True, num_partitions) self.compare_count(spatial_rdd_copy, input_count, input_boundary) spatial_rdd_copy = PointRDD(self.sc, crs_point_test, splitter, True) self.compare_count(spatial_rdd_copy, 20000, crs_envelope) - spatial_rdd_copy = PointRDD(spatial_rdd.rawJvmSpatialRDD, StorageLevel.MEMORY_ONLY) - self.compare_count(spatial_rdd_copy, input_count, input_boundary) - spatial_rdd_copy = PointRDD(self.sc, input_location, offset, splitter, True, num_partitions, StorageLevel.MEMORY_ONLY) - self.compare_count(spatial_rdd_copy, input_count, input_boundary) - spatial_rdd_copy = PointRDD(self.sc, input_location, offset, splitter, True, StorageLevel.MEMORY_ONLY) - self.compare_count(spatial_rdd_copy, input_count, input_boundary) - spatial_rdd_copy = PointRDD(self.sc, crs_point_test, splitter, True, num_partitions, StorageLevel.MEMORY_ONLY) - self.compare_count(spatial_rdd_copy, 20000, crs_envelope) - spatial_rdd_copy = PointRDD(self.sc, crs_point_test, splitter, True, StorageLevel.MEMORY_ONLY) - self.compare_count(spatial_rdd_copy, 20000, crs_envelope) - spatial_rdd_copy = PointRDD(spatial_rdd.rawJvmSpatialRDD, StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:5070") - self.compare_count(spatial_rdd_copy, input_count, transformed_envelope) - spatial_rdd_copy = PointRDD(self.sc, input_location, offset, splitter, True, num_partitions, - StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:5070") - self.compare_count(spatial_rdd_copy, input_count, transformed_envelope) - spatial_rdd_copy = PointRDD(self.sc, input_location, offset, splitter, True, - StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:5070") - self.compare_count(spatial_rdd_copy, input_count, transformed_envelope) - spatial_rdd_copy = PointRDD(self.sc, crs_point_test, splitter, True, - num_partitions, StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:5070") - - self.compare_count(spatial_rdd_copy, 20000, crs_envelope_transformed) - spatial_rdd_copy = PointRDD(self.sc, crs_point_test, splitter, True, StorageLevel.MEMORY_ONLY, - "epsg:4326", "epsg:5070") - self.compare_count(spatial_rdd_copy, 20000, crs_envelope_transformed) def test_empty_constructor(self): spatial_rdd = PointRDD( @@ -91,8 +62,7 @@ def test_empty_constructor(self): Offset=offset, splitter=splitter, carryInputData=True, - partitions=num_partitions, - newLevel=StorageLevel.MEMORY_ONLY + partitions=num_partitions ) spatial_rdd.buildIndex(IndexType.RTREE, False) spatial_rdd_copy = PointRDD() @@ -106,8 +76,7 @@ def test_equal_partitioning(self): Offset=offset, splitter=splitter, carryInputData=False, - partitions=10, - newLevel=StorageLevel.MEMORY_ONLY + partitions=10 ) spatial_rdd.analyze() spatial_rdd.spatialPartitioning(GridType.QUADTREE) @@ -121,7 +90,6 @@ def test_build_index_without_set_grid(self): Offset=offset, splitter=splitter, carryInputData=True, - partitions=num_partitions, - newLevel=StorageLevel.MEMORY_ONLY + partitions=num_partitions ) spatial_rdd.buildIndex(IndexType.RTREE, False) diff --git a/python/tests/spatial_rdd/test_polygon_rdd.py b/python/tests/spatial_rdd/test_polygon_rdd.py index 61b8528b38..3897f78565 100644 --- a/python/tests/spatial_rdd/test_polygon_rdd.py +++ b/python/tests/spatial_rdd/test_polygon_rdd.py @@ -15,8 +15,6 @@ # specific language governing permissions and limitations # under the License. -from pyspark import StorageLevel - from sedona.core.SpatialRDD import PolygonRDD from sedona.core.SpatialRDD.spatial_rdd import SpatialRDD from sedona.core.enums import IndexType, FileDataSplitter, GridType @@ -44,36 +42,13 @@ def test_constructor(self): InputLocation=input_location, splitter=splitter, carryInputData=True, - partitions=num_partitions, - newLevel=StorageLevel.MEMORY_ONLY + partitions=num_partitions ) self.compare_spatial_rdd(spatial_rdd_core, input_boundary) - spatial_rdd_core = PolygonRDD( - self.sc, - input_location, - splitter, - True, - num_partitions, - StorageLevel.MEMORY_ONLY - ) - self.compare_spatial_rdd(spatial_rdd_core, input_boundary) spatial_rdd = PolygonRDD(rawSpatialRDD=spatial_rdd_core.rawJvmSpatialRDD) self.compare_spatial_rdd(spatial_rdd, input_boundary) - spatial_rdd = PolygonRDD(spatial_rdd_core.rawJvmSpatialRDD, "epsg:4326", "epsg:5070") - self.compare_spatial_rdd(spatial_rdd, query_envelope) - assert spatial_rdd.getSourceEpsgCode() == "epsg:4326" - assert spatial_rdd.getTargetEpsgCode() == "epsg:5070" - spatial_rdd = PolygonRDD(rawSpatialRDD=spatial_rdd_core.rawJvmSpatialRDD, sourceEpsgCode="epsg:4326", targetEpsgCode="epsg:5070") - assert spatial_rdd.getSourceEpsgCode() == "epsg:4326" - assert spatial_rdd.getTargetEpsgCode() == "epsg:5070" - self.compare_spatial_rdd(spatial_rdd, query_envelope) - spatial_rdd = PolygonRDD(rawSpatialRDD=spatial_rdd.rawJvmSpatialRDD, newLevel=StorageLevel.MEMORY_ONLY) - self.compare_spatial_rdd(spatial_rdd, query_envelope) - spatial_rdd = PolygonRDD(spatial_rdd_core.rawJvmSpatialRDD, StorageLevel.MEMORY_ONLY) - self.compare_spatial_rdd(spatial_rdd, input_boundary) - spatial_rdd = PolygonRDD() query_window_rdd = PolygonRDD( self.sc, @@ -117,132 +92,13 @@ def test_constructor(self): self.compare_spatial_rdd(spatial_rdd_core, input_boundary) - query_window_rdd = PolygonRDD( - self.sc, - polygon_rdd_input_location, - polygon_rdd_start_offset, - polygon_rdd_end_offset, - polygon_rdd_splitter, - True, - 5, - StorageLevel.MEMORY_ONLY - ) - - assert query_window_rdd.analyze() - assert query_window_rdd.approximateTotalCount == 3000 - - query_window_rdd = PolygonRDD( - self.sc, - polygon_rdd_input_location, - polygon_rdd_start_offset, - polygon_rdd_end_offset, - polygon_rdd_splitter, - True, - StorageLevel.MEMORY_ONLY - ) - - assert query_window_rdd.analyze() - assert query_window_rdd.approximateTotalCount == 3000 - - spatial_rdd_core = PolygonRDD( - self.sc, - input_location, - splitter, - True, - 5, - StorageLevel.MEMORY_ONLY - ) - - self.compare_spatial_rdd(spatial_rdd_core, input_boundary) - - spatial_rdd_core = PolygonRDD( - self.sc, - input_location, - splitter, - True, - StorageLevel.MEMORY_ONLY - ) - - self.compare_spatial_rdd(spatial_rdd_core, input_boundary) - - spatial_rdd = PolygonRDD( - spatial_rdd_core.rawJvmSpatialRDD, StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:5070" - ) - self.compare_spatial_rdd(spatial_rdd, query_envelope) - - query_window_rdd = PolygonRDD( - self.sc, - polygon_rdd_input_location, - polygon_rdd_start_offset, - polygon_rdd_end_offset, - polygon_rdd_splitter, - True, - 5, - StorageLevel.MEMORY_ONLY, - "epsg:4326", - "epsg:5070" - ) - - assert query_window_rdd.analyze() - assert query_window_rdd.approximateTotalCount == 3000 - - query_window_rdd = PolygonRDD( - self.sc, - polygon_rdd_input_location, - polygon_rdd_start_offset, - polygon_rdd_end_offset, - polygon_rdd_splitter, - True, - StorageLevel.MEMORY_ONLY, - "epsg:4326", - "epsg:5070" - ) - - assert query_window_rdd.analyze() - assert query_window_rdd.approximateTotalCount == 3000 - - spatial_rdd_core = PolygonRDD( - self.sc, - input_location, - splitter, - True, - 5, - StorageLevel.MEMORY_ONLY, - "epsg:4326", - "epsg:5070" - ) - - self.compare_spatial_rdd(spatial_rdd_core, query_envelope) - spatial_rdd_core = PolygonRDD( - self.sc, - input_location, - splitter, - True, - StorageLevel.MEMORY_ONLY, - "epsg:4326", - "epsg:5070" - ) - - spatial_rdd_core = PolygonRDD( - sparkContext=self.sc, - InputLocation=input_location, - splitter=splitter, - carryInputData=True, - newLevel=StorageLevel.MEMORY_ONLY, - sourceEpsgCRSCode="epsg:4326", - targetEpsgCode="epsg:5070" - ) - - self.compare_spatial_rdd(spatial_rdd_core, query_envelope) - def test_empty_constructor(self): spatial_rdd = PolygonRDD( sparkContext=self.sc, InputLocation=input_location, splitter=splitter, carryInputData=True, - partitions=num_partitions, - newLevel=StorageLevel.MEMORY_ONLY + partitions=num_partitions ) spatial_rdd.analyze() spatial_rdd.spatialPartitioning(grid_type) @@ -257,8 +113,7 @@ def test_geojson_constructor(self): InputLocation=input_location_geo_json, splitter=FileDataSplitter.GEOJSON, carryInputData=True, - partitions=4, - newLevel=StorageLevel.MEMORY_ONLY + partitions=4 ) spatial_rdd.analyze() assert spatial_rdd.approximateTotalCount == 1001 @@ -272,8 +127,7 @@ def test_wkt_constructor(self): sparkContext=self.sc, InputLocation=input_location_wkt, splitter=FileDataSplitter.WKT, - carryInputData=True, - newLevel=StorageLevel.MEMORY_ONLY + carryInputData=True ) spatial_rdd.analyze() @@ -286,26 +140,13 @@ def test_wkb_constructor(self): sparkContext=self.sc, InputLocation=input_location_wkb, splitter=FileDataSplitter.WKB, - carryInputData=True, - newLevel=StorageLevel.MEMORY_ONLY + carryInputData=True ) spatial_rdd.analyze() assert spatial_rdd.approximateTotalCount == 103 assert spatial_rdd.boundaryEnvelope is not None assert spatial_rdd.rawSpatialRDD.take(1)[0].getUserData() == "31\t039\t00835841\t31039\tCuming\tCuming County\t06\tH1\tG4020\t\t\t\tA\t1477895811\t10447360\t+41.9158651\t-096.7885168" - def test_build_index_without_set_grid(self): - spatial_rdd = PolygonRDD( - self.sc, - input_location, - FileDataSplitter.CSV, - carryInputData=True, - partitions=num_partitions, - newLevel=StorageLevel.MEMORY_ONLY - ) - spatial_rdd.analyze() - spatial_rdd.buildIndex(IndexType.RTREE, False) - def test_mbr(self): polygon_rdd = PolygonRDD( sparkContext=self.sc, diff --git a/python/tests/spatial_rdd/test_rectangle_rdd.py b/python/tests/spatial_rdd/test_rectangle_rdd.py index 308ca7988d..6c598f8c76 100644 --- a/python/tests/spatial_rdd/test_rectangle_rdd.py +++ b/python/tests/spatial_rdd/test_rectangle_rdd.py @@ -18,7 +18,6 @@ import os import pytest -from pyspark import StorageLevel from sedona.core.SpatialRDD import RectangleRDD from sedona.core.enums import IndexType, GridType, FileDataSplitter @@ -50,8 +49,7 @@ def test_constructor(self): Offset=offset, splitter=splitter, carryInputData=True, - partitions=numPartitions, - newLevel=StorageLevel.MEMORY_ONLY + partitions=numPartitions ) spatial_rdd.analyze() @@ -65,8 +63,7 @@ def test_constructor(self): offset, splitter, True, - numPartitions, - StorageLevel.MEMORY_ONLY + numPartitions ) spatial_rdd.analyze() @@ -81,8 +78,7 @@ def test_empty_constructor(self): Offset=offset, splitter=splitter, carryInputData=True, - partitions=numPartitions, - newLevel=StorageLevel.MEMORY_ONLY + partitions=numPartitions ) spatial_rdd.analyze() diff --git a/python/tests/spatial_rdd/test_spatial_rdd.py b/python/tests/spatial_rdd/test_spatial_rdd.py index df309c72ca..37fe3819f6 100644 --- a/python/tests/spatial_rdd/test_spatial_rdd.py +++ b/python/tests/spatial_rdd/test_spatial_rdd.py @@ -19,7 +19,7 @@ import pyspark import pytest -from pyspark import StorageLevel, RDD +from pyspark import RDD from shapely.geometry import Point from sedona.core.SpatialRDD import PointRDD @@ -49,8 +49,7 @@ def create_spatial_rdd(self): Offset=offset, splitter=splitter, carryInputData=True, - partitions=numPartitions, - newLevel=StorageLevel.MEMORY_ONLY + partitions=numPartitions ) return spatial_rdd @@ -58,21 +57,6 @@ def test_analyze(self): spatial_rdd = self.create_spatial_rdd() assert spatial_rdd.analyze() - def test_crs_transform(self): - spatial_rdd = PointRDD( - sparkContext=self.sc, - InputLocation=crs_test_point, - Offset=0, - splitter=splitter, - carryInputData=True, - partitions=numPartitions, - newLevel=StorageLevel.MEMORY_ONLY - ) - - spatial_rdd.CRSTransform("epsg:4326", "epsg:3857") - - assert spatial_rdd.rawSpatialRDD.collect()[0].geom.wkt == "POINT (-9833016.710450118 3805934.914254189)" - def test_minimum_bounding_rectangle(self): spatial_rdd = self.create_spatial_rdd() @@ -127,22 +111,6 @@ def test_field_names(self): except AssertionError: assert geo_json_rdd.fieldNames == ['id', 'zipcode', 'name'] - def test_get_crs_transformation(self): - spatial_rdd = PointRDD( - sparkContext=self.sc, - InputLocation=crs_test_point, - Offset=0, - splitter=splitter, - carryInputData=True, - partitions=numPartitions, - newLevel=StorageLevel.MEMORY_ONLY - ) - - assert not spatial_rdd.getCRStransformation() - spatial_rdd.CRSTransform("epsg:4326", "epsg:3857") - - assert spatial_rdd.getCRStransformation() - def test_get_partitioner(self): spatial_rdd = self.create_spatial_rdd() @@ -171,40 +139,6 @@ def test_get_sample_number(self): spatial_rdd.setSampleNumber(10) assert spatial_rdd.getSampleNumber() == 10 - def test_get_source_epsg_code(self): - spatial_rdd = PointRDD( - sparkContext=self.sc, - InputLocation=crs_test_point, - Offset=0, - splitter=splitter, - carryInputData=True, - partitions=numPartitions, - newLevel=StorageLevel.MEMORY_ONLY - ) - - assert spatial_rdd.getSourceEpsgCode() == "" - - spatial_rdd.CRSTransform("epsg:4326", "epsg:3857") - - assert spatial_rdd.getSourceEpsgCode() == "epsg:4326" - - def test_get_target_epsg_code(self): - spatial_rdd = PointRDD( - sparkContext=self.sc, - InputLocation=crs_test_point, - Offset=0, - splitter=splitter, - carryInputData=True, - partitions=numPartitions, - newLevel=StorageLevel.MEMORY_ONLY - ) - - assert spatial_rdd.getTargetEpsgCode() == "" - - spatial_rdd.CRSTransform("epsg:4326", "epsg:3857") - - assert spatial_rdd.getTargetEpsgCode() == "epsg:3857" - def test_grids(self): for grid_type in GridType: diff --git a/python/tests/spatial_rdd/test_spatial_rdd_writer.py b/python/tests/spatial_rdd/test_spatial_rdd_writer.py index 32d10641b8..1685ded8df 100644 --- a/python/tests/spatial_rdd/test_spatial_rdd_writer.py +++ b/python/tests/spatial_rdd/test_spatial_rdd_writer.py @@ -19,7 +19,6 @@ import shutil import pytest -from pyspark import StorageLevel from sedona.core.SpatialRDD import PointRDD from sedona.core.enums import FileDataSplitter @@ -80,8 +79,7 @@ def test_save_as_geo_json_with_data(self, remove_wkb_directory): Offset=offset, splitter=splitter, carryInputData=True, - partitions=numPartitions, - newLevel=StorageLevel.MEMORY_ONLY + partitions=numPartitions ) spatial_rdd.saveAsGeoJSON(test_save_as_wkb_with_data) @@ -91,8 +89,7 @@ def test_save_as_geo_json_with_data(self, remove_wkb_directory): InputLocation=test_save_as_wkb_with_data, splitter=FileDataSplitter.GEOJSON, carryInputData=True, - partitions=numPartitions, - newLevel=StorageLevel.MEMORY_ONLY + partitions=numPartitions ) assert result_wkb.rawSpatialRDD.count() == spatial_rdd.rawSpatialRDD.count() diff --git a/python/tests/sql/test_spatial_rdd_to_spatial_dataframe.py b/python/tests/sql/test_spatial_rdd_to_spatial_dataframe.py index 1b73360cbd..3d9030ec61 100644 --- a/python/tests/sql/test_spatial_rdd_to_spatial_dataframe.py +++ b/python/tests/sql/test_spatial_rdd_to_spatial_dataframe.py @@ -17,7 +17,6 @@ import os -from pyspark import StorageLevel from pyspark.sql.types import StructType, StructField, StringType, IntegerType from sedona.core.SpatialRDD import PointRDD @@ -69,8 +68,7 @@ def test_point_rdd(self): Offset=0, splitter=splitter, carryInputData=True, - partitions=numPartitions, - newLevel=StorageLevel.MEMORY_ONLY + partitions=numPartitions ) raw_spatial_rdd = spatial_rdd.rawSpatialRDD.map( diff --git a/python/tests/test_assign_raw_spatial_rdd.py b/python/tests/test_assign_raw_spatial_rdd.py index 1c28836477..9011e93d1d 100644 --- a/python/tests/test_assign_raw_spatial_rdd.py +++ b/python/tests/test_assign_raw_spatial_rdd.py @@ -18,7 +18,6 @@ from sedona.core.SpatialRDD import PointRDD, CircleRDD from tests.properties.point_properties import input_location, offset, splitter, num_partitions from tests.test_base import TestBase -from pyspark import StorageLevel class TestSpatialRddAssignment(TestBase): @@ -30,8 +29,7 @@ def test_raw_spatial_rdd_assignment(self): offset, splitter, True, - num_partitions, - StorageLevel.MEMORY_ONLY + num_partitions ) spatial_rdd.analyze() @@ -51,8 +49,7 @@ def test_raw_circle_rdd_assignment(self): offset, splitter, True, - num_partitions, - StorageLevel.MEMORY_ONLY + num_partitions ) circle_rdd = CircleRDD(point_rdd, 1.0) circle_rdd.analyze() diff --git a/python/tests/test_scala_example.py b/python/tests/test_scala_example.py index 25b1e7b45b..fd2d3a2461 100644 --- a/python/tests/test_scala_example.py +++ b/python/tests/test_scala_example.py @@ -52,14 +52,14 @@ class TestScalaExample(TestBase): def test_spatial_range_query(self): object_rdd = PointRDD( - self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True, StorageLevel.MEMORY_ONLY + self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True ) object_rdd.rawJvmSpatialRDD.persist(StorageLevel.MEMORY_ONLY) for _ in range(each_query_loop_times): result_size = RangeQuery.SpatialRangeQuery(object_rdd, range_query_window, False, False).count() object_rdd = PointRDD( - self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True, StorageLevel.MEMORY_ONLY + self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True ) object_rdd.rawJvmSpatialRDD.persist(StorageLevel.MEMORY_ONLY) for _ in range(each_query_loop_times): @@ -67,7 +67,7 @@ def test_spatial_range_query(self): def test_spatial_range_query_using_index(self): object_rdd = PointRDD( - self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True, StorageLevel.MEMORY_ONLY) + self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True) object_rdd.buildIndex(point_rdd_index_type, False) object_rdd.indexedRawRDD.persist(StorageLevel.MEMORY_ONLY) assert object_rdd.indexedRawRDD.is_cached @@ -77,7 +77,7 @@ def test_spatial_range_query_using_index(self): def test_spatial_knn_query(self): object_rdd = PointRDD( - self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True, StorageLevel.MEMORY_ONLY + self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True ) object_rdd.rawJvmSpatialRDD.persist(StorageLevel.MEMORY_ONLY) @@ -86,7 +86,7 @@ def test_spatial_knn_query(self): def test_spatial_knn_query_using_index(self): object_rdd = PointRDD( - self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True, StorageLevel.MEMORY_ONLY + self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True ) object_rdd.buildIndex(point_rdd_index_type, False) object_rdd.indexedRawRDD.persist(StorageLevel.MEMORY_ONLY) @@ -100,7 +100,7 @@ def test_spatial_join_query(self): polygon_rdd_splitter, True ) object_rdd = PointRDD( - self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True, StorageLevel.MEMORY_ONLY) + self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True) object_rdd.spatialPartitioning(join_query_partitioning_type) query_window_rdd.spatialPartitioning(object_rdd.getPartitioner()) @@ -117,7 +117,7 @@ def test_spatial_join_using_index(self): polygon_rdd_end_offset, polygon_rdd_splitter, True ) object_rdd = PointRDD( - self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True, StorageLevel.MEMORY_ONLY + self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True ) object_rdd.spatialPartitioning(join_query_partitioning_type) @@ -135,7 +135,7 @@ def test_spatial_join_using_index(self): def test_distance_join_query(self): object_rdd = PointRDD( - self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True, StorageLevel.MEMORY_ONLY) + self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True) query_window_rdd = CircleRDD(object_rdd, 0.1) object_rdd.spatialPartitioning(GridType.QUADTREE) @@ -151,7 +151,7 @@ def test_distance_join_query(self): def test_distance_join_using_index(self): object_rdd = PointRDD( - self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True, StorageLevel.MEMORY_ONLY) + self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True) query_window_rdd = CircleRDD(object_rdd, 0.1) @@ -168,26 +168,6 @@ def test_distance_join_using_index(self): for _ in range(each_query_loop_times): result_size = JoinQuery.DistanceJoinQuery(object_rdd, query_window_rdd, True, True).count() - def test_crs_transformation_spatial_range_query(self): - object_rdd = PointRDD( - self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True, StorageLevel.MEMORY_ONLY, - "epsg:4326", "epsg:3005") - - object_rdd.rawSpatialRDD.persist(StorageLevel.MEMORY_ONLY) - assert object_rdd.rawSpatialRDD.is_cached - for _ in range(each_query_loop_times): - result_size = RangeQuery.SpatialRangeQuery(object_rdd, range_query_window, False, False).count() - assert result_size > -1 - - def test_crs_transformation_spatial_range_query_using_index(self): - object_rdd = PointRDD(self.sc, point_rdd_input_location, point_rdd_offset, - point_rdd_splitter, True, StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:3005") - object_rdd.buildIndex(point_rdd_index_type, False) - object_rdd.indexedRawRDD.persist(StorageLevel.MEMORY_ONLY) - for _ in range(each_query_loop_times): - result_size = RangeQuery.SpatialRangeQuery(object_rdd, range_query_window, False, True).count() - assert result_size > -1 - def test_indexed_rdd_assignment(self): object_rdd = PointRDD( self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True) diff --git a/python/tests/utils/test_crs_transformation.py b/python/tests/utils/test_crs_transformation.py index 71b638b3d8..d43cc0efef 100644 --- a/python/tests/utils/test_crs_transformation.py +++ b/python/tests/utils/test_crs_transformation.py @@ -15,8 +15,6 @@ # specific language governing permissions and limitations # under the License. -from pyspark import StorageLevel - from sedona.core.SpatialRDD import PointRDD, PolygonRDD, CircleRDD from sedona.core.enums import GridType from sedona.core.geom.circle import Circle @@ -35,11 +33,9 @@ def test_spatial_range_query(self): input_location, offset, splitter, - True, - StorageLevel.MEMORY_ONLY, - "epsg:4326", - "epsg:3005" + True ) + spatial_rdd.CRSTransform("epsg:4326", "epsg:3005") for i in range(loop_times): result_size = RangeQuery.SpatialRangeQuery(spatial_rdd, query_envelope, False, False).count() @@ -47,115 +43,3 @@ def test_spatial_range_query(self): assert RangeQuery.SpatialRangeQuery( spatial_rdd, query_envelope, False, False).take(10)[1].getUserData() is not None - - def test_spatial_range_query_using_index(self): - spatial_rdd = PointRDD( - self.sc, - input_location, - offset, - splitter, - True, - StorageLevel.MEMORY_ONLY, - "epsg:4326", - "epsg:3005" - ) - spatial_rdd.buildIndex(IndexType.RTREE, False) - - for i in range(loop_times): - result_size = RangeQuery.SpatialRangeQuery(spatial_rdd, query_envelope, False, False).count() - assert result_size == 3127 - - assert RangeQuery.SpatialRangeQuery( - spatial_rdd, query_envelope, False, False).take(10)[1].getUserData() is not None - - def test_spatial_knn_query(self): - point_rdd = PointRDD( - self.sc, input_location, offset, splitter, True, StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:3005" - ) - - for i in range(loop_times): - result = KNNQuery.SpatialKnnQuery(point_rdd, query_point, top_k, False) - assert result.__len__() > 0 - assert result[0].getUserData() is not None - - def test_spatial_knn_query_using_index(self): - point_rdd = PointRDD( - self.sc, input_location, offset, splitter, True, StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:3005" - ) - point_rdd.buildIndex(IndexType.RTREE, False) - - for i in range(loop_times): - result = KNNQuery.SpatialKnnQuery(point_rdd, query_point, top_k, False) - assert result.__len__() > 0 - assert result[0].getUserData() is not None - - def test_spatial_knn_correctness(self): - point_rdd = PointRDD( - self.sc, input_location, offset, splitter, True, StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:3005" - ) - result_no_index = KNNQuery.SpatialKnnQuery(point_rdd, query_point, top_k, False) - point_rdd.buildIndex(IndexType.RTREE, False) - result_with_index = KNNQuery.SpatialKnnQuery(point_rdd, query_point, top_k, True) - - sorted_result_no_index = sorted(result_no_index, key=lambda geo_data: distance_sorting_functions( - geo_data, query_point)) - - sorted_result_with_index = sorted(result_with_index, key=lambda geo_data: distance_sorting_functions( - geo_data, query_point)) - - difference = 0 - for x in range(top_k): - difference += sorted_result_no_index[x].geom.distance(sorted_result_with_index[x].geom) - - assert difference == 0 - - def test_spatial_join_query_with_polygon_rdd(self): - query_rdd = PolygonRDD( - self.sc, input_location_query_polygon, splitter, True, - num_partitions, StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:3005" - ) - - spatial_rdd = PointRDD( - self.sc, input_location, offset, splitter, True, num_partitions, - StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:3005" - ) - spatial_rdd.spatialPartitioning(grid_type) - query_rdd.spatialPartitioning(spatial_rdd.getPartitioner()) - - result = JoinQuery.SpatialJoinQuery(spatial_rdd, query_rdd, False, True).collect() - assert result[1][0].getUserData() is not None - - for data in result: - if data[1].__len__() != 0: - for right_data in data[1]: - assert right_data.getUserData() is not None - - def test_spatial_join_query_with_polygon_rdd_using_index(self): - query_rdd = PolygonRDD( - self.sc, input_location_query_polygon, splitter, True, - num_partitions, StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:3005" - ) - - spatial_rdd = PointRDD( - self.sc, - input_location, offset, splitter, True, num_partitions, - StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:3005" - ) - - query_rdd.analyze() - spatial_rdd.analyze() - - spatial_rdd.spatialPartitioning(grid_type) - - spatial_rdd.buildIndex(IndexType.RTREE, True) - - query_rdd.spatialPartitioning(spatial_rdd.getPartitioner()) - - result = JoinQuery.SpatialJoinQuery(spatial_rdd, query_rdd, False, True).collect() - - assert result[1][0].getUserData() is not None - - for data in result: - if data[1].__len__() != 0: - for right_data in data[1]: - assert right_data.getUserData() is not None From a192aab8e97aa6ee6973271f3a364818073d7f0e Mon Sep 17 00:00:00 2001 From: Jia Yu Date: Tue, 8 Aug 2023 01:38:32 -0700 Subject: [PATCH 03/11] Reduce loop times to 1 --- python/tests/properties/crs_transform.py | 2 +- python/tests/spatial_operator/test_linestring_knn.py | 2 +- python/tests/spatial_operator/test_linestring_range.py | 2 +- python/tests/spatial_operator/test_point_knn.py | 2 +- python/tests/spatial_operator/test_point_range.py | 2 +- python/tests/spatial_operator/test_polygon_knn.py | 2 +- python/tests/spatial_operator/test_polygon_range.py | 2 +- python/tests/spatial_operator/test_rectangle_knn.py | 2 +- python/tests/spatial_operator/test_rectangle_range.py | 2 +- python/tests/test_scala_example.py | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) diff --git a/python/tests/properties/crs_transform.py b/python/tests/properties/crs_transform.py index 2cc9c5ec46..548ceed583 100644 --- a/python/tests/properties/crs_transform.py +++ b/python/tests/properties/crs_transform.py @@ -30,7 +30,7 @@ num_partitions = 11 distance = 0.01 input_location_query_polygon = os.path.join(tests_resource, "crs-test-polygon.csv") -loop_times = 5 +loop_times = 1 query_envelope = Envelope(30.01, 40.01, -90.01, -80.01) query_point = Point(34.01, -84.01) top_k = 100 \ No newline at end of file diff --git a/python/tests/spatial_operator/test_linestring_knn.py b/python/tests/spatial_operator/test_linestring_knn.py index cd8ba2124f..36b3a948aa 100644 --- a/python/tests/spatial_operator/test_linestring_knn.py +++ b/python/tests/spatial_operator/test_linestring_knn.py @@ -34,7 +34,7 @@ class TestLineStringKnn(TestBase): - loop_times = 5 + loop_times = 1 query_point = Point(-84.01, 34.01) def test_spatial_knn_query(self): diff --git a/python/tests/spatial_operator/test_linestring_range.py b/python/tests/spatial_operator/test_linestring_range.py index 1b60e92482..d9041622e4 100644 --- a/python/tests/spatial_operator/test_linestring_range.py +++ b/python/tests/spatial_operator/test_linestring_range.py @@ -33,7 +33,7 @@ class TestLineStringRange(TestBase): - loop_times = 5 + loop_times = 1 query_envelope = Envelope(-85.01, -60.01, 34.01, 50.01) def test_spatial_range_query(self): diff --git a/python/tests/spatial_operator/test_point_knn.py b/python/tests/spatial_operator/test_point_knn.py index c911475d01..8d6d0274df 100644 --- a/python/tests/spatial_operator/test_point_knn.py +++ b/python/tests/spatial_operator/test_point_knn.py @@ -34,7 +34,7 @@ class TestPointKnn(TestBase): - loop_times = 5 + loop_times = 1 query_point = Point(-84.01, 34.01) top_k = 100 diff --git a/python/tests/spatial_operator/test_point_range.py b/python/tests/spatial_operator/test_point_range.py index 61ffba15fb..ef7c62afe0 100644 --- a/python/tests/spatial_operator/test_point_range.py +++ b/python/tests/spatial_operator/test_point_range.py @@ -47,7 +47,7 @@ class TestPointRange(TestBase): - loop_times = 5 + loop_times = 1 query_envelope = Envelope(-90.01, -80.01, 30.01, 40.01) def test_spatial_range_query(self): diff --git a/python/tests/spatial_operator/test_polygon_knn.py b/python/tests/spatial_operator/test_polygon_knn.py index 49085b3eeb..2a341e9761 100644 --- a/python/tests/spatial_operator/test_polygon_knn.py +++ b/python/tests/spatial_operator/test_polygon_knn.py @@ -33,7 +33,7 @@ class TestPolygonKnn(TestBase): - loop_times = 5 + loop_times = 1 top_k = 100 query_point = Point(-84.01, 34.01) diff --git a/python/tests/spatial_operator/test_polygon_range.py b/python/tests/spatial_operator/test_polygon_range.py index 58c775c781..4537ffc06b 100644 --- a/python/tests/spatial_operator/test_polygon_range.py +++ b/python/tests/spatial_operator/test_polygon_range.py @@ -31,7 +31,7 @@ class TestPolygonRange(TestBase): - loop_times = 5 + loop_times = 1 query_envelope = Envelope(-85.01, -60.01, 34.01, 50.01) def test_spatial_range_query(self): diff --git a/python/tests/spatial_operator/test_rectangle_knn.py b/python/tests/spatial_operator/test_rectangle_knn.py index 7674bc521e..b032a67aa6 100644 --- a/python/tests/spatial_operator/test_rectangle_knn.py +++ b/python/tests/spatial_operator/test_rectangle_knn.py @@ -43,7 +43,7 @@ class TestRectangleKNN(TestBase): query_envelope = Envelope(-90.01, -80.01, 30.01, 40.01) - loop_times = 5 + loop_times = 1 query_point = Point(-84.01, 34.01) top_k = 100 query_polygon = Polygon( diff --git a/python/tests/spatial_operator/test_rectangle_range.py b/python/tests/spatial_operator/test_rectangle_range.py index ac92a66354..dd25f3ac31 100644 --- a/python/tests/spatial_operator/test_rectangle_range.py +++ b/python/tests/spatial_operator/test_rectangle_range.py @@ -41,7 +41,7 @@ class TestRectangleRange(TestBase): query_envelope = Envelope(-90.01, -80.01, 30.01, 40.01) - loop_times = 5 + loop_times = 1 def test_spatial_range_query(self): spatial_rdd = RectangleRDD(self.sc, inputLocation, offset, splitter, True) diff --git a/python/tests/test_scala_example.py b/python/tests/test_scala_example.py index fd2d3a2461..adcf53e0df 100644 --- a/python/tests/test_scala_example.py +++ b/python/tests/test_scala_example.py @@ -43,7 +43,7 @@ knn_query_point = Point(-84.01, 34.01) range_query_window = Envelope(-90.01, -80.01, 30.01, 40.01) join_query_partitioning_type = GridType.QUADTREE -each_query_loop_times = 20 +each_query_loop_times = 1 shape_file_input_location = os.path.join(tests_resource, "shapefiles/polygon") From ae1662a5c841963216abee54973d663f77f3e7b5 Mon Sep 17 00:00:00 2001 From: Jia Yu Date: Tue, 8 Aug 2023 01:47:40 -0700 Subject: [PATCH 04/11] Remove calling storage level --- R/R/data_interface.R | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/R/R/data_interface.R b/R/R/data_interface.R index a7cbfdfa1c..0504a39e3d 100644 --- a/R/R/data_interface.R +++ b/R/R/data_interface.R @@ -147,7 +147,6 @@ sedona_read_dsv_to_typed_rdd <- function(sc, max(as.integer(repartition %||% 1L), 1L), fmt ) %>% - set_storage_level(storage_level) %>% new_spatial_rdd(type) } @@ -211,7 +210,6 @@ sedona_read_shapefile_to_typed_rdd <- function(sc, java_context(sc), location ) %>% - set_storage_level(storage_level) %>% new_spatial_rdd(type) } @@ -240,7 +238,6 @@ sedona_read_geojson_to_typed_rdd <- function(sc, has_non_spatial_attrs, max(as.integer(repartition %||% 1L), 1L) ) %>% - set_storage_level(storage_level) %>% new_spatial_rdd(type) } @@ -305,7 +302,6 @@ sedona_read_geojson <- function(sc, allow_invalid_geometries, skip_syntactically_invalid_geometries ) %>% - set_storage_level(storage_level) %>% new_spatial_rdd(NULL) } @@ -335,7 +331,6 @@ sedona_read_wkb <- function(sc, allow_invalid_geometries, skip_syntactically_invalid_geometries ) %>% - set_storage_level(storage_level) %>% new_spatial_rdd(NULL) } @@ -365,7 +360,6 @@ sedona_read_wkt <- function(sc, allow_invalid_geometries, skip_syntactically_invalid_geometries ) %>% - set_storage_level(storage_level) %>% new_spatial_rdd(NULL) } @@ -381,7 +375,6 @@ sedona_read_shapefile <- function(sc, java_context(sc), location ) %>% - set_storage_level(storage_level) %>% new_spatial_rdd(NULL) } @@ -800,18 +793,3 @@ to_delimiter_enum_value <- function(sc, delimiter) { sc$state$enums$delimiter[[delimiter]] } - -set_storage_level <- function(rdd, storage_level) { - sc <- spark_connection(rdd) - storage_level <- sc$state$object_cache$storage_levels[[storage_level]] %||% { - storage_level_obj <- invoke_static( - sc, "org.apache.spark.storage.StorageLevel", storage_level - ) - sc$state$object_cache$storage_levels[[storage_level]] <- storage_level_obj - - storage_level_obj - } - invoke(rdd, "analyze", storage_level) - - rdd -} From f65bb7ee5b46fc3fb6734e188435dc7f49ab8fc7 Mon Sep 17 00:00:00 2001 From: Jia Yu Date: Tue, 8 Aug 2023 01:53:42 -0700 Subject: [PATCH 05/11] Fix the scala code --- .../ScalaEarthdataMapperRunnableExample.scala | 4 +- .../sedona/core/showcase/ScalaExample.scala | 58 +++------------ .../sedona/core/showcase/SpatialJoinShp.scala | 70 ------------------- 3 files changed, 11 insertions(+), 121 deletions(-) delete mode 100644 core/src/main/scala/org/apache/sedona/core/showcase/SpatialJoinShp.scala diff --git a/core/src/main/scala/org/apache/sedona/core/showcase/ScalaEarthdataMapperRunnableExample.scala b/core/src/main/scala/org/apache/sedona/core/showcase/ScalaEarthdataMapperRunnableExample.scala index c1f8f24918..4249065ca7 100644 --- a/core/src/main/scala/org/apache/sedona/core/showcase/ScalaEarthdataMapperRunnableExample.scala +++ b/core/src/main/scala/org/apache/sedona/core/showcase/ScalaEarthdataMapperRunnableExample.scala @@ -57,7 +57,7 @@ object ScalaEarthdataMapperRunnableExample extends App { */ def testSpatialRangeQuery() { val earthdataHDFPoint = new EarthdataHDFPointMapper(HDFIncrement, HDFOffset, HDFRootGroupName, HDFDataVariableList, HDFDataVariableName, urlPrefix) - val spatialRDD = new PointRDD(sc, InputLocation, numPartitions, earthdataHDFPoint, StorageLevel.MEMORY_ONLY) + val spatialRDD = new PointRDD(sc, InputLocation, numPartitions, earthdataHDFPoint) var i = 0 while (i < loopTimes) { var resultSize = 0L @@ -71,7 +71,7 @@ object ScalaEarthdataMapperRunnableExample extends App { */ def testSpatialRangeQueryUsingIndex() { val earthdataHDFPoint = new EarthdataHDFPointMapper(HDFIncrement, HDFOffset, HDFRootGroupName, HDFDataVariableList, HDFDataVariableName, urlPrefix) - val spatialRDD = new PointRDD(sc, InputLocation, numPartitions, earthdataHDFPoint, StorageLevel.MEMORY_ONLY) + val spatialRDD = new PointRDD(sc, InputLocation, numPartitions, earthdataHDFPoint) spatialRDD.buildIndex(IndexType.RTREE, false) var i = 0 while (i < loopTimes) { diff --git a/core/src/main/scala/org/apache/sedona/core/showcase/ScalaExample.scala b/core/src/main/scala/org/apache/sedona/core/showcase/ScalaExample.scala index ee730fa523..235aa0b6c1 100644 --- a/core/src/main/scala/org/apache/sedona/core/showcase/ScalaExample.scala +++ b/core/src/main/scala/org/apache/sedona/core/showcase/ScalaExample.scala @@ -64,7 +64,7 @@ object ScalaExample extends App { val kNNQueryPoint = geometryFactory.createPoint(new Coordinate(-84.01, 34.01)) val rangeQueryWindow = new Envelope(-90.01, -80.01, 30.01, 40.01) val joinQueryPartitioningType = GridType.QUADTREE - val eachQueryLoopTimes = 5 + val eachQueryLoopTimes = 1 var ShapeFileInputLocation = resourceFolder + "shapefiles/polygon" @@ -76,8 +76,6 @@ object ScalaExample extends App { testSpatialJoinQueryUsingIndex() testDistanceJoinQuery() testDistanceJoinQueryUsingIndex() - testCRSTransformationSpatialRangeQuery() - testCRSTransformationSpatialRangeQueryUsingIndex() sc.stop() System.out.println("All DEMOs passed!") @@ -88,7 +86,7 @@ object ScalaExample extends App { * @throws Exception the exception */ def testSpatialRangeQuery() { - val objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true, StorageLevel.MEMORY_ONLY) + val objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true) objectRDD.rawSpatialRDD.persist(StorageLevel.MEMORY_ONLY) for (i <- 1 to eachQueryLoopTimes) { val resultSize = RangeQuery.SpatialRangeQuery(objectRDD, rangeQueryWindow, SpatialPredicate.COVERED_BY, false).count @@ -102,7 +100,7 @@ object ScalaExample extends App { * @throws Exception the exception */ def testSpatialRangeQueryUsingIndex() { - val objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true, StorageLevel.MEMORY_ONLY) + val objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true) objectRDD.buildIndex(PointRDDIndexType, false) objectRDD.indexedRawRDD.persist(StorageLevel.MEMORY_ONLY) for (i <- 1 to eachQueryLoopTimes) { @@ -117,7 +115,7 @@ object ScalaExample extends App { * @throws Exception the exception */ def testSpatialKnnQuery() { - val objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true, StorageLevel.MEMORY_ONLY) + val objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true) objectRDD.rawSpatialRDD.persist(StorageLevel.MEMORY_ONLY) for (i <- 1 to eachQueryLoopTimes) { val result = KNNQuery.SpatialKnnQuery(objectRDD, kNNQueryPoint, 1000, false) @@ -130,7 +128,7 @@ object ScalaExample extends App { * @throws Exception the exception */ def testSpatialKnnQueryUsingIndex() { - val objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true, StorageLevel.MEMORY_ONLY) + val objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true) objectRDD.buildIndex(PointRDDIndexType, false) objectRDD.indexedRawRDD.persist(StorageLevel.MEMORY_ONLY) for (i <- 1 to eachQueryLoopTimes) { @@ -145,7 +143,7 @@ object ScalaExample extends App { */ def testSpatialJoinQuery() { val queryWindowRDD = new PolygonRDD(sc, PolygonRDDInputLocation, PolygonRDDStartOffset, PolygonRDDEndOffset, PolygonRDDSplitter, true) - val objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true, StorageLevel.MEMORY_ONLY) + val objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true) objectRDD.spatialPartitioning(joinQueryPartitioningType) queryWindowRDD.spatialPartitioning(objectRDD.getPartitioner) @@ -164,7 +162,7 @@ object ScalaExample extends App { */ def testSpatialJoinQueryUsingIndex() { val queryWindowRDD = new PolygonRDD(sc, PolygonRDDInputLocation, PolygonRDDStartOffset, PolygonRDDEndOffset, PolygonRDDSplitter, true) - val objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true, StorageLevel.MEMORY_ONLY) + val objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true) objectRDD.spatialPartitioning(joinQueryPartitioningType) queryWindowRDD.spatialPartitioning(objectRDD.getPartitioner) @@ -185,7 +183,7 @@ object ScalaExample extends App { * @throws Exception the exception */ def testDistanceJoinQuery() { - val objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true, StorageLevel.MEMORY_ONLY) + val objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true) val queryWindowRDD = new CircleRDD(objectRDD, 0.1) objectRDD.spatialPartitioning(GridType.QUADTREE) @@ -205,7 +203,7 @@ object ScalaExample extends App { * @throws Exception the exception */ def testDistanceJoinQueryUsingIndex() { - val objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true, StorageLevel.MEMORY_ONLY) + val objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true) val queryWindowRDD = new CircleRDD(objectRDD, 0.1) objectRDD.spatialPartitioning(GridType.QUADTREE) @@ -221,44 +219,6 @@ object ScalaExample extends App { } } - @throws[Exception] - def testCRSTransformationSpatialRangeQuery(): Unit = { - val objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true, StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:3005") - objectRDD.rawSpatialRDD.persist(StorageLevel.MEMORY_ONLY) - var i = 0 - while ( { - i < eachQueryLoopTimes - }) { - val resultSize = RangeQuery.SpatialRangeQuery(objectRDD, rangeQueryWindow, SpatialPredicate.COVERED_BY, false).count - assert(resultSize > -1) - - { - i += 1; - i - 1 - } - } - } - - - @throws[Exception] - def testCRSTransformationSpatialRangeQueryUsingIndex(): Unit = { - val objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true, StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:3005") - objectRDD.buildIndex(PointRDDIndexType, false) - objectRDD.indexedRawRDD.persist(StorageLevel.MEMORY_ONLY) - var i = 0 - while ( { - i < eachQueryLoopTimes - }) { - val resultSize = RangeQuery.SpatialRangeQuery(objectRDD, rangeQueryWindow, SpatialPredicate.COVERED_BY, true).count - assert(resultSize > -1) - - { - i += 1; - i - 1 - } - } - } - @throws[Exception] def testLoadShapefileIntoPolygonRDD(): Unit = { val shapefileRDD = new ShapefileRDD(sc, ShapeFileInputLocation) diff --git a/core/src/main/scala/org/apache/sedona/core/showcase/SpatialJoinShp.scala b/core/src/main/scala/org/apache/sedona/core/showcase/SpatialJoinShp.scala deleted file mode 100644 index 00a64cf89f..0000000000 --- a/core/src/main/scala/org/apache/sedona/core/showcase/SpatialJoinShp.scala +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.sedona.core.showcase - -import org.apache.log4j.{Level, Logger} -import org.apache.sedona.core.formatMapper.shapefileParser.ShapefileRDD -import org.apache.sedona.core.spatialRDD.PolygonRDD -import org.apache.spark.storage.StorageLevel -import org.apache.spark.{SparkConf, SparkContext} -import org.locationtech.jts.geom.Polygon - -object SpatialJoinShp extends App { - - val conf = new SparkConf().setAppName("SpatialJoinSpeciesPA").setMaster("local[4]") - - - Logger.getLogger("org").setLevel(Level.WARN) - Logger.getLogger("akka").setLevel(Level.WARN) - val sc = new SparkContext(conf) - val shp1 = new ShapefileRDD(sc, "/Users/jiayu/Downloads/spark4geo_subset/wdpa") - val wdpa = new PolygonRDD(shp1.getPolygonRDD, StorageLevel.MEMORY_ONLY) - val shp2 = new ShapefileRDD(sc, "/Users/jiayu/Downloads/spark4geo_subset/amphib") - val species = new PolygonRDD(shp2.getPolygonRDD, StorageLevel.MEMORY_ONLY) - val result = shp2.getShapeRDD.collect(); - - //wdpa.spatialPartitioning(GridType.QUADTREE) - //species.spatialPartitioning(wdpa.partitionTree) - - def loadShapefile(path: String, numPartitions: Int = 20): PolygonRDD = { - val shp = new ShapefileRDD(sc, path) - val polygon = new PolygonRDD(shp.getPolygonRDD, StorageLevel.MEMORY_ONLY) - //polygon.rawSpatialRDD = polygon.rawSpatialRDD.repartition(numPartitions) - //polygon.analyze() - polygon - } - - for (a <- 1 until result.size()) { - println("print..." + result.get(a).getUserData + " END"); - } - - //val query = JoinQuery.SpatialJoinQuery(wdpa, species, false, false) - - println("polygon is " + shp2.getPolygonRDD.take(100).get(55)) - println("userdata is " + wdpa.rawSpatialRDD.take(100).get(55).asInstanceOf[Polygon].getUserData) - println(species.rawSpatialRDD.count()) - - - //val user_data_sample = JoinQuery.SpatialJoinQuery(wdpa, species, false, false).first()._1.getUserData - //if (user_data_sample.toString.isEmpty) println("UserData is empty") else println(user_data_sample) - - // val join_result = query.rdd.map((tuple: (Polygon, util.HashSet[Polygon])) => (tuple._1, tuple._2.asScala.map(tuple._1.intersection(_).getArea)) ) - // val intersections = join_result.collect() -} From c487aef0120bc45e2a91dc22b80f72b6bcb23ba5 Mon Sep 17 00:00:00 2001 From: Jia Yu Date: Tue, 8 Aug 2023 02:24:25 -0700 Subject: [PATCH 06/11] Fix a bunch of failed test cases --- .../EarthdataMapperRunnableExample.java | 6 +- .../apache/sedona/core/showcase/Example.java | 51 +--- .../spatialOperator/LineStringJoinTest.java | 190 ------------- .../core/spatialOperator/PointJoinTest.java | 250 ----------------- .../core/spatialOperator/PolygonJoinTest.java | 258 ------------------ .../spatialOperator/RectangleJoinTest.java | 181 ------------ .../core/spatialRDD/LineStringRDDTest.java | 4 + .../sedona/core/spatialRDD/PointRDDTest.java | 3 + .../core/spatialRDD/PolygonRDDTest.java | 7 + .../core/spatialRDD/RectangleRDDTest.java | 3 + core/src/test/resources/point.test.properties | 5 +- .../test/resources/polygon.test.properties | 5 +- 12 files changed, 31 insertions(+), 932 deletions(-) delete mode 100644 core/src/test/java/org/apache/sedona/core/spatialOperator/LineStringJoinTest.java delete mode 100644 core/src/test/java/org/apache/sedona/core/spatialOperator/PointJoinTest.java delete mode 100644 core/src/test/java/org/apache/sedona/core/spatialOperator/PolygonJoinTest.java delete mode 100644 core/src/test/java/org/apache/sedona/core/spatialOperator/RectangleJoinTest.java diff --git a/core/src/main/java/org/apache/sedona/core/showcase/EarthdataMapperRunnableExample.java b/core/src/main/java/org/apache/sedona/core/showcase/EarthdataMapperRunnableExample.java index a8ba812ef1..7552305124 100644 --- a/core/src/main/java/org/apache/sedona/core/showcase/EarthdataMapperRunnableExample.java +++ b/core/src/main/java/org/apache/sedona/core/showcase/EarthdataMapperRunnableExample.java @@ -143,7 +143,8 @@ public static void testSpatialRangeQuery() { EarthdataHDFPointMapper earthdataHDFPoint = new EarthdataHDFPointMapper(HDFIncrement, HDFOffset, HDFRootGroupName, HDFDataVariableList, HDFDataVariableName, urlPrefix); - PointRDD spatialRDD = new PointRDD(sc, InputLocation, numPartitions, earthdataHDFPoint, StorageLevel.MEMORY_ONLY()); + PointRDD spatialRDD = new PointRDD(sc, InputLocation, numPartitions, earthdataHDFPoint); + spatialRDD.rawSpatialRDD.persist(StorageLevel.MEMORY_ONLY()); for (int i = 0; i < loopTimes; i++) { long resultSize; try { @@ -164,7 +165,7 @@ public static void testSpatialRangeQueryUsingIndex() { EarthdataHDFPointMapper earthdataHDFPoint = new EarthdataHDFPointMapper(HDFIncrement, HDFOffset, HDFRootGroupName, HDFDataVariableList, HDFDataVariableName, urlPrefix); - PointRDD spatialRDD = new PointRDD(sc, InputLocation, numPartitions, earthdataHDFPoint, StorageLevel.MEMORY_ONLY()); + PointRDD spatialRDD = new PointRDD(sc, InputLocation, numPartitions, earthdataHDFPoint); try { spatialRDD.buildIndex(IndexType.RTREE, false); } @@ -172,6 +173,7 @@ public static void testSpatialRangeQueryUsingIndex() // TODO Auto-generated catch block e1.printStackTrace(); } + spatialRDD.indexedRawRDD.persist(StorageLevel.MEMORY_ONLY()); for (int i = 0; i < loopTimes; i++) { try { long resultSize; diff --git a/core/src/main/java/org/apache/sedona/core/showcase/Example.java b/core/src/main/java/org/apache/sedona/core/showcase/Example.java index f1bf7dd9d0..e220d5d341 100644 --- a/core/src/main/java/org/apache/sedona/core/showcase/Example.java +++ b/core/src/main/java/org/apache/sedona/core/showcase/Example.java @@ -191,8 +191,6 @@ public static void main(String[] args) testSpatialJoinQueryUsingIndex(); testDistanceJoinQuery(); testDistanceJoinQueryUsingIndex(); - testCRSTransformationSpatialRangeQuery(); - testCRSTransformationSpatialRangeQueryUsingIndex(); testLoadShapefileIntoPolygonRDD(); } catch (Exception e) { @@ -212,7 +210,7 @@ public static void main(String[] args) public static void testSpatialRangeQuery() throws Exception { - objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true, StorageLevel.MEMORY_ONLY()); + objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true); objectRDD.rawSpatialRDD.persist(StorageLevel.MEMORY_ONLY()); for (int i = 0; i < eachQueryLoopTimes; i++) { long resultSize = RangeQuery.SpatialRangeQuery(objectRDD, rangeQueryWindow, false, false).count(); @@ -228,7 +226,7 @@ public static void testSpatialRangeQuery() public static void testSpatialRangeQueryUsingIndex() throws Exception { - objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true, StorageLevel.MEMORY_ONLY()); + objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true); objectRDD.buildIndex(PointRDDIndexType, false); objectRDD.indexedRawRDD.persist(StorageLevel.MEMORY_ONLY()); for (int i = 0; i < eachQueryLoopTimes; i++) { @@ -245,7 +243,7 @@ public static void testSpatialRangeQueryUsingIndex() public static void testSpatialKnnQuery() throws Exception { - objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true, StorageLevel.MEMORY_ONLY()); + objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true); objectRDD.rawSpatialRDD.persist(StorageLevel.MEMORY_ONLY()); for (int i = 0; i < eachQueryLoopTimes; i++) { List result = KNNQuery.SpatialKnnQuery(objectRDD, kNNQueryPoint, 1000, false); @@ -261,7 +259,7 @@ public static void testSpatialKnnQuery() public static void testSpatialKnnQueryUsingIndex() throws Exception { - objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true, StorageLevel.MEMORY_ONLY()); + objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true); objectRDD.buildIndex(PointRDDIndexType, false); objectRDD.indexedRawRDD.persist(StorageLevel.MEMORY_ONLY()); for (int i = 0; i < eachQueryLoopTimes; i++) { @@ -279,7 +277,7 @@ public static void testSpatialJoinQuery() throws Exception { queryWindowRDD = new PolygonRDD(sc, PolygonRDDInputLocation, PolygonRDDStartOffset, PolygonRDDEndOffset, PolygonRDDSplitter, true); - objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true, StorageLevel.MEMORY_ONLY()); + objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true); objectRDD.spatialPartitioning(joinQueryPartitioningType); queryWindowRDD.spatialPartitioning(objectRDD.getPartitioner()); @@ -301,7 +299,7 @@ public static void testSpatialJoinQueryUsingIndex() throws Exception { queryWindowRDD = new PolygonRDD(sc, PolygonRDDInputLocation, PolygonRDDStartOffset, PolygonRDDEndOffset, PolygonRDDSplitter, true); - objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true, StorageLevel.MEMORY_ONLY()); + objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true); objectRDD.spatialPartitioning(joinQueryPartitioningType); queryWindowRDD.spatialPartitioning(objectRDD.getPartitioner()); @@ -325,7 +323,7 @@ public static void testSpatialJoinQueryUsingIndex() public static void testDistanceJoinQuery() throws Exception { - objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true, StorageLevel.MEMORY_ONLY()); + objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true); CircleRDD queryWindowRDD = new CircleRDD(objectRDD, 0.1); objectRDD.spatialPartitioning(GridType.QUADTREE); @@ -349,7 +347,7 @@ public static void testDistanceJoinQuery() public static void testDistanceJoinQueryUsingIndex() throws Exception { - objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true, StorageLevel.MEMORY_ONLY()); + objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true); CircleRDD queryWindowRDD = new CircleRDD(objectRDD, 0.1); objectRDD.spatialPartitioning(GridType.QUADTREE); @@ -366,39 +364,6 @@ public static void testDistanceJoinQueryUsingIndex() } } - /** - * Test CRS transformation spatial range query. - * - * @throws Exception the exception - */ - public static void testCRSTransformationSpatialRangeQuery() - throws Exception - { - objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true, StorageLevel.MEMORY_ONLY(), "epsg:4326", "epsg:3005"); - objectRDD.rawSpatialRDD.persist(StorageLevel.MEMORY_ONLY()); - for (int i = 0; i < eachQueryLoopTimes; i++) { - long resultSize = RangeQuery.SpatialRangeQuery(objectRDD, rangeQueryWindow, false, false).count(); - assert resultSize > -1; - } - } - - /** - * Test CRS transformation spatial range query using index. - * - * @throws Exception the exception - */ - public static void testCRSTransformationSpatialRangeQueryUsingIndex() - throws Exception - { - objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true, StorageLevel.MEMORY_ONLY(), "epsg:4326", "epsg:3005"); - objectRDD.buildIndex(PointRDDIndexType, false); - objectRDD.indexedRawRDD.persist(StorageLevel.MEMORY_ONLY()); - for (int i = 0; i < eachQueryLoopTimes; i++) { - long resultSize = RangeQuery.SpatialRangeQuery(objectRDD, rangeQueryWindow, false, true).count(); - assert resultSize > -1; - } - } - public static void testLoadShapefileIntoPolygonRDD() throws Exception { diff --git a/core/src/test/java/org/apache/sedona/core/spatialOperator/LineStringJoinTest.java b/core/src/test/java/org/apache/sedona/core/spatialOperator/LineStringJoinTest.java deleted file mode 100644 index 477cedfec2..0000000000 --- a/core/src/test/java/org/apache/sedona/core/spatialOperator/LineStringJoinTest.java +++ /dev/null @@ -1,190 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.sedona.core.spatialOperator; - -import org.apache.sedona.core.enums.GridType; -import org.apache.sedona.core.enums.IndexType; -import org.apache.sedona.core.enums.JoinBuildSide; -import org.apache.sedona.core.spatialRDD.LineStringRDD; -import org.apache.sedona.core.spatialRDD.PolygonRDD; -import org.junit.AfterClass; -import org.junit.BeforeClass; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.locationtech.jts.geom.LineString; -import org.locationtech.jts.geom.Polygon; -import scala.Tuple2; - -import java.util.Arrays; -import java.util.Collection; -import java.util.List; - -import static org.junit.Assert.assertEquals; - -@RunWith(Parameterized.class) -public class LineStringJoinTest - extends JoinTestBase -{ - - private static long expectedMatchCount; - private static long expectedMatchWithOriginalDuplicatesCount; - - public LineStringJoinTest(GridType gridType, int numPartitions) - { - super(gridType, numPartitions); - } - - @Parameterized.Parameters - public static Collection testParams() - { - return Arrays.asList(new Object[][] { - {GridType.QUADTREE, 11}, - {GridType.KDBTREE, 11}, - }); - } - - /** - * Once executed before all. - */ - @BeforeClass - public static void onceExecutedBeforeAll() - { - initialize("LineStringJoin", "linestring.test.properties"); - expectedMatchCount = Long.parseLong(prop.getProperty("matchCount")); - expectedMatchWithOriginalDuplicatesCount = - Long.parseLong(prop.getProperty("matchWithOriginalDuplicatesCount")); - } - - /** - * Tear down. - */ - @AfterClass - public static void TearDown() - { - sc.stop(); - } - - /** - * Test spatial join query with line string RDD. - * - * @throws Exception the exception - */ - @Test - public void testNestedLoop() - throws Exception - { - - PolygonRDD queryRDD = createPolygonRDD(); - LineStringRDD spatialRDD = createLineStringRDD(); - - partitionRdds(queryRDD, spatialRDD); - - List>> result = JoinQuery.SpatialJoinQuery(spatialRDD, queryRDD, false, true).collect(); - - sanityCheckJoinResults(result); - long expectedCount = expectToPreserveOriginalDuplicates() - ? expectedMatchWithOriginalDuplicatesCount : expectedMatchCount; - assertEquals(expectedCount, countJoinResults(result)); - } - - /** - * Test spatial join query with polygon RDD using R tree index. - * - * @throws Exception the exception - */ - @Test - public void testRTree() - throws Exception - { - testIndexInt(IndexType.RTREE); - } - - /** - * Test spatial join query with polygon RDD using quad tree index. - * - * @throws Exception the exception - */ - @Test - public void testQuadTree() - throws Exception - { - testIndexInt(IndexType.QUADTREE); - } - - private void testIndexInt(IndexType indexType) - throws Exception - { - PolygonRDD queryRDD = createPolygonRDD(); - - LineStringRDD spatialRDD = createLineStringRDD(); - - partitionRdds(queryRDD, spatialRDD); - spatialRDD.buildIndex(indexType, true); - - List>> result = JoinQuery.SpatialJoinQuery(spatialRDD, queryRDD, false, true).collect(); - - sanityCheckJoinResults(result); - long expectedCount = expectToPreserveOriginalDuplicates() - ? expectedMatchWithOriginalDuplicatesCount : expectedMatchCount; - assertEquals(expectedCount, countJoinResults(result)); - } - - @Test - public void testDynamicRTree() - throws Exception - { - testDynamicIndexInt(IndexType.RTREE); - } - - @Test - public void testDynamicQuadTree() - throws Exception - { - testDynamicIndexInt(IndexType.QUADTREE); - } - - private void testDynamicIndexInt(IndexType indexType) - throws Exception - { - PolygonRDD queryRDD = createPolygonRDD(); - LineStringRDD spatialRDD = createLineStringRDD(); - - partitionRdds(queryRDD, spatialRDD); - - JoinQuery.JoinParams joinParams = new JoinQuery.JoinParams(true, true, indexType, JoinBuildSide.LEFT); - List> results = JoinQuery.spatialJoin(queryRDD, spatialRDD, joinParams).collect(); - - sanityCheckFlatJoinResults(results); - - long expectedCount = expectToPreserveOriginalDuplicates() - ? expectedMatchWithOriginalDuplicatesCount : expectedMatchCount; - assertEquals(expectedCount, results.size()); - } - - private LineStringRDD createLineStringRDD() - { - return createLineStringRDD(InputLocation); - } - - private PolygonRDD createPolygonRDD() - { - return createPolygonRDD(InputLocationQueryPolygon); - } -} \ No newline at end of file diff --git a/core/src/test/java/org/apache/sedona/core/spatialOperator/PointJoinTest.java b/core/src/test/java/org/apache/sedona/core/spatialOperator/PointJoinTest.java deleted file mode 100644 index b3820598d5..0000000000 --- a/core/src/test/java/org/apache/sedona/core/spatialOperator/PointJoinTest.java +++ /dev/null @@ -1,250 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.sedona.core.spatialOperator; - -import org.apache.sedona.core.enums.GridType; -import org.apache.sedona.core.enums.IndexType; -import org.apache.sedona.core.enums.JoinBuildSide; -import org.apache.sedona.core.spatialRDD.PointRDD; -import org.apache.sedona.core.spatialRDD.PolygonRDD; -import org.apache.sedona.core.spatialRDD.RectangleRDD; -import org.apache.sedona.core.spatialRDD.SpatialRDD; -import org.junit.AfterClass; -import org.junit.BeforeClass; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.locationtech.jts.geom.Point; -import org.locationtech.jts.geom.Polygon; -import scala.Tuple2; - -import java.util.Arrays; -import java.util.Collection; -import java.util.List; - -import static org.junit.Assert.assertEquals; - -@RunWith(Parameterized.class) -public class PointJoinTest - extends JoinTestBase -{ - - private static long expectedRectangleMatchCount; - private static long expectedRectangleMatchWithOriginalDuplicatesCount; - private static long expectedPolygonMatchCount; - private static long expectedPolygonMatchWithOriginalDuplicatesCount; - - public PointJoinTest(GridType gridType, int numPartitions) - { - super(gridType, numPartitions); - } - - @Parameterized.Parameters - public static Collection testParams() - { - return Arrays.asList(new Object[][] { - {GridType.QUADTREE, 11}, - {GridType.KDBTREE, 11}, - }); - } - - /** - * Once executed before all. - */ - @BeforeClass - public static void onceExecutedBeforeAll() - { - initialize("PointJoin", "point.test.properties"); - expectedRectangleMatchCount = Long.parseLong(prop.getProperty("rectangleMatchCount")); - expectedRectangleMatchWithOriginalDuplicatesCount = - Long.parseLong(prop.getProperty("rectangleMatchWithOriginalDuplicatesCount")); - expectedPolygonMatchCount = Long.parseLong(prop.getProperty("polygonMatchCount")); - expectedPolygonMatchWithOriginalDuplicatesCount = - Long.parseLong(prop.getProperty("polygonMatchWithOriginalDuplicatesCount")); - } - - /** - * Tear down. - */ - @AfterClass - public static void TearDown() - { - sc.stop(); - } - - /** - * Test spatial join query. - * - * @throws Exception the exception - */ - @Test - public void testNestedLoopWithRectangles() - throws Exception - { - RectangleRDD queryRDD = createRectangleRDD(); - testNestedLoopInt(queryRDD, expectedRectangleMatchCount); - } - - /** - * Test spatial join query with polygon RDD. - * - * @throws Exception the exception - */ - @Test - public void testNestedLoopWithPolygons() - throws Exception - { - PolygonRDD queryRDD = createPolygonRDD(); - final long expectedCount = expectToPreserveOriginalDuplicates() - ? expectedPolygonMatchWithOriginalDuplicatesCount : expectedPolygonMatchCount; - testNestedLoopInt(queryRDD, expectedCount); - } - - private void testNestedLoopInt(SpatialRDD queryRDD, long expectedCount) - throws Exception - { - PointRDD spatialRDD = createPointRDD(); - - partitionRdds(queryRDD, spatialRDD); - - List>> result = JoinQuery.SpatialJoinQuery(spatialRDD, queryRDD, false, true).collect(); - - sanityCheckJoinResults(result); - assertEquals(expectedCount, countJoinResults(result)); - } - - /** - * Test spatial join query with rectangle RDD using rtree index. - * - * @throws Exception the exception - */ - @Test - public void testRTreeWithRectangles() - throws Exception - { - RectangleRDD queryRDD = createRectangleRDD(); - testIndexInt(queryRDD, IndexType.RTREE, expectedRectangleMatchCount); - } - - /** - * Test spatial join query with polygon RDD using R tree index. - * - * @throws Exception the exception - */ - @Test - public void testRTreeWithPolygons() - throws Exception - { - PolygonRDD queryRDD = createPolygonRDD(); - final long expectedCount = expectToPreserveOriginalDuplicates() - ? expectedPolygonMatchWithOriginalDuplicatesCount : expectedPolygonMatchCount; - testIndexInt(queryRDD, IndexType.RTREE, expectedCount); - } - - /** - * Test spatial join query with rectangle RDD using quadtree index. - * - * @throws Exception the exception - */ - @Test - public void testQuadTreeWithRectangles() - throws Exception - { - RectangleRDD queryRDD = createRectangleRDD(); - testIndexInt(queryRDD, IndexType.QUADTREE, expectedRectangleMatchCount); - } - - /** - * Test spatial join query with polygon RDD using quad tree index. - * - * @throws Exception the exception - */ - @Test - public void testQuadTreeWithPolygons() - throws Exception - { - PolygonRDD queryRDD = createPolygonRDD(); - final long expectedCount = expectToPreserveOriginalDuplicates() - ? expectedPolygonMatchWithOriginalDuplicatesCount : expectedPolygonMatchCount; - testIndexInt(queryRDD, IndexType.QUADTREE, expectedCount); - } - - private void testIndexInt(SpatialRDD queryRDD, IndexType indexType, long expectedCount) - throws Exception - { - PointRDD spatialRDD = createPointRDD(); - - partitionRdds(queryRDD, spatialRDD); - spatialRDD.buildIndex(indexType, true); - - List>> result = JoinQuery.SpatialJoinQuery(spatialRDD, queryRDD, false, true).collect(); - - sanityCheckJoinResults(result); - assertEquals(expectedCount, countJoinResults(result)); - } - - @Test - public void testDynamicRTreeWithRectangles() - throws Exception - { - final RectangleRDD rectangleRDD = createRectangleRDD(); - final long expectedCount = expectToPreserveOriginalDuplicates() - ? expectedRectangleMatchWithOriginalDuplicatesCount : expectedRectangleMatchCount; - testDynamicRTreeInt(rectangleRDD, IndexType.RTREE, expectedCount); - } - - @Test - public void testDynamicRTreeWithPolygons() - throws Exception - { - PolygonRDD polygonRDD = createPolygonRDD(); - final long expectedCount = expectToPreserveOriginalDuplicates() - ? expectedPolygonMatchWithOriginalDuplicatesCount : expectedPolygonMatchCount; - testDynamicRTreeInt(polygonRDD, IndexType.RTREE, expectedCount); - } - - private void testDynamicRTreeInt(SpatialRDD queryRDD, IndexType indexType, long expectedCount) - throws Exception - { - PointRDD spatialRDD = createPointRDD(); - - partitionRdds(queryRDD, spatialRDD); - - JoinQuery.JoinParams joinParams = new JoinQuery.JoinParams(true, true, indexType, JoinBuildSide.LEFT); - List> results = JoinQuery.spatialJoin(queryRDD, spatialRDD, joinParams).collect(); - - sanityCheckFlatJoinResults(results); - assertEquals(expectedCount, results.size()); - } - - private RectangleRDD createRectangleRDD() - { - return createRectangleRDD(InputLocationQueryWindow); - } - - private PolygonRDD createPolygonRDD() - { - return createPolygonRDD(InputLocationQueryPolygon); - } - - private PointRDD createPointRDD() - { - return createPointRDD(InputLocation); - } -} \ No newline at end of file diff --git a/core/src/test/java/org/apache/sedona/core/spatialOperator/PolygonJoinTest.java b/core/src/test/java/org/apache/sedona/core/spatialOperator/PolygonJoinTest.java deleted file mode 100644 index 97b14e7a6c..0000000000 --- a/core/src/test/java/org/apache/sedona/core/spatialOperator/PolygonJoinTest.java +++ /dev/null @@ -1,258 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.sedona.core.spatialOperator; - -import org.apache.sedona.core.enums.GridType; -import org.apache.sedona.core.enums.IndexType; -import org.apache.sedona.core.enums.JoinBuildSide; -import org.apache.sedona.core.spatialRDD.PolygonRDD; -import org.apache.spark.storage.StorageLevel; -import org.junit.AfterClass; -import org.junit.BeforeClass; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.locationtech.jts.geom.Polygon; -import scala.Tuple2; - -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.List; - -import static org.junit.Assert.assertEquals; - -@RunWith(Parameterized.class) -public class PolygonJoinTest - extends JoinTestBase -{ - - private static long expectedContainsMatchCount; - private static long expectedIntersectsMatchCount; - private static long expectedContainsWithOriginalDuplicatesCount; - private static long expectedIntersectsWithOriginalDuplicatesCount; - - public PolygonJoinTest(GridType gridType, int numPartitions) - { - super(gridType, numPartitions); - } - - @Parameterized.Parameters - public static Collection testParams() - { - return Arrays.asList(new Object[][] { - {GridType.QUADTREE, 11}, - {GridType.KDBTREE, 11}, - }); - } - - /** - * Once executed before all. - */ - @BeforeClass - public static void onceExecutedBeforeAll() - { - initialize("PolygonJoin", "polygon.test.properties"); - - expectedContainsMatchCount = Long.parseLong(prop.getProperty("containsMatchCount")); - expectedContainsWithOriginalDuplicatesCount = - Long.parseLong(prop.getProperty("containsMatchWithOriginalDuplicatesCount")); - expectedIntersectsMatchCount = Long.parseLong(prop.getProperty("intersectsMatchCount")); - expectedIntersectsWithOriginalDuplicatesCount = - Long.parseLong(prop.getProperty("intersectsMatchWithOriginalDuplicatesCount")); - } - - /** - * Tear down. - */ - @AfterClass - public static void TearDown() - { - sc.stop(); - } - - @Test - public void testDynamicRTreeAndContains() - throws Exception - { - testDynamicIndexInt(false, IndexType.RTREE); - } - - @Test - public void testDynamicQuadTreeAndContains() - throws Exception - { - testDynamicIndexInt(false, IndexType.QUADTREE); - } - - @Test - public void testDynamicRTreeAndIntersects() - throws Exception - { - testDynamicIndexInt(true, IndexType.RTREE); - } - - @Test - public void testDynamicQuadTreeAndIntersects() - throws Exception - { - testDynamicIndexInt(true, IndexType.QUADTREE); - } - - private void testDynamicIndexInt(boolean intersects, IndexType indexType) - throws Exception - { - final PolygonRDD queryRDD = createPolygonRDD(InputLocationQueryPolygon); - final PolygonRDD spatialRDD = createPolygonRDD(InputLocation); - partitionRdds(queryRDD, spatialRDD); - - final JoinQuery.JoinParams joinParams = new JoinQuery.JoinParams(true, intersects, indexType, JoinBuildSide.LEFT); - final List> results = JoinQuery.spatialJoin(queryRDD, spatialRDD, joinParams).collect(); - sanityCheckFlatJoinResults(results); - - final long expectedCount = expectToPreserveOriginalDuplicates() - ? getExpectedWithOriginalDuplicatesCount(intersects) : getExpectedCount(intersects); - assertEquals(expectedCount, results.size()); - } - - /** - * Test spatial join query with polygon RDD. - * - * @throws Exception the exception - */ - @Test - public void testNestedLoopAndContains() - throws Exception - { - testNestedLoopInt(false); - } - - @Test - public void testNestedLoopAndIntersects() - throws Exception - { - testNestedLoopInt(true); - } - - private void testNestedLoopInt(boolean intersects) - throws Exception - { - PolygonRDD queryRDD = createPolygonRDD(InputLocationQueryPolygon); - PolygonRDD spatialRDD = createPolygonRDD(InputLocation); - - partitionRdds(queryRDD, spatialRDD); - - List>> result = JoinQuery.SpatialJoinQuery(spatialRDD, queryRDD, false, intersects).collect(); - sanityCheckJoinResults(result); - assertEquals(getExpectedWithOriginalDuplicatesCount(intersects), countJoinResults(result)); - } - - /** - * Test spatial join query with polygon RDD using R tree index. - * - * @throws Exception the exception - */ - @Test - public void testRTreeAndContains() - throws Exception - { - testIndexInt(false, IndexType.RTREE); - } - - @Test - public void testRTreeAndIntersects() - throws Exception - { - testIndexInt(true, IndexType.RTREE); - } - - @Test - public void testQuadTreeAndContains() - throws Exception - { - testIndexInt(false, IndexType.QUADTREE); - } - - @Test - public void testQuadTreeAndIntersects() - throws Exception - { - testIndexInt(true, IndexType.QUADTREE); - } - - private void testIndexInt(boolean intersects, IndexType indexType) - throws Exception - { - PolygonRDD queryRDD = createPolygonRDD(InputLocationQueryPolygon); - PolygonRDD spatialRDD = createPolygonRDD(InputLocation); - - partitionRdds(queryRDD, spatialRDD); - spatialRDD.buildIndex(indexType, true); - - List>> result = JoinQuery.SpatialJoinQuery(spatialRDD, queryRDD, true, intersects).collect(); - sanityCheckJoinResults(result); - assertEquals(getExpectedWithOriginalDuplicatesCount(intersects), countJoinResults(result)); - } - - private long getExpectedCount(boolean intersects) - { - return intersects ? expectedIntersectsMatchCount : expectedContainsMatchCount; - } - - private long getExpectedWithOriginalDuplicatesCount(boolean intersects) - { - return intersects ? expectedIntersectsWithOriginalDuplicatesCount : expectedContainsWithOriginalDuplicatesCount; - } - - @Test - public void testJoinWithSingletonRDD() throws Exception - { - PolygonRDD queryRDD = createPolygonRDD(InputLocationQueryPolygon); - PolygonRDD spatialRDD = createPolygonRDD(InputLocation); - PolygonRDD singletonRDD = new PolygonRDD(); - Polygon queryPolygon = queryRDD.rawSpatialRDD.first(); - singletonRDD.rawSpatialRDD = sc.parallelize(Collections.singletonList(queryPolygon), 1); - singletonRDD.analyze(StorageLevel.MEMORY_ONLY()); - - // Joining with a singleton RDD is essentially the same with a range query - long expectedResultCount = RangeQuery.SpatialRangeQuery(spatialRDD, queryPolygon, true, false).count(); - - partitionRdds(singletonRDD, spatialRDD); - List> result = JoinQuery.SpatialJoinQueryFlat(spatialRDD, singletonRDD, false, true).collect(); - sanityCheckFlatJoinResults(result); - assertEquals(expectedResultCount, result.size()); - - partitionRdds(spatialRDD, singletonRDD); - result = JoinQuery.SpatialJoinQueryFlat(singletonRDD, spatialRDD, false, true).collect(); - sanityCheckFlatJoinResults(result); - assertEquals(expectedResultCount, result.size()); - - partitionRdds(singletonRDD, spatialRDD); - spatialRDD.buildIndex(indexType, true); - result = JoinQuery.SpatialJoinQueryFlat(spatialRDD, singletonRDD, true, true).collect(); - sanityCheckFlatJoinResults(result); - assertEquals(expectedResultCount, result.size()); - - partitionRdds(spatialRDD, singletonRDD); - singletonRDD.buildIndex(indexType, true); - result = JoinQuery.SpatialJoinQueryFlat(singletonRDD, spatialRDD, true, true).collect(); - sanityCheckFlatJoinResults(result); - assertEquals(expectedResultCount, result.size()); - } -} diff --git a/core/src/test/java/org/apache/sedona/core/spatialOperator/RectangleJoinTest.java b/core/src/test/java/org/apache/sedona/core/spatialOperator/RectangleJoinTest.java deleted file mode 100644 index 0b72723772..0000000000 --- a/core/src/test/java/org/apache/sedona/core/spatialOperator/RectangleJoinTest.java +++ /dev/null @@ -1,181 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.sedona.core.spatialOperator; - -import org.apache.sedona.core.enums.GridType; -import org.apache.sedona.core.enums.IndexType; -import org.apache.sedona.core.enums.JoinBuildSide; -import org.apache.sedona.core.spatialRDD.RectangleRDD; -import org.junit.AfterClass; -import org.junit.BeforeClass; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.locationtech.jts.geom.Polygon; -import scala.Tuple2; - -import java.util.Arrays; -import java.util.Collection; -import java.util.List; - -import static org.junit.Assert.assertEquals; - -@RunWith(Parameterized.class) -public class RectangleJoinTest - extends JoinTestBase -{ - - private static long expectedMatchCount; - private static long expectedMatchWithOriginalDuplicatesCount; - - public RectangleJoinTest(GridType gridType, int numPartitions) - { - super(gridType, numPartitions); - } - - @Parameterized.Parameters - public static Collection testParams() - { - return Arrays.asList(new Object[][] { - {GridType.QUADTREE, 11}, - {GridType.KDBTREE, 11}, - }); - } - - /** - * Once executed before all. - */ - @BeforeClass - public static void onceExecutedBeforeAll() - { - initialize("RectangleJoin", "rectangle.test.properties"); - expectedMatchCount = Long.parseLong(prop.getProperty("matchCount")); - expectedMatchWithOriginalDuplicatesCount = - Long.parseLong(prop.getProperty("matchWithOriginalDuplicatesCount")); - } - - /** - * Tear down. - */ - @AfterClass - public static void TearDown() - { - sc.stop(); - } - - /** - * Test spatial join query with rectangle RDD. - * - * @throws Exception the exception - */ - @Test - public void testNestedLoop() - throws Exception - { - RectangleRDD queryRDD = createRectangleRDD(); - RectangleRDD spatialRDD = createRectangleRDD(); - - partitionRdds(queryRDD, spatialRDD); - - List>> result = JoinQuery.SpatialJoinQuery(spatialRDD, queryRDD, false, true).collect(); - - sanityCheckJoinResults(result); - long expectedCount = expectToPreserveOriginalDuplicates() - ? expectedMatchWithOriginalDuplicatesCount : expectedMatchCount; - assertEquals(expectedCount, countJoinResults(result)); - } - - /** - * Test spatial join query with rectangle RDD using rtree index. - * - * @throws Exception the exception - */ - @Test - public void testRTree() - throws Exception - { - testIndexInt(IndexType.RTREE); - } - - /** - * Test spatial join query with rectangle RDD using quadtree index. - * - * @throws Exception the exception - */ - @Test - public void testQuadTree() - throws Exception - { - testIndexInt(IndexType.QUADTREE); - } - - private void testIndexInt(IndexType indexType) - throws Exception - { - RectangleRDD queryRDD = createRectangleRDD(); - RectangleRDD spatialRDD = createRectangleRDD(); - - partitionRdds(queryRDD, spatialRDD); - spatialRDD.buildIndex(indexType, true); - - List>> result = JoinQuery.SpatialJoinQuery(spatialRDD, queryRDD, false, true).collect(); - - sanityCheckJoinResults(result); - long expectedCount = expectToPreserveOriginalDuplicates() - ? expectedMatchWithOriginalDuplicatesCount : expectedMatchCount; - assertEquals(expectedCount, countJoinResults(result)); - } - - @Test - public void testDynamicRTree() - throws Exception - { - testDynamicIndexInt(IndexType.RTREE); - } - - @Test - public void testDynamicQuadTree() - throws Exception - { - testDynamicIndexInt(IndexType.QUADTREE); - } - - private void testDynamicIndexInt(IndexType indexType) - throws Exception - { - RectangleRDD queryRDD = createRectangleRDD(); - RectangleRDD spatialRDD = createRectangleRDD(); - - partitionRdds(queryRDD, spatialRDD); - - JoinQuery.JoinParams joinParams = new JoinQuery.JoinParams(true, true, indexType, JoinBuildSide.LEFT); - List> result = JoinQuery.spatialJoin(queryRDD, spatialRDD, joinParams).collect(); - - sanityCheckFlatJoinResults(result); - - final long expectedCount = expectToPreserveOriginalDuplicates() - ? expectedMatchWithOriginalDuplicatesCount : expectedMatchCount; - assertEquals(expectedCount, result.size()); - } - - private RectangleRDD createRectangleRDD() - { - return createRectangleRDD(InputLocation); - } -} \ No newline at end of file diff --git a/core/src/test/java/org/apache/sedona/core/spatialRDD/LineStringRDDTest.java b/core/src/test/java/org/apache/sedona/core/spatialRDD/LineStringRDDTest.java index 1020961b93..186f18862c 100644 --- a/core/src/test/java/org/apache/sedona/core/spatialRDD/LineStringRDDTest.java +++ b/core/src/test/java/org/apache/sedona/core/spatialRDD/LineStringRDDTest.java @@ -67,6 +67,7 @@ public void testConstructor() throws Exception { LineStringRDD spatialRDD = new LineStringRDD(sc, InputLocation, splitter, true, numPartitions); + spatialRDD.analyze(); assertEquals(inputCount, spatialRDD.approximateTotalCount); assertEquals(inputBoundary, spatialRDD.boundaryEnvelope); } @@ -76,6 +77,7 @@ public void testEmptyConstructor() throws Exception { LineStringRDD spatialRDD = new LineStringRDD(sc, InputLocation, splitter, true, numPartitions); + spatialRDD.analyze(); spatialRDD.spatialPartitioning(gridType); spatialRDD.buildIndex(IndexType.RTREE, true); // Create an empty spatialRDD and manually assemble it @@ -108,6 +110,7 @@ public void testBuildRtreeIndex() throws Exception { LineStringRDD spatialRDD = new LineStringRDD(sc, InputLocation, splitter, true, numPartitions); + spatialRDD.analyze(); spatialRDD.spatialPartitioning(gridType); spatialRDD.buildIndex(IndexType.RTREE, true); if (spatialRDD.indexedRDD.take(1).get(0) instanceof STRtree) { @@ -128,6 +131,7 @@ public void testBuildQuadtreeIndex() throws Exception { LineStringRDD spatialRDD = new LineStringRDD(sc, InputLocation, splitter, true, numPartitions); + spatialRDD.analyze(); spatialRDD.spatialPartitioning(gridType); spatialRDD.buildIndex(IndexType.QUADTREE, true); if (spatialRDD.indexedRDD.take(1).get(0) instanceof STRtree) { diff --git a/core/src/test/java/org/apache/sedona/core/spatialRDD/PointRDDTest.java b/core/src/test/java/org/apache/sedona/core/spatialRDD/PointRDDTest.java index c00fdd11a0..651e0b880a 100644 --- a/core/src/test/java/org/apache/sedona/core/spatialRDD/PointRDDTest.java +++ b/core/src/test/java/org/apache/sedona/core/spatialRDD/PointRDDTest.java @@ -65,6 +65,7 @@ public static void TearDown() public void testConstructor() { PointRDD spatialRDD = new PointRDD(sc, InputLocation, offset, splitter, true, numPartitions); + spatialRDD.analyze(); assertEquals(inputCount, spatialRDD.approximateTotalCount); assertEquals(inputBoundary, spatialRDD.boundaryEnvelope); assert spatialRDD.rawSpatialRDD.take(9).get(0).getUserData().equals("testattribute0\ttestattribute1\ttestattribute2"); @@ -109,6 +110,7 @@ public void testBuildRtreeIndex() throws Exception { PointRDD spatialRDD = new PointRDD(sc, InputLocation, offset, splitter, true, numPartitions); + spatialRDD.analyze(); spatialRDD.spatialPartitioning(gridType); spatialRDD.buildIndex(IndexType.RTREE, true); if (spatialRDD.indexedRDD.take(1).get(0) instanceof STRtree) { @@ -129,6 +131,7 @@ public void testBuildQuadtreeIndex() throws Exception { PointRDD spatialRDD = new PointRDD(sc, InputLocation, offset, splitter, true, numPartitions); + spatialRDD.analyze(); spatialRDD.spatialPartitioning(gridType); spatialRDD.buildIndex(IndexType.QUADTREE, true); if (spatialRDD.indexedRDD.take(1).get(0) instanceof STRtree) { diff --git a/core/src/test/java/org/apache/sedona/core/spatialRDD/PolygonRDDTest.java b/core/src/test/java/org/apache/sedona/core/spatialRDD/PolygonRDDTest.java index 5edbd789b4..e5abbd0b72 100644 --- a/core/src/test/java/org/apache/sedona/core/spatialRDD/PolygonRDDTest.java +++ b/core/src/test/java/org/apache/sedona/core/spatialRDD/PolygonRDDTest.java @@ -76,6 +76,7 @@ public static void TearDown() public void testConstructor() { PolygonRDD spatialRDD = new PolygonRDD(sc, InputLocation, splitter, true, numPartitions); + spatialRDD.analyze(); assertEquals(inputCount, spatialRDD.approximateTotalCount); assertEquals(inputBoundary, spatialRDD.boundaryEnvelope); } @@ -85,6 +86,7 @@ public void testEmptyConstructor() throws Exception { PolygonRDD spatialRDD = new PolygonRDD(sc, InputLocation, splitter, true, numPartitions); + spatialRDD.analyze(); spatialRDD.spatialPartitioning(gridType); spatialRDD.buildIndex(IndexType.RTREE, true); // Create an empty spatialRDD and manually assemble it @@ -98,6 +100,7 @@ public void testEmptyConstructor() public void testGeoJSONConstructor() { PolygonRDD spatialRDD = new PolygonRDD(sc, InputLocationGeojson, FileDataSplitter.GEOJSON, true, 4); + spatialRDD.analyze(); assert spatialRDD.approximateTotalCount == 1001; assert spatialRDD.boundaryEnvelope != null; assertEquals(spatialRDD.rawSpatialRDD.take(1).get(0).getUserData(), "01\t077\t011501\t5\t1500000US010770115015\t010770115015\t5\tBG\t6844991\t32636"); @@ -109,6 +112,7 @@ public void testGeoJSONConstructor() public void testWktConstructor() { PolygonRDD spatialRDD = new PolygonRDD(sc, InputLocationWkt, FileDataSplitter.WKT, true); + spatialRDD.analyze(); assert spatialRDD.approximateTotalCount == 103; assert spatialRDD.boundaryEnvelope != null; assert spatialRDD.rawSpatialRDD.take(1).get(0).getUserData().equals("31\t039\t00835841\t31039\tCuming\tCuming County\t06\tH1\tG4020\t\t\t\tA\t1477895811\t10447360\t+41.9158651\t-096.7885168"); @@ -118,6 +122,7 @@ public void testWktConstructor() public void testWkbConstructor() { PolygonRDD spatialRDD = new PolygonRDD(sc, InputLocationWkb, FileDataSplitter.WKB, true); + spatialRDD.analyze(); assert spatialRDD.approximateTotalCount == 103; assert spatialRDD.boundaryEnvelope != null; assert spatialRDD.rawSpatialRDD.take(1).get(0).getUserData().equals("31\t039\t00835841\t31039\tCuming\tCuming County\t06\tH1\tG4020\t\t\t\tA\t1477895811\t10447360\t+41.9158651\t-096.7885168"); @@ -146,6 +151,7 @@ public void testBuildRtreeIndex() throws Exception { PolygonRDD spatialRDD = new PolygonRDD(sc, InputLocation, splitter, true, numPartitions); + spatialRDD.analyze(); spatialRDD.spatialPartitioning(gridType); spatialRDD.buildIndex(IndexType.RTREE, true); if (spatialRDD.indexedRDD.take(1).get(0) instanceof STRtree) { @@ -166,6 +172,7 @@ public void testBuildQuadtreeIndex() throws Exception { PolygonRDD spatialRDD = new PolygonRDD(sc, InputLocation, splitter, true, numPartitions); + spatialRDD.analyze(); spatialRDD.spatialPartitioning(gridType); spatialRDD.buildIndex(IndexType.QUADTREE, true); if (spatialRDD.indexedRDD.take(1).get(0) instanceof STRtree) { diff --git a/core/src/test/java/org/apache/sedona/core/spatialRDD/RectangleRDDTest.java b/core/src/test/java/org/apache/sedona/core/spatialRDD/RectangleRDDTest.java index 2541004521..f4d5bfec16 100644 --- a/core/src/test/java/org/apache/sedona/core/spatialRDD/RectangleRDDTest.java +++ b/core/src/test/java/org/apache/sedona/core/spatialRDD/RectangleRDDTest.java @@ -67,6 +67,7 @@ public void testConstructor() throws Exception { RectangleRDD spatialRDD = new RectangleRDD(sc, InputLocation, offset, splitter, true, numPartitions); + spatialRDD.analyze(); assertEquals(inputCount, spatialRDD.approximateTotalCount); assertEquals(inputBoundary, spatialRDD.boundaryEnvelope); } @@ -107,6 +108,7 @@ public void testBuildRtreeIndex() throws Exception { RectangleRDD spatialRDD = new RectangleRDD(sc, InputLocation, offset, splitter, true, numPartitions); + spatialRDD.analyze(); spatialRDD.spatialPartitioning(gridType); spatialRDD.buildIndex(IndexType.RTREE, true); if (spatialRDD.indexedRDD.take(1).get(0) instanceof STRtree) { @@ -127,6 +129,7 @@ public void testBuildQuadtreeIndex() throws Exception { RectangleRDD spatialRDD = new RectangleRDD(sc, InputLocation, offset, splitter, true, numPartitions); + spatialRDD.analyze(); spatialRDD.spatialPartitioning(gridType); spatialRDD.buildIndex(IndexType.QUADTREE, true); if (spatialRDD.indexedRDD.take(1).get(0) instanceof STRtree) { diff --git a/core/src/test/resources/point.test.properties b/core/src/test/resources/point.test.properties index 2a308a4c76..3dfb438325 100644 --- a/core/src/test/resources/point.test.properties +++ b/core/src/test/resources/point.test.properties @@ -9,7 +9,4 @@ distance=0.01 queryPolygonSet=primaryroads-polygon.csv inputCount=3000 inputBoundary=-173.120769, -84.965961, 30.244859, 71.355134 -rectangleMatchCount=103 -rectangleMatchWithOriginalDuplicatesCount=103 -polygonMatchCount=472 -polygonMatchWithOriginalDuplicatesCount=562 + diff --git a/core/src/test/resources/polygon.test.properties b/core/src/test/resources/polygon.test.properties index 05526e1c90..f52668ade6 100644 --- a/core/src/test/resources/polygon.test.properties +++ b/core/src/test/resources/polygon.test.properties @@ -12,7 +12,4 @@ inputLocationWkt=county_small.tsv inputLocationWkb=county_small_wkb.tsv inputCount=3000 inputBoundary=-158.104182, -66.03575, 17.986328, 48.645133 -containsMatchCount=6941 -containsMatchWithOriginalDuplicatesCount=9334 -intersectsMatchCount=24323 -intersectsMatchWithOriginalDuplicatesCount=32726 + From a7a30a2d0416ff3599cbb51ecc8ecb48bd0d29af Mon Sep 17 00:00:00 2001 From: Jia Yu Date: Tue, 8 Aug 2023 11:45:15 -0700 Subject: [PATCH 07/11] Fix a bunch of failed test cases --- .../org/apache/sedona/sql/adapterTestScala.scala | 3 +-- .../org/apache/sedona/viz/showcase/Example.java | 14 +++++++------- .../apache/sedona/viz/showcase/ScalaExample.scala | 12 ++++++------ .../org/apache/sedona/viz/ChoroplethmapTest.java | 8 ++++---- .../java/org/apache/sedona/viz/HeatmapTest.java | 8 ++++---- .../sedona/viz/ParallelVisualizationTest.java | 10 +++++----- .../org/apache/sedona/viz/ScatterplotTest.java | 12 ++++++------ .../org/apache/sedona/viz/rdd/scalaTest.scala | 14 +++++++------- 8 files changed, 40 insertions(+), 41 deletions(-) diff --git a/sql/common/src/test/scala/org/apache/sedona/sql/adapterTestScala.scala b/sql/common/src/test/scala/org/apache/sedona/sql/adapterTestScala.scala index 2fb58a8224..76132c7b3e 100644 --- a/sql/common/src/test/scala/org/apache/sedona/sql/adapterTestScala.scala +++ b/sql/common/src/test/scala/org/apache/sedona/sql/adapterTestScala.scala @@ -28,7 +28,6 @@ import org.apache.sedona.core.spatialRDD.{CircleRDD, PointRDD, PolygonRDD} import org.apache.sedona.sql.utils.Adapter import org.apache.spark.sql.sedona_sql.UDT.GeometryUDT import org.apache.spark.sql.types._ -import org.apache.spark.storage.StorageLevel import org.locationtech.jts.geom.Point import org.scalatest.GivenWhenThen @@ -195,7 +194,7 @@ class adapterTestScala extends TestBaseScala with GivenWhenThen{ val urlPrefix = resourceFolder + "modis/" val HDFDataVariableList:Array[String] = Array("LST", "QC", "Error_LST", "Emis_31", "Emis_32") val earthdataHDFPoint = new EarthdataHDFPointMapper(HDFincrement, HDFoffset, HDFrootGroupName, HDFDataVariableList, HDFDataVariableName, urlPrefix) - val spatialRDD = new PointRDD(sparkSession.sparkContext, InputLocation, numPartitions, earthdataHDFPoint, StorageLevel.MEMORY_ONLY) + val spatialRDD = new PointRDD(sparkSession.sparkContext, InputLocation, numPartitions, earthdataHDFPoint) import scala.jdk.CollectionConverters._ spatialRDD.fieldNames = HDFDataVariableList.dropRight(4).toList.asJava val spatialDf = Adapter.toDf(spatialRDD, sparkSession) diff --git a/viz/src/main/java/org/apache/sedona/viz/showcase/Example.java b/viz/src/main/java/org/apache/sedona/viz/showcase/Example.java index 990129ccc5..11cdec33e0 100644 --- a/viz/src/main/java/org/apache/sedona/viz/showcase/Example.java +++ b/viz/src/main/java/org/apache/sedona/viz/showcase/Example.java @@ -206,7 +206,7 @@ public class Example public static boolean buildScatterPlot(String outputPath) { try { - PolygonRDD spatialRDD = new PolygonRDD(sparkContext, PolygonInputLocation, PolygonSplitter, false, PolygonNumPartitions, StorageLevel.MEMORY_ONLY()); + PolygonRDD spatialRDD = new PolygonRDD(sparkContext, PolygonInputLocation, PolygonSplitter, false, PolygonNumPartitions); ScatterPlot visualizationOperator = new ScatterPlot(1000, 600, USMainLandBoundary, false); visualizationOperator.CustomizeColor(255, 255, 255, 255, Color.GREEN, true); visualizationOperator.Visualize(sparkContext, spatialRDD); @@ -241,7 +241,7 @@ public static boolean buildScatterPlot(String outputPath) public static boolean buildHeatMap(String outputPath) { try { - RectangleRDD spatialRDD = new RectangleRDD(sparkContext, RectangleInputLocation, RectangleSplitter, false, RectangleNumPartitions, StorageLevel.MEMORY_ONLY()); + RectangleRDD spatialRDD = new RectangleRDD(sparkContext, RectangleInputLocation, RectangleSplitter, false, RectangleNumPartitions); HeatMap visualizationOperator = new HeatMap(1000, 600, USMainLandBoundary, false, 2); visualizationOperator.Visualize(sparkContext, spatialRDD); SedonaVizImageGenerator imageGenerator = new SedonaVizImageGenerator(); @@ -263,8 +263,8 @@ public static boolean buildHeatMap(String outputPath) public static boolean buildChoroplethMap(String outputPath) { try { - PointRDD spatialRDD = new PointRDD(sparkContext, PointInputLocation, PointOffset, PointSplitter, false, PointNumPartitions, StorageLevel.MEMORY_ONLY()); - PolygonRDD queryRDD = new PolygonRDD(sparkContext, PolygonInputLocation, PolygonSplitter, false, PolygonNumPartitions, StorageLevel.MEMORY_ONLY()); + PointRDD spatialRDD = new PointRDD(sparkContext, PointInputLocation, PointOffset, PointSplitter, false, PointNumPartitions); + PolygonRDD queryRDD = new PolygonRDD(sparkContext, PolygonInputLocation, PolygonSplitter, false, PolygonNumPartitions); spatialRDD.spatialPartitioning(GridType.KDBTREE); queryRDD.spatialPartitioning(spatialRDD.getPartitioner()); spatialRDD.buildIndex(IndexType.RTREE, true); @@ -300,7 +300,7 @@ public static boolean buildChoroplethMap(String outputPath) public static boolean parallelFilterRenderNoStitch(String outputPath) { try { - RectangleRDD spatialRDD = new RectangleRDD(sparkContext, RectangleInputLocation, RectangleSplitter, false, RectangleNumPartitions, StorageLevel.MEMORY_ONLY()); + RectangleRDD spatialRDD = new RectangleRDD(sparkContext, RectangleInputLocation, RectangleSplitter, false, RectangleNumPartitions); HeatMap visualizationOperator = new HeatMap(1000, 600, USMainLandBoundary, false, 2, 4, 4, true, true); visualizationOperator.Visualize(sparkContext, spatialRDD); SedonaVizImageGenerator imageGenerator = new SedonaVizImageGenerator(); @@ -322,7 +322,7 @@ public static boolean parallelFilterRenderNoStitch(String outputPath) public static boolean parallelFilterRenderStitch(String outputPath) { try { - RectangleRDD spatialRDD = new RectangleRDD(sparkContext, RectangleInputLocation, RectangleSplitter, false, RectangleNumPartitions, StorageLevel.MEMORY_ONLY()); + RectangleRDD spatialRDD = new RectangleRDD(sparkContext, RectangleInputLocation, RectangleSplitter, false, RectangleNumPartitions); HeatMap visualizationOperator = new HeatMap(1000, 600, USMainLandBoundary, false, 2, 4, 4, true, true); visualizationOperator.Visualize(sparkContext, spatialRDD); ImageGenerator imageGenerator = new ImageGenerator(); @@ -348,7 +348,7 @@ public static boolean earthdataVisualization(String outputPath) try { EarthdataHDFPointMapper earthdataHDFPoint = new EarthdataHDFPointMapper(HDFIncrement, HDFOffset, HDFRootGroupName, HDFDataVariableList, HDFDataVariableName, HDFswitchXY, urlPrefix); - PointRDD spatialRDD = new PointRDD(sparkContext, earthdataInputLocation, earthdataNumPartitions, earthdataHDFPoint, StorageLevel.MEMORY_ONLY()); + PointRDD spatialRDD = new PointRDD(sparkContext, earthdataInputLocation, earthdataNumPartitions, earthdataHDFPoint); ScatterPlot visualizationOperator = new ScatterPlot(1000, 600, spatialRDD.boundaryEnvelope, ColorizeOption.EARTHOBSERVATION, false, false); visualizationOperator.CustomizeColor(255, 255, 255, 255, Color.BLUE, true); visualizationOperator.Visualize(sparkContext, spatialRDD); diff --git a/viz/src/main/scala/org/apache/sedona/viz/showcase/ScalaExample.scala b/viz/src/main/scala/org/apache/sedona/viz/showcase/ScalaExample.scala index f159474f4f..f88bf6af92 100644 --- a/viz/src/main/scala/org/apache/sedona/viz/showcase/ScalaExample.scala +++ b/viz/src/main/scala/org/apache/sedona/viz/showcase/ScalaExample.scala @@ -126,7 +126,7 @@ object ScalaExample extends App { * @return true, if successful */ def buildHeatMap(outputPath: String): Boolean = { - val spatialRDD = new RectangleRDD(sparkContext, RectangleInputLocation, RectangleSplitter, false, RectangleNumPartitions, StorageLevel.MEMORY_ONLY) + val spatialRDD = new RectangleRDD(sparkContext, RectangleInputLocation, RectangleSplitter, false, RectangleNumPartitions) val visualizationOperator = new HeatMap(1000, 600, USMainLandBoundary, false, 2) visualizationOperator.Visualize(sparkContext, spatialRDD) val imageGenerator = new ImageGenerator @@ -141,8 +141,8 @@ object ScalaExample extends App { * @return true, if successful */ def buildChoroplethMap(outputPath: String): Boolean = { - val spatialRDD = new PointRDD(sparkContext, PointInputLocation, PointOffset, PointSplitter, false, PointNumPartitions, StorageLevel.MEMORY_ONLY) - val queryRDD = new PolygonRDD(sparkContext, PolygonInputLocation, PolygonSplitter, false, PolygonNumPartitions, StorageLevel.MEMORY_ONLY) + val spatialRDD = new PointRDD(sparkContext, PointInputLocation, PointOffset, PointSplitter, false, PointNumPartitions) + val queryRDD = new PolygonRDD(sparkContext, PolygonInputLocation, PolygonSplitter, false, PolygonNumPartitions) spatialRDD.spatialPartitioning(GridType.KDBTREE) queryRDD.spatialPartitioning(spatialRDD.getPartitioner) spatialRDD.buildIndex(IndexType.RTREE, true) @@ -167,7 +167,7 @@ object ScalaExample extends App { * @return true, if successful */ def parallelFilterRenderNoStitch(outputPath: String): Boolean = { - val spatialRDD = new RectangleRDD(sparkContext, RectangleInputLocation, RectangleSplitter, false, RectangleNumPartitions, StorageLevel.MEMORY_ONLY) + val spatialRDD = new RectangleRDD(sparkContext, RectangleInputLocation, RectangleSplitter, false, RectangleNumPartitions) val visualizationOperator = new HeatMap(1000, 600, USMainLandBoundary, false, 2, 4, 4, true, true) visualizationOperator.Visualize(sparkContext, spatialRDD) val imageGenerator = new ImageGenerator @@ -182,7 +182,7 @@ object ScalaExample extends App { * @return true, if successful */ def parallelFilterRenderStitch(outputPath: String): Boolean = { - val spatialRDD = new RectangleRDD(sparkContext, RectangleInputLocation, RectangleSplitter, false, RectangleNumPartitions, StorageLevel.MEMORY_ONLY) + val spatialRDD = new RectangleRDD(sparkContext, RectangleInputLocation, RectangleSplitter, false, RectangleNumPartitions) val visualizationOperator = new HeatMap(1000, 600, USMainLandBoundary, false, 2, 4, 4, true, true) visualizationOperator.Visualize(sparkContext, spatialRDD) val imageGenerator = new ImageGenerator @@ -193,7 +193,7 @@ object ScalaExample extends App { def earthdataVisualization(outputPath: String): Boolean = { val earthdataHDFPoint = new EarthdataHDFPointMapper(HDFIncrement, HDFOffset, HDFRootGroupName, HDFDataVariableList, HDFDataVariableName, HDFswitchXY, urlPrefix) - val spatialRDD = new PointRDD(sparkContext, earthdataInputLocation, earthdataNumPartitions, earthdataHDFPoint, StorageLevel.MEMORY_ONLY) + val spatialRDD = new PointRDD(sparkContext, earthdataInputLocation, earthdataNumPartitions, earthdataHDFPoint) val visualizationOperator = new ScatterPlot(1000, 600, spatialRDD.boundaryEnvelope, ColorizeOption.EARTHOBSERVATION, false, false) visualizationOperator.CustomizeColor(255, 255, 255, 255, Color.BLUE, true) visualizationOperator.Visualize(sparkContext, spatialRDD) diff --git a/viz/src/test/java/org/apache/sedona/viz/ChoroplethmapTest.java b/viz/src/test/java/org/apache/sedona/viz/ChoroplethmapTest.java index 812d2feabe..eb1764178b 100644 --- a/viz/src/test/java/org/apache/sedona/viz/ChoroplethmapTest.java +++ b/viz/src/test/java/org/apache/sedona/viz/ChoroplethmapTest.java @@ -53,8 +53,8 @@ public class ChoroplethmapTest public void testRectangleRDDVisualization() throws Exception { - PointRDD spatialRDD = new PointRDD(sparkContext, PointInputLocation, PointOffset, PointSplitter, false, PointNumPartitions, StorageLevel.MEMORY_ONLY()); - RectangleRDD queryRDD = new RectangleRDD(sparkContext, RectangleInputLocation, RectangleSplitter, false, RectangleNumPartitions, StorageLevel.MEMORY_ONLY()); + PointRDD spatialRDD = new PointRDD(sparkContext, PointInputLocation, PointOffset, PointSplitter, false, PointNumPartitions); + RectangleRDD queryRDD = new RectangleRDD(sparkContext, RectangleInputLocation, RectangleSplitter, false, RectangleNumPartitions); spatialRDD.spatialPartitioning(GridType.KDBTREE); queryRDD.spatialPartitioning(spatialRDD.getPartitioner()); spatialRDD.buildIndex(IndexType.RTREE, true); @@ -85,8 +85,8 @@ public void testPolygonRDDVisualization() throws Exception { //UserSuppliedPolygonMapper userSuppliedPolygonMapper = new UserSuppliedPolygonMapper(); - PointRDD spatialRDD = new PointRDD(sparkContext, PointInputLocation, PointOffset, PointSplitter, false, PointNumPartitions, StorageLevel.MEMORY_ONLY()); - PolygonRDD queryRDD = new PolygonRDD(sparkContext, PolygonInputLocation, PolygonSplitter, false, PolygonNumPartitions, StorageLevel.MEMORY_ONLY()); + PointRDD spatialRDD = new PointRDD(sparkContext, PointInputLocation, PointOffset, PointSplitter, false, PointNumPartitions); + PolygonRDD queryRDD = new PolygonRDD(sparkContext, PolygonInputLocation, PolygonSplitter, false, PolygonNumPartitions); spatialRDD.spatialPartitioning(GridType.KDBTREE); queryRDD.spatialPartitioning(spatialRDD.getPartitioner()); spatialRDD.buildIndex(IndexType.RTREE, true); diff --git a/viz/src/test/java/org/apache/sedona/viz/HeatmapTest.java b/viz/src/test/java/org/apache/sedona/viz/HeatmapTest.java index 7f62e7998f..cc540b38ff 100644 --- a/viz/src/test/java/org/apache/sedona/viz/HeatmapTest.java +++ b/viz/src/test/java/org/apache/sedona/viz/HeatmapTest.java @@ -47,7 +47,7 @@ public class HeatmapTest public void testPointRDDVisualization() throws Exception { - PointRDD spatialRDD = new PointRDD(sparkContext, PointInputLocation, PointOffset, PointSplitter, false, PointNumPartitions, StorageLevel.MEMORY_ONLY()); + PointRDD spatialRDD = new PointRDD(sparkContext, PointInputLocation, PointOffset, PointSplitter, false, PointNumPartitions); HeatMap visualizationOperator = new HeatMap(800, 500, USMainLandBoundary, false, 3); visualizationOperator.Visualize(sparkContext, spatialRDD); ImageGenerator imageGenerator = new ImageGenerator(); @@ -63,7 +63,7 @@ public void testPointRDDVisualization() public void testRectangleRDDVisualization() throws Exception { - RectangleRDD spatialRDD = new RectangleRDD(sparkContext, RectangleInputLocation, RectangleSplitter, false, RectangleNumPartitions, StorageLevel.MEMORY_ONLY()); + RectangleRDD spatialRDD = new RectangleRDD(sparkContext, RectangleInputLocation, RectangleSplitter, false, RectangleNumPartitions); HeatMap visualizationOperator = new HeatMap(800, 500, USMainLandBoundary, false, 2, 4, 4, false, true); visualizationOperator.Visualize(sparkContext, spatialRDD); @@ -82,7 +82,7 @@ public void testPolygonRDDVisualization() throws Exception { //UserSuppliedPolygonMapper userSuppliedPolygonMapper = new UserSuppliedPolygonMapper(); - PolygonRDD spatialRDD = new PolygonRDD(sparkContext, PolygonInputLocation, PolygonSplitter, false, PolygonNumPartitions, StorageLevel.MEMORY_ONLY()); + PolygonRDD spatialRDD = new PolygonRDD(sparkContext, PolygonInputLocation, PolygonSplitter, false, PolygonNumPartitions); HeatMap visualizationOperator = new HeatMap(800, 500, USMainLandBoundary, false, 2); visualizationOperator.Visualize(sparkContext, spatialRDD); @@ -99,7 +99,7 @@ public void testPolygonRDDVisualization() public void testLineStringRDDVisualization() throws Exception { - LineStringRDD spatialRDD = new LineStringRDD(sparkContext, LineStringInputLocation, LineStringSplitter, false, LineStringNumPartitions, StorageLevel.MEMORY_ONLY()); + LineStringRDD spatialRDD = new LineStringRDD(sparkContext, LineStringInputLocation, LineStringSplitter, false, LineStringNumPartitions); HeatMap visualizationOperator = new HeatMap(800, 500, USMainLandBoundary, false, 2); visualizationOperator.Visualize(sparkContext, spatialRDD); diff --git a/viz/src/test/java/org/apache/sedona/viz/ParallelVisualizationTest.java b/viz/src/test/java/org/apache/sedona/viz/ParallelVisualizationTest.java index a37fc395bf..1892279ca9 100644 --- a/viz/src/test/java/org/apache/sedona/viz/ParallelVisualizationTest.java +++ b/viz/src/test/java/org/apache/sedona/viz/ParallelVisualizationTest.java @@ -67,7 +67,7 @@ public class ParallelVisualizationTest public void testPointRDDVisualization() throws Exception { - PointRDD spatialRDD = new PointRDD(sparkContext, PointInputLocation, PointOffset, PointSplitter, false, PointNumPartitions, StorageLevel.MEMORY_ONLY()); + PointRDD spatialRDD = new PointRDD(sparkContext, PointInputLocation, PointOffset, PointSplitter, false, PointNumPartitions); HeatMap visualizationOperator = new HeatMap(resolutionX, resolutionY, USMainLandBoundary, false, 2, partitionX, partitionY, true, true); visualizationOperator.Visualize(sparkContext, spatialRDD); @@ -85,7 +85,7 @@ public void testPointRDDVisualization() public void testRectangleRDDVisualizationWithTiles() throws Exception { - RectangleRDD spatialRDD = new RectangleRDD(sparkContext, RectangleInputLocation, RectangleSplitter, false, RectangleNumPartitions, StorageLevel.MEMORY_ONLY()); + RectangleRDD spatialRDD = new RectangleRDD(sparkContext, RectangleInputLocation, RectangleSplitter, false, RectangleNumPartitions); HeatMap visualizationOperator = new HeatMap(resolutionX, resolutionY, USMainLandBoundary, false, 2, partitionX, partitionY, true, true); visualizationOperator.Visualize(sparkContext, spatialRDD); @@ -103,7 +103,7 @@ public void testRectangleRDDVisualizationWithTiles() public void testRectangleRDDVisualizationNoTiles() throws Exception { - RectangleRDD spatialRDD = new RectangleRDD(sparkContext, RectangleInputLocation, RectangleSplitter, false, RectangleNumPartitions, StorageLevel.MEMORY_ONLY()); + RectangleRDD spatialRDD = new RectangleRDD(sparkContext, RectangleInputLocation, RectangleSplitter, false, RectangleNumPartitions); HeatMap visualizationOperator = new HeatMap(resolutionX, resolutionY, USMainLandBoundary, false, 5, partitionX, partitionY, true, true); visualizationOperator.Visualize(sparkContext, spatialRDD); @@ -122,7 +122,7 @@ public void testPolygonRDDVisualization() throws Exception { //UserSuppliedPolygonMapper userSuppliedPolygonMapper = new UserSuppliedPolygonMapper(); - PolygonRDD spatialRDD = new PolygonRDD(sparkContext, PolygonInputLocation, PolygonSplitter, false, PolygonNumPartitions, StorageLevel.MEMORY_ONLY()); + PolygonRDD spatialRDD = new PolygonRDD(sparkContext, PolygonInputLocation, PolygonSplitter, false, PolygonNumPartitions); HeatMap visualizationOperator = new HeatMap(resolutionX, resolutionY, USMainLandBoundary, false, 2, partitionX, partitionY, true, true); visualizationOperator.Visualize(sparkContext, spatialRDD); @@ -140,7 +140,7 @@ public void testPolygonRDDVisualization() public void testLineStringRDDVisualization() throws Exception { - LineStringRDD spatialRDD = new LineStringRDD(sparkContext, LineStringInputLocation, LineStringSplitter, false, LineStringNumPartitions, StorageLevel.MEMORY_ONLY()); + LineStringRDD spatialRDD = new LineStringRDD(sparkContext, LineStringInputLocation, LineStringSplitter, false, LineStringNumPartitions); HeatMap visualizationOperator = new HeatMap(resolutionX, resolutionY, USMainLandBoundary, false, 2, partitionX, partitionY, true, true); visualizationOperator.Visualize(sparkContext, spatialRDD); diff --git a/viz/src/test/java/org/apache/sedona/viz/ScatterplotTest.java b/viz/src/test/java/org/apache/sedona/viz/ScatterplotTest.java index cbc0de9478..2bf9179c0a 100644 --- a/viz/src/test/java/org/apache/sedona/viz/ScatterplotTest.java +++ b/viz/src/test/java/org/apache/sedona/viz/ScatterplotTest.java @@ -79,7 +79,7 @@ public void testEncodeDecodeId() public void testPointRDDVisualization() throws Exception { - PointRDD spatialRDD = new PointRDD(sparkContext, PointInputLocation, PointOffset, PointSplitter, false, PointNumPartitions, StorageLevel.MEMORY_ONLY()); + PointRDD spatialRDD = new PointRDD(sparkContext, PointInputLocation, PointOffset, PointSplitter, false, PointNumPartitions); ScatterPlot visualizationOperator = new ScatterPlot(1000, 600, USMainLandBoundary, false); visualizationOperator.CustomizeColor(255, 255, 255, 255, Color.GREEN, true); visualizationOperator.Visualize(sparkContext, spatialRDD); @@ -104,7 +104,7 @@ public void testPointRDDVisualization() public void testPointRDDVisualizationWithParallelRendering() throws Exception { - PointRDD spatialRDD = new PointRDD(sparkContext, PointInputLocation, PointOffset, PointSplitter, false, PointNumPartitions, StorageLevel.MEMORY_ONLY()); + PointRDD spatialRDD = new PointRDD(sparkContext, PointInputLocation, PointOffset, PointSplitter, false, PointNumPartitions); ScatterPlot visualizationOperator = new ScatterPlot(1000, 600, USMainLandBoundary, ColorizeOption.NORMAL, false, 4, 4, true, false); visualizationOperator.CustomizeColor(255, 255, 255, 255, Color.GREEN, true); @@ -124,7 +124,7 @@ public void testPointRDDVisualizationWithParallelRendering() public void testSaveAsDistributedFile() throws Exception { - PointRDD spatialRDD = new PointRDD(sparkContext, PointInputLocation, PointOffset, PointSplitter, false, PointNumPartitions, StorageLevel.MEMORY_ONLY()); + PointRDD spatialRDD = new PointRDD(sparkContext, PointInputLocation, PointOffset, PointSplitter, false, PointNumPartitions); ScatterPlot visualizationOperator = new ScatterPlot(1000, 600, USMainLandBoundary, false, 2, 2, true, false); visualizationOperator.CustomizeColor(255, 255, 255, 255, Color.GREEN, true); visualizationOperator.Visualize(sparkContext, spatialRDD); @@ -152,7 +152,7 @@ public void testSaveAsDistributedFile() public void testRectangleRDDVisualization() throws Exception { - RectangleRDD spatialRDD = new RectangleRDD(sparkContext, RectangleInputLocation, RectangleSplitter, false, RectangleNumPartitions, StorageLevel.MEMORY_ONLY()); + RectangleRDD spatialRDD = new RectangleRDD(sparkContext, RectangleInputLocation, RectangleSplitter, false, RectangleNumPartitions); ScatterPlot visualizationOperator = new ScatterPlot(1000, 600, USMainLandBoundary, false); visualizationOperator.CustomizeColor(255, 255, 255, 255, Color.RED, true); visualizationOperator.Visualize(sparkContext, spatialRDD); @@ -178,7 +178,7 @@ public void testPolygonRDDVisualization() throws Exception { //UserSuppliedPolygonMapper userSuppliedPolygonMapper = new UserSuppliedPolygonMapper(); - PolygonRDD spatialRDD = new PolygonRDD(sparkContext, PolygonInputLocation, PolygonSplitter, false, PolygonNumPartitions, StorageLevel.MEMORY_ONLY()); + PolygonRDD spatialRDD = new PolygonRDD(sparkContext, PolygonInputLocation, PolygonSplitter, false, PolygonNumPartitions); ScatterPlot visualizationOperator = new ScatterPlot(1000, 600, USMainLandBoundary, false); visualizationOperator.CustomizeColor(255, 255, 255, 255, Color.GREEN, true); visualizationOperator.Visualize(sparkContext, spatialRDD); @@ -206,7 +206,7 @@ public void testLineStringRDDVisualization() int resolutionY = 800; int resolutionX = RasterizationUtils.GetWidthFromHeight(resolutionY, USMainLandBoundary); //UserSuppliedLineStringMapper userSuppliedLineStringMapper = new UserSuppliedLineStringMapper(); - LineStringRDD spatialRDD = new LineStringRDD(sparkContext, LineStringInputLocation, LineStringSplitter, false, LineStringNumPartitions, StorageLevel.MEMORY_ONLY()); + LineStringRDD spatialRDD = new LineStringRDD(sparkContext, LineStringInputLocation, LineStringSplitter, false, LineStringNumPartitions); ScatterPlot visualizationOperator = new ScatterPlot(resolutionX, resolutionY, USMainLandBoundary, false); visualizationOperator.CustomizeColor(255, 255, 255, 255, Color.GREEN, true); visualizationOperator.Visualize(sparkContext, spatialRDD); diff --git a/viz/src/test/scala/org/apache/sedona/viz/rdd/scalaTest.scala b/viz/src/test/scala/org/apache/sedona/viz/rdd/scalaTest.scala index 12d248bbbb..e417718a49 100644 --- a/viz/src/test/scala/org/apache/sedona/viz/rdd/scalaTest.scala +++ b/viz/src/test/scala/org/apache/sedona/viz/rdd/scalaTest.scala @@ -97,7 +97,7 @@ class scalaTest extends FunSpec with BeforeAndAfterAll{ describe("SedonaViz in Scala") { it("should pass scatter plot") { - val spatialRDD = new PolygonRDD(sparkContext, PolygonInputLocation, PolygonSplitter, false, PolygonNumPartitions, StorageLevel.MEMORY_ONLY) + val spatialRDD = new PolygonRDD(sparkContext, PolygonInputLocation, PolygonSplitter, false, PolygonNumPartitions) var visualizationOperator = new ScatterPlot(1000, 600, USMainLandBoundary, false) visualizationOperator.CustomizeColor(255, 255, 255, 255, Color.GREEN, true) visualizationOperator.Visualize(sparkContext, spatialRDD) @@ -119,7 +119,7 @@ class scalaTest extends FunSpec with BeforeAndAfterAll{ } it("should pass heat map") { - val spatialRDD = new RectangleRDD(sparkContext, RectangleInputLocation, RectangleSplitter, false, RectangleNumPartitions, StorageLevel.MEMORY_ONLY) + val spatialRDD = new RectangleRDD(sparkContext, RectangleInputLocation, RectangleSplitter, false, RectangleNumPartitions) val visualizationOperator = new HeatMap(1000, 600, USMainLandBoundary, false, 2) visualizationOperator.Visualize(sparkContext, spatialRDD) val imageGenerator = new ImageGenerator @@ -128,8 +128,8 @@ class scalaTest extends FunSpec with BeforeAndAfterAll{ } it("should pass choropleth map") { - val spatialRDD = new PointRDD(sparkContext, PointInputLocation, PointOffset, PointSplitter, false, PointNumPartitions, StorageLevel.MEMORY_ONLY) - val queryRDD = new PolygonRDD(sparkContext, PolygonInputLocation, PolygonSplitter, false, PolygonNumPartitions, StorageLevel.MEMORY_ONLY) + val spatialRDD = new PointRDD(sparkContext, PointInputLocation, PointOffset, PointSplitter, false, PointNumPartitions) + val queryRDD = new PolygonRDD(sparkContext, PolygonInputLocation, PolygonSplitter, false, PolygonNumPartitions) spatialRDD.spatialPartitioning(GridType.KDBTREE) queryRDD.spatialPartitioning(spatialRDD.getPartitioner) spatialRDD.buildIndex(IndexType.RTREE, true) @@ -148,7 +148,7 @@ class scalaTest extends FunSpec with BeforeAndAfterAll{ } it("should pass parallel filtering and rendering without stitching image tiles") { - val spatialRDD = new RectangleRDD(sparkContext, RectangleInputLocation, RectangleSplitter, false, RectangleNumPartitions, StorageLevel.MEMORY_ONLY) + val spatialRDD = new RectangleRDD(sparkContext, RectangleInputLocation, RectangleSplitter, false, RectangleNumPartitions) val visualizationOperator = new HeatMap(1000, 600, USMainLandBoundary, false, 2, 4, 4, true, true) visualizationOperator.Visualize(sparkContext, spatialRDD) val imageGenerator = new ImageGenerator @@ -157,7 +157,7 @@ class scalaTest extends FunSpec with BeforeAndAfterAll{ } it("should pass parallel filtering and rendering with stitching image tiles") { - val spatialRDD = new RectangleRDD(sparkContext, RectangleInputLocation, RectangleSplitter, false, RectangleNumPartitions, StorageLevel.MEMORY_ONLY) + val spatialRDD = new RectangleRDD(sparkContext, RectangleInputLocation, RectangleSplitter, false, RectangleNumPartitions) val visualizationOperator = new HeatMap(1000, 600, USMainLandBoundary, false, 2, 4, 4, true, true) visualizationOperator.Visualize(sparkContext, spatialRDD) val imageGenerator = new ImageGenerator @@ -169,7 +169,7 @@ class scalaTest extends FunSpec with BeforeAndAfterAll{ ignore("should pass earth data hdf scatter plot") { val earthdataHDFPoint = new EarthdataHDFPointMapper(HDFIncrement, HDFOffset, HDFRootGroupName, HDFDataVariableList, HDFDataVariableName, HDFswitchXY, urlPrefix) - val spatialRDD = new PointRDD(sparkContext, earthdataInputLocation, earthdataNumPartitions, earthdataHDFPoint, StorageLevel.MEMORY_ONLY) + val spatialRDD = new PointRDD(sparkContext, earthdataInputLocation, earthdataNumPartitions, earthdataHDFPoint) val visualizationOperator = new ScatterPlot(1000, 600, spatialRDD.boundaryEnvelope, ColorizeOption.EARTHOBSERVATION, false, false) visualizationOperator.CustomizeColor(255, 255, 255, 255, Color.BLUE, true) visualizationOperator.Visualize(sparkContext, spatialRDD) From 86ec83f3f108e171ca03ad4285ee9d3ab767bd44 Mon Sep 17 00:00:00 2001 From: Jia Yu Date: Tue, 8 Aug 2023 12:59:57 -0700 Subject: [PATCH 08/11] Fix more test cases --- .../org/apache/sedona/viz/ChoroplethmapTest.java | 2 ++ .../org/apache/sedona/viz/rdd/scalaTest.scala | 14 +++++++------- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/viz/src/test/java/org/apache/sedona/viz/ChoroplethmapTest.java b/viz/src/test/java/org/apache/sedona/viz/ChoroplethmapTest.java index eb1764178b..ab93264260 100644 --- a/viz/src/test/java/org/apache/sedona/viz/ChoroplethmapTest.java +++ b/viz/src/test/java/org/apache/sedona/viz/ChoroplethmapTest.java @@ -55,6 +55,7 @@ public void testRectangleRDDVisualization() { PointRDD spatialRDD = new PointRDD(sparkContext, PointInputLocation, PointOffset, PointSplitter, false, PointNumPartitions); RectangleRDD queryRDD = new RectangleRDD(sparkContext, RectangleInputLocation, RectangleSplitter, false, RectangleNumPartitions); + spatialRDD.analyze(); spatialRDD.spatialPartitioning(GridType.KDBTREE); queryRDD.spatialPartitioning(spatialRDD.getPartitioner()); spatialRDD.buildIndex(IndexType.RTREE, true); @@ -87,6 +88,7 @@ public void testPolygonRDDVisualization() //UserSuppliedPolygonMapper userSuppliedPolygonMapper = new UserSuppliedPolygonMapper(); PointRDD spatialRDD = new PointRDD(sparkContext, PointInputLocation, PointOffset, PointSplitter, false, PointNumPartitions); PolygonRDD queryRDD = new PolygonRDD(sparkContext, PolygonInputLocation, PolygonSplitter, false, PolygonNumPartitions); + spatialRDD.analyze(); spatialRDD.spatialPartitioning(GridType.KDBTREE); queryRDD.spatialPartitioning(spatialRDD.getPartitioner()); spatialRDD.buildIndex(IndexType.RTREE, true); diff --git a/viz/src/test/scala/org/apache/sedona/viz/rdd/scalaTest.scala b/viz/src/test/scala/org/apache/sedona/viz/rdd/scalaTest.scala index e417718a49..7d40e13f3e 100644 --- a/viz/src/test/scala/org/apache/sedona/viz/rdd/scalaTest.scala +++ b/viz/src/test/scala/org/apache/sedona/viz/rdd/scalaTest.scala @@ -18,11 +18,6 @@ */ package org.apache.sedona.viz.rdd -import java.awt.Color // scalastyle:ignore illegal.imports -import java.io.FileInputStream -import java.util.Properties - -import org.locationtech.jts.geom.Envelope import org.apache.log4j.{Level, Logger} import org.apache.sedona.common.enums.FileDataSplitter import org.apache.sedona.core.enums.{GridType, IndexType} @@ -30,14 +25,18 @@ import org.apache.sedona.core.formatMapper.EarthdataHDFPointMapper import org.apache.sedona.core.spatialOperator.JoinQuery import org.apache.sedona.core.spatialRDD.{PointRDD, PolygonRDD, RectangleRDD} import org.apache.sedona.viz.`extension`.visualizationEffect.{ChoroplethMap, HeatMap, ScatterPlot} -import org.apache.sedona.viz.core.{ImageGenerator, RasterOverlayOperator} import org.apache.sedona.viz.core.Serde.SedonaVizKryoRegistrator +import org.apache.sedona.viz.core.{ImageGenerator, RasterOverlayOperator} import org.apache.sedona.viz.utils.{ColorizeOption, ImageType} import org.apache.spark.serializer.KryoSerializer -import org.apache.spark.storage.StorageLevel import org.apache.spark.{SparkConf, SparkContext} +import org.locationtech.jts.geom.Envelope import org.scalatest.{BeforeAndAfterAll, FunSpec} +import java.awt.Color +import java.io.FileInputStream +import java.util.Properties + class scalaTest extends FunSpec with BeforeAndAfterAll{ val sparkConf = new SparkConf().setAppName("scalaTest").setMaster("local[*]") sparkConf.set("spark.serializer", classOf[KryoSerializer].getName) @@ -130,6 +129,7 @@ class scalaTest extends FunSpec with BeforeAndAfterAll{ it("should pass choropleth map") { val spatialRDD = new PointRDD(sparkContext, PointInputLocation, PointOffset, PointSplitter, false, PointNumPartitions) val queryRDD = new PolygonRDD(sparkContext, PolygonInputLocation, PolygonSplitter, false, PolygonNumPartitions) + spatialRDD.analyze(); spatialRDD.spatialPartitioning(GridType.KDBTREE) queryRDD.spatialPartitioning(spatialRDD.getPartitioner) spatialRDD.buildIndex(IndexType.RTREE, true) From 4634f880feaba09d73ec8616a25a5074904c5ccf Mon Sep 17 00:00:00 2001 From: Jia Yu Date: Wed, 9 Aug 2023 00:48:06 -0700 Subject: [PATCH 09/11] Fix a bunch of test cases --- .../java/org/apache/sedona/core/spatialRDD/LineStringRDD.java | 3 ++- .../main/java/org/apache/sedona/core/spatialRDD/PointRDD.java | 3 ++- .../java/org/apache/sedona/core/spatialRDD/PolygonRDD.java | 3 ++- .../java/org/apache/sedona/core/spatialRDD/RectangleRDD.java | 2 +- .../java/org/apache/sedona/core/spatialRDD/SpatialRDD.java | 1 + 5 files changed, 8 insertions(+), 4 deletions(-) diff --git a/core/src/main/java/org/apache/sedona/core/spatialRDD/LineStringRDD.java b/core/src/main/java/org/apache/sedona/core/spatialRDD/LineStringRDD.java index 4131569a0d..8c9b911081 100644 --- a/core/src/main/java/org/apache/sedona/core/spatialRDD/LineStringRDD.java +++ b/core/src/main/java/org/apache/sedona/core/spatialRDD/LineStringRDD.java @@ -47,7 +47,7 @@ public LineStringRDD() {} */ public LineStringRDD(JavaRDD rawSpatialRDD) { - this.rawSpatialRDD = rawSpatialRDD; + this.setRawSpatialRDD(rawSpatialRDD); } /** @@ -138,5 +138,6 @@ public LineStringRDD(JavaSparkContext sparkContext, String InputLocation, Intege this.setRawSpatialRDD(rawTextRDD.mapPartitions(new LineStringFormatMapper(splitter, carryInputData))); } if (splitter.equals(FileDataSplitter.GEOJSON)) { this.fieldNames = FormatMapper.readGeoJsonPropertyNames(rawTextRDD.take(1).get(0).toString()); } + this.analyze(); } } diff --git a/core/src/main/java/org/apache/sedona/core/spatialRDD/PointRDD.java b/core/src/main/java/org/apache/sedona/core/spatialRDD/PointRDD.java index 9e5bc9d693..1a54f11829 100644 --- a/core/src/main/java/org/apache/sedona/core/spatialRDD/PointRDD.java +++ b/core/src/main/java/org/apache/sedona/core/spatialRDD/PointRDD.java @@ -51,7 +51,7 @@ public PointRDD() {} */ public PointRDD(JavaRDD rawSpatialRDD) { - this.rawSpatialRDD = rawSpatialRDD; + this.setRawSpatialRDD(rawSpatialRDD); } /** @@ -136,5 +136,6 @@ public PointRDD(JavaSparkContext sparkContext, String InputLocation, Integer Off if (Offset != null) {this.setRawSpatialRDD(rawTextRDD.mapPartitions(new PointFormatMapper(Offset, splitter, carryInputData)));} else {this.setRawSpatialRDD(rawTextRDD.mapPartitions(new PointFormatMapper(splitter, carryInputData)));} if (splitter.equals(FileDataSplitter.GEOJSON)) { this.fieldNames = FormatMapper.readGeoJsonPropertyNames(rawTextRDD.take(1).get(0).toString()); } + this.analyze(); } } diff --git a/core/src/main/java/org/apache/sedona/core/spatialRDD/PolygonRDD.java b/core/src/main/java/org/apache/sedona/core/spatialRDD/PolygonRDD.java index 097d7c1ab9..30461acb28 100644 --- a/core/src/main/java/org/apache/sedona/core/spatialRDD/PolygonRDD.java +++ b/core/src/main/java/org/apache/sedona/core/spatialRDD/PolygonRDD.java @@ -59,7 +59,7 @@ public PolygonRDD() {} */ public PolygonRDD(JavaRDD rawSpatialRDD) { - this.rawSpatialRDD = rawSpatialRDD; + this.setRawSpatialRDD(rawSpatialRDD); } /** @@ -150,6 +150,7 @@ public PolygonRDD(JavaSparkContext sparkContext, String InputLocation, Integer s this.setRawSpatialRDD(rawTextRDD.mapPartitions(new PolygonFormatMapper(splitter, carryInputData))); } if (splitter.equals(FileDataSplitter.GEOJSON)) { this.fieldNames = FormatMapper.readGeoJsonPropertyNames(rawTextRDD.take(1).get(0).toString()); } + this.analyze(); } /** diff --git a/core/src/main/java/org/apache/sedona/core/spatialRDD/RectangleRDD.java b/core/src/main/java/org/apache/sedona/core/spatialRDD/RectangleRDD.java index 253e7e6d97..50965a28e0 100644 --- a/core/src/main/java/org/apache/sedona/core/spatialRDD/RectangleRDD.java +++ b/core/src/main/java/org/apache/sedona/core/spatialRDD/RectangleRDD.java @@ -49,7 +49,7 @@ public RectangleRDD() {} */ public RectangleRDD(JavaRDD rawSpatialRDD) { - this.rawSpatialRDD = rawSpatialRDD; + this.setRawSpatialRDD(rawSpatialRDD); } /** diff --git a/core/src/main/java/org/apache/sedona/core/spatialRDD/SpatialRDD.java b/core/src/main/java/org/apache/sedona/core/spatialRDD/SpatialRDD.java index 20720b38d6..06e82d565f 100644 --- a/core/src/main/java/org/apache/sedona/core/spatialRDD/SpatialRDD.java +++ b/core/src/main/java/org/apache/sedona/core/spatialRDD/SpatialRDD.java @@ -447,6 +447,7 @@ public JavaRDD getRawSpatialRDD() public void setRawSpatialRDD(JavaRDD rawSpatialRDD) { this.rawSpatialRDD = rawSpatialRDD; + this.analyze(); } /** From dd1183788d5f57338190c47f38bf12777c91b0c0 Mon Sep 17 00:00:00 2001 From: Jia Yu Date: Wed, 9 Aug 2023 01:31:00 -0700 Subject: [PATCH 10/11] Revert "Remove calling storage level" This reverts commit ae1662a5c841963216abee54973d663f77f3e7b5. --- R/R/data_interface.R | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/R/R/data_interface.R b/R/R/data_interface.R index 0504a39e3d..a7cbfdfa1c 100644 --- a/R/R/data_interface.R +++ b/R/R/data_interface.R @@ -147,6 +147,7 @@ sedona_read_dsv_to_typed_rdd <- function(sc, max(as.integer(repartition %||% 1L), 1L), fmt ) %>% + set_storage_level(storage_level) %>% new_spatial_rdd(type) } @@ -210,6 +211,7 @@ sedona_read_shapefile_to_typed_rdd <- function(sc, java_context(sc), location ) %>% + set_storage_level(storage_level) %>% new_spatial_rdd(type) } @@ -238,6 +240,7 @@ sedona_read_geojson_to_typed_rdd <- function(sc, has_non_spatial_attrs, max(as.integer(repartition %||% 1L), 1L) ) %>% + set_storage_level(storage_level) %>% new_spatial_rdd(type) } @@ -302,6 +305,7 @@ sedona_read_geojson <- function(sc, allow_invalid_geometries, skip_syntactically_invalid_geometries ) %>% + set_storage_level(storage_level) %>% new_spatial_rdd(NULL) } @@ -331,6 +335,7 @@ sedona_read_wkb <- function(sc, allow_invalid_geometries, skip_syntactically_invalid_geometries ) %>% + set_storage_level(storage_level) %>% new_spatial_rdd(NULL) } @@ -360,6 +365,7 @@ sedona_read_wkt <- function(sc, allow_invalid_geometries, skip_syntactically_invalid_geometries ) %>% + set_storage_level(storage_level) %>% new_spatial_rdd(NULL) } @@ -375,6 +381,7 @@ sedona_read_shapefile <- function(sc, java_context(sc), location ) %>% + set_storage_level(storage_level) %>% new_spatial_rdd(NULL) } @@ -793,3 +800,18 @@ to_delimiter_enum_value <- function(sc, delimiter) { sc$state$enums$delimiter[[delimiter]] } + +set_storage_level <- function(rdd, storage_level) { + sc <- spark_connection(rdd) + storage_level <- sc$state$object_cache$storage_levels[[storage_level]] %||% { + storage_level_obj <- invoke_static( + sc, "org.apache.spark.storage.StorageLevel", storage_level + ) + sc$state$object_cache$storage_levels[[storage_level]] <- storage_level_obj + + storage_level_obj + } + invoke(rdd, "analyze", storage_level) + + rdd +} From dbad25367c0f1b0acee3868b17dfd310fcd34ced Mon Sep 17 00:00:00 2001 From: Jia Yu Date: Thu, 10 Aug 2023 01:03:55 -0700 Subject: [PATCH 11/11] Fix failed test cases --- R/tests/testthat/test-data-interface.R | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/R/tests/testthat/test-data-interface.R b/R/tests/testthat/test-data-interface.R index 5ead06548d..e19fbff01d 100644 --- a/R/tests/testthat/test-data-interface.R +++ b/R/tests/testthat/test-data-interface.R @@ -35,8 +35,7 @@ test_rdd_with_non_spatial_attrs <- invoke_new( 1L, # offset sc$state$enums$delimiter$csv, TRUE, - 1L, # numPartitions - sc$state$object_cache$storage_levels$memory_only + 1L # numPartitions ) %>% apache.sedona:::new_spatial_rdd("point") @@ -575,8 +574,7 @@ test_that("sedona_write_wkb() works as expected", { 0L, # offset sc$state$enums$delimiter$wkb, TRUE, - 1L, # numPartitions - sc$state$object_cache$storage_levels$memory_only + 1L # numPartitions ) expect_result_matches_original(pt_rdd) @@ -593,8 +591,7 @@ test_that("sedona_write_wkt() works as expected", { 0L, # offset sc$state$enums$delimiter$wkt, TRUE, - 1L, # numPartitions - sc$state$object_cache$storage_levels$memory_only + 1L # numPartitions ) expect_result_matches_original(pt_rdd) @@ -611,8 +608,7 @@ test_that("sedona_write_geojson() works as expected", { 0L, # offset sc$state$enums$delimiter$geojson, TRUE, - 1L, # numPartitions - sc$state$object_cache$storage_levels$memory_only + 1L # numPartitions ) expect_result_matches_original_geojson(pt_rdd)