-
Notifications
You must be signed in to change notification settings - Fork 220
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Initial commit for wrapping the gmtselect function for #1427 which selects data table subsets based on multiple spatial criteria. Original GMT `gmtselect` documentation is at https://docs.generic-mapping-tools.org/6.2/gmtselect.html. Aliased non-common optional parameters reverse (I) and z_subregion (Z).
- Loading branch information
Showing
5 changed files
with
212 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -81,6 +81,7 @@ Operations on tabular data: | |
|
||
blockmean | ||
blockmedian | ||
select | ||
surface | ||
|
||
Operations on grids: | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -44,6 +44,7 @@ | |
grdtrack, | ||
info, | ||
makecpt, | ||
select, | ||
surface, | ||
which, | ||
x2sys_cross, | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
""" | ||
select - Select data table subsets based on multiple spatial criteria. | ||
""" | ||
import pandas as pd | ||
from pygmt.clib import Session | ||
from pygmt.helpers import ( | ||
GMTTempFile, | ||
build_arg_string, | ||
fmt_docstring, | ||
kwargs_to_strings, | ||
use_alias, | ||
) | ||
|
||
|
||
@fmt_docstring | ||
@use_alias( | ||
I="reverse", | ||
J="projection", | ||
R="region", | ||
V="verbose", | ||
Z="z_subregion", | ||
b="binary", | ||
d="nodata", | ||
e="find", | ||
f="coltypes", | ||
g="gap", | ||
h="header", | ||
i="incols", | ||
o="outcols", | ||
r="registration", | ||
s="skiprows", | ||
w="wrap", | ||
) | ||
@kwargs_to_strings(R="sequence") | ||
def select(table=None, outfile=None, **kwargs): | ||
r""" | ||
Select data table subsets based on multiple spatial criteria. | ||
This is a filter that reads (x, y) or (longitude, latitude) positions from | ||
the first 2 columns of *table* and uses a combination of 1-7 criteria to | ||
pass or reject the records. Records can be selected based on whether or not | ||
they are: | ||
1. inside a rectangular region (**region** [and **projection**]) | ||
2. within *dist* km of any point in *pointfile* | ||
3. within *dist* km of any line in *linefile* | ||
4. inside one of the polygons in the *polygonfile* | ||
5. inside geographical features (based on coastlines) | ||
6. has z-values within a given range, or | ||
7. inside bins of a grid mask whose nodes are non-zero | ||
The sense of the tests can be reversed for each of these 7 criteria by | ||
using the **reverse** option. | ||
Full option list at :gmt-docs:`gmtselect.html` | ||
{aliases} | ||
Parameters | ||
---------- | ||
table : str or {table-like} | ||
Pass in either a file name to an ASCII data table, a 2D | ||
{table-classes}. | ||
outfile : str | ||
The file name for the output ASCII file. | ||
reverse : str | ||
[**cflrsz**]. | ||
Reverses the sense of the test for each of the criteria specified: | ||
- **c** select records NOT inside any point's circle of influence. | ||
- **f** select records NOT inside any of the polygons. | ||
- **g** will pass records inside the cells with z equal zero of the | ||
grid mask in **-G**. | ||
- **l** select records NOT within the specified distance of any line. | ||
- **r** select records NOT inside the specified rectangular region. | ||
- **s** select records NOT considered inside as specified by **-N** | ||
(and **-A**, **-D**). | ||
- **z** select records NOT within the range specified by | ||
**z_subregion**. | ||
z_subregion : str | ||
*min*\ [/*max*]\ [**+a**]\ [**+c**\ *col*]\ [**+i**]. | ||
Pass all records whose 3rd column (*z*; *col* = 2) lies within the | ||
given range or is NaN (use **skiprows** to skip NaN records). If *max* | ||
is omitted then we test if *z* equals *min* instead. This means | ||
equality within 5 ULPs (unit of least precision; | ||
http://en.wikipedia.org/wiki/Unit_in_the_last_place). Input file must | ||
have at least three columns. To indicate no limit on min or max, | ||
specify a hyphen (-). If your 3rd column is absolute time then remember | ||
to supply ``coltypes="2T"``. To specify another column, append | ||
**+c**\ *col*, and to specify several tests just repeat the | ||
**z_subregion** option as many times as you have columns to test. | ||
**Note**: When more than one **z_subregion** option is given then the | ||
``reverse="z"`` option cannot be used. In the case of multiple tests | ||
you may use these modifiers as well: **+a** passes any record that | ||
passes at least one of your *z* tests [Default is all tests must pass], | ||
and **+i** reverses the tests to pass record with *z* value NOT in the | ||
given range. Finally, if **+c** is not used then it is automatically | ||
incremented for each new **z_subregion** option, starting with 2. | ||
{J} | ||
{R} | ||
{V} | ||
{b} | ||
{d} | ||
{e} | ||
{f} | ||
{g} | ||
{h} | ||
{i} | ||
{o} | ||
{r} | ||
{s} | ||
{w} | ||
Returns | ||
------- | ||
output : pandas.DataFrame or None | ||
Return type depends on whether the ``outfile`` parameter is set: | ||
- :class:`pandas.DataFrame` table if ``outfile`` is not set. | ||
- None if ``outfile`` is set (filtered output will be stored in file | ||
set by ``outfile``). | ||
""" | ||
|
||
with GMTTempFile(suffix=".csv") as tmpfile: | ||
with Session() as lib: | ||
# Choose how data will be passed into the module | ||
table_context = lib.virtualfile_from_data(check_kind="vector", data=table) | ||
with table_context as infile: | ||
if outfile is None: | ||
outfile = tmpfile.name | ||
arg_str = " ".join([infile, build_arg_string(kwargs), "->" + outfile]) | ||
lib.call_module(module="gmtselect", args=arg_str) | ||
|
||
# Read temporary csv output to a pandas table | ||
if outfile == tmpfile.name: # if user did not set outfile, return pd.DataFrame | ||
try: | ||
column_names = table.columns.to_list() | ||
result = pd.read_csv(tmpfile.name, sep="\t", names=column_names) | ||
except AttributeError: # 'str' object has no attribute 'columns' | ||
result = pd.read_csv(tmpfile.name, sep="\t", header=None, comment=">") | ||
elif outfile != tmpfile.name: # return None if outfile set, output in outfile | ||
result = None | ||
|
||
return result |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
""" | ||
Tests for select. | ||
""" | ||
import os | ||
|
||
import numpy.testing as npt | ||
import pandas as pd | ||
import pytest | ||
from pygmt import select | ||
from pygmt.datasets import load_sample_bathymetry | ||
from pygmt.exceptions import GMTInvalidInput | ||
from pygmt.helpers import GMTTempFile, data_kind | ||
|
||
|
||
@pytest.fixture(scope="module", name="dataframe") | ||
def fixture_dataframe(): | ||
""" | ||
Load the table data from the sample bathymetry dataset. | ||
""" | ||
return load_sample_bathymetry() | ||
|
||
|
||
def test_select_input_dataframe(dataframe): | ||
""" | ||
Run select by passing in a pandas.DataFrame as input. | ||
""" | ||
output = select(table=dataframe, region=[250, 251, 26, 27]) | ||
assert isinstance(output, pd.DataFrame) | ||
assert all(dataframe.columns == output.columns) | ||
assert output.shape == (65, 3) | ||
npt.assert_allclose(output.median(), [250.31464, 26.33893, -270.0]) | ||
|
||
|
||
def test_select_input_table_matrix(dataframe): | ||
""" | ||
Run select using table input that is not a pandas.DataFrame but still a | ||
matrix. | ||
Also testing the reverse (I) alias. | ||
""" | ||
table = dataframe.values | ||
output = select(table=table, region=[245.5, 254.5, 20.5, 29.5], reverse="r") | ||
assert isinstance(output, pd.DataFrame) | ||
assert output.shape == (9177, 3) | ||
npt.assert_allclose(output.median(), [247.235, 20.48624, -3241.0]) | ||
|
||
|
||
def test_select_input_filename(): | ||
""" | ||
Run select by passing in an ASCII text file as input. | ||
Also testing the z_subregion (Z) alias. | ||
""" | ||
with GMTTempFile() as tmpfile: | ||
output = select( | ||
table="@tut_ship.xyz", | ||
region=[250, 251, 26, 27], | ||
z_subregion=["-/-630", "-120/0+a"], | ||
outfile=tmpfile.name, | ||
) | ||
assert output is None # check that output is None since outfile is set | ||
assert os.path.exists(path=tmpfile.name) | ||
output = pd.read_csv(tmpfile.name, sep="\t", header=None) | ||
assert output.shape == (5, 3) | ||
npt.assert_allclose(output.median(), [250.12149, 26.04296, -674.0]) |