Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix unicode handling for property keys, layer names. #5

Merged
merged 5 commits into from
Sep 3, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
version: 2
jobs:
build:
docker:
- image: circleci/python:2.7.15-stretch
steps:
- checkout
- run:
name: Checkout submodules
command: git submodule update --init --recursive
- run:
name: Install C++ dependencies
command: sudo apt install build-essential libgeos-dev libboost-python-dev
- run:
name: Install Python dependencies
command: sudo pip install shapely
- run:
name: Build library
command: python setup.py build
- run:
name: Unit tests
command: python setup.py test
12 changes: 9 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,13 @@ Where:
You will need to install a C++11 build system and the GEOS library, e.g: if you are on Ubuntu or Debian:

```
sudo apt install build-essential libgeos-dev
sudo apt install build-essential libgeos-dev libboost-python-dev
```

You will also need the [Shapely](http://toblerity.org/shapely/) Python library. Install (with or without `sudo` depending on whether you're installing it globally or locally):

```
pip install shapely
```

**NOTE: probably other stuff as well! Please [file an issue](https://github.com/tilezen/coanacatl/issues/new) if you find you need additional dependencies.**
Expand All @@ -41,8 +47,8 @@ python setup.py install
## Current limitations

* Only point, linestring, polygon and multi-versions of those are supported. Linear rings and geometry collections are currently not supported.
* Property dictionary keys must be strings, as per the MVT spec. Property dictionary values can be boolean, integer, floating point or strings.
* There are **no tests**!
* Property dictionary keys must be strings (or `unicode`), as per the MVT spec. Property dictionary values can be boolean, integer, floating point or strings.
* There are **very few tests**!
* Error checking of return values from the GEOS API is inadequate, and needs shoring up.
* There needs to be a better way to return warnings/errors to the user, perhaps as a list of objects, so that the user can determine if it's enough to fail the tile or just log.

Expand Down
43 changes: 34 additions & 9 deletions coanacatl/coanacatl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,36 @@ void _coanacatl_printf(const char *fmt, ...) {
#define FINISH_GEOS finishGEOS_r
#endif

namespace {

/**
* Extract a string from a Python object. The object _must_ be either a str or
* unicode object, else an exception will be thrown.
*/
std::string extract_utf8_string(bp::object value) {
PyObject *value_ptr = value.ptr();

if (PyUnicode_Check(value_ptr)) {
bp::object encoded = bp::str(value).encode("utf-8");
std::string v = bp::extract<std::string>(encoded);
return v;

} else if (PyString_Check(value_ptr)) {
std::string v = bp::extract<std::string>(value);
return v;

} else {
std::ostringstream out;
bp::object repr_py = value.attr("__repr__")();
std::string repr = bp::extract<std::string>(repr_py);
out << "Unable to convert Python object of type "
<< value_ptr->ob_type->tp_name << " to string: " << repr;
throw std::runtime_error(out.str());
}
}

} // end anonymous namespace

class encoder {
public:
encoder(bp::tuple bounds, size_t extents)
Expand Down Expand Up @@ -121,7 +151,7 @@ class encoder {
};

void encoder::encode_layer(bp::object layer) {
std::string layer_name = bp::extract<std::string>(layer["name"]);
std::string layer_name = extract_utf8_string(layer["name"]);

if (m_layer_names.count(layer_name) > 0) {
throw std::runtime_error("Duplicate layer names are not allowed.");
Expand Down Expand Up @@ -158,7 +188,7 @@ void encoder::add_properties(vtzero::feature_builder &fb, bp::dict props) {
const size_t num_items = bp::len(items);
for (size_t i = 0; i < num_items; ++i) {
bp::object item = items[i];
std::string k = bp::extract<std::string>(item[0]);
std::string k = extract_utf8_string(item[0]);
bp::object value = item[1];
PyObject *value_ptr = value.ptr();

Expand All @@ -174,13 +204,8 @@ void encoder::add_properties(vtzero::feature_builder &fb, bp::dict props) {
int64_t v = bp::extract<int64_t>(value);
fb.add_property(k, v);

} else if (PyUnicode_Check(value_ptr)) {
bp::object encoded = bp::str(value).encode("utf-8");
std::string v = bp::extract<std::string>(encoded);
fb.add_property(k, v);

} else if (PyString_Check(value_ptr)) {
std::string v = bp::extract<std::string>(value);
} else if (PyUnicode_Check(value_ptr) || PyString_Check(value_ptr)) {
std::string v = extract_utf8_string(value);
fb.add_property(k, v);

} else {
Expand Down
249 changes: 189 additions & 60 deletions test.py
Original file line number Diff line number Diff line change
@@ -1,60 +1,189 @@
import coanacatl
from shapely.geometry import Point
from shapely.geometry import LineString
from shapely.geometry import Polygon
from shapely.geometry import MultiPoint
from shapely.geometry import MultiLineString
from shapely.geometry import MultiPolygon


features = [
dict(
geometry=Point(0, 0),
properties={
'string': 'string_value',
'long': 4294967297L,
'int': 1,
'float': 1.0,
'bool': True,
},
id=1
),
dict(
geometry=LineString([(0, 0), (1, 1)]),
properties={'baz': 'bat'},
id=None
),
dict(
geometry=Point(0, 0).buffer(1),
properties={'blah': 'blah', 'id': 123},
id=3
),
dict(
geometry=MultiPoint([(0, 0), (1, 1)]),
properties={'foo': 'bar', 'boolean': False},
id=None
),
dict(
geometry=MultiLineString([[(0, 0), (1, 0)], [(0, 1), (1, 1)]]),
properties={'foo': 'bar'},
id=None
),
dict(
geometry=Point(0, 0).buffer(0.4).union(Point(1, 1).buffer(0.4)),
properties={'blah': 'blah'},
id=4
),
]

layers = [dict(
name='layer',
features=features,
)]

bounds = (0, 0, 1, 1)
extents = 4096

tile_data = coanacatl.encode(layers, bounds, extents)
print repr(tile_data)
with open('foo.mvt', 'w') as fh:
fh.write(tile_data)
from unittest import TestCase


class GeometryTest(TestCase):

def _generate_tile(self, features):
import coanacatl

layers = [dict(
name='layer',
features=features,
)]

bounds = (0, 0, 1, 1)
extents = 4096

tile_data = coanacatl.encode(layers, bounds, extents)
self.assertTrue(tile_data)
return tile_data

def test_point(self):
from shapely.geometry import Point

features = [
dict(
geometry=Point(0, 0),
properties={},
id=1
),
]

self._generate_tile(features)

def test_linestring(self):
from shapely.geometry import LineString

features = [
dict(
geometry=LineString([(0, 0), (1, 1)]),
properties={},
id=None
),
]

self._generate_tile(features)

def test_polygon(self):
from shapely.geometry import Point

features = [
dict(
geometry=Point(0, 0).buffer(1),
properties={},
id=3
),
]

self._generate_tile(features)

def test_multipoint(self):
from shapely.geometry import MultiPoint

features = [
dict(
geometry=MultiPoint([(0, 0), (1, 1)]),
properties={},
id=None
),
]

self._generate_tile(features)

def test_multilinestring(self):
from shapely.geometry import MultiLineString

features = [
dict(
geometry=MultiLineString([[(0, 0), (1, 0)], [(0, 1), (1, 1)]]),
properties={},
id=None
),
]

self._generate_tile(features)

def test_multipolygon(self):
from shapely.geometry import Point

features = [
dict(
geometry=Point(0, 0).buffer(0.4).union(
Point(1, 1).buffer(0.4)),
properties={},
id=4
),
]

self._generate_tile(features)


class PropertyTest(TestCase):

def _generate_tile(self, features):
import coanacatl

layers = [dict(
name='layer',
features=features,
)]

bounds = (0, 0, 1, 1)
extents = 4096

tile_data = coanacatl.encode(layers, bounds, extents)
self.assertTrue(tile_data)
return tile_data

def test_property_types(self):
from shapely.geometry import Point

features = [
dict(
geometry=Point(0, 0),
properties={
'string': 'string_value',
'long': 4294967297L,
'int': 1,
'float': 1.0,
'bool': True,
},
id=1
),
]

self._generate_tile(features)

def test_unicode_property_value(self):
from shapely.geometry import Point

features = [
dict(
geometry=Point(0, 0),
properties={
'string': unicode('unicode_value'),
},
id=1
),
]

self._generate_tile(features)

def test_unicode_property_key(self):
from shapely.geometry import Point

features = [
dict(
geometry=Point(0, 0),
properties={
unicode('unicode'): 'string_value',
},
id=1
),
]

self._generate_tile(features)

def test_unicode_layer_name(self):
import coanacatl
from shapely.geometry import Point

layers = [dict(
name=unicode('layer'),
features=[
dict(
geometry=Point(0, 0),
properties={
'foo': 'bar',
},
id=1
),
],
)]

bounds = (0, 0, 1, 1)
extents = 4096

tile_data = coanacatl.encode(layers, bounds, extents)
self.assertTrue(tile_data)
return tile_data