tilezen · zerebubuth · Aug 19, 2016 · Aug 18, 2016 · Aug 18, 2016 · Aug 18, 2016
diff --git a/SEMANTIC-VERSIONING.md b/SEMANTIC-VERSIONING.md
@@ -132,18 +132,18 @@ In addition to the `common` **name** locals call a place, the following `common`
 
 #### Common languages:
 
-1. `name:ara` Arabic
-1. `name:zho` Chinese, traditional or simplified
-1. `name:eng` English
-1. `name:fra` French
-1. `name:rus` Russian
-1. `name:spa` Spanish
-1. `name:deu` German
-1. `name:gre` Greek
-1. `name:ita` Italian
-1. `name:jpn` Japanese
-1. `name:kor` Korean
-1. `name:vie` Vietnamese
+1. `name:ar` Arabic
+1. `name:zh` Chinese, traditional or simplified
+1. `name:en` English
+1. `name:fr` French
+1. `name:ru` Russian
+1. `name:es` Spanish
+1. `name:de` German
+1. `name:gr` Greek
+1. `name:it` Italian
+1. `name:jp` Japanese
+1. `name:ko` Korean
+1. `name:vi` Vietnamese
 
 Arabic, Chinese, English, French, Russian and Spanish are used by the United National for meetings and official documents. The other languages listed are frequently used in OpenStreetMap and Who's On First.
 

diff --git a/docs/layers.md b/docs/layers.md
@@ -16,7 +16,7 @@ Mapzen primarily sources from OpenStreetMap, but includes a variety of other ope
 
 #### Name localization
 
-Mapzen vector tile features include the default `name` property. We include all language variants of the `name:*`, `alt_name:*`, `alt_name_`, `old_name:*` values to enable full internationalization (when different than `name`). Tangram supports all language scripts.
+Mapzen vector tile features include the default `name` property. We include all language variants of the `name:*`, `alt_name:*`, `alt_name_`, `old_name:*` values to enable full internationalization (when different than `name`). Tangram supports all language scripts. Language variants are identified by an ISO 639-1 two-letter language code and optional country, for example `en_GB` for British English.
 
 For features in the `boundaries` layer, we support two additional variants `left:name:*` and `right:name:*` to support oriented labeling on the appropriate side of the boundary line (so the labeled polygon's text can appear inside that polygon consistently).
 

diff --git a/integration-test/418-wof-l10n_name.py b/integration-test/418-wof-l10n_name.py
@@ -5,7 +5,7 @@
     { 'id': 85826037, 'kind': 'neighbourhood',
       'source': "whosonfirst.mapzen.com",
       'name': 'Hollywood',
-      'name:kor': '\xed\x97\x90\xeb\xa6\xac\xec\x9a\xb0\xeb\x93\x9c' })
+      'name:ko': '\xed\x97\x90\xeb\xa6\xac\xec\x9a\xb0\xeb\x93\x9c' })
 
 # San Francisco (wof neighbourhood)
 # https://whosonfirst.mapzen.com/data/858/826/41/85882641.geojson
@@ -14,7 +14,7 @@
     { 'id': 85882641, 'kind': 'neighbourhood',
       'source': "whosonfirst.mapzen.com",
       'name': 'San Francisco',
-      'name:spa': type(None) })
+      'name:es': type(None) })
 
 # San Francisco (osm city)
 # http://www.openstreetmap.org/node/26819236
@@ -23,20 +23,20 @@
     { 'id': 26819236, 'kind': 'locality', 'kind_detail': 'city',
       'source': "openstreetmap.org",
       'name': 'San Francisco',
-      'name:zho': '\xe8\x88\x8a\xe9\x87\x91\xe5\xb1\xb1\xe5\xb8\x82\xe8\x88\x87\xe7\xb8\xa3' })
+      'name:zh': '\xe8\x88\x8a\xe9\x87\x91\xe5\xb1\xb1\xe5\xb8\x82\xe8\x88\x87\xe7\xb8\xa3' })
 
 # Node: Londonderry/Derry (267762522)
 # http://www.openstreetmap.org/node/267762522
 assert_has_feature(
     16, 31436, 20731, 'places',
-    { 'id': 267762522, 'name:eng_GB': 'Londonderry'})
+    { 'id': 267762522, 'name:en_GB': 'Londonderry'})
 
 # Node: Jerusalem (29090735)
 # http://www.openstreetmap.org/node/29090735
 assert_has_feature(
     16, 39180, 26661, 'places',
     { 'id': 29090735,
-      'name:nan': 'I\xc3\xa2-l\xc5\x8d\xcd\x98-sat-l\xc3\xa9ng',
-      'name:zho': '\xe8\x80\xb6\xe8\xb7\xaf\xe6\x92\x92\xe5\x86\xb7',
-      'name:yue': '\xe8\x80\xb6\xe8\xb7\xaf\xe6\x92\x92\xe5\x86\xb7',
+      'name:zh-min-nan': 'I\xc3\xa2-l\xc5\x8d\xcd\x98-sat-l\xc3\xa9ng',
+      'name:zh': '\xe8\x80\xb6\xe8\xb7\xaf\xe6\x92\x92\xe5\x86\xb7',
+      'name:zh-yue': '\xe8\x80\xb6\xe8\xb7\xaf\xe6\x92\x92\xe5\x86\xb7',
       })
diff --git a/test/test_transform.py b/test/test_transform.py
@@ -58,29 +58,29 @@ def _call_fut(self, x):
 
     def test_osm_convert_2_3(self):
         eng = self._call_fut('en')
-        self.assertEquals(eng, 'eng')
+        self.assertEquals(eng, 'en')
 
     def test_osm_convert_3(self):
         eng = self._call_fut('eng')
-        self.assertEquals(eng, 'eng')
+        self.assertEquals(eng, 'en')
 
     def test_osm_convert_not_found(self):
         invalid = self._call_fut('foo')
         self.assertIsNone(invalid)
 
     def test_osm_convert_country(self):
         eng_gb = self._call_fut('en_GB')
-        self.assertEquals(eng_gb, 'eng_GB')
+        self.assertEquals(eng_gb, 'en_GB')
 
     def test_osm_convert_country_invalid(self):
         not_found = self._call_fut('en_foo')
         self.assertIsNone(not_found)
 
     def test_osm_convert_lookup(self):
         zh_min_nan = self._call_fut('zh-min-nan')
-        self.assertEquals(zh_min_nan, 'nan')
+        self.assertEquals(zh_min_nan, 'zh-min-nan')
         zh_min_nan = self._call_fut('zh-yue')
-        self.assertEquals(zh_min_nan, 'yue')
+        self.assertEquals(zh_min_nan, 'zh-yue')
 
 
 class L10nWofTransformTest(unittest.TestCase):
@@ -92,7 +92,7 @@ def _call_fut(self, x):
 
     def test_osm_convert_valid(self):
         eng = self._call_fut('eng_x')
-        self.assertEquals(eng, 'eng')
+        self.assertEquals(eng, 'en')
 
     def test_osm_convert_invalid(self):
         invalid = self._call_fut('zzz_x')
@@ -118,14 +118,14 @@ def _call_fut(self, source, name_key, name_val):
 
     def test_osm_source(self):
         shape, props, fid = self._call_fut('openstreetmap.org', 'en', 'foo')
-        self.assertTrue('name:eng' in props)
-        self.assertEquals('foo', props['name:eng'])
+        self.assertTrue('name:en' in props)
+        self.assertEquals('foo', props['name:en'])
 
     def test_wof_source(self):
         shape, props, fid = self._call_fut('whosonfirst.mapzen.com',
                                            'eng_x', 'foo')
-        self.assertTrue('name:eng' in props)
-        self.assertEquals('foo', props['name:eng'])
+        self.assertTrue('name:en' in props)
+        self.assertEquals('foo', props['name:en'])
 
 
 class DropFeaturesMinPixelsTest(unittest.TestCase):

diff --git a/vectordatasource/transform.py b/vectordatasource/transform.py
@@ -346,10 +346,10 @@ def _convert_wof_l10n_name(x):
     if len(lang_str_iso_639_3) != 3:
         return None
     try:
-        pycountry.languages.get(iso639_3_code=lang_str_iso_639_3)
+        lang = pycountry.languages.get(iso639_3_code=lang_str_iso_639_3)
     except KeyError:
         return None
-    return lang_str_iso_639_3
+    return lang.iso639_1_code
 
 
 def _normalize_osm_lang_code(x):
@@ -366,8 +366,11 @@ def _normalize_osm_lang_code(x):
                 lang = pycountry.languages.get(iso639_3_code=x)
             except KeyError:
                 return None
-    iso639_3_code = lang.iso639_3_code.encode('utf-8')
-    return iso639_3_code
+    try:
+        iso639_1_code = lang.iso639_1_code.encode('utf-8')
+    except AttributeError:
+        return None
+    return iso639_1_code
 
 
 def _normalize_country_code(x):
@@ -386,38 +389,38 @@ def _normalize_country_code(x):
     return alpha2_code
 
 
-osm_l10n_lookup = {
-    'zh-min-nan': 'nan',
-    'zh-yue': 'yue',
-}
-
-
-def osm_l10n_name_lookup(x):
-    lookup = osm_l10n_lookup.get(x)
-    if lookup is not None:
-        return lookup
-    else:
-        return x
+osm_l10n_lookup = set([
+    'zh-min-nan',
+    'zh-yue'
+])
 
 
 def _convert_osm_l10n_name(x):
-    x = osm_l10n_name_lookup(x)
+    if x in osm_l10n_lookup:
+        return x
 
     if '_' not in x:
-        return _normalize_osm_lang_code(x)
+        lang_code_candidate = x
+        country_candidate = None
 
-    fields_by_underscore = x.split('_', 1)
-    lang_code_candidate, country_candidate = fields_by_underscore
+    else:
+        fields_by_underscore = x.split('_', 1)
+        lang_code_candidate, country_candidate = fields_by_underscore
 
     lang_code_result = _normalize_osm_lang_code(lang_code_candidate)
     if lang_code_result is None:
         return None
 
-    country_result = _normalize_country_code(country_candidate)
-    if country_result is None:
-        return None
+    if country_candidate:
+        country_result = _normalize_country_code(country_candidate)
+        if country_result is None:
+            return None
+
+        result = '%s_%s' % (lang_code_result, country_result)
+
+    else:
+        result = lang_code_result
 
-    result = '%s_%s' % (lang_code_result, country_result)
     return result