Skip to content

Commit

Permalink
Merge pull request #22 from mundialis/SN
Browse files Browse the repository at this point in the history
SN DSM and DTM tile indices
  • Loading branch information
anikaweinmann authored Dec 22, 2023
2 parents e80bbc2 + 4f57cd4 commit 8b83f5a
Show file tree
Hide file tree
Showing 8 changed files with 326 additions and 16 deletions.
10 changes: 2 additions & 8 deletions DSM/SN/README.md
Original file line number Diff line number Diff line change
@@ -1,15 +1,9 @@
This folder contains a tileindex for a DSM ("DOM") of Sachsen. It was created based on a .csv file containing list of URLs.
This folder contains a tileindex for a DSM ("DOM") of Sachsen. It was created with the script `SN_DOM_tileindex.py` and requests the site "https://www.geodaten.sachsen.de/batch-download-4719.html".

Tile index:
- `SN_DOM_tileindex_proj.gpkg.gz`
- contains one poylgon for every DSM
- download URL for each poylgon is stored as attribute

Generate Tile_index with .csv file:
- Download links of each file need to be manually copied from https://www.geodaten.sachsen.de/batch-download-4719.html for whole saxony, product: "Digitales Oberflächenmodell 1m (DOM1)"
- a .csv file is created, containing each download URL for all DGMs (no seperator, one URL per line) including the prefix "/vsizip/vsicurl/"
- based on .csv, a tileindex.gpkg can be generated by running: `gdaltindex -f GPKG SN_DOM_tileindex.gpkg --optfile SN_DOM_tiles.csv`
- reprojected via QGIS
- vsicurl download URL for each poylgon is stored as attribute

License:
- `license.txt`
Expand Down
161 changes: 161 additions & 0 deletions DSM/SN/SN_DOM_tileindex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
############################################################################
#
# NAME: SN_DOM_tileindex.py
#
# AUTHOR(S): Anika Weinmann
# mundialis GmbH & Co. KG, Bonn
# https://www.mundialis.de
#
# PURPOSE: Create tile index of DOM tiff files
#
# Data source: https://www.geodaten.sachsen.de/batch-download-4719.html
#
# COPYRIGHT: (C) 2023 by Anika Weinmann, mundialis
#
# REQUIREMENTS: gdal, gzip
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
############################################################################

# Usage:
# python3 DSM/SN/SN_DOM_tileindex.py
# Output:
# DSM/SN/SN_DOM_tileindex_proj.gpkg.gz


from bs4 import BeautifulSoup
import requests as req
import json
import re
import os

SN_URL = "https://www.geodaten.sachsen.de/batch-download-4719.html"
DATA_TYPE = "DOM1_TIFF"
TILESIZE = 1000
TINDEX = "SN_DOM_tileindex_proj.gpkg.gz"

os.chdir("DSM/SN/")


def get_all_grid_ids(grid_ids):
all_grid_ids = []
for i in range(0, len(grid_ids), 2):
id = grid_ids[i]
num = grid_ids[i + 1]
for n in range(num):
grid_id = id + n
all_grid_ids.append(grid_id)
return all_grid_ids


# get HTML from SN_URL and extract js script
web = req.get(SN_URL)
html = BeautifulSoup(web.text, 'lxml')
js_scripts = html.find_all("script", type="text/javascript")
js_script = ""
for jss in js_scripts:
if "function copyURLs" in str(jss):
js_script = str(jss)

# extract vars from js script
## data url
data_base_url = "".join([x.strip() for x in js_script.split(
"function createGeoCloudURL", 1
)[-1].split("}", 1)[0].split("return")[-1].split("+")]).replace(
"(", ""
).replace(")", "").replace("'", "")
## product
batchConfig_products_str = "{" + re.findall(r"batchConfig.products={(.*?)}*\n", js_script)[0].strip()
batchConfig_products = json.loads(batchConfig_products_str)
products = batchConfig_products[DATA_TYPE]
product_id = products["share_id"]
filename_tpl = products["filename"]
# resolution = int(products["packagesize"]) / 1000.
existing_not_computed = get_all_grid_ids(products["existing_not_computed"])
computed_not_existing = get_all_grid_ids(products["computed_not_existing"])
## mapping
batchConfig_mapping_str = "{" + re.findall(r"batchConfig.mapping={(.*?)}*\n", js_script)[0].strip()
batchConfig_mapping = json.loads(batchConfig_mapping_str)

# create GeoJson
geojson_dict = {
"type": "FeatureCollection",
"name": "tindex",
"crs": {"type": "name", "properties": {"name": "urn:ogc:def:crs:EPSG::25833"}},
"features": []
}

# create tile urls
urls = []
fid = 0
for map in batchConfig_mapping.values():
grid_id_list = map["grid_id"]
for i in range(0, len(grid_id_list), 2):
first_grid_id = grid_id_list[i]
num = grid_id_list[i + 1]
for n in range(num):
grid_id = first_grid_id + n
if (
grid_id not in existing_not_computed
and grid_id not in computed_not_existing
):
rechtswert = str(grid_id)[:3]
hochwert = str(grid_id)[3:]
filename = filename_tpl.replace("$Rechtswert$", rechtswert).replace("$Hochwert$", hochwert)
url = data_base_url.replace("productId", product_id).replace("filename", filename)
vsi_url = f"/vsizip/vsicurl/{url}/{filename.replace('_tiff', '').replace('.zip', '.tif')}"
if vsi_url not in urls:
urls.append(vsi_url)
fid += 1
x1 = int(rechtswert) * 1000
y1 = int(hochwert) * 1000
x2 = x1 + TILESIZE
y2 = y1 + TILESIZE
feat = {
"type": "Feature", "properties": {
"fid": fid,
"location": vsi_url
},
"geometry": {
"type": "Polygon", "coordinates": [[[x1, y1], [x2, y1], [x2, y2], [x1, y2], [x1, y1]]]
}
}
geojson_dict["features"].append(feat)

print(f"Found {len(urls)} {DATA_TYPE} files.")

with open("tindex.geojson", "w") as f:
json.dump(geojson_dict, f, indent=4)

# create GPKG from GeoJson
gpkg_file = f"{TINDEX.split('.')[0]}.gpkg"
stream = os.popen(f"ogr2ogr {gpkg_file} tindex.geojson")
ogr2ogr_out = stream.read()

# verify
print("Verifying vector tile index:")
stream = os.popen(f"ogrinfo -so -al {gpkg_file}")
tindex_verification = stream.read()
print(tindex_verification)

# package
if os.path.isfile(TINDEX):
os.remove(TINDEX)
stream = os.popen(f"gzip {gpkg_file}")
create_gz = stream.read()
print(f"<{TINDEX}> created")

# cleanup
if os.path.isfile("tindex.geojson"):
os.remove("tindex.geojson")
if os.path.isfile(gpkg_file):
os.remove(gpkg_file)
Binary file modified DSM/SN/SN_DOM_tileindex_proj.gpkg.gz
Binary file not shown.
Binary file removed DSM/SN/SN_DOM_tiles.csv.gz
Binary file not shown.
10 changes: 2 additions & 8 deletions DTM/SN/README.md
Original file line number Diff line number Diff line change
@@ -1,15 +1,9 @@
This folder contains a tileindex for a DTM ("DGM") of Sachsen. It was created based on a .csv file containing list of URLs.
This folder contains a tileindex for a DTM ("DGM") of Sachsen. It was created with the script `SN_DOM_tileindex.py` and requests the site "https://www.geodaten.sachsen.de/batch-download-4719.html".

Tile index:
- `SN_DGM_tileindex_proj.gpkg.gz`
- contains one poylgon for every DTM
- download URL for each poylgon is stored as attribute

Generate Tile_index with .csv file:
- Download links of each file need to be manually copied from https://www.geodaten.sachsen.de/batch-download-4719.html for whole saxony, product: "Digitales Geländemodell 1m (DGM1)"
- a .csv file is created, containing each download URL for all DGMs (no seperator, one URL per line) including the prefix "/vsizip/vsicurl/"
- based on .csv, a tileindex.gpkg can be generated by running: `gdaltindex -f GPKG SN_DGM_tileindex.gpkg --optfile SN_DGM_tiles.csv`
- reprojected via QGIS
- vsicurl download URL for each poylgon is stored as attribute

License:
- `license.txt`
Expand Down
161 changes: 161 additions & 0 deletions DTM/SN/SN_DGM_tileindex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
############################################################################
#
# NAME: SN_DGM_tileindex.py
#
# AUTHOR(S): Anika Weinmann
# mundialis GmbH & Co. KG, Bonn
# https://www.mundialis.de
#
# PURPOSE: Create tile index of DGM tiff files
#
# Data source: https://www.geodaten.sachsen.de/batch-download-4719.html
#
# COPYRIGHT: (C) 2023 by Anika Weinmann, mundialis
#
# REQUIREMENTS: gdal, gzip
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
############################################################################

# Usage:
# python3 DTM/SN/SN_DGM_tileindex.py
# Output:
# DTM/SN/SN_DGM_tileindex_proj.gpkg.gz


from bs4 import BeautifulSoup
import requests as req
import json
import re
import os

SN_URL = "https://www.geodaten.sachsen.de/batch-download-4719.html"
DATA_TYPE = "DGM1_TIFF"
TILESIZE = 1000
TINDEX = "SN_DGM_tileindex_proj.gpkg.gz"

os.chdir("DTM/SN/")


def get_all_grid_ids(grid_ids):
all_grid_ids = []
for i in range(0, len(grid_ids), 2):
id = grid_ids[i]
num = grid_ids[i + 1]
for n in range(num):
grid_id = id + n
all_grid_ids.append(grid_id)
return all_grid_ids


# get HTML from SN_URL and extract js script
web = req.get(SN_URL)
html = BeautifulSoup(web.text, 'lxml')
js_scripts = html.find_all("script", type="text/javascript")
js_script = ""
for jss in js_scripts:
if "function copyURLs" in str(jss):
js_script = str(jss)

# extract vars from js script
## data url
data_base_url = "".join([x.strip() for x in js_script.split(
"function createGeoCloudURL", 1
)[-1].split("}", 1)[0].split("return")[-1].split("+")]).replace(
"(", ""
).replace(")", "").replace("'", "")
## product
batchConfig_products_str = "{" + re.findall(r"batchConfig.products={(.*?)}*\n", js_script)[0].strip()
batchConfig_products = json.loads(batchConfig_products_str)
products = batchConfig_products[DATA_TYPE]
product_id = products["share_id"]
filename_tpl = products["filename"]
# resolution = int(products["packagesize"]) / 1000.
existing_not_computed = get_all_grid_ids(products["existing_not_computed"])
computed_not_existing = get_all_grid_ids(products["computed_not_existing"])
## mapping
batchConfig_mapping_str = "{" + re.findall(r"batchConfig.mapping={(.*?)}*\n", js_script)[0].strip()
batchConfig_mapping = json.loads(batchConfig_mapping_str)

# create GeoJson
geojson_dict = {
"type": "FeatureCollection",
"name": "tindex",
"crs": {"type": "name", "properties": {"name": "urn:ogc:def:crs:EPSG::25833"}},
"features": []
}

# create tile urls
urls = []
fid = 0
for map in batchConfig_mapping.values():
grid_id_list = map["grid_id"]
for i in range(0, len(grid_id_list), 2):
first_grid_id = grid_id_list[i]
num = grid_id_list[i + 1]
for n in range(num):
grid_id = first_grid_id + n
if (
grid_id not in existing_not_computed
and grid_id not in computed_not_existing
):
rechtswert = str(grid_id)[:3]
hochwert = str(grid_id)[3:]
filename = filename_tpl.replace("$Rechtswert$", rechtswert).replace("$Hochwert$", hochwert)
url = data_base_url.replace("productId", product_id).replace("filename", filename)
vsi_url = f"/vsizip/vsicurl/{url}/{filename.replace('_tiff', '').replace('.zip', '.tif')}"
if vsi_url not in urls:
urls.append(vsi_url)
fid += 1
x1 = int(rechtswert) * 1000
y1 = int(hochwert) * 1000
x2 = x1 + TILESIZE
y2 = y1 + TILESIZE
feat = {
"type": "Feature", "properties": {
"fid": fid,
"location": vsi_url
},
"geometry": {
"type": "Polygon", "coordinates": [[[x1, y1], [x2, y1], [x2, y2], [x1, y2], [x1, y1]]]
}
}
geojson_dict["features"].append(feat)

print(f"Found {len(urls)} {DATA_TYPE} files.")

with open("tindex.geojson", "w") as f:
json.dump(geojson_dict, f, indent=4)

# create GPKG from GeoJson
gpkg_file = f"{TINDEX.split('.')[0]}.gpkg"
stream = os.popen(f"ogr2ogr {gpkg_file} tindex.geojson")
ogr2ogr_out = stream.read()

# verify
print("Verifying vector tile index:")
stream = os.popen(f"ogrinfo -so -al {gpkg_file}")
tindex_verification = stream.read()
print(tindex_verification)

# package
if os.path.isfile(TINDEX):
os.remove(TINDEX)
stream = os.popen(f"gzip {gpkg_file}")
create_gz = stream.read()
print(f"<{TINDEX}> created")

# cleanup
if os.path.isfile("tindex.geojson"):
os.remove("tindex.geojson")
if os.path.isfile(gpkg_file):
os.remove(gpkg_file)
Binary file modified DTM/SN/SN_DGM_tileindex_proj.gpkg.gz
Binary file not shown.
Binary file removed DTM/SN/SN_DGM_tiles.csv.gz
Binary file not shown.

0 comments on commit 8b83f5a

Please sign in to comment.