From fb9939d647877cada694d51dcf4c6cdf036b7636 Mon Sep 17 00:00:00 2001 From: Roelof Rietbroek Date: Wed, 20 Mar 2024 15:26:18 +0100 Subject: [PATCH] move static gravity fields, degree 1 and 2 corrrections from geoslurp to this module --- pyproject.toml | 4 + src/shxarray/geoslurp/deg1n2.py | 365 +++++++++++++++++++++++++++++ src/shxarray/geoslurp/gravity.py | 130 ++++++++++ src/shxarray/geoslurp/icgem.py | 126 ++++++++++ src/shxarray/geoslurp/icgemdset.py | 78 ++++++ 5 files changed, 703 insertions(+) create mode 100644 src/shxarray/geoslurp/deg1n2.py create mode 100644 src/shxarray/geoslurp/gravity.py create mode 100644 src/shxarray/geoslurp/icgem.py create mode 100644 src/shxarray/geoslurp/icgemdset.py diff --git a/pyproject.toml b/pyproject.toml index 1d575ed..271aaa7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,4 +39,8 @@ shlib = "shxarray.shlib:SHComputeBackend" "Homepage" = "https://github.com/ITC-Water-Resources/shxarray" "Bug Tracker" = "https://github.com/ITC-Water-Resources/shxarray/issues" +[project.entry-points."geoslurp.dsetfactories"] +deg1n2corr = "shxarray.geoslurp.deg1n2:getDeg1n2corrDsets" +[project.entry-points."geoslurp.dsets"] +icgemstatic = "shxarray.geoslurp.icgemdset:ICGEMstatic" diff --git a/src/shxarray/geoslurp/deg1n2.py b/src/shxarray/geoslurp/deg1n2.py new file mode 100644 index 0000000..0798b1b --- /dev/null +++ b/src/shxarray/geoslurp/deg1n2.py @@ -0,0 +1,365 @@ +# This file is part of the shxarray software which is licensed +# under the Apache License version 2.0 (see the LICENSE file in the main repository) +# Copyright Roelof Rietbroek (r.rietbroek@utwente.nl), 2024 +# + + +from geoslurp.dataset import DataSet +from geoslurp.datapull.ftp import Uri as ftp +from geoslurp.datapull.http import Uri as http +from datetime import datetime,timedelta +from shxarray.core.time import decyear2dt +import os +import re +from shxarray.geoslurp.gravity import GravitySHTBase,GravitySHinDBTBase,Trig,JSONSHArchive +import xarray as xr + +schema="shxarray" + +# class geocenter_Rietbroeketal2016upd(DataSet): + # fout="Geocenter_dec2017.tgz" + # sqrt3timesRE=11047256.4063275 + # schema=schema + # table=type("geocenter_Rietbroeketal2016updTable", (GravitySHTBase,), {}) + # def __init__(self,dbconn): + # super().__init__(dbconn) + # # super().__init__(direc=direc,uri='https://wobbly.earth/data/Geocenter_dec2017.tgz',order=['c10','c11','s11'],lastupdate=datetime(2018,10,16)) + + # def pull(self): + # """Pulls the geocenter ascii files in the cache""" + + # uri=http("https://wobbly.earth/data/"+self.fout,lastmod=datetime(2018,10,16)).download(self.cacheDir(),check=True) + + + # def register(self): + # self.truncateTable() + # #set general settings + # self._dbinvent.data={"citation":"Rietbroek, R., Brunnabend, S.-E., Kusche, J., Schröter, J., Dahle, C., 2016. " \ + # "Revisiting the Contemporary Sea Level Budget on Global and Regional Scales. " \ + # "Proceedings of the National Academy of Sciences 201519132. " \ + # "https://doi.org/10.1073/pnas.1519132113"} + + # with tarfile.open(os.path.join(self.cacheDir(),self.fout),'r:gz') as tf: + + # metacomb=[] + + # files=['Geocenter/GeocentCM-CF_Green.txt', + # 'Geocenter/GeocentCM-CF_Antarctica.txt', + # 'Geocenter/GeocentCM-CF_Hydrology.txt', + # 'Geocenter/GeocentCM-CF_LandGlaciers.txt', + # 'Geocenter/GeocentCM-CF_GIA.txt', + # 'Geocenter/GeocentCM-CF_TotSurfload.txt'] + + # order=[(1,1,Trig.c),(1,1,Trig.s),(1,0,Trig.c)] + # lastupdate=datetime.now() + # for file in files: + # #get files + # with tf.extractfile(file) as fid: + # for ln in fid: + # shar=JSONSHArchive(1) + # lnspl=ln.decode('utf-8').split() + # tcent=decyear2dt(float(lnspl[0])) + # tstart,tend=dt2monthlyinterval(tcent) + + # meta={"type":file.split('_')[-1][:-4],"time":tcent,"tstart":tstart,"tend":tend,"lastupdate":lastupdate,"nmax":1,"omax":1,"origin":"CF","format":"JSONB","uri":"self:data","gm":0.3986004415e+15,"re":0.6378136460e+07 +# } + + # for el,val in zip(order,lnspl[1:4]): + # # import pdb;pdb.set_trace() + # shar["cnm"][shar.idx(el)]=float(val)/self.sqrt3timesRE + + # #also add sigmas + # for el,val in zip(order,lnspl[4:7]): + # shar["sigcnm"][shar.idx(el)]=float(val)/self.sqrt3timesRE + # meta["data"]=shar.dict + # self.addEntry(meta) + # self.updateInvent() + + +def parseGSMDate(dtstr): + """Parse datestr as found in GSM files (yyyymmdd.00000)""" + return datetime(int(dtstr[0:4]),int(dtstr[4:6]),int(dtstr[6:8])) + +class geocenter_GRCRL06_TN13(DataSet): + schema=schema + rooturl="http://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-docs/grace/open/docs/" + def __init__(self,dbconn): + self.table=type(self.__class__.__name__.lower().replace('-',"_")+"Table", (GravitySHinDBTBase,), {}) + super().__init__(dbconn) + + def pull(self): + """Pulls the geocenter ascii files in the cache""" + uri=http(self.rooturl+self.fout,lastmod=datetime(2019,12,1)).download(self.cacheDir(),check=True) + + + def register(self): + self.truncateTable() + #set general settings + self._dbinvent.data={"citation":"GRACE technical note 13"} + lastupdate=datetime.now() + with open(os.path.join(self.cacheDir(),self.fout),'r') as fid: + #skip header + for ln in fid: + if re.search("end of header",ln): + break + + nv=[] + mv=[] + tv=[] + cnmv=[] + sigcnmv=[] + #loop over entry lines + for ln in fid: + + tp,n,m,cnm,snm,sigcnm,sigsnm,ts,te=ln.split() + + #Append cosine coefficients + n=int(n) + m=int(m) + nv.append(n) + mv.append(m) + tv.append(0) + cnmv.append(float(cnm)) + sigcnmv.append(float(sigcnm)) + + #append sine coefficients + if m > 0: + nv.append(n) + mv.append(m) + tv.append(1) + cnmv.append(float(snm)) + sigcnmv.append(float(sigsnm)) + + if m == 1: + #register the accumulated entry + tstart=parseGSMDate(ts) + tend=parseGSMDate(te) + #get the central time + tcent=tstart+(tend-tstart)/2 + #snap the central epoch to the 15th of the month of the central time + # tcent=datetime(tstart.year,tstart.month,15) + + meta={"type":"GSM","time":tcent,"tstart":tstart,"tend":tend,"lastupdate":lastupdate,"nmax":1,"omax":1,"origin":"CF","format":"JSONB","gm":0.3986004415e+15,"re":0.6378136460e+07} + meta["data"]=xr.Dataset(data_vars=dict(cnm=(["shg"],cnmv),sigcnm=(["shg"],sigcnmv)),coords=dict(n=(["shg"],nv),m=(["shg"],mv),t=(["shg"],tv))) + + self.addEntry(meta) + nv=[] + mv=[] + tv=[] + cnmv=[] + sigcnmv=[] + + self.updateInvent() + + + + + + +class GeocenterRIESCFCM(DataSet): + fout30="GCN_L1_L2_30d_CF-CM.txt" + fout60="GCN_L1_L2_60d_CF-CM.txt" + #note also embed mm to meter conversion in here (e3) + sqrt3timesRE=11047256.23312e3 + schema=schema + table=type("geocenter_ries_cfcmTable", (GravitySHTBase,), {}) + def __init__(self,dbconn): + super().__init__(dbconn) + # super().__init__(direc=direc,uri='https://wobbly.earth/data/Geocenter_dec2017.tgz',order=['c10','c11','s11'],lastupdate=datetime(2018,10,16)) + + def pull(self): + """Pulls the geocenter ascii files in the cache""" + + uri=http("http://download.csr.utexas.edu/pub/slr/geocenter/"+self.fout30).download(self.cacheDir()) + uri=http("http://download.csr.utexas.edu/pub/slr/geocenter/"+self.fout60).download(self.cacheDir()) + + + def register(self): + self.truncateTable() + #set general settings + self._dbinvent.data={"citation":"Ries, J.C., 2016. Reconciling estimates of annual geocenter motion from space geodesy, in: Proceedings of the 20th International Workshop on Laser Ranging, Potsdam, Germany. pp. 10–14."} + + + self.extractSLR(os.path.join(self.cacheDir(),self.fout30)) + self.extractSLR(os.path.join(self.cacheDir(),self.fout60)) + self.updateInvent() + + def extractSLR(self,filen): + lastupdate=datetime.now() + order=[(1,1,Trig.c),(1,1,Trig.s),(1,0,Trig.c)] + if re.search('30d',filen): + dt=timedelta(days=15) + else: + dt=timedelta(days=30) + + with open(filen,'r') as fid: + #skip header + fid.readline() + for ln in fid: + shar=JSONSHArchive(1) + lnspl=ln.split() + #note little hack as the 60d file version has an empty line at the back + if len(lnspl) == 0: + break + tcent=decyear2dt(float(lnspl[0])) + tstart=tcent-dt + tend=tcent+dt + + meta={"type":"GSM"+os.path.basename(filen)[10:13],"time":tcent,"tstart":tstart,"tend":tend,"lastupdate":lastupdate,"nmax":1,"omax":1,"origin":"CF","format":"JSONB","uri":"self:data","gm":0.3986004415e+15,"re":0.6378136460e+07 +} + + for el,val in zip(order,lnspl[1:4]): + shar["cnm"][shar.idx(el)]=float(val)/self.sqrt3timesRE + + #also add sigmas + for el,val in zip(order,lnspl[4:7]): + shar["sigcnm"][shar.idx(el)]=float(val)/self.sqrt3timesRE + meta["data"]=shar.dict + self.addEntry(meta) + + +class TN14SLRGSFC(DataSet): + schema=schema + rooturl="ftp://isdcftp.gfz-potsdam.de/grace-fo/DOCUMENTS/TECHNICAL_NOTES/" + fout="TN-14_C30_C20_SLR_GSFC.txt" + def __init__(self,dbconn): + self.table=type(self.__class__.__name__.lower().replace('-',"_")+"Table", (GravitySHinDBTBase,), {}) + super().__init__(dbconn) + + def pull(self): + """Pulls the C20 Technical note 14""" + uri=ftp(self.rooturl+self.fout,lastmod=datetime(2019,12,1)).download(self.cacheDir(),check=True) + + + def register(self): + self.truncateTable() + #set general settings + self._dbinvent.data={"citation":"GRACE technical note 14"} + lastupdate=datetime.now() + mjd00=datetime(1858,11,17) + nmax=2 + omax=0 + with open(os.path.join(self.cacheDir(),self.fout),'r') as fid: + #skip header + for ln in fid: + if re.search("^Product:",ln): + break + + #loop over entry lines + for ln in fid: + + mjd0,decy0,c20,dc20,sigc20,c30,dc30,sigc30,mjd1,decyr1=ln.split() + + nv=[] + mv=[] + tv=[] + cnmv=[] + dcnmv=[] + sigcnmv=[] + + #Append c20 coefficients + nv.append(2) + mv.append(0) + tv.append(0) + cnmv.append(float(c20)) + dcnmv.append(float(dc20)*1e-10) + sigcnmv.append(float(sigc20)) + if c30 != "NaN": + nmax=3 + nv.append(3) + mv.append(0) + tv.append(0) + cnmv.append(float(c30)) + dcnmv.append(float(dc30)*1e-10) + sigcnmv.append(float(sigc30)) + + #register the accumulated entry + tstart=mjd00+timedelta(days=float(mjd0)) + tend=mjd00+timedelta(days=float(mjd1)) + tcent=tstart+(tend-tstart)/2 + + meta={"type":"GSM","time":tcent,"tstart":tstart,"tend":tend,"lastupdate":lastupdate,"nmax":nmax,"omax":omax,"format":"JSONB","gm":0.3986004415e+15,"re":0.6378136460e+07} + meta["data"]=xr.Dataset(data_vars=dict(cnm=(["shg"],cnmv),dcnm=(["shg"],dcnmv),sigcnm=(["shg"],sigcnmv)),coords=dict(n=(["shg"],nv),m=(["shg"],mv),t=(["shg"],tv))) + + self.addEntry(meta) + self.updateInvent() + +def getDeg1n2corrDsets(conf): + out=[] + for center in ["CSR", "GFZ", "JPL"]: + out.append(type("geocenter_"+center+"RL06_TN13",(geocenter_GRCRL06_TN13,),{"fout":"TN-13_GEOC_"+center+"_RL06.txt"})) + out.append(GeocenterRIESCFCM) + out.append(TN14SLRGSFC) + return out + +# class Sun2017Comb(LowdegreeSource): + # def __init__(self,direc,uri=None): + # if not uri: + # uri='https://d1rkab7tlqy5f1.cloudfront.net/CiTG/Over%20faculteit/Afdelingen/' \ + # 'Geoscience%20%26%20Remote%20sensing/Research/Gravity/models%20and%20data/3_Deg1_C20_CMB.txt' + # lastupdate=datetime(2018,1,1) + # super().__init__(direc=direc,uri=uri,lastupdate=lastupdate) + # def extract(self): + # singlemeta=self.meta + + # citation="Reference: Yu Sun, Pavel Ditmar, Riccardo Riva (2017), Statistically optimal" \ + # " estimation of degree-1 and C20 coefficients based on GRACE data and an " \ + # "ocean bottom pressure model Geophysical Journal International, 210(3), " \ + # "1305-1322, 2017. doi:10.1093/gji/ggx241." + + + # valorder=['c10','c11','s11'] + # covorder=[('c10','c10'),('c10','c11'),('c10','s11'),('c10','c20'),('c11','c11'),('c11','s11'),('c11','c20'),('s11','s11'),('s11','c20'),('c20','c20')] + + # singlemeta['data']["citation"]=citation + # with open(os.path.join(self.direc,self.fout),'rt') as fid: + # dataregex=re.compile('^ +[0-9]') + # time=[] + # for ln in fid: + # if dataregex.match(ln): + # lnspl=ln.split() + # time.append(decyear2dt(float(lnspl[0]))) + # d12=[0]*4 + # for el,val in zip(valorder,lnspl[1:4]): + # d12[self.dindex(el)]=val + # singlemeta['data']['d12'].append(d12) + + + # covUpper=[0]*10 + # for (el1,el2),val in zip(covorder,lnspl[5:]): + # covUpper[self.covindex(el1,el2)]=val + # singlemeta['data']['covUpper'].append(covUpper) + + # singlemeta["data"]["time"]=[dt.isoformat() for dt in time] + # singlemeta['tstart']=min(time) + # singlemeta['tend']=max(time) + # singlemeta['lastupdate']=datetime.now() + + # return [singlemeta] + +# class Sun2017Comb_GIArestored(Sun2017Comb): + # def __init__(self,direc): + # uri="https://d1rkab7tlqy5f1.cloudfront.net/CiTG/Over%20faculteit/Afdelingen/" \ + # "Geoscience%20%26%20Remote%20sensing/Research/Gravity/models%20and%20data/" \ + # "4_Deg1_C20_CMB_GIA_restored.txt" + # super().__init__(direc,uri) + +# class GeocTable(GeocTBase): + # """Defines the Geocenter motion table""" + # __tablename__='deg1n2' + # id=Column(Integer,primary_key=True) + # name=Column(String,unique=True) + # lastupdate=Column(TIMESTAMP) + # tstart=Column(TIMESTAMP) + # tend=Column(TIMESTAMP) + # origin=Column(String) + # data=Column(JSONB) + + # "ftp://ftp.csr.utexas.edu/pub/slr/geocenter/" + # # {'name':'Rietbroeketal2016updated','uri':'https://wobbly.earth/data/Geocenter_dec2017.tgz','lastupdate':datetime(2018,10,16)}, + # # {'name':'SwensonWahr2008','uri':'ftp://podaac.jpl.nasa.gov/allData/tellus/L2/degree_1/deg1_coef.txt','lastupdate':datetime(2018,10,16)}, + # # {'name':'Sun2017Comb','uri':'https://d1rkab7tlqy5f1.cloudfront.net/CiTG/Over%20faculteit/Afdelingen/Geoscience%20%26%20Remote%20sensing/Research/Gravity/models%20and%20data/3_Deg1_C20_CMB.txt' + # # ,'lastupdate':datetime(2018,10,16)}, + # ] + + diff --git a/src/shxarray/geoslurp/gravity.py b/src/shxarray/geoslurp/gravity.py new file mode 100644 index 0000000..2a1a6e7 --- /dev/null +++ b/src/shxarray/geoslurp/gravity.py @@ -0,0 +1,130 @@ +# This file is part of geoslurp. +# geoslurp is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. + +# geoslurp is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with geoslurp; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +# Author Roelof Rietbroek (roelof@geod.uni-bonn.de), 2018 + +from sqlalchemy.ext.declarative import declared_attr, as_declarative +from sqlalchemy import MetaData +from sqlalchemy import Column,Integer,String, Boolean,Float +from sqlalchemy.dialects.postgresql import TIMESTAMP, JSONB +from geoslurp.types.json import DataArrayJSONType + + +schema="shxarray" + +#define a declarative baseclass for spherical harmonics gravity data +@as_declarative(metadata=MetaData(schema=schema)) +class GravitySHTBase(object): + @declared_attr + def __tablename__(cls): + #strip of the 'Table' from the class name + return cls.__name__[:-5].lower() + id = Column(Integer, primary_key=True) + lastupdate=Column(TIMESTAMP) + tstart=Column(TIMESTAMP,index=True) + tend=Column(TIMESTAMP,index=True) + time=Column(TIMESTAMP,index=True) + nmax=Column(Integer) + omax=Column(Integer) + gm=Column(Float) + re=Column(Float) + tidesystem=Column(String) + origin=Column(String) + format=Column(String) + type=Column(String) + uri=Column(String) + data=Column(JSONB) + +@as_declarative(metadata=MetaData(schema=schema)) +class GravitySHinDBTBase(object): + @declared_attr + def __tablename__(cls): + #strip of the 'Table' from the class name + return cls.__name__[:-5].lower() + id = Column(Integer, primary_key=True) + lastupdate=Column(TIMESTAMP) + tstart=Column(TIMESTAMP,index=True) + tend=Column(TIMESTAMP,index=True) + time=Column(TIMESTAMP,index=True) + nmax=Column(Integer) + omax=Column(Integer) + gm=Column(Float) + re=Column(Float) + tidesystem=Column(String) + origin=Column(String) + format=Column(String) + type=Column(String) + data=Column(DataArrayJSONType) + __table_args__ = {'extend_existing': True} + + +class Trig(): + """Enum to distinguish between a trigonometric cosine and sine coefficient""" + c = 0 + s = 1 + +class JSONSHArchive(): + """JSON Archive which stores SH data, with sigmas and possibly a covariance + Note this mimics the Archive interface of frommle without actually requiring its import""" + def __init__(self,nmax=None,datadict=None): + + if nmax: + #create from maximum degree + self.data_={"attr":{},"vars":{"shg":[]}} + shg=[] + for n in range(nmax+1): + for m in range(n+1): + shg.append((n,m,Trig.c)) + if m> 0: + shg.append((n,m,Trig.s)) + self.data_["vars"]["shg"]=shg + self.data_["attr"]["nmax"]=nmax + elif not nmax and datadict: + self.data_=datadic + else: + raise RunTimeError("Can only construct a JSONSHArchive from either nmax or a datadict") + + def __getitem__(self,key): + """retrieves a named variable, and lazily creates allowed variables when requested""" + if not key in self.data_["vars"]: + if key in ["cnm","sigcnm"]: + self.data_["vars"][key]=[0]*len(self.data_["vars"]["shg"]) + elif key == "covcnm": + nl=len(self.data_["vars"]["shg"]) + self.data_["vars"][key]=[[0]*nl]*nl + return self.data_["vars"][key] + + def __setitem__(self,key,item): + self.data_["vars"][key]=item + + + def idx(self,nmt): + """returns the index of the n,m,t tuple""" + return self.data_["vars"]["shg"].index(nmt) + + @property + def attr(self): + """get the stored global attributes of the file""" + return self.data_["attr"] + + @attr.setter + def attr(self,attrdict): + """sets the stored global attributes of the file""" + self.data_["attr"]=attrdict + + @property + def dict(self): + return self.data_ + diff --git a/src/shxarray/geoslurp/icgem.py b/src/shxarray/geoslurp/icgem.py new file mode 100644 index 0000000..e288c26 --- /dev/null +++ b/src/shxarray/geoslurp/icgem.py @@ -0,0 +1,126 @@ +# This file is part of geoslurp. +# geoslurp is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. + +# geoslurp is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with Frommle; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +# Author Roelof Rietbroek (roelof@geod.uni-bonn.de), 2018 + +from geoslurp.datapull import UriBase,CrawlerBase +from geoslurp.datapull.http import Uri as http +from geoslurp.config.slurplogger import slurplogger +import gzip as gz +import re +from lxml.etree import HTML as HTMLtree +import os +from datetime import datetime + +def icgemMetaExtractor(uri): + """Extract meta information from a gzipped icgem file""" + + #first extract the icgem header + headstart=False + hdr={} + with gz.open(uri.url,'rt') as fid: + slurplogger().info("Extracting info from %s"%(uri.url)) + for ln in fid: + # if "begin_of_head" in ln: + # headstart=True + # continue + + if headstart and 'end_of_head' in ln: + break + + # if headstart: + spl=ln.split() + if len(spl) == 2: + hdr[spl[0]]=spl[1] + + + try: + meta={"nmax":int(hdr["max_degree"]), + "lastupdate":uri.lastmod, + "format":"icgem", + "gm":float(hdr["earth_gravity_constant"].replace('D','E')), + "re":float(hdr["radius"].replace('D','E')), + "uri":uri.url, + "type":"GSM", + "data":{"name":hdr["modelname"]} + } + except Exception as e: + pass + + #add tide system + try: + tmp=hdr["tide_system"] + if re.search('zero_tide',tmp): + meta["tidesystem"]="zero-tide" + elif re.search('tide_free',tmp): + meta["tidesystem"]="tide-free" + except: + pass + + return meta + +class Uri(UriBase): + """Holds an uri to an icgem static field""" + def __init__(self,url,lastmod=None,name=None,ref=None,nmax=None,year=None): + if year and not lastmod: + #use year as the last modification time + lastmod=datetime(year,12,31) + super().__init__(url,lastmod) + self.name=name + self.ref=ref + self.nmax=nmax + + +class Crawler(CrawlerBase): + """Crawl icgem static fields""" + def __init__(self): + super().__init__(url="http://icgem.gfz-potsdam.de/tom_longtime") + buf=http(self.rooturl).buffer() + self._roothtml=HTMLtree(buf.getvalue()) + + def uris(self): + """List uris of available static models""" + + rowregex=re.compile('(^tom-row(?!-header))|(^tom-row-odd)') + for elem in self._roothtml.iterfind('.//tr'): + uridict={} + if not rowregex.match(elem.attrib['class']): + continue + + nameelem=elem.find(".//td[@class='tom-cell-name']") + if nameelem.text.strip() != '': + #just find the name end strip line ending + uridict["name"]=nameelem.text.lstrip()[:-1] + else: + #find a name and reference + nameelem=nameelem.find(".//a[@href]") + uridict["name"]=nameelem.text + uridict["ref"]=nameelem.attrib['href'] + + #find the year, maximum degree, doi etc + uridict["year"]=int(elem.find(".//td[@class='tom-cell-year']").text) + uridict["nmax"]=int(elem.find(".//td[@class='tom-cell-degree']").text) + try: + uridict["url"]=os.path.dirname(self.rooturl)+elem.find(".//td[@class='tom-cell-modelfile']").find(".//a[@href]").attrib["href"] + except AttributeError: + #not avaailable for download so skip this entry + continue + + try: + uridict["ref"]=elem.find(".//td[@class='tom-cell-doilink']").find(".//a[@href]").attrib["href"] + except AttributeError: + #no problem as this entry is optional just pass + pass + yield Uri(**uridict) diff --git a/src/shxarray/geoslurp/icgemdset.py b/src/shxarray/geoslurp/icgemdset.py new file mode 100644 index 0000000..cac5cbd --- /dev/null +++ b/src/shxarray/geoslurp/icgemdset.py @@ -0,0 +1,78 @@ +# This file is part of shxarray. +# This file is part of the shxarray software which is licensed +# under the Apache License version 2.0 (see the LICENSE file in the main repository) +# Copyright Roelof Rietbroek (r.rietbroek@utwente.nl), 2024 +# provides a dataset and table for static gravity fields from the icgem website + + + +from geoslurp.dataset import DataSet +from shxarray.geoslurp.gravity import GravitySHTBase +from shxarray.geoslurp.icgem import Crawler as IcgemCrawler +from shxarray.geoslurp.icgem import icgemMetaExtractor +from geoslurp.datapull.uri import findFiles +import re +from geoslurp.datapull import UriFile +import os + +schema="shxarray" + +class ICGEMstatic(DataSet): + """Manages the static gravity fields which are hosted at http://icgem.gfz-potsdam.de/tom_longtime""" + table=type("ICGEMstaticTable",(GravitySHTBase,), {}) + schema=schema + stripuri=True + def __init__(self, dbconn): + super().__init__(dbconn) + #initialize postgreslq table + GravitySHTBase.metadata.create_all(self.db.dbeng, checkfirst=True) + self.updated=[] + + def pull(self,pattern=None,list=False): + """Pulls static gravity fields from the icgem website + :param pattern: only download files whose name obeys this regular expression + :param list (bool): only list available models""" + self.updated=[] + crwl=IcgemCrawler() + if pattern: + regex=re.compile(pattern) + outdir=self.dataDir() + if list: + print("%12s %5s %4s"%("name","nmax", "year")) + for uri in crwl.uris(): + if pattern: + if not regex.search(uri.name): + continue + if list: + #only list available models + print("%-12s %5d %4d"%(uri.name,uri.nmax,uri.lastmod.year)) + else: + tmp,upd=uri.download(outdir,check=True, gzip=True) + if upd: + self.updated.append(tmp) + + def register(self,pattern=None): + """Register static gravity fields donwloaded in the data director + :param pattern: only register files whose filename obeys this regular expression + """ + if not pattern: + pattern='.*\.gz' + #create a list of files which need to be (re)registered + if self.updated: + files=self.updated + else: + files=[UriFile(file) for file in findFiles(self.dataDir(),pattern)] + + #loop over files + for uri in files: + urilike=os.path.basename(uri.url) + + if not self.uriNeedsUpdate(urilike,uri.lastmod): + continue + + meta=icgemMetaExtractor(uri) + self.addEntry(meta) + + self.updateInvent() + +