diff --git a/CMakeLists.txt b/CMakeLists.txt index 75cc5a752d..589d137ddd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1179,6 +1179,15 @@ IF(NOT FOUND_CURL) ENDIF() ENDIF(NOT FOUND_CURL) +# see if we have libxml2 +find_package(LibXml2) +IF(LibXml2_FOUND) +SET(HAVE_LIBXML2 TRUE) +INCLUDE_DIRECTORIES(${LIBXML2_INCLUDE_DIRS}) +ELSE() +SET(HAVE_LIBXML2 FALSE) +ENDIF() + IF(NOT ENABLE_BYTERANGE AND ENABLE_HDF5_ROS3) MESSAGE(WARNING "ROS3 support requires ENABLE_BYTERANGE=TRUE; disabling ROS3 support") SET(ENABLE_HDF5_ROS3 OFF CACHE BOOL "ROS3 support" FORCE) @@ -2080,6 +2089,7 @@ ENDIF() IF(ENABLE_DAP4) ADD_SUBDIRECTORY(libdap4) + ADD_SUBDIRECTORY(libncxml) ENDIF() IF(ENABLE_PLUGINS) diff --git a/Makefile.am b/Makefile.am index 98c2c5209f..7ffcee8f9c 100644 --- a/Makefile.am +++ b/Makefile.am @@ -67,6 +67,7 @@ endif if ENABLE_DAP4 DAP4 = libdap4 +XML = libncxml NCDAP4TESTDIR = dap4_test endif #DAP4 @@ -112,7 +113,7 @@ endif # This is the list of subdirs for which Makefiles will be constructed # and run. ncgen must come before ncdump, because their tests # depend on it. -SUBDIRS = include $(H5_TEST_DIR) libdispatch libsrc $(LIBSRC4_DIR) \ +SUBDIRS = include $(H5_TEST_DIR) ${XML} libdispatch libsrc $(LIBSRC4_DIR) \ $(LIBSRCP) $(LIBHDF4) $(LIBHDF5) $(OCLIB) $(DAP2) ${DAP4} \ ${NCPOCO} ${ZARR} liblib \ $(NCGEN3) $(NCGEN) $(NCDUMP) ${PLUGIN_DIR} $(TESTDIRS) docs \ diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 45bfd2ff76..f631cff468 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -7,6 +7,7 @@ This file contains a high-level description of this package's evolution. Release ## 4.8.2 - TBD +* [Enhancement] Support optional use of libxml2, otherwise default to ezxml. See [Github #2135](https://github.com/Unidata/netcdf-c/pull/2135) -- H/T to [Egbert Eich](https://github.com/e4t). * [Enhancement] Support byte-range reading of netcdf-3 files stored in private buckets in S3. See [Github #2134](https://github.com/Unidata/netcdf-c/pull/2134) * [Enhancement] Support Amazon S3 access for NCZarr. Also support use of the existing Amazon SDK credentials system. See [Github #2114](https://github.com/Unidata/netcdf-c/pull/2114) * [Bug Fix] Fix string allocation error in H5FDhttp.c. See [Github #2127](https://github.com/Unidata/netcdf-c/pull/2127). diff --git a/config.h.cmake.in b/config.h.cmake.in index 4a2f7d4252..0d944a0bd3 100644 --- a/config.h.cmake.in +++ b/config.h.cmake.in @@ -300,6 +300,9 @@ are set when opening a binary file on Windows. */ /* Define to 1 if you have the `pnetcdf' library (-lpnetcdf). */ #cmakedefine HAVE_LIBPNETCDF 1 +/* Define to 1 if you have the libxml2 library. */ +#cmakedefine HAVE_LIBXML2 1 + /* Define to 1 if you have the header file. */ #cmakedefine HAVE_LOCALE_H 1 diff --git a/configure.ac b/configure.ac index 6a053cc6ab..1154b981d5 100644 --- a/configure.ac +++ b/configure.ac @@ -1054,6 +1054,19 @@ if test "x$enable_dap4" = xyes; then AC_DEFINE([ENABLE_DAP4], [1], [if true, build DAP4 Client]) fi +# We can optionally use libxml2 for DAP4, if available +AC_CHECK_LIB([xml2],[xmlReadMemory],[have_libxml2=yes],[have_libxml2=no]) +if test "x$have_libxml2" = "xyes" ; then + AC_SEARCH_LIBS([xmlReadMemory],[xml2 xml2.dll cygxml2.dll], [],[]) +fi +if test "x$have_libxml2" = xyes; then + XML2FLAGS=`xml2-config --cflags` + AC_SUBST([XML2FLAGS],${XML2FLAGS}) + AC_DEFINE([HAVE_LIBXML2], [1], [if true, use libxml2]) +fi +# Need a condition for this +AM_CONDITIONAL(HAVE_LIBXML2, [test "x$have_libxml2" = xyes]) + # check for useful, but not essential, memio support AC_CHECK_FUNCS([memmove getpagesize sysconf]) @@ -1875,6 +1888,7 @@ AC_CONFIG_FILES([Makefile libhdf4/Makefile libnczarr/Makefile libncpoco/Makefile + libncxml/Makefile libdispatch/Makefile liblib/Makefile ncdump/cdl/Makefile diff --git a/include/Makefile.am b/include/Makefile.am index 16f1e25773..ac4067ca8c 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -20,7 +20,7 @@ nc4internal.h nctime.h nc3internal.h onstack.h ncrc.h ncauth.h \ ncoffsets.h nctestserver.h nc4dispatch.h nc3dispatch.h ncexternl.h \ ncpathmgr.h ncindex.h hdf4dispatch.h hdf5internal.h nc_provenance.h \ hdf5dispatch.h ncmodel.h isnan.h nccrc.h ncexhash.h ncxcache.h \ -ncfilter.h ncjson.h ezxml.h ncs3sdk.h +ncfilter.h ncjson.h ncxml.h ncs3sdk.h if USE_DAP noinst_HEADERS += ncdap.h @@ -32,10 +32,7 @@ endif EXTRA_DIST = CMakeLists.txt XGetopt.h netcdf_meta.h.in netcdf_dispatch.h.in -# netcdf_json.h is part of the distribution. -# If either of the files ncjson.h ../libdispatch/ncjson.c is changed -# then netcdf_json.h should be reconstructed using this recipe. -build_netcdf_json.h:: - sed -e 's/NCJSON_H/NETCDF_JSON_H/' -e '/ncjson.h/d' <${srcdir}/ncjson.h > $@ - sed -e '/ncjson.h/d' < ${srcdir}/../libdispatch/ncjson.c >> $@ - +# Give the recipe for rebuilding netcdf_json.h +makencjson:: + sed -e 's/NCJSON_H/NETCDF_JSON_H/' -e '/ncjson.h/d' <${srcdir}/ncjson.h > netcdf_json.h + sed -e '/ncjson.h/d' < ${srcdir}/../libdispatch/ncjson.c >> netcdf_json.h diff --git a/include/ncxml.h b/include/ncxml.h new file mode 100644 index 0000000000..011896f25e --- /dev/null +++ b/include/ncxml.h @@ -0,0 +1,44 @@ +/* Copyright 2018-2018 University Corporation for Atmospheric + Research/Unidata. */ + +#ifndef NCXML_H +#define NCXML_H + +#ifdef _WIN32 + #ifdef DLL_EXPORT /* define when building the library */ + #define DECLSPEC __declspec(dllexport) + #else + #define DECLSPEC __declspec(dllimport) + #endif +#else + #define DECLSPEC +#endif + +typedef void* ncxml_t; +typedef void* ncxml_attr_t; +typedef void* ncxml_doc_t; + +#if defined(__cplusplus) +extern "C" { +#endif + +DECLSPEC void ncxml_initialize(void); +DECLSPEC void ncxml_finalize(void); +DECLSPEC ncxml_doc_t ncxml_parse(char* contents, size_t len); +DECLSPEC void ncxml_free(ncxml_doc_t doc0); +DECLSPEC ncxml_t ncxml_root(ncxml_doc_t doc); +DECLSPEC const char* ncxml_name(ncxml_t xml0); +DECLSPEC char* ncxml_attr(ncxml_t xml0, const char* key); +DECLSPEC ncxml_t ncxml_child(ncxml_t xml0, const char* name); +DECLSPEC ncxml_t ncxml_next(ncxml_t xml0, const char* name); +DECLSPEC char* ncxml_text(ncxml_t xml0); +DECLSPEC int ncxml_attr_pairs(ncxml_t xml0, char*** pairsp); +/* Nameless versions of child and next */ +DECLSPEC ncxml_t ncxml_child_first(ncxml_t xml0); +DECLSPEC ncxml_t ncxml_child_next(ncxml_t xml0); + +#if defined(__cplusplus) +} +#endif + +#endif /*NCXML_H*/ diff --git a/libdap4/Makefile.am b/libdap4/Makefile.am index 3070344906..f16cfd4e51 100644 --- a/libdap4/Makefile.am +++ b/libdap4/Makefile.am @@ -6,6 +6,8 @@ # Put together AM_CPPFLAGS and AM_LDFLAGS. include $(top_srcdir)/lib_flags.am +AM_CPPFLAGS += -I$(top_srcdir)/libncxml +libdap4_la_CPPFLAGS = ${AM_CPPFLAGS} # We may have to add to these later. DISTCLEANFILES = @@ -59,7 +61,7 @@ endif # Build convenience library noinst_LTLIBRARIES = libdap4.la libdap4_la_SOURCES = $(SRC) $(HDRS) -libdap4_la_CPPFLAGS = $(AM_CPPFLAGS) +libdap4_la_CPPFLAGS += $(AM_CPPFLAGS) libdap4_la_LIBADD = endif # ENABLE_DAP4 diff --git a/libdap4/d4data.c b/libdap4/d4data.c index a9a57b25a3..fdc0725344 100644 --- a/libdap4/d4data.c +++ b/libdap4/d4data.c @@ -6,7 +6,6 @@ #include "d4includes.h" #include #include -#include "ezxml.h" #include "d4includes.h" #include "d4odom.h" #include "nccrc.h" diff --git a/libdap4/d4fix.c b/libdap4/d4fix.c index 39f9a77b31..3d3edf1751 100644 --- a/libdap4/d4fix.c +++ b/libdap4/d4fix.c @@ -7,7 +7,6 @@ #include #include "d4includes.h" -#include "ezxml.h" /* The primary purpose of this code is to provide node and data walkers diff --git a/libdap4/d4meta.c b/libdap4/d4meta.c index 7e1af13475..6266b73f92 100644 --- a/libdap4/d4meta.c +++ b/libdap4/d4meta.c @@ -7,7 +7,6 @@ #include #include "nc4internal.h" #include "ncoffsets.h" -#include "ezxml.h" /** * Build the netcdf-4 metadata from the NCD4node nodes. diff --git a/libdap4/d4parser.c b/libdap4/d4parser.c index 4dc5e37ce3..5c648e54c6 100644 --- a/libdap4/d4parser.c +++ b/libdap4/d4parser.c @@ -6,7 +6,7 @@ #include "d4includes.h" #include #include -#include "ezxml.h" +#include "ncxml.h" /** * Implement the Dap4 Parser Using a DOM Parser @@ -103,8 +103,6 @@ static void setname(NCD4node* node, const char* name) /***************************************************/ -extern const char** ezxml_all_attr(ezxml_t xml, int* countp); - /* Forwards */ static int addOrigType(NCD4parser*, NCD4node* src, NCD4node* dst, const char* tag); @@ -112,9 +110,9 @@ static int defineAtomicTypes(NCD4meta*,NClist*); static void classify(NCD4node* container, NCD4node* node); static int convertString(union ATOMICS*, NCD4node* type, const char* s); static int downConvert(union ATOMICS*, NCD4node* type); -static int fillgroup(NCD4parser*, NCD4node* group, ezxml_t xml); -static NCD4node* getOpaque(NCD4parser*, ezxml_t varxml, NCD4node* group); -static int getValueStrings(NCD4parser*, NCD4node*, ezxml_t xattr, NClist*); +static int fillgroup(NCD4parser*, NCD4node* group, ncxml_t xml); +static NCD4node* getOpaque(NCD4parser*, ncxml_t varxml, NCD4node* group); +static int getValueStrings(NCD4parser*, NCD4node*, ncxml_t xattr, NClist*); static int isReserved(const char* name); static const KEYWORDINFO* keyword(const char* name); static NCD4node* lookupAtomicType(NClist*, const char* name); @@ -122,30 +120,30 @@ static NCD4node* lookFor(NClist* elems, const char* name, NCD4sort sort); static NCD4node* lookupFQN(NCD4parser*, const char* sfqn, NCD4sort); static int lookupFQNList(NCD4parser*, NClist* fqn, NCD4sort sort, NCD4node** result); static NCD4node* makeAnonDim(NCD4parser*, const char* sizestr); -static int makeNode(NCD4parser*, NCD4node* parent, ezxml_t, NCD4sort, nc_type, NCD4node**); +static int makeNode(NCD4parser*, NCD4node* parent, ncxml_t, NCD4sort, nc_type, NCD4node**); static int makeNodeStatic(NCD4meta* meta, NCD4node* parent, NCD4sort sort, nc_type subsort, NCD4node** nodep); -static int parseAtomicVar(NCD4parser*, NCD4node* container, ezxml_t xml, NCD4node**); -static int parseAttributes(NCD4parser*, NCD4node* container, ezxml_t xml); -static int parseDimensions(NCD4parser*, NCD4node* group, ezxml_t xml); -static int parseDimRefs(NCD4parser*, NCD4node* var, ezxml_t xml); -static int parseEconsts(NCD4parser*, NCD4node* en, ezxml_t xml); -static int parseEnumerations(NCD4parser*, NCD4node* group, ezxml_t dom); -static int parseFields(NCD4parser*, NCD4node* container, ezxml_t xml); -static int parseError(NCD4parser*, ezxml_t errxml); -static int parseGroups(NCD4parser*, NCD4node* group, ezxml_t dom); -static int parseMaps(NCD4parser*, NCD4node* var, ezxml_t xml); -static int parseMetaData(NCD4parser*, NCD4node* node, ezxml_t xml); -static int parseStructure(NCD4parser*, NCD4node* container, ezxml_t dom, NCD4node**); -static int parseSequence(NCD4parser*, NCD4node* container, ezxml_t dom,NCD4node**); +static int parseAtomicVar(NCD4parser*, NCD4node* container, ncxml_t xml, NCD4node**); +static int parseAttributes(NCD4parser*, NCD4node* container, ncxml_t xml); +static int parseDimensions(NCD4parser*, NCD4node* group, ncxml_t xml); +static int parseDimRefs(NCD4parser*, NCD4node* var, ncxml_t xml); +static int parseEconsts(NCD4parser*, NCD4node* en, ncxml_t xml); +static int parseEnumerations(NCD4parser*, NCD4node* group, ncxml_t dom); +static int parseFields(NCD4parser*, NCD4node* container, ncxml_t xml); +static int parseError(NCD4parser*, ncxml_t errxml); +static int parseGroups(NCD4parser*, NCD4node* group, ncxml_t dom); +static int parseMaps(NCD4parser*, NCD4node* var, ncxml_t xml); +static int parseMetaData(NCD4parser*, NCD4node* node, ncxml_t xml); +static int parseStructure(NCD4parser*, NCD4node* container, ncxml_t dom, NCD4node**); +static int parseSequence(NCD4parser*, NCD4node* container, ncxml_t dom,NCD4node**); static int parseLL(const char* text, long long*); static int parseULL(const char* text, unsigned long long*); -static int parseVariables(NCD4parser*, NCD4node* group, ezxml_t xml); -static int parseVariable(NCD4parser*, NCD4node* group, ezxml_t xml, NCD4node**); +static int parseVariables(NCD4parser*, NCD4node* group, ncxml_t xml); +static int parseVariable(NCD4parser*, NCD4node* group, ncxml_t xml, NCD4node**); static void reclaimParser(NCD4parser* parser); static void record(NCD4parser*, NCD4node* node); static int splitOrigType(NCD4parser*, const char* fqn, NCD4node* var); static void track(NCD4meta*, NCD4node* node); -static int traverse(NCD4parser*, ezxml_t dom); +static int traverse(NCD4parser*, ncxml_t dom); static int parseForwards(NCD4parser* parser, NCD4node* root); #ifndef FIXEDOPAQUE static int defineBytestringType(NCD4parser*); @@ -159,8 +157,8 @@ NCD4_parse(NCD4meta* metadata) { int ret = NC_NOERR; NCD4parser* parser = NULL; - int ilen; - ezxml_t dom = NULL; + ncxml_doc_t doc = NULL; + ncxml_t dom = NULL; /* fill in the atomic types for meta*/ metadata->atomictypes = nclistnew(); @@ -170,9 +168,9 @@ NCD4_parse(NCD4meta* metadata) parser = (NCD4parser*)calloc(1,sizeof(NCD4parser)); if(parser == NULL) {ret=NC_ENOMEM; goto done;} parser->metadata = metadata; - ilen = strlen(parser->metadata->serial.dmr); - dom = nc_ezxml_parse_str(parser->metadata->serial.dmr,ilen); - if(dom == NULL) {ret=NC_ENOMEM; goto done;} + doc = ncxml_parse(parser->metadata->serial.dmr,strlen(parser->metadata->serial.dmr)); + if(doc == NULL) {ret=NC_ENOMEM; goto done;} + dom = ncxml_root(doc); parser->types = nclistnew(); parser->dims = nclistnew(); parser->vars = nclistnew(); @@ -184,8 +182,8 @@ NCD4_parse(NCD4meta* metadata) ret = traverse(parser,dom); done: - if(dom != NULL) - nc_ezxml_free(dom); + if(doc != NULL) + ncxml_free(doc); reclaimParser(parser); return THROW(ret); } @@ -205,12 +203,12 @@ reclaimParser(NCD4parser* parser) /* Recursively walk the DOM tree to create the metadata */ static int -traverse(NCD4parser* parser, ezxml_t dom) +traverse(NCD4parser* parser, ncxml_t dom) { int ret = NC_NOERR; /* See if we have an or */ - if(strcmp(dom->name,"Error")==0) { + if(strcmp(ncxml_name(dom),"Error")==0) { ret=parseError(parser,dom); /* Report the error */ fprintf(stderr,"DAP4 Error: http-code=%d message=\"%s\" context=\"%s\"\n", @@ -220,19 +218,19 @@ traverse(NCD4parser* parser, ezxml_t dom) fflush(stderr); ret=NC_EDMR; goto done; - } else if(strcmp(dom->name,"Dataset")==0) { - const char* xattr = NULL; + } else if(strcmp(ncxml_name(dom),"Dataset")==0) { + char* xattr = NULL; if((ret=makeNode(parser,NULL,NULL,NCD4_GROUP,NC_NULL,&parser->metadata->root))) goto done; parser->metadata->root->group.isdataset = 1; parser->metadata->root->meta.id = parser->metadata->ncid; parser->metadata->groupbyid = nclistnew(); SETNAME(parser->metadata->root,"/"); - xattr = nc_ezxml_attr(dom,"name"); - if(xattr != NULL) parser->metadata->root->group.datasetname = strdup(xattr); - xattr = nc_ezxml_attr(dom,"dapVersion"); - if(xattr != NULL) parser->metadata->root->group.dapversion = strdup(xattr); - xattr = nc_ezxml_attr(dom,"dmrVersion"); - if(xattr != NULL) parser->metadata->root->group.dmrversion = strdup(xattr); + xattr = ncxml_attr(dom,"name"); + if(xattr != NULL) parser->metadata->root->group.datasetname = xattr; + xattr = ncxml_attr(dom,"dapVersion"); + if(xattr != NULL) parser->metadata->root->group.dapversion = xattr; + xattr = ncxml_attr(dom,"dmrVersion"); + if(xattr != NULL) parser->metadata->root->group.dmrversion = xattr; /* Recursively walk the tree */ if((ret = fillgroup(parser,parser->metadata->root,dom))) goto done; @@ -242,13 +240,13 @@ traverse(NCD4parser* parser, ezxml_t dom) if((ret = parseForwards(parser,parser->metadata->root))) goto done; } else - FAIL(NC_EINVAL,"Unexpected dom root name: %s",dom->name); + FAIL(NC_EINVAL,"Unexpected dom root name: %s",ncxml_name(dom)); done: return THROW(ret); } static int -fillgroup(NCD4parser* parser, NCD4node* group, ezxml_t xml) +fillgroup(NCD4parser* parser, NCD4node* group, ncxml_t xml) { int ret = NC_NOERR; @@ -267,23 +265,25 @@ fillgroup(NCD4parser* parser, NCD4node* group, ezxml_t xml) } static int -parseDimensions(NCD4parser* parser, NCD4node* group, ezxml_t xml) +parseDimensions(NCD4parser* parser, NCD4node* group, ncxml_t xml) { int ret = NC_NOERR; - ezxml_t x; - for(x=nc_ezxml_child(xml, "Dimension");x != NULL;x = nc_ezxml_next(x)) { + ncxml_t x; + for(x=ncxml_child(xml, "Dimension");x != NULL;x = ncxml_next(x,"Dimension")) { NCD4node* dimnode = NULL; unsigned long long size; - const char* sizestr; - const char* unlimstr; - sizestr = nc_ezxml_attr(x,"size"); + char* sizestr; + char* unlimstr; + sizestr = ncxml_attr(x,"size"); if(sizestr == NULL) FAIL(NC_EDIMSIZE,"Dimension has no size"); - unlimstr = nc_ezxml_attr(x,UCARTAGUNLIM); + unlimstr = ncxml_attr(x,UCARTAGUNLIM); if((ret = parseULL(sizestr,&size))) goto done; + nullfree(sizestr); if((ret=makeNode(parser,group,x,NCD4_DIM,NC_NULL,&dimnode))) goto done; dimnode->dim.size = (long long)size; dimnode->dim.isunlimited = (unlimstr != NULL); + nullfree(unlimstr); /* Process attributes */ if((ret = parseAttributes(parser,dimnode,x))) goto done; classify(group,dimnode); @@ -293,19 +293,20 @@ parseDimensions(NCD4parser* parser, NCD4node* group, ezxml_t xml) } static int -parseEnumerations(NCD4parser* parser, NCD4node* group, ezxml_t xml) +parseEnumerations(NCD4parser* parser, NCD4node* group, ncxml_t xml) { int ret = NC_NOERR; - ezxml_t x; + ncxml_t x; - for(x=nc_ezxml_child(xml, "Enumeration");x != NULL;x = nc_ezxml_next(x)) { + for(x=ncxml_child(xml, "Enumeration");x != NULL;x = ncxml_next(x, "Enumeration")) { NCD4node* node = NULL; NCD4node* basetype = NULL; - const char* fqn = nc_ezxml_attr(x,"basetype"); + char* fqn = ncxml_attr(x,"basetype"); basetype = lookupFQN(parser,fqn,NCD4_TYPE); if(basetype == NULL) { FAIL(NC_EBADTYPE,"Enumeration has unknown type: ",fqn); } + nullfree(fqn); if((ret=makeNode(parser,group,x,NCD4_TYPE,NC_ENUM,&node))) goto done; node->basetype = basetype; if((ret=parseEconsts(parser,node,x))) goto done; @@ -314,9 +315,10 @@ parseEnumerations(NCD4parser* parser, NCD4node* group, ezxml_t xml) classify(group,node); /* Finally, see if this type has UCARTAGORIGTYPE xml attribute */ if(parser->metadata->controller->controls.translation == NCD4_TRANSNC4) { - const char* typetag = nc_ezxml_attr(x,UCARTAGORIGTYPE); + char* typetag = ncxml_attr(x,UCARTAGORIGTYPE); if(typetag != NULL) { } + nullfree(typetag); } } done: @@ -324,24 +326,26 @@ parseEnumerations(NCD4parser* parser, NCD4node* group, ezxml_t xml) } static int -parseEconsts(NCD4parser* parser, NCD4node* en, ezxml_t xml) +parseEconsts(NCD4parser* parser, NCD4node* en, ncxml_t xml) { int ret = NC_NOERR; - ezxml_t x; + ncxml_t x; NClist* econsts = nclistnew(); - for(x=nc_ezxml_child(xml, "EnumConst");x != NULL;x = nc_ezxml_next(x)) { + for(x=ncxml_child(xml, "EnumConst");x != NULL;x = ncxml_next(x, "EnumConst")) { NCD4node* ec = NULL; - const char* name; - const char* svalue; - name = nc_ezxml_attr(x,"name"); + char* name; + char* svalue; + name = ncxml_attr(x,"name"); if(name == NULL) FAIL(NC_EBADNAME,"Enum const with no name"); if((ret=makeNode(parser,en,x,NCD4_ECONST,NC_NULL,&ec))) goto done ; - svalue = nc_ezxml_attr(x,"value"); + nullfree(name); + svalue = ncxml_attr(x,"value"); if(svalue == NULL) FAIL(NC_EINVAL,"Enumeration Constant has no value"); if((ret=convertString(&ec->en.ecvalue,en->basetype,svalue))) FAIL(NC_EINVAL,"Non-numeric Enumeration Constant: %s->%s",ec->name,svalue); + nullfree(svalue); PUSH(econsts,ec); } en->en.econsts = econsts; @@ -350,15 +354,15 @@ parseEconsts(NCD4parser* parser, NCD4node* en, ezxml_t xml) } static int -parseVariables(NCD4parser* parser, NCD4node* group, ezxml_t xml) +parseVariables(NCD4parser* parser, NCD4node* group, ncxml_t xml) { int ret = NC_NOERR; - ezxml_t x; - for(x=xml->child;x != NULL;x=x->ordered) { + ncxml_t x; + for(x=ncxml_child_first(xml);x != NULL;x=ncxml_child_next(x)) { NCD4node* node = NULL; - const KEYWORDINFO* info = keyword(x->name); + const KEYWORDINFO* info = keyword(ncxml_name(x)); if(info == NULL) - FAIL(NC_ETRANSLATION,"Unexpected node type: %s",x->name); + FAIL(NC_ETRANSLATION,"Unexpected node type: %s",ncxml_name(x)); /* Check if we need to process this node */ if(!ISVAR(info->sort)) continue; /* Handle elsewhere */ node = NULL; @@ -370,11 +374,11 @@ parseVariables(NCD4parser* parser, NCD4node* group, ezxml_t xml) } static int -parseVariable(NCD4parser* parser, NCD4node* container, ezxml_t xml, NCD4node** nodep) +parseVariable(NCD4parser* parser, NCD4node* container, ncxml_t xml, NCD4node** nodep) { int ret = NC_NOERR; NCD4node* node = NULL; - const KEYWORDINFO* info = keyword(xml->name); + const KEYWORDINFO* info = keyword(ncxml_name(xml)); switch (info->subsort) { case NC_STRUCT: @@ -393,7 +397,7 @@ parseVariable(NCD4parser* parser, NCD4node* container, ezxml_t xml, NCD4node** n } static int -parseMetaData(NCD4parser* parser, NCD4node* container, ezxml_t xml) +parseMetaData(NCD4parser* parser, NCD4node* container, ncxml_t xml) { int ret = NC_NOERR; /* Process dimrefs */ @@ -407,7 +411,7 @@ parseMetaData(NCD4parser* parser, NCD4node* container, ezxml_t xml) } static int -parseStructure(NCD4parser* parser, NCD4node* container, ezxml_t xml, NCD4node** nodep) +parseStructure(NCD4parser* parser, NCD4node* container, ncxml_t xml, NCD4node** nodep) { int ret = NC_NOERR; NCD4node* var = NULL; @@ -440,10 +444,11 @@ parseStructure(NCD4parser* parser, NCD4node* container, ezxml_t xml, NCD4node** /* See if this var has UCARTAGORIGTYPE attribute */ if(parser->metadata->controller->controls.translation == NCD4_TRANSNC4) { - const char* typetag = nc_ezxml_attr(xml,UCARTAGORIGTYPE); + char* typetag = ncxml_attr(xml,UCARTAGORIGTYPE); if(typetag != NULL) { /* yes, place it on the type */ if((ret=addOrigType(parser,var,type,typetag))) goto done; + nullfree(typetag); } } @@ -455,13 +460,13 @@ parseStructure(NCD4parser* parser, NCD4node* container, ezxml_t xml, NCD4node** } static int -parseFields(NCD4parser* parser, NCD4node* container, ezxml_t xml) +parseFields(NCD4parser* parser, NCD4node* container, ncxml_t xml) { int ret = NC_NOERR; - ezxml_t x; - for(x=xml->child;x != NULL;x=x->ordered) { - NCD4node* node = NULL; - const KEYWORDINFO* info = keyword(x->name); + ncxml_t x; + for(x=ncxml_child_first(xml);x != NULL;x=ncxml_child_next(x)) { + NCD4node* node = NULL; + const KEYWORDINFO* info = keyword(ncxml_name(x)); if(!ISVAR(info->sort)) continue; /* not a field */ ret = parseVariable(parser,container,x,&node); if(ret) goto done; @@ -475,13 +480,13 @@ Specialized version of parseFields that is used to attach a singleton field to a vlentype */ static int -parseVlenField(NCD4parser* parser, NCD4node* container, ezxml_t xml, NCD4node** fieldp) +parseVlenField(NCD4parser* parser, NCD4node* container, ncxml_t xml, NCD4node** fieldp) { int ret = NC_NOERR; NCD4node* field = NULL; - ezxml_t x; - for(x=xml->child;x != NULL;x=x->ordered) { - const KEYWORDINFO* info = keyword(x->name); + ncxml_t x; + for(x=ncxml_child_first(xml);x != NULL;x=ncxml_child_next(x)) { + const KEYWORDINFO* info = keyword(ncxml_name(x)); if(!ISVAR(info->sort)) continue; /* not a field */ if(field != NULL) {ret = NC_EBADTYPE; goto done;} @@ -494,7 +499,7 @@ parseVlenField(NCD4parser* parser, NCD4node* container, ezxml_t xml, NCD4node** } static int -parseSequence(NCD4parser* parser, NCD4node* container, ezxml_t xml, NCD4node** nodep) +parseSequence(NCD4parser* parser, NCD4node* container, ncxml_t xml, NCD4node** nodep) { int ret = NC_NOERR; NCD4node* var = NULL; @@ -524,9 +529,10 @@ parseSequence(NCD4parser* parser, NCD4node* container, ezxml_t xml, NCD4node** n Test: UCARTAGVLEN xml attribute is set */ if(parser->metadata->controller->controls.translation == NCD4_TRANSNC4) { - const char* vlentag = nc_ezxml_attr(xml,UCARTAGVLEN); + char* vlentag = ncxml_attr(xml,UCARTAGVLEN); if(vlentag != NULL) usevlen = 1; + nullfree(vlentag); } else usevlen = 0; @@ -581,10 +587,11 @@ parseSequence(NCD4parser* parser, NCD4node* container, ezxml_t xml, NCD4node** n /* See if this var has UCARTAGORIGTYPE attribute */ if(parser->metadata->controller->controls.translation == NCD4_TRANSNC4) { - const char* typetag = nc_ezxml_attr(xml,UCARTAGORIGTYPE); + char* typetag = ncxml_attr(xml,UCARTAGORIGTYPE); if(typetag != NULL) { /* yes, place it on the type */ if((ret=addOrigType(parser,var,vlentype,typetag))) goto done; + nullfree(typetag); } } if(nodep) *nodep = var; @@ -595,14 +602,15 @@ parseSequence(NCD4parser* parser, NCD4node* container, ezxml_t xml, NCD4node** n } static int -parseGroups(NCD4parser* parser, NCD4node* parent, ezxml_t xml) +parseGroups(NCD4parser* parser, NCD4node* parent, ncxml_t xml) { int ret = NC_NOERR; - ezxml_t x; - for(x=nc_ezxml_child(xml, "Group");x != NULL;x = nc_ezxml_next(x)) { + ncxml_t x; + for(x=ncxml_child(xml, "Group");x != NULL;x = ncxml_next(x,"Group")) { NCD4node* group = NULL; - const char* name = nc_ezxml_attr(x,"name"); + char* name = ncxml_attr(x,"name"); if(name == NULL) FAIL(NC_EBADNAME,"Group has no name"); + nullfree(name); if((ret=makeNode(parser,parent,x,NCD4_GROUP,NC_NULL,&group))) goto done; group->group.varbyid = nclistnew(); classify(parent,group); @@ -615,7 +623,7 @@ parseGroups(NCD4parser* parser, NCD4node* parent, ezxml_t xml) } static int -parseAtomicVar(NCD4parser* parser, NCD4node* container, ezxml_t xml, NCD4node** nodep) +parseAtomicVar(NCD4parser* parser, NCD4node* container, ncxml_t xml, NCD4node** nodep) { int ret = NC_NOERR; NCD4node* node = NULL; @@ -625,7 +633,7 @@ parseAtomicVar(NCD4parser* parser, NCD4node* container, ezxml_t xml, NCD4node** NCD4node* group; /* Check for aliases */ - for(typename=xml->name;;) { + for(typename=ncxml_name(xml);;) { info = keyword(typename); if(info->aliasfor == NULL) break; typename = info->aliasfor; @@ -633,11 +641,12 @@ parseAtomicVar(NCD4parser* parser, NCD4node* container, ezxml_t xml, NCD4node** group = NCD4_groupFor(container); /* Locate its basetype; handle opaque and enum separately */ if(info->subsort == NC_ENUM) { - const char* enumfqn = nc_ezxml_attr(xml,"enum"); + char* enumfqn = ncxml_attr(xml,"enum"); if(enumfqn == NULL) base = NULL; else base = lookupFQN(parser,enumfqn,NCD4_TYPE); + nullfree(enumfqn); } else if(info->subsort == NC_OPAQUE) { /* See if the xml references an opaque type name */ base = getOpaque(parser,xml,group); @@ -654,10 +663,11 @@ parseAtomicVar(NCD4parser* parser, NCD4node* container, ezxml_t xml, NCD4node** if((ret = parseMetaData(parser,node,xml))) goto done; /* See if this var has UCARTAGORIGTYPE attribute */ if(parser->metadata->controller->controls.translation == NCD4_TRANSNC4) { - const char* typetag = nc_ezxml_attr(xml,UCARTAGORIGTYPE); + char* typetag = ncxml_attr(xml,UCARTAGORIGTYPE); if(typetag != NULL) { /* yes, place it on the type */ if((ret=addOrigType(parser,node,node,typetag))) goto done; + nullfree(typetag); } } if(nodep) *nodep = node; @@ -666,22 +676,23 @@ parseAtomicVar(NCD4parser* parser, NCD4node* container, ezxml_t xml, NCD4node** } static int -parseDimRefs(NCD4parser* parser, NCD4node* var, ezxml_t xml) +parseDimRefs(NCD4parser* parser, NCD4node* var, ncxml_t xml) { int ret = NC_NOERR; - ezxml_t x; - for(x=nc_ezxml_child(xml, "Dim");x!= NULL;x=nc_ezxml_next(x)) { + ncxml_t x; + for(x=ncxml_child(xml, "Dim");x!= NULL;x=ncxml_next(x,"Dim")) { NCD4node* dim = NULL; - const char* fqn; + char* fqn; - fqn = nc_ezxml_attr(x,"name"); + fqn = ncxml_attr(x,"name"); if(fqn != NULL) { dim = lookupFQN(parser,fqn,NCD4_DIM); if(dim == NULL) { FAIL(NC_EBADDIM,"Cannot locate dim with name: %s",fqn); } + nullfree(fqn); } else { - const char* sizestr = nc_ezxml_attr(x,"size"); + char* sizestr = ncxml_attr(x,"size"); if(sizestr == NULL) { FAIL(NC_EBADDIM,"Dimension reference has no name and no size"); } @@ -689,6 +700,7 @@ parseDimRefs(NCD4parser* parser, NCD4node* var, ezxml_t xml) dim = makeAnonDim(parser,sizestr); if(dim == NULL) FAIL(NC_EBADDIM,"Cannot create anonymous dimension for size: %s",sizestr); + nullfree(sizestr); } PUSH(var->dims,dim); } @@ -697,58 +709,60 @@ parseDimRefs(NCD4parser* parser, NCD4node* var, ezxml_t xml) } static int -parseMaps(NCD4parser* parser, NCD4node* var, ezxml_t xml) +parseMaps(NCD4parser* parser, NCD4node* var, ncxml_t xml) { int ret = NC_NOERR; - ezxml_t x; + ncxml_t x; - for(x=nc_ezxml_child(xml, "Map");x!= NULL;x=nc_ezxml_next(x)) { - const char* fqn; - fqn = nc_ezxml_attr(x,"name"); + for(x=ncxml_child(xml, "Map");x!= NULL;x=ncxml_next(x,"Map")) { + char* fqn; + fqn = ncxml_attr(x,"name"); if(fqn == NULL) FAIL(NC_ENOTVAR," has no name attribute"); - PUSH(var->mapnames,strdup(fqn)); + PUSH(var->mapnames,fqn); } done: return THROW(ret); } static int -parseAttributes(NCD4parser* parser, NCD4node* container, ezxml_t xml) +parseAttributes(NCD4parser* parser, NCD4node* container, ncxml_t xml) { int ret = NC_NOERR; - ezxml_t x; + ncxml_t x; NClist* values = NULL; + char** pairs = NULL; /* First, transfer any reserved xml attributes */ { - int count = 0; - const char** all = NULL; - all = nc_ezxml_all_attr(xml,&count); - if(all != NULL && count > 0) { - const char** p; - container->xmlattributes = nclistnew(); - for(p=all;*p;p+=2) { - if(isReserved(*p)) { - PUSH(container->xmlattributes,strdup(p[0])); - PUSH(container->xmlattributes,strdup(p[1])); - } + char** p; + if(!ncxml_attr_pairs(xml,&pairs)) + {ret = NC_ENOMEM; goto done;} + if(container->xmlattributes) nclistfree(container->xmlattributes); + container->xmlattributes = nclistnew(); + for(p=pairs;*p;p+=2) { + if(isReserved(*p)) { + PUSH(container->xmlattributes,strdup(p[0])); + PUSH(container->xmlattributes,strdup(p[1])); } } } - for(x=nc_ezxml_child(xml, "Attribute");x!= NULL;x=nc_ezxml_next(x)) { - const char* name = nc_ezxml_attr(x,"name"); - const char* type = nc_ezxml_attr(x,"type"); + for(x=ncxml_child(xml, "Attribute");x!= NULL;x=ncxml_next(x,"Attribute")) { + char* name = ncxml_attr(x,"name"); + char* type = ncxml_attr(x,"type"); NCD4node* attr = NULL; NCD4node* basetype; if(name == NULL) FAIL(NC_EBADNAME,"Missing name"); + nullfree(name); #ifdef HYRAXHACK /* Hyrax specifies type="container" for container types */ if(strcmp(type,"container")==0 - || strcmp(type,"Container")==0) + || strcmp(type,"Container")==0) { + nullfree(type); type = NULL; + } #endif if(type == NULL) { /* containers not supported; ignore */ @@ -759,6 +773,7 @@ parseAttributes(NCD4parser* parser, NCD4node* container, ezxml_t xml) basetype = lookupFQN(parser,type,NCD4_TYPE); if(basetype == NULL) FAIL(NC_EBADTYPE,"Unknown type: %s",type); + nullfree(type); if(basetype->subsort == NC_NAT && basetype->subsort != NC_ENUM) FAIL(NC_EBADTYPE," type must be atomic or enum: %s",type); attr->basetype = basetype; @@ -770,6 +785,11 @@ parseAttributes(NCD4parser* parser, NCD4node* container, ezxml_t xml) PUSH(container->attributes,attr); } done: + if(pairs) { + char** p = pairs; + for(;*p;p++) nullfree(*p); + free(pairs); + } if(ret != NC_NOERR) { nclistfreeall(values); } @@ -777,26 +797,27 @@ parseAttributes(NCD4parser* parser, NCD4node* container, ezxml_t xml) } static int -parseError(NCD4parser* parser, ezxml_t errxml) +parseError(NCD4parser* parser, ncxml_t errxml) { - const char* shttpcode = nc_ezxml_attr(errxml,"httpcode"); - ezxml_t x; - if(shttpcode == NULL) shttpcode = "400"; + char* shttpcode = ncxml_attr(errxml,"httpcode"); + ncxml_t x; + if(shttpcode == NULL) shttpcode = strdup("400"); if(sscanf(shttpcode,"%d",&parser->metadata->error.httpcode) != 1) nclog(NCLOGERR,"Malformed response"); - x=nc_ezxml_child(errxml, "Message"); + nullfree(shttpcode); + x=ncxml_child(errxml, "Message"); if(x != NULL) { - const char* txt = nc_ezxml_txt(x); - parser->metadata->error.message = (txt == NULL ? NULL : strdup(txt)); + char* txt = ncxml_text(x); + parser->metadata->error.message = (txt == NULL ? NULL : txt); } - x = nc_ezxml_child(errxml, "Context"); + x = ncxml_child(errxml, "Context"); if(x != NULL) { - const char* txt = nc_ezxml_txt(x); + const char* txt = ncxml_text(x); parser->metadata->error.context = (txt == NULL ? NULL : strdup(txt)); } - x=nc_ezxml_child(errxml, "OtherInformation"); + x=ncxml_child(errxml, "OtherInformation"); if(x != NULL) { - const char* txt = nc_ezxml_txt(x); + const char* txt = ncxml_text(x); parser->metadata->error.otherinfo = (txt == NULL ? NULL : strdup(txt)); } return THROW(NC_NOERR); @@ -806,12 +827,12 @@ parseError(NCD4parser* parser, ezxml_t errxml) Find or create an opaque type */ static NCD4node* -getOpaque(NCD4parser* parser, ezxml_t varxml, NCD4node* group) +getOpaque(NCD4parser* parser, ncxml_t varxml, NCD4node* group) { int i, ret = NC_NOERR; long long len; NCD4node* opaquetype = NULL; - const char* xattr; + char* xattr; #ifndef FIXEDOPAQUE len = 0; @@ -820,12 +841,13 @@ getOpaque(NCD4parser* parser, ezxml_t varxml, NCD4node* group) #endif if(parser->metadata->controller->controls.translation == NCD4_TRANSNC4) { /* See if this var has UCARTAGOPAQUE attribute */ - xattr = nc_ezxml_attr(varxml,UCARTAGOPAQUE); + xattr = ncxml_attr(varxml,UCARTAGOPAQUE); if(xattr != NULL) { long long tmp = 0; if((ret = parseLL(xattr,&tmp)) || (tmp < 0)) FAIL(NC_EINVAL,"Illegal opaque len: %s",xattr); len = tmp; + nullfree(xattr); } } #ifndef FIXEDOPAQUE @@ -861,33 +883,35 @@ getOpaque(NCD4parser* parser, ezxml_t varxml, NCD4node* group) /* get all value strings */ static int -getValueStrings(NCD4parser* parser, NCD4node* type, ezxml_t xattr, NClist* svalues) +getValueStrings(NCD4parser* parser, NCD4node* type, ncxml_t xattr, NClist* svalues) { - const char* s; + char* s; /* See first if we have a "value" xml attribute */ - s = nc_ezxml_attr(xattr,"value"); - if(s != NULL) - PUSH(svalues,strdup(s)); + s = ncxml_attr(xattr,"value"); + if(s != NULL) + {PUSH(svalues,s); s = NULL;} else {/* look for subnodes */ - ezxml_t x; - for(x=nc_ezxml_child(xattr, "Value");x != NULL;x = nc_ezxml_next(x)) { + ncxml_t x; + for(x=ncxml_child(xattr, "Value");x != NULL;x = ncxml_next(x,"Value")) { char* es; char* ds; /* We assume that either their is a single xml attribute called "value", or there is a single chunk of text containing possibly multiple values. */ - s = nc_ezxml_attr(x,"value"); + s = ncxml_attr(x,"value"); if(s == NULL) {/* See if there is a text part. */ - s = x->txt; - if(s == NULL) s = ""; + s = ncxml_text(x); + if(s == NULL) s = strdup(""); } /* Need to de-escape the string */ es = NCD4_entityescape(s); ds = NCD4_deescape(es); - PUSH(svalues,ds); - nullfree(es); + PUSH(svalues,ds); ds = NULL; + nullfree(es); es = NULL; + nullfree(s); s = NULL; } } + nullfree(s); return THROW(NC_NOERR); } @@ -1232,7 +1256,7 @@ lookupAtomicType(NClist* atomictypes, const char* name) /**************************************************/ static int -makeNode(NCD4parser* parser, NCD4node* parent, ezxml_t xml, NCD4sort sort, nc_type subsort, NCD4node** nodep) +makeNode(NCD4parser* parser, NCD4node* parent, ncxml_t xml, NCD4sort sort, nc_type subsort, NCD4node** nodep) { int ret = NC_NOERR; NCD4node* node = NULL; @@ -1242,12 +1266,13 @@ makeNode(NCD4parser* parser, NCD4node* parent, ezxml_t xml, NCD4sort sort, nc_ty /* Set node name, if it exists */ if(xml != NULL) { - const char* name = nc_ezxml_attr(xml,"name"); + char* name = ncxml_attr(xml,"name"); if(name != NULL) { if(strlen(name) > NC_MAX_NAME) { nclog(NCLOGERR,"Name too long: %s",name); } SETNAME(node,name); + nullfree(name); } } record(parser,node); diff --git a/libdap4/d4swap.c b/libdap4/d4swap.c index 2819a2bca7..2e557e9966 100644 --- a/libdap4/d4swap.c +++ b/libdap4/d4swap.c @@ -6,7 +6,6 @@ #include "d4includes.h" #include #include "d4includes.h" -#include "ezxml.h" /* The primary purpose of this code is to recursively traverse diff --git a/libdispatch/CMakeLists.txt b/libdispatch/CMakeLists.txt index b89c57bea9..156c86c7e2 100644 --- a/libdispatch/CMakeLists.txt +++ b/libdispatch/CMakeLists.txt @@ -5,7 +5,7 @@ # See netcdf-c/COPYRIGHT file for more info. SET(libdispatch_SOURCES dparallel.c dcopy.c dfile.c ddim.c datt.c dattinq.c dattput.c dattget.c derror.c dvar.c dvarget.c dvarput.c dvarinq.c ddispatch.c nclog.c dstring.c dutf8.c dinternal.c doffsets.c ncuri.c nclist.c ncbytes.c nchashmap.c nctime.c nc.c nclistmgr.c utf8proc.h utf8proc.c dpathmgr.c dutil.c drc.c dauth.c dreadonly.c dnotnc4.c dnotnc3.c daux.c dinfermodel.c -dcrc32.c dcrc32.h dcrc64.c ncexhash.c ncxcache.c ncjson.c ezxml.c ds3util.c) +dcrc32.c dcrc32.h dcrc64.c ncexhash.c ncxcache.c ncjson.c ds3util.c) # Netcdf-4 only functions. Must be defined even if not used SET(libdispatch_SOURCES ${libdispatch_SOURCES} dgroup.c dvlen.c dcompound.c dtype.c denum.c dopaque.c dfilter.c) diff --git a/libdispatch/Makefile.am b/libdispatch/Makefile.am index 70c2fd8b15..1a695e1c5d 100644 --- a/libdispatch/Makefile.am +++ b/libdispatch/Makefile.am @@ -21,7 +21,7 @@ dvarinq.c dinternal.c ddispatch.c dutf8.c nclog.c dstring.c ncuri.c \ nclist.c ncbytes.c nchashmap.c nctime.c nc.c nclistmgr.c dauth.c \ doffsets.c dpathmgr.c dutil.c dreadonly.c dnotnc4.c dnotnc3.c \ daux.c dinfermodel.c \ -dcrc32.c dcrc32.h dcrc64.c ncexhash.c ncxcache.c ncjson.c ezxml.c ds3util.c +dcrc32.c dcrc32.h dcrc64.c ncexhash.c ncxcache.c ncjson.c ds3util.c # Add the utf8 codebase libdispatch_la_SOURCES += utf8proc.c utf8proc.h @@ -77,6 +77,8 @@ ncsettings.c: $(top_srcdir)/libnetcdf.settings ncsettings.hdr # Show what is needed to insert a new version of ezxml # primary fix: The original ezxml.[ch] uses '//' comments; # unpack and replace with '/*..*/' + +REPO=https://downloads.sourceforge.net/project/ezxml/ EZXML=ezxml-0.8.6.tar.gz ezxml:: rm -fr ./ezxml ./ezxml.[ch] ./license.txt diff --git a/libdispatch/ddispatch.c b/libdispatch/ddispatch.c index bca0ed7724..12ba329958 100644 --- a/libdispatch/ddispatch.c +++ b/libdispatch/ddispatch.c @@ -11,6 +11,7 @@ See LICENSE.txt for license information. #include "ncrc.h" #include "ncoffsets.h" #include "ncpathmgr.h" +#include "ncxml.h" /* Required for getcwd, other functions. */ #ifdef HAVE_UNISTD_H @@ -125,6 +126,9 @@ NCDISPATCH_finalize(void) ncrc_freeglobalstate(); #if defined(ENABLE_BYTERANGE) || defined(ENABLE_DAP) || defined(ENABLE_DAP4) curl_global_cleanup(); +#endif +#if defined(ENABLE_DAP4) + ncxml_finalize(); #endif return status; } diff --git a/liblib/CMakeLists.txt b/liblib/CMakeLists.txt index 93f004ada1..acbc09a25d 100644 --- a/liblib/CMakeLists.txt +++ b/liblib/CMakeLists.txt @@ -31,6 +31,7 @@ ENDIF() IF(ENABLE_DAP4) SET(liblib_LIBS ${liblib_LIBS} dap4) + SET(liblib_LIBS ${liblib_LIBS} ncxml) ENDIF() IF(ENABLE_NCZARR) @@ -125,6 +126,10 @@ IF(ENABLE_S3_SDK) TARGET_LINK_LIBRARIES(netcdf ${AWS_LINK_LIBRARIES}) ENDIF() +IF(HAVE_LIBXML2) + SET(TLL_LIBS ${TLL_LIBS} ${LIBXML2_LIBRARIES}) +ENDIF() + IF(NOT WIN32) IF(NOT APPLE) IF(CMAKE_DL_LIBS) diff --git a/liblib/Makefile.am b/liblib/Makefile.am index 55f6a82f6e..e0ff052be2 100644 --- a/liblib/Makefile.am +++ b/liblib/Makefile.am @@ -62,6 +62,8 @@ endif # ENABLE_DAP if ENABLE_DAP4 AM_CPPFLAGS += -I${top_srcdir}/libdap4 libnetcdf_la_LIBADD += ${top_builddir}/libdap4/libdap4.la +AM_CPPFLAGS += -I${top_srcdir}/libncxml +libnetcdf_la_LIBADD += ${top_builddir}/libncxml/libncxml.la endif # ENABLE_DAP4 # NetCDF-4 ... diff --git a/libncxml/CMakeLists.txt b/libncxml/CMakeLists.txt new file mode 100644 index 0000000000..87188d2845 --- /dev/null +++ b/libncxml/CMakeLists.txt @@ -0,0 +1,14 @@ +IF(HAVE_LIBXML2) +SET(libncxml_SOURCES ncxml_xml2.c) +ELSE() +SET(libncxml_SOURCES ncxml_ezxml.c ezxml.c ezxml.h) +ENDIF() + +add_library(ncxml OBJECT ${libncxml_SOURCES}) +TARGET_INCLUDE_DIRECTORIES(ncxml PUBLIC ./include) + +#IF(NOT MSVC) +#target_compile_features(ncxml PUBLIC cxx_std_11) +#ENDIF() + +ADD_EXTRA_DIST(CMakeLists.txt license.txt) diff --git a/libncxml/Makefile.am b/libncxml/Makefile.am new file mode 100644 index 0000000000..4baceb4e90 --- /dev/null +++ b/libncxml/Makefile.am @@ -0,0 +1,54 @@ +# WARNING: this is not the libxml2 library +# It comes from https://github.com/recp/xml + +# Copyright 2009, UCAR/Unidata +# See the COPYRIGHT file for more information. + +# Use automake or CMake for building under nix +# Use CMake for building under windows + +# Get AM_CPPFLAGS and AM_LDFLAGS + +include $(top_srcdir)/lib_flags.am + +if HAVE_LIBXML2 +AM_CPPFLAGS += ${XML2FLAGS} +endif + +# This is our output. The ZARR convenience library. +noinst_LTLIBRARIES = libncxml.la +libncxml_la_LIBADD = +libncxml_la_LDFLAGS = + +if HAVE_LIBXML2 +libncxml_la_SOURCES = ncxml_xml2.c +else +libncxml_la_SOURCES = ncxml_ezxml.c ezxml.c ezxml.h +endif + +EXTRA_DIST = CMakeLists.txt license.txt + +# Construct ezxml from latest sources +REPO=https://downloads.sourceforge.net/project/ezxml/ +EZXML=ezxml-0.8.6.tar.gz +makelib:: + rm -fr ./ezxml.[ch] ./license.txt ./ezxml + tar -zxf ./${EZXML} + echo '#define EZXML_NOMMAP 1' > ezxml.c + cat /d' | \ + sed -e 's|//\(.*\)|/*\1*/|' \ + sed -e 's|//\(.*\)|/*\1*/|' \ + cat >./ezxml.c + sed -e 's|//\(.*\)|/*\1*/|' ./ezxml.h + cp ezxml/license.txt . + rm -fr ezxml + +# Define path to the xml github dir; this value assumes it is in a parallel directory to netcdf-c (YMMV) +GITSRC=${top_srcdir}/../tinyxml2 +xmakelib:: + rm -f readme.md LICENSE.txt + rm -fr ./tinyxml2.cpp ./tinyxml2.h + cp ${GITSRC}/readme.md . + cp ${GITSRC}/LICENSE.txt . + cp ${GITSRC}/tinyxml2.h ${GITSRC}/tinyxml2.cpp . diff --git a/libdispatch/ezxml.c b/libncxml/ezxml.c similarity index 81% rename from libdispatch/ezxml.c rename to libncxml/ezxml.c index 95252d3393..96af31f97d 100644 --- a/libdispatch/ezxml.c +++ b/libncxml/ezxml.c @@ -1,8 +1,4 @@ -/* - * Copyright 1998-2018 University Corporation for Atmospheric Research/Unidata - * See the LICENSE file for more information. - */ - +#define EZXML_NOMMAP 1 /* ezxml.c * * Copyright 2004-2006 Aaron Voisine @@ -27,21 +23,18 @@ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "config.h" #include #include #include #include #include -#ifdef HAVE_UNISTD_H -#include -#endif +#include +#ifndef EZXML_NOMMAP +#include +#endif /* EZXML_NOMMAP*/ +#include #include "ezxml.h" -#ifndef vsnprintf -extern int vsnprintf(char*, size_t, const char*, va_list ap); -#endif - #define EZXML_WS "\t\r\n " /* whitespace*/ #define EZXML_ERRL 128 /* maximum error string length*/ @@ -49,30 +42,22 @@ typedef struct ezxml_root *ezxml_root_t; struct ezxml_root { /* additional data for the root tag*/ struct ezxml xml; /* is a super-struct built on top of ezxml struct*/ ezxml_t cur; /* current xml tree insertion point*/ - char* m; /* original xml string*/ + char *m; /* original xml string*/ size_t len; /* length of allocated memory for mmap, -1 for malloc*/ - char* u; /* UTF-8 conversion of string if original was UTF-16*/ - char* s; /* start of work area*/ - char* e; /* end of work area*/ - char* *ent; /* general entities (ampersand sequences)*/ - char* **attr; /* default attributes*/ - char* **pi; /* processing instructions*/ + char *u; /* UTF-8 conversion of string if original was UTF-16*/ + char *s; /* start of work area*/ + char *e; /* end of work area*/ + char **ent; /* general entities (ampersand sequences)*/ + char ***attr; /* default attributes*/ + char ***pi; /* processing instructions*/ short standalone; /* non-zero if */ char err[EZXML_ERRL]; /* error string*/ }; -static const char* EZXML_NIL[] = { NULL }; /* empty, null terminated array of strings*/ - -/* Forward */ -static ezxml_t ezxml_add_child(ezxml_t xml, const char* name, size_t off); -static ezxml_t ezxml_set_flag(ezxml_t xml, short flag); -static ezxml_t ezxml_err(ezxml_root_t root, char* s, const char* err, ...); -static ezxml_t ezxml_new(const char* name); -static ezxml_t ezxml_vget(ezxml_t xml, va_list ap); +char *EZXML_NIL[] = { NULL }; /* empty, null terminated array of strings*/ /* returns the first child tag with the given name or NULL if not found*/ -ezxml_t -nc_ezxml_child(ezxml_t xml, const char* name) +ezxml_t ezxml_child(ezxml_t xml, const char *name) { xml = (xml) ? xml->child : NULL; while (xml && strcmp(name, xml->name)) xml = xml->sibling; @@ -81,16 +66,14 @@ nc_ezxml_child(ezxml_t xml, const char* name) /* returns the Nth tag with the same name in the same subsection or NULL if not*/ /* found*/ -ezxml_t -nc_ezxml_idx(ezxml_t xml, int idx) +ezxml_t ezxml_idx(ezxml_t xml, int idx) { for (; xml && idx; idx--) xml = xml->next; return xml; } /* returns the value of the requested tag attribute or NULL if not found*/ -const char* -nc_ezxml_attr(ezxml_t xml, const char* attr) +const char *ezxml_attr(ezxml_t xml, const char *attr) { int i = 0, j = 1; ezxml_root_t root = (ezxml_root_t)xml; @@ -107,17 +90,16 @@ nc_ezxml_attr(ezxml_t xml, const char* attr) } /* same as ezxml_get but takes an already initialized va_list*/ -static ezxml_t -ezxml_vget(ezxml_t xml, va_list ap) +ezxml_t ezxml_vget(ezxml_t xml, va_list ap) { - char* name = va_arg(ap, char* ); + char *name = va_arg(ap, char *); int idx = -1; if (name && *name) { - idx = va_arg(ap, int); - xml = nc_ezxml_child(xml, name); + idx = va_arg(ap, int); + xml = ezxml_child(xml, name); } - return (idx < 0) ? xml : ezxml_vget(nc_ezxml_idx(xml, idx), ap); + return (idx < 0) ? xml : ezxml_vget(ezxml_idx(xml, idx), ap); } /* Traverses the xml tree to retrieve a specific subtag. Takes a variable*/ @@ -126,8 +108,7 @@ ezxml_vget(ezxml_t xml, va_list ap) /* title = ezxml_get(library, "shelf", 0, "book", 2, "title", -1);*/ /* This retrieves the title of the 3rd book on the 1st shelf of library.*/ /* Returns NULL if not found.*/ -ezxml_t -nc_ezxml_get(ezxml_t xml, ...) +ezxml_t ezxml_get(ezxml_t xml, ...) { va_list ap; ezxml_t r; @@ -140,26 +121,24 @@ nc_ezxml_get(ezxml_t xml, ...) /* returns a null terminated array of processing instructions for the given*/ /* target*/ -const char** -nc_ezxml_pi(ezxml_t xml, const char* target) +const char **ezxml_pi(ezxml_t xml, const char *target) { ezxml_root_t root = (ezxml_root_t)xml; int i = 0; - if (! root) return (const char* *)EZXML_NIL; + if (! root) return (const char **)EZXML_NIL; while (root->xml.parent) root = (ezxml_root_t)root->xml.parent; /* root tag*/ while (root->pi[i] && strcmp(target, root->pi[i][0])) i++; /* find target*/ - return ((root->pi[i]) ? (const char**)(root->pi[i] + 1) : EZXML_NIL); + return (const char **)((root->pi[i]) ? root->pi[i] + 1 : EZXML_NIL); } /* set an error string and return root*/ -static ezxml_t -ezxml_err(ezxml_root_t root, char* s, const char* err, ...) +ezxml_t ezxml_err(ezxml_root_t root, char *s, const char *err, ...) { va_list ap; int line = 1; - char* t, fmt[EZXML_ERRL]; - + char *t, fmt[EZXML_ERRL]; + for (t = root->s; t < s; t++) if (*t == '\n') line++; snprintf(fmt, EZXML_ERRL, "[error near line %d]: %s", line, err); @@ -176,10 +155,9 @@ ezxml_err(ezxml_root_t root, char* s, const char* err, ...) /* for cdata sections, ' ' for attribute normalization, or '*' for non-cdata*/ /* attribute normalization. Returns s, or if the decoded string is longer than*/ /* s, returns a malloced string that must be freed.*/ -static char* -ezxml_decode(char* s, char* *ent, char t) +char *ezxml_decode(char *s, char **ent, char t) { - char* e, *r = s, *m = s; + char *e, *r = s, *m = s; long b, c, d, l; for (; *s; s++) { /* normalize line endings*/ @@ -187,8 +165,9 @@ ezxml_decode(char* s, char* *ent, char t) *(s++) = '\n'; if (*s == '\n') memmove(s, (s + 1), strlen(s)); } + if (!*s) break; // bug#19 / CVE-2019-20200 } - + for (s = r; ; ) { while (*s && *s != '&' && (*s != '%' || t != '%') && !isspace(*s)) s++; @@ -199,9 +178,11 @@ ezxml_decode(char* s, char* *ent, char t) if (! c || *e != ';') { s++; continue; } /* not a character ref*/ if (c < 0x80) *(s++) = c; /* US-ASCII subset*/ - else { /* multi-byte UTF-8 sequence*/ + else { /* multi-byte UTF-8 sequence*/ for (b = 0, d = c; d; d /= 2) b++; /* number of bits in c*/ - b = (b - 2) / 5; /* number of bytes in payload*/ + // UTF-8 can ecode max 36 bits (standard says 21) - noop on 32 bit. + if (b > 36) { s++; continue; } // bug#15 / CVE-2019-20006 / bug#17 / CVE-2019-20202 + b = (b - 2) / 5; /* number of bytes in payload*/ *(s++) = (0xFF << (7 - b)) | (c >> (6 * b)); /* head*/ while (b) *(s++) = 0x80 | ((c >> (6 * --b)) & 0x3F); /* payload*/ } @@ -214,16 +195,15 @@ ezxml_decode(char* s, char* *ent, char t) b += 2); /* find entity in entity list*/ if (ent[b++]) { /* found a match*/ - if ((c = strlen(ent[b])) - 1 > (e = strchr(s, ';')) - s) { + + if ((c = strlen(ent[b])) - 1 > (e = strchr(s, ';')) - s) { + if (!e) { s++; continue; } // bug#18 / CVE-2019-20199 l = (d = (s - r)) + c + strlen(e); /* new length*/ r = (r == m) ? strcpy(malloc(l), r) : realloc(r, l); e = strchr((s = r + d), ';'); /* fix up pointers*/ + if (!e) { s++; continue; } // bug#18 / CVE-2019-20199 } - if(c > strlen(s) || strlen(e) > strlen(s + c)) { /* Patch 28 */ - fprintf(stderr, "Error: ezxml_decode(): memmove() past end of buffer!"); - exit(-1); - } memmove(s + c, e + 1, strlen(e)); /* shift rest of string*/ strncpy(s, ent[b], c); /* copy in replacement text*/ } @@ -244,11 +224,10 @@ ezxml_decode(char* s, char* *ent, char t) } /* called when parser finds start of new tag*/ -static void -ezxml_open_tag(ezxml_root_t root, char* name, char* *attr) +void ezxml_open_tag(ezxml_root_t root, char *name, char **attr) { ezxml_t xml = root->cur; - + if (xml->name) xml = ezxml_add_child(xml, name, strlen(xml->txt)); else xml->name = name; /* first open tag*/ @@ -257,11 +236,10 @@ ezxml_open_tag(ezxml_root_t root, char* name, char* *attr) } /* called when parser finds character content between open and closing tag*/ -static void -ezxml_char_content(ezxml_root_t root, char* s, size_t len, char t) +void ezxml_char_content(ezxml_root_t root, char *s, size_t len, char t) { ezxml_t xml = root->cur; - char* m = s; + char *m = s; size_t l; if (! xml || ! xml->name || ! len) return; /* sanity check*/ @@ -282,8 +260,7 @@ ezxml_char_content(ezxml_root_t root, char* s, size_t len, char t) } /* called when parser finds closing tag*/ -static ezxml_t -ezxml_close_tag(ezxml_root_t root, char* name, char* s) +ezxml_t ezxml_close_tag(ezxml_root_t root, char *name, char *s) { if (! root->cur || ! root->cur->name || strcmp(name, root->cur->name)) return ezxml_err(root, s, "unexpected closing tag ", name); @@ -294,8 +271,7 @@ ezxml_close_tag(ezxml_root_t root, char* name, char* s) /* checks for circular entity references, returns non-zero if no circular*/ /* references are found, zero otherwise*/ -static int -ezxml_ent_ok(char* name, char* s, char* *ent) +int ezxml_ent_ok(char *name, char *s, char **ent) { int i; @@ -309,11 +285,10 @@ ezxml_ent_ok(char* name, char* s, char* *ent) } /* called when the parser finds a processing instruction*/ -static void -ezxml_proc_inst(ezxml_root_t root, char* s, size_t len) +void ezxml_proc_inst(ezxml_root_t root, char *s, size_t len) { int i = 0, j = 1; - char* target = s; + char *target = s; s[len] = '\0'; /* null terminate instruction*/ if (*(s += strcspn(s, EZXML_WS))) { @@ -327,19 +302,19 @@ ezxml_proc_inst(ezxml_root_t root, char* s, size_t len) return; } - if (! root->pi[0]) *(root->pi = malloc(sizeof(char* *))) = NULL; /*first pi*/ + if (! root->pi[0]) *(root->pi = malloc(sizeof(char **))) = NULL; /*first pi*/ while (root->pi[i] && strcmp(target, root->pi[i][0])) i++; /* find target*/ if (! root->pi[i]) { /* new target*/ - root->pi = realloc(root->pi, sizeof(char* *) * (i + 2)); - root->pi[i] = malloc(sizeof(char* ) * 3); + root->pi = realloc(root->pi, sizeof(char **) * (i + 2)); + root->pi[i] = malloc(sizeof(char *) * 3); root->pi[i][0] = target; - root->pi[i][1] = (char* )(root->pi[i + 1] = NULL); /* terminate pi list*/ + root->pi[i][1] = (char *)(root->pi[i + 1] = NULL); /* terminate pi list*/ root->pi[i][2] = strdup(""); /* empty document position list*/ } while (root->pi[i][j]) j++; /* find end of instruction list for this target*/ - root->pi[i] = realloc(root->pi[i], sizeof(char* ) * (j + 3)); + root->pi[i] = realloc(root->pi[i], sizeof(char *) * (j + 3)); root->pi[i][j + 2] = realloc(root->pi[i][j + 1], j + 1); strcpy(root->pi[i][j + 2] + j - 1, (root->xml.name) ? ">" : "<"); root->pi[i][j + 1] = NULL; /* null terminate pi list for this target*/ @@ -347,13 +322,13 @@ ezxml_proc_inst(ezxml_root_t root, char* s, size_t len) } /* called when the parser finds an internal doctype subset*/ -static short -ezxml_internal_dtd(ezxml_root_t root, char* s, size_t len) +short ezxml_internal_dtd(ezxml_root_t root, char *s, size_t len) { char q, *c, *t, *n = NULL, *v, **ent, **pe; int i, j; + /* Bug CVE-2021-31229 */ size_t n_len, n_off; - + pe = memcpy(malloc(sizeof(EZXML_NIL)), EZXML_NIL, sizeof(EZXML_NIL)); for (s[len] = '\0'; s; ) { @@ -363,13 +338,12 @@ ezxml_internal_dtd(ezxml_root_t root, char* s, size_t len) else if (! strncmp(s, "= n_len) { ezxml_err(root, NULL, "write past buffer (ent; ent[i]; i++); - ent = realloc(ent, (i + 3) * sizeof(char* )); /* space for next ent*/ + ent = realloc(ent, (i + 3) * sizeof(char *)); /* space for next ent*/ if (*c == '%') pe = ent; else root->ent = ent; @@ -400,11 +374,12 @@ ezxml_internal_dtd(ezxml_root_t root, char* s, size_t len) if (! *t) { ezxml_err(root, t, "unclosed ")) == '>') continue; else *s = '\0'; /* null terminate tag name*/ - for (i = 0; n && root->attr[i] && strcmp(n, root->attr[i][0]); i++); /* patch 25 */ + /* Bug CVE-2021-30485 */ + for (i = 0; n && root->attr[i] && strcmp(n, root->attr[i][0]); i++); - for(;;) { - s++; - if(!(*(n = s + strspn(s, EZXML_WS)) && *n != '>')) break; + for(;;) { + s++; + if(!(*(n = s + strspn(s, EZXML_WS)) && *n != '>')) break; if (*(s = n + strcspn(n, EZXML_WS))) *s = '\0'; /* attr name*/ else { ezxml_err(root, t, "malformed attr[i]) { /* new tag name*/ - root->attr = (! i) ? malloc(2 * sizeof(char* *)) + root->attr = (! i) ? malloc(2 * sizeof(char **)) : realloc(root->attr, - (i + 2) * sizeof(char* *)); - root->attr[i] = malloc(2 * sizeof(char* )); + (i + 2) * sizeof(char **)); + root->attr[i] = malloc(2 * sizeof(char *)); root->attr[i][0] = t; /* set tag name*/ - root->attr[i][1] = (char* )(root->attr[i + 1] = NULL); + root->attr[i][1] = (char *)(root->attr[i + 1] = NULL); } for (j = 1; root->attr[i][j]; j += 3); /* find end of list*/ root->attr[i] = realloc(root->attr[i], - (j + 4) * sizeof(char* )); + (j + 4) * sizeof(char *)); root->attr[i][j + 3] = NULL; /* null terminate list*/ root->attr[i][j + 2] = c; /* is it cdata?*/ @@ -462,10 +437,9 @@ ezxml_internal_dtd(ezxml_root_t root, char* s, size_t len) /* Converts a UTF-16 string to UTF-8. Returns a new string that must be freed*/ /* or NULL if no conversion was needed.*/ -static char* -ezxml_str2utf8(char* *s, size_t *len) +char *ezxml_str2utf8(char **s, size_t *len) { - char* u; + char *u; size_t l = 0, sl, max = *len; long c, d; int b, be = (**s == '\xFE') ? 1 : (**s == '\xFF') ? 0 : -1; @@ -495,13 +469,11 @@ ezxml_str2utf8(char* *s, size_t *len) } /* frees a tag attribute list*/ -static void -ezxml_free_attr(char* *attr) -{ +void ezxml_free_attr(char **attr) { int i = 0; - char* m; - - if (! attr || ((const char**)attr) == EZXML_NIL) return; /* nothing to free*/ + char *m; + + if (! attr || attr == EZXML_NIL) return; /* nothing to free*/ while (attr[i]) i += 2; /* find end of attribute list*/ m = attr[i + 1]; /* list of which names and values are malloced*/ for (i = 0; m[i]; i++) { @@ -513,18 +485,19 @@ ezxml_free_attr(char* *attr) } /* parse the given xml string and return an ezxml structure*/ -ezxml_t -nc_ezxml_parse_str(char* s, size_t len) +ezxml_t ezxml_parse_str(char *s, size_t len) { ezxml_root_t root = (ezxml_root_t)ezxml_new(NULL); char q, e, *d, **attr, **a = NULL; /* initialize a to avoid compile warning*/ int l, i, j; + if (!root) return NULL; // bug#21 / CVE-2021-26221 root->m = s; if (! len) return ezxml_err(root, NULL, "root tag missing"); root->u = ezxml_str2utf8(&s, &len); /* convert utf-16 to utf-8*/ + if (! s) return ezxml_err(root, NULL, "invalid root tag"); // bug#13 / CVE-2019-20007 root->e = (root->s = s) + len; /* record start and end of work area*/ - + e = s[len - 1]; /* save end char*/ s[len - 1] = '\0'; /* turn end char into null terminator*/ @@ -532,22 +505,22 @@ nc_ezxml_parse_str(char* s, size_t len) if (! *s) return ezxml_err(root, s, "root tag missing"); for (; ; ) { - attr = (char* *)EZXML_NIL; + attr = (char **)EZXML_NIL; d = ++s; - + if (isalpha(*s) || *s == '_' || *s == ':' || *s < '\0') { /* new tag*/ if (! root->cur) return ezxml_err(root, d, "markup outside of root element"); s += strcspn(s, EZXML_WS "/>"); while (isspace(*s)) *(s++) = '\0'; /* null terminate tag name*/ - + if (*s && *s != '/' && *s != '>') /* find tag in default attr list*/ for (i = 0; (a = root->attr[i]) && strcmp(a[0], d); i++); for (l = 0; *s && *s != '/' && *s != '>'; l += 2) { /* new attrib*/ - attr = (l) ? realloc(attr, (l + 4) * sizeof(char* )) - : malloc(4 * sizeof(char* )); /* allocate space*/ + attr = (l) ? realloc(attr, (l + 4) * sizeof(char *)) + : malloc(4 * sizeof(char *)); /* allocate space*/ attr[l + 3] = (l) ? realloc(attr[l + 1], (l / 2) + 2) : malloc(2); /* mem for list of maloced vals*/ strcpy(attr[l + 3] + (l / 2), " "); /* value is not malloced*/ @@ -556,7 +529,7 @@ nc_ezxml_parse_str(char* s, size_t len) attr[l] = s; /* set attribute name*/ s += strcspn(s, EZXML_WS "=/>"); - if (*s == '=' || isspace(*s)) { + if (*s == '=' || isspace(*s)) { *(s++) = '\0'; /* null terminate tag attribute name*/ q = *(s += strspn(s, EZXML_WS "=")); if (q == '"' || q == '\'') { /* attribute value*/ @@ -594,7 +567,7 @@ nc_ezxml_parse_str(char* s, size_t len) } else { if (l) ezxml_free_attr(attr); - return ezxml_err(root, d, "missing >"); + return ezxml_err(root, d, "missing >"); } } else if (*s == '/') { /* close tag*/ @@ -614,22 +587,23 @@ nc_ezxml_parse_str(char* s, size_t len) else return ezxml_err(root, d, "unclosed ') || (l && (*s != ']' || + for (l = 0; *s && ((! l && *s != '>') || (l && (*s != ']' || *(s + strspn(s + 1, EZXML_WS) + 1) != '>'))); l = (*s == '[') ? 1 : l) s += strcspn(s + 1, "[]>") + 1; - if (! *s) /* patch 27 */ + /* Bug CVE-2021-31348 / CVE-2021-31347 */ + if (! *s ) return ezxml_err(root, d, "unclosed xml; } else if (*s == '?') { /* processing instructions*/ do { s = strchr(s, '?'); } while (s && *(++s) && *s != '>'); - if (! s || (! *s && e != '>')) + if (! s || (! *s && e != '>')) return ezxml_err(root, d, "unclosed ", root->cur->name); } +/* Wrapper for ezxml_parse_str() that accepts a file stream. Reads the entire*/ +/* stream into memory and then parses it. For xml files, use ezxml_parse_file()*/ +/* or ezxml_parse_fd()*/ +ezxml_t ezxml_parse_fp(FILE *fp) +{ + ezxml_root_t root; + size_t l, len = 0; + char *s; + + if (! (s = malloc(EZXML_BUFSIZE))) return NULL; + do { + len += (l = fread((s + len), 1, EZXML_BUFSIZE, fp)); + if (l == EZXML_BUFSIZE) s = realloc(s, len + EZXML_BUFSIZE); + } while (s && l == EZXML_BUFSIZE); + + if (! s) return NULL; + if (!(root = (ezxml_root_t)ezxml_parse_str(s, len))) { free(s); return NULL; }; // bug#21 / CVE-2021-26221 + root = (ezxml_root_t)ezxml_parse_str(s, len); + root->len = -1; /* so we know to free s in ezxml_free()*/ + return &root->xml; +} + +/* A wrapper for ezxml_parse_str() that accepts a file descriptor. First*/ +/* attempts to mem map the file. Failing that, reads the file into memory.*/ +/* Returns NULL on failure.*/ +ezxml_t ezxml_parse_fd(int fd) +{ + ezxml_root_t root; + struct stat st; + size_t l; + void *m; + + if (fd < 0) return NULL; + fstat(fd, &st); + +#ifndef EZXML_NOMMAP + l = (st.st_size + sysconf(_SC_PAGESIZE) - 1) & ~(sysconf(_SC_PAGESIZE) -1); + if ((m = mmap(NULL, l, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0)) != + MAP_FAILED) { + madvise(m, l, MADV_SEQUENTIAL); /* optimize for sequential access*/ + root = (ezxml_root_t)ezxml_parse_str(m, st.st_size); + if (!root) { munmap(m,l); return NULL; }; // bug#21 / CVE-2021-26221 + madvise(m, root->len = l, MADV_NORMAL); /* put it back to normal*/ + } + else { /* mmap failed, read file into memory*/ +#endif /* EZXML_NOMMAP*/ + l = read(fd, m = malloc(st.st_size), st.st_size); + root = (ezxml_root_t)ezxml_parse_str(m, l); + if (!root) { free(m); return NULL; }; // bug#21 / CVE-2021-26221 + root->len = -1; /* so we know to free s in ezxml_free()*/ +#ifndef EZXML_NOMMAP + } +#endif /* EZXML_NOMMAP*/ + return &root->xml; +} + +/* a wrapper for ezxml_parse_fd that accepts a file name*/ +ezxml_t ezxml_parse_file(const char *file) +{ + int fd = open(file, O_RDONLY, 0); + ezxml_t xml = ezxml_parse_fd(fd); + + if (fd >= 0) close(fd); + return xml; +} + /* Encodes ampersand sequences appending the results to *dst, reallocating *dst*/ /* if length excedes max. a is non-zero for attribute encoding. Returns *dst*/ -static char* -ezxml_ampencode(const char* s, size_t len, char* *dst, size_t *dlen, size_t *max, short a) +char *ezxml_ampencode(const char *s, size_t len, char **dst, size_t *dlen, + size_t *max, short a) { - const char* e; - + const char *e; + for (e = s + len; s != e; s++) { while (*dlen + 10 > *max) *dst = realloc(*dst, *max += EZXML_BUFSIZE); @@ -674,11 +714,11 @@ ezxml_ampencode(const char* s, size_t len, char* *dst, size_t *dlen, size_t *max /* Recursively converts each tag to xml appending it to *s. Reallocates *s if*/ /* its length excedes max. start is the location of the previous tag in the*/ /* parent tag's character content. Returns *s.*/ -static char* -ezxml_toxml_r(ezxml_t xml, char* *s, size_t *len, size_t *max, size_t start, char* **attr) +char *ezxml_toxml_r(ezxml_t xml, char **s, size_t *len, size_t *max, + size_t start, char ***attr) { int i, j; - char* txt = (xml->parent) ? xml->parent->txt : ""; + char *txt = (xml->parent) ? xml->parent->txt : ""; size_t off = 0; /* parent character content up to this tag*/ @@ -689,7 +729,7 @@ ezxml_toxml_r(ezxml_t xml, char* *s, size_t *len, size_t *max, size_t start, cha *len += sprintf(*s + *len, "<%s", xml->name); /* open tag*/ for (i = 0; xml->attr[i]; i += 2) { /* tag attributes*/ - if (nc_ezxml_attr(xml, xml->attr[i]) != xml->attr[i + 1]) continue; + if (ezxml_attr(xml, xml->attr[i]) != xml->attr[i + 1]) continue; while (*len + strlen(xml->attr[i]) + 7 > *max) /* reallocate s*/ *s = realloc(*s, *max += EZXML_BUFSIZE); @@ -700,7 +740,7 @@ ezxml_toxml_r(ezxml_t xml, char* *s, size_t *len, size_t *max, size_t start, cha for (i = 0; attr[i] && strcmp(attr[i][0], xml->name); i++); for (j = 1; attr[i] && attr[i][j]; j += 3) { /* default attributes*/ - if (! attr[i][j + 1] || nc_ezxml_attr(xml, attr[i][j]) != attr[i][j + 1]) + if (! attr[i][j + 1] || ezxml_attr(xml, attr[i][j]) != attr[i][j + 1]) continue; /* skip duplicates and non-values*/ while (*len + strlen(attr[i][j]) + 7 > *max) /* reallocate s*/ *s = realloc(*s, *max += EZXML_BUFSIZE); @@ -713,7 +753,7 @@ ezxml_toxml_r(ezxml_t xml, char* *s, size_t *len, size_t *max, size_t start, cha *s = (xml->child) ? ezxml_toxml_r(xml->child, s, len, max, 0, attr) /*child*/ : ezxml_ampencode(xml->txt, -1, s, len, max, 0); /*data*/ - + while (*len + strlen(xml->name) + 4 > *max) /* reallocate s*/ *s = realloc(*s, *max += EZXML_BUFSIZE); @@ -726,15 +766,17 @@ ezxml_toxml_r(ezxml_t xml, char* *s, size_t *len, size_t *max, size_t start, cha /* Converts an ezxml structure back to xml. Returns a string of xml data that*/ /* must be freed.*/ -char* -nc_ezxml_toxml(ezxml_t xml) +char *ezxml_toxml(ezxml_t xml) { ezxml_t p = (xml) ? xml->parent : NULL, o = (xml) ? xml->ordered : NULL; ezxml_root_t root = (ezxml_root_t)xml; size_t len = 0, max = EZXML_BUFSIZE; - char* s = strcpy(malloc(max), ""), *t, *n; + char *s = malloc(max), *t, *n; // bug#23 / CVE-2021-26220 + int i, j, k; + if (!s) return (NULL);// bug#23 / CVE-2021-26220 + s = strcpy(s, "");// bug#23 / CVE-2021-26220 if (! xml || ! xml->name) return realloc(s, len + 1); while (root->xml.parent) root = (ezxml_root_t)root->xml.parent; /* root tag*/ @@ -766,19 +808,18 @@ nc_ezxml_toxml(ezxml_t xml) } /* free the memory allocated for the ezxml structure*/ -void -nc_ezxml_free(ezxml_t xml) +void ezxml_free(ezxml_t xml) { ezxml_root_t root = (ezxml_root_t)xml; int i, j; - char* *a, *s; + char **a, *s; if (! xml) return; - nc_ezxml_free(xml->child); - nc_ezxml_free(xml->ordered); + ezxml_free(xml->child); + ezxml_free(xml->ordered); if (! xml->parent) { /* free root tag allocations*/ - for (i = 10; root->ent[i]; i += 2) /* 0 - 9 are default entities (<>&"')*/ + for (i = 10; root->ent[i]; i += 2) /* 0 - 9 are default entites (<>&"')*/ if ((s = root->ent[i + 1]) < root->s || s > root->e) free(s); free(root->ent); /* free list of general entities*/ @@ -793,13 +834,13 @@ nc_ezxml_free(ezxml_t xml) for (j = 1; root->pi[i][j]; j++); free(root->pi[i][j + 1]); free(root->pi[i]); - } + } if (root->pi[0]) free(root->pi); /* free processing instructions*/ if (root->len == -1) free(root->m); /* malloced xml data*/ -#ifdef EZXML_MMAP +#ifndef EZXML_NOMMAP else if (root->len) munmap(root->m, root->len); /* mem mapped xml data*/ -#endif /* EZXML_MMAP*/ +#endif /* EZXML_NOMMAP*/ if (root->u) free(root->u); /* utf8 conversion*/ } @@ -810,32 +851,36 @@ nc_ezxml_free(ezxml_t xml) } /* return parser error message or empty string if none*/ -const char* -nc_ezxml_error(ezxml_t xml) +const char *ezxml_error(ezxml_t xml) { while (xml && xml->parent) xml = xml->parent; /* find root tag*/ return (xml) ? ((ezxml_root_t)xml)->err : ""; } /* returns a new empty ezxml structure with the given root tag name*/ -static ezxml_t -ezxml_new(const char* name) +ezxml_t ezxml_new(const char *name) { - static const char* entities[] = { "lt;", "<", "gt;", ">", "quot;", """, + static char *ent[] = { "lt;", "<", "gt;", ">", "quot;", """, "apos;", "'", "amp;", "&", NULL }; - ezxml_root_t root = (ezxml_root_t)memset(malloc(sizeof(struct ezxml_root)), - '\0', sizeof(struct ezxml_root)); - root->xml.name = (char* )name; + ezxml_root_t root = NULL; + /* Bug CVE-2021-26222 */ + char **p_ent; + + if (!(root = calloc(1,sizeof(struct ezxml_root)))) return NULL; // bug#21 / CVE-2021-26221 + memset(root, '\0', sizeof(struct ezxml_root)); + /* Bug CVE-2021-26222 */ + if (!(p_ent = calloc(1,sizeof(ent)))) { free(root); return NULL; }; + root->xml.name = (char *)name; root->cur = &root->xml; strcpy(root->err, root->xml.txt = ""); - root->ent = memcpy(malloc(sizeof(entities)), entities, sizeof(entities)); - root->attr = root->pi = (char* **)(root->xml.attr = (char**)EZXML_NIL); + /* Bug CVE-2021-26222 */ + root->ent = memcpy(p_ent, ent, sizeof(ent)); + root->attr = root->pi = (char ***)(root->xml.attr = (char**)EZXML_NIL); return &root->xml; } /* inserts an existing tag into an ezxml structure*/ -static ezxml_t -ezxml_insert(ezxml_t xml, ezxml_t dest, size_t off) +ezxml_t ezxml_insert(ezxml_t xml, ezxml_t dest, size_t off) { ezxml_t cur, prev, head; @@ -878,105 +923,33 @@ ezxml_insert(ezxml_t xml, ezxml_t dest, size_t off) /* Adds a child tag. off is the offset of the child tag relative to the start*/ /* of the parent tag's character content. Returns the child tag.*/ -static ezxml_t -ezxml_add_child(ezxml_t xml, const char* name, size_t off) +ezxml_t ezxml_add_child(ezxml_t xml, const char *name, size_t off) { ezxml_t child; if (! xml) return NULL; child = (ezxml_t)memset(malloc(sizeof(struct ezxml)), '\0', sizeof(struct ezxml)); - child->name = (char* )name; + child->name = (char *)name; child->attr = (char**)EZXML_NIL; child->txt = ""; return ezxml_insert(child, xml, off); } -/* sets a flag for the given tag and returns the tag*/ -static ezxml_t -ezxml_set_flag(ezxml_t xml, short flag) -{ - if (xml) xml->flags |= flag; - return xml; -} - -/** -Extra ezxml functionality -*/ - -/** -Get list of all the xml attributes. -Returns NULL, if none -WARNING: returns actual list, so do not free -*/ -const char** -nc_ezxml_all_attr(ezxml_t xml, int* countp) -{ - if(xml && xml->attr) { - char** p; - int count = 0; - for(p=xml->attr;*p;p+=2) count += 2; /* get number of attributes */ - return (const char**)xml->attr; - } - return NULL; -} - -#ifdef EZXML_TEST /* test harness*/ -int main(int argc, char* *argv) -{ - ezxml_t xml; - char* s; - int i; - - if (argc != 2) return fprintf(stderr, "usage: %s xmlfile\n", argv[0]); - - xml = ezxml_parse_file(argv[1]); - printf("%s\n", (s = ezxml_toxml(xml))); - free(s); - i = fprintf(stderr, "%s", ezxml_error(xml)); - ezxml_free(xml); - return (i) ? 1 : 0; -} -#endif /* EZXML_TEST*/ - -#if -0 -/* Wrapper for ezxml_parse_str() that accepts a file stream. Reads the entire*/ -/* stream into memory and then parses it. For xml files, use ezxml_parse_file()*/ -/* or ezxml_parse_fd()*/ -ezxml_t ezxml_parse_fp(FILE *fp) -{ - ezxml_root_t root; - size_t l, len = 0; - char *s; - - if (! (s = malloc(EZXML_BUFSIZE))) return NULL; - do { - len += (l = fread((s + len), 1, EZXML_BUFSIZE, fp)); - if (l == EZXML_BUFSIZE) s = realloc(s, len + EZXML_BUFSIZE); - } while (s && l == EZXML_BUFSIZE); - - if (! s) return NULL; - root = (ezxml_root_t)ezxml_parse_str(s, len); - root->len = -1; /* so we know to free s in ezxml_free()*/ - return &root->xml; -} - /* sets the character content for the given tag and returns the tag*/ -static ezxml_t -ezxml_set_txt(ezxml_t xml, const char* txt) +ezxml_t ezxml_set_txt(ezxml_t xml, const char *txt) { if (! xml) return NULL; if (xml->flags & EZXML_TXTM) free(xml->txt); /* existing txt was malloced*/ xml->flags &= ~EZXML_TXTM; - xml->txt = (char* )txt; + xml->txt = (char *)txt; return xml; } /* Sets the given tag attribute or adds a new attribute if not found. A value*/ /* of NULL will remove the specified attribute. Returns the tag given.*/ -static ezxml_t -ezxml_set_attr(ezxml_t xml, const char* name, const char* value) +ezxml_t ezxml_set_attr(ezxml_t xml, const char *name, const char *value) { int l = 0, c; @@ -984,31 +957,31 @@ ezxml_set_attr(ezxml_t xml, const char* name, const char* value) while (xml->attr[l] && strcmp(xml->attr[l], name)) l += 2; if (! xml->attr[l]) { /* not found, add as new attribute*/ if (! value) return xml; /* nothing to do*/ - if (xml->attr == (char**)EZXML_NIL) { /* first attribute*/ - xml->attr = malloc(4 * sizeof(char* )); + if (xml->attr == EZXML_NIL) { /* first attribute*/ + xml->attr = malloc(4 * sizeof(char *)); xml->attr[1] = strdup(""); /* empty list of malloced names/vals*/ } - else xml->attr = realloc(xml->attr, (l + 4) * sizeof(char* )); + else xml->attr = realloc(xml->attr, (l + 4) * sizeof(char *)); - xml->attr[l] = (char* )name; /* set attribute name*/ + xml->attr[l] = (char *)name; /* set attribute name*/ xml->attr[l + 2] = NULL; /* null terminate attribute list*/ xml->attr[l + 3] = realloc(xml->attr[l + 1], (c = strlen(xml->attr[l + 1])) + 2); strcpy(xml->attr[l + 3] + c, " "); /* set name/value as not malloced*/ if (xml->flags & EZXML_DUP) xml->attr[l + 3][c] = EZXML_NAMEM; } - else if (xml->flags & EZXML_DUP) free((char* )name); /* name was strduped*/ + else if (xml->flags & EZXML_DUP) free((char *)name); /* name was strduped*/ for (c = l; xml->attr[c]; c += 2); /* find end of attribute list*/ if (xml->attr[c + 1][l / 2] & EZXML_TXTM) free(xml->attr[l + 1]); /*old val*/ if (xml->flags & EZXML_DUP) xml->attr[c + 1][l / 2] |= EZXML_TXTM; else xml->attr[c + 1][l / 2] &= ~EZXML_TXTM; - if (value) xml->attr[l + 1] = (char* )value; /* set attribute value*/ + if (value) xml->attr[l + 1] = (char *)value; /* set attribute value*/ else { /* remove attribute*/ if (xml->attr[c + 1][l / 2] & EZXML_NAMEM) free(xml->attr[l]); memmove(xml->attr + l, xml->attr + l + 2, (c - l + 2) * sizeof(char*)); - xml->attr = realloc(xml->attr, (c + 2) * sizeof(char* )); + xml->attr = realloc(xml->attr, (c + 2) * sizeof(char *)); memmove(xml->attr[c + 1] + (l / 2), xml->attr[c + 1] + (l / 2) + 1, (c / 2) - (l / 2)); /* fix list of which name/vals are malloced*/ } @@ -1016,9 +989,15 @@ ezxml_set_attr(ezxml_t xml, const char* name, const char* value) return xml; } +/* sets a flag for the given tag and returns the tag*/ +ezxml_t ezxml_set_flag(ezxml_t xml, short flag) +{ + if (xml) xml->flags |= flag; + return xml; +} + /* removes a tag along with its subtags without freeing its memory*/ -static ezxml_t -ezxml_cut(ezxml_t xml) +ezxml_t ezxml_cut(ezxml_t xml) { ezxml_t cur; @@ -1045,9 +1024,31 @@ ezxml_cut(ezxml_t xml) while (cur->next && cur->next != xml) cur = cur->next; if (cur->next) cur->next = cur->next->next; /* patch next list*/ - } + } } xml->ordered = xml->sibling = xml->next = NULL; return xml; } -#endif + +#ifdef EZXML_TEST /* test harness*/ +int main(int argc, char **argv) +{ + ezxml_t xml; + char *s; + int i; + + if (argc != 2) return fprintf(stderr, "usage: %s xmlfile\n", argv[0]); + + xml = ezxml_parse_file(argv[1]); + + s = ezxml_toxml(xml); + if (s) { + printf("%s\n", s); + free(s); + } // bug#23 / CVE-2021-26220 + + i = fprintf(stderr, "%s", ezxml_error(xml)); + ezxml_free(xml); + return (i) ? 1 : 0; +} +#endif /* EZXML_TEST*/ diff --git a/include/ezxml.h b/libncxml/ezxml.h similarity index 88% rename from include/ezxml.h rename to libncxml/ezxml.h index 9a9637b1eb..be87cc93f9 100644 --- a/include/ezxml.h +++ b/libncxml/ezxml.h @@ -57,74 +57,69 @@ struct ezxml { /* structure. For efficiency, modifies the data by adding null terminators*/ /* and decoding ampersand sequences. If you don't want this, copy the data and*/ /* pass in the copy. Returns NULL on failure.*/ -ezxml_t nc_ezxml_parse_str(char *s, size_t len); +ezxml_t ezxml_parse_str(char *s, size_t len); + +/* A wrapper for ezxml_parse_str() that accepts a file descriptor. First*/ +/* attempts to mem map the file. Failing that, reads the file into memory.*/ +/* Returns NULL on failure.*/ +ezxml_t ezxml_parse_fd(int fd); + +/* a wrapper for ezxml_parse_fd() that accepts a file name*/ +ezxml_t ezxml_parse_file(const char *file); + +/* Wrapper for ezxml_parse_str() that accepts a file stream. Reads the entire*/ +/* stream into memory and then parses it. For xml files, use ezxml_parse_file()*/ +/* or ezxml_parse_fd()*/ +ezxml_t ezxml_parse_fp(FILE *fp); /* returns the first child tag (one level deeper) with the given name or NULL*/ /* if not found*/ -ezxml_t nc_ezxml_child(ezxml_t xml, const char *name); +ezxml_t ezxml_child(ezxml_t xml, const char *name); /* returns the next tag of the same name in the same section and depth or NULL*/ /* if not found*/ -#define nc_ezxml_next(xml) ((xml) ? (xml)->next : NULL) +#define ezxml_next(xml) ((xml) ? xml->next : NULL) /* Returns the Nth tag with the same name in the same section at the same depth*/ /* or NULL if not found. An index of 0 returns the tag given.*/ -ezxml_t nc_ezxml_idx(ezxml_t xml, int idx); +ezxml_t ezxml_idx(ezxml_t xml, int idx); /* returns the name of the given tag*/ -#define nc_ezxml_name(xml) ((xml) ? xml->name : NULL) +#define ezxml_name(xml) ((xml) ? xml->name : NULL) /* returns the given tag's character content or empty string if none*/ -#define nc_ezxml_txt(xml) ((xml) ? xml->txt : "") +#define ezxml_txt(xml) ((xml) ? xml->txt : "") /* returns the value of the requested tag attribute, or NULL if not found*/ -const char *nc_ezxml_attr(ezxml_t xml, const char *attr); +const char *ezxml_attr(ezxml_t xml, const char *attr); -/* Traverses the ezxml structure to retrieve a specific subtag. Takes a*/ +/* Traverses the ezxml sturcture to retrieve a specific subtag. Takes a*/ /* variable length list of tag names and indexes. The argument list must be*/ /* terminated by either an index of -1 or an empty string tag name. Example: */ /* title = ezxml_get(library, "shelf", 0, "book", 2, "title", -1);*/ /* This retrieves the title of the 3rd book on the 1st shelf of library.*/ /* Returns NULL if not found.*/ -ezxml_t nc_ezxml_get(ezxml_t xml, ...); +ezxml_t ezxml_get(ezxml_t xml, ...); /* Converts an ezxml structure back to xml. Returns a string of xml data that*/ /* must be freed.*/ -char *nc_ezxml_toxml(ezxml_t xml); +char *ezxml_toxml(ezxml_t xml); /* returns a NULL terminated array of processing instructions for the given*/ /* target*/ -const char **nc_ezxml_pi(ezxml_t xml, const char *target); +const char **ezxml_pi(ezxml_t xml, const char *target); /* frees the memory allocated for an ezxml structure*/ -void nc_ezxml_free(ezxml_t xml); - +void ezxml_free(ezxml_t xml); + /* returns parser error message or empty string if none*/ -const char *nc_ezxml_error(ezxml_t xml); - -const char** nc_ezxml_all_attr(ezxml_t xml, int* countp); - - -#if 0 +const char *ezxml_error(ezxml_t xml); /* returns a new empty ezxml structure with the given root tag name*/ -ezxml_t nc_ezxml_new(const char *name); +ezxml_t ezxml_new(const char *name); /* wrapper for ezxml_new() that strdup()s name*/ -#define nc_ezxml_new_d(name) ezxml_set_flag(ezxml_new(strdup(name)), EZXML_NAMEM) - -/* A wrapper for ezxml_parse_str() that accepts a file descriptor. First*/ -/* attempts to mem map the file. Failing that, reads the file into memory.*/ -/* Returns NULL on failure.*/ -ezxml_t ezxml_parse_fd(int fd); - -/* a wrapper for ezxml_parse_fd() that accepts a file name*/ -ezxml_t ezxml_parse_file(const char *file); - -/* Wrapper for ezxml_parse_str() that accepts a file stream. Reads the entire*/ -/* stream into memory and then parses it. For xml files, use ezxml_parse_file()*/ -/* or ezxml_parse_fd()*/ -ezxml_t ezxml_parse_fp(FILE *fp); +#define ezxml_new_d(name) ezxml_set_flag(ezxml_new(strdup(name)), EZXML_NAMEM) /* Adds a child tag. off is the offset of the child tag relative to the start*/ /* of the parent tag's character content. Returns the child tag.*/ @@ -165,8 +160,6 @@ ezxml_t ezxml_insert(ezxml_t xml, ezxml_t dest, size_t off); /* removes a tag along with all its subtags*/ #define ezxml_remove(xml) ezxml_free(ezxml_cut(xml)) -#endif /*0*/ - #ifdef __cplusplus } #endif diff --git a/libncxml/license.txt b/libncxml/license.txt new file mode 100644 index 0000000000..80e4e88460 --- /dev/null +++ b/libncxml/license.txt @@ -0,0 +1,20 @@ +Copyright 2004-2006 Aaron Voisine + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/libncxml/ncxml_ezxml.c b/libncxml/ncxml_ezxml.c new file mode 100644 index 0000000000..77a3adbcef --- /dev/null +++ b/libncxml/ncxml_ezxml.c @@ -0,0 +1,121 @@ +/* Copyright 2018-2018 University Corporation for Atmospheric + Research/Unidata. */ + +#include +#include +#include "ncxml.h" +#include "ezxml.h" + +#ifndef nulldup +#define nulldup(s) ((s)?strdup(s):NULL) +#endif + +static int ncxml_initialized = 0; + +void +ncxml_initialize(void) +{ + ncxml_initialized = 1; +} + +void +ncxml_finalize(void) +{ + ncxml_initialized = 0; +} + +ncxml_doc_t +ncxml_parse(char* contents, size_t len) +{ + return (ncxml_t)ezxml_parse_str(contents,len); +} + +void +ncxml_free(ncxml_doc_t doc0) +{ + ezxml_t doc = (ezxml_t)doc0; + ezxml_free(doc); +} + +ncxml_t +ncxml_root(ncxml_doc_t doc0) +{ + ezxml_t doc = (ezxml_t)doc0; + return (ncxml_t)doc; +} + +const char* +ncxml_name(ncxml_t xml0) +{ + ezxml_t xml = (ezxml_t)xml0; + return (xml?xml->name:NULL); +} + +char* +ncxml_attr(ncxml_t xml0, const char* key) +{ + ezxml_t xml = (ezxml_t)xml0; + return nulldup(ezxml_attr(xml,key)); +} + +ncxml_t +ncxml_child(ncxml_t xml0, const char* name) +{ + ezxml_t xml = (ezxml_t)xml0; + return (ncxml_t)ezxml_child(xml,name); +} + +ncxml_t +ncxml_next(ncxml_t xml0, const char* name) +{ + ezxml_t xml = (ezxml_t)xml0; + (void)name; /* unused */ + return (ncxml_t)ezxml_next(xml); +} + +char* +ncxml_text(ncxml_t xml0) +{ + ezxml_t xml = (ezxml_t)xml0; + return (xml?strdup(xml->txt):strdup("")); +} + +/* Nameless versions of child and next */ +ncxml_t +ncxml_child_first(ncxml_t xml0) +{ + ezxml_t xml = (ezxml_t)xml0; + return (xml?xml->child:NULL); +} + +ncxml_t +ncxml_child_next(ncxml_t xml0) +{ + ezxml_t xml = (ezxml_t)xml0; + return (xml?xml->ordered:NULL); +} + +int +ncxml_attr_pairs(ncxml_t xml0, char*** pairsp) +{ + char** pairs = NULL; + ezxml_t xml = (ezxml_t)xml0; + if(xml) { + /* First count */ + int i,count = 0; + const char** p = (const char**)xml->attr; + for(count=0;*p;p+=2) + count++; /* pair count */ + pairs = (char**)malloc(sizeof(char*)*((2*count)+1)); + if(pairs == NULL) return 0; + p = (const char**)xml->attr; + for(i=0;*p;p+=2,i+=2) { + pairs[i] = strdup(p[0]); + pairs[i+1] = strdup(p[1]); + } + pairs[2*count] = NULL; + if(pairsp) *pairsp = pairs; + return 1; + } + return 0; +} diff --git a/libncxml/ncxml_xml2.c b/libncxml/ncxml_xml2.c new file mode 100644 index 0000000000..67e5f53f19 --- /dev/null +++ b/libncxml/ncxml_xml2.c @@ -0,0 +1,161 @@ +/* Copyright 2018-2018 University Corporation for Atmospheric Research/Unidata. */ + +#include +#include +#include +#include +#include "ncxml.h" + +#ifndef nulldup +#define nulldup(s) ((s)?strdup(s):NULL) +#endif + +static int ncxml_initialized = 0; + +void +ncxml_initialize(void) +{ + ncxml_initialized = 1; +} + +void +ncxml_finalize(void) +{ + ncxml_initialized = 0; + xmlCleanupParser(); +} + +ncxml_doc_t +ncxml_parse(char* contents, size_t len) +{ + xmlDocPtr doc; /* the resulting document tree */ + doc = xmlReadMemory(contents, (int)len, "dap4.xml", NULL, 0); + return (ncxml_doc_t)doc; +} + +void +ncxml_free(ncxml_doc_t doc0) +{ + xmlDoc *doc = (xmlDoc*)doc0; + xmlFreeDoc(doc); + } + +ncxml_t +ncxml_root(ncxml_doc_t doc0) +{ + xmlDoc *doc = (xmlDoc*)doc0; + return (ncxml_t)xmlDocGetRootElement(doc); +} + +const char* +ncxml_name(ncxml_t xml0) +{ + xmlNode* xml = (xmlNode*)xml0; + return (xml?xml->name:NULL); +} + +char* +ncxml_attr(ncxml_t xml0, const char* key) +{ + xmlNode* xml = (xmlNode*)xml0; + xmlChar* value = NULL; + char* s = NULL; + + value = xmlGetProp(xml,key); + s = nulldup((char*)value); + xmlFree(value); + return s; +} + +/* First child by name */ +ncxml_t +ncxml_child(ncxml_t xml0, const char* name) +{ + xmlNode* xml = (xmlNode*)xml0; + xmlNode* child = NULL; + + for(child=xml->children;child; child = child->next) { + if(child->type == XML_ELEMENT_NODE && strcmp(child->name,name)==0) + return (ncxml_t)child; + } + return NULL; +} + +ncxml_t +ncxml_next(ncxml_t xml0, const char* name) +{ + xmlNode* xml = (xmlNode*)xml0; + xmlNode* next = NULL; + + for(next=xml->next;next; next = next->next) { + if(next->type == XML_ELEMENT_NODE && strcmp(next->name,name)==0) + return (ncxml_t)next; + } + return NULL; +} + +char* +ncxml_text(ncxml_t xml0) +{ + xmlNode* xml = (xmlNode*)xml0; + xmlChar* txt = NULL; + char* s = NULL; + if(xml == NULL) return NULL; + txt = xmlNodeGetContent(xml); + s = nulldup((char*)txt); + xmlFree(txt); + return s; +} + +/* Nameless versions of child and next */ +ncxml_t +ncxml_child_first(ncxml_t xml0) +{ + xmlNode* xml = (xmlNode*)xml0; + xmlNode* child = NULL; + + if(xml == NULL) return NULL; + for(child=xml->children;child; child = child->next) { + if(child->type == XML_ELEMENT_NODE) return child; + } + return NULL; +} + +ncxml_t +ncxml_child_next(ncxml_t xml0) +{ + xmlNode* xml = (xmlNode*)xml0; + + if(xml == NULL) return NULL; + for(xml=xml->next;xml; xml = xml->next) { + if(xml->type == XML_ELEMENT_NODE) return xml; + } + return NULL; +} + +int +ncxml_attr_pairs(ncxml_t xml0, char*** pairsp) +{ + char** pairs = NULL; + xmlNode* xml = (xmlNode*)xml0; + xmlAttr* attr = NULL; + int i,count = 0; + + if(xml == NULL) return 0; + /* First count */ + for(attr=xml->properties;attr;attr=attr->next) count++; + /* Allocate */ + pairs = (char**)malloc(sizeof(char*)*((2*count)+1)); + if(pairs == NULL) return 0; + /* Collect */ + for(i=0,attr=xml->properties;attr;i+=2,attr=attr->next) { + xmlChar* value; + pairs[i] = nulldup((char*)attr->name); + value = xmlNodeListGetString(xml->doc, attr->children, 1); + pairs[i+1] = nulldup((char*)value); + xmlFree(value); + } + pairs[2*count] = NULL; + if(pairsp) *pairsp = pairs; + return 1; +} diff --git a/nczarr_test/run_interop.sh b/nczarr_test/run_interop.sh index 37fd6d6a5f..8867e9f549 100755 --- a/nczarr_test/run_interop.sh +++ b/nczarr_test/run_interop.sh @@ -87,9 +87,5 @@ exit # Cleanup rm -fr ${execdir}/ref_power_901_constants.file rm -f ${execdir}/ref_zarr_test_data.cdl -if test "x$srcdir" != "x$execdir" ; then - rm -fr ${execdir}/ref_power_901_constants.zip - rm -fr ${execdir}/ref_quotes.zip -fi exit 0