From 9652eca3e9557e728879c6339c752b8451ecd47f Mon Sep 17 00:00:00 2001 From: Remi Chateauneu Date: Sun, 28 Feb 2021 19:56:21 +0000 Subject: [PATCH 1/5] Speedup. --- rdflib/plugins/parsers/notation3.py | 264 +++++++++++++++------------- 1 file changed, 139 insertions(+), 125 deletions(-) diff --git a/rdflib/plugins/parsers/notation3.py b/rdflib/plugins/parsers/notation3.py index ad90e67b3..d67185f58 100755 --- a/rdflib/plugins/parsers/notation3.py +++ b/rdflib/plugins/parsers/notation3.py @@ -137,7 +137,7 @@ def join(here, there): return here + frag # join('mid:foo@example', '../foo') bzzt - if here[bcolonl + 1 : bcolonl + 2] != "/": + if here[bcolonl + 1] != "/": raise ValueError( "Base <%s> has no slash after " "colon - with relative '%s'." % (here, there) @@ -503,18 +503,19 @@ def tok(self, tok, argstr, i, colon=False): """ assert tok[0] not in _notNameChars # not for punctuation - if argstr[i : i + 1] == "@": - i = i + 1 + if argstr[i] == "@": + i += 1 else: if tok not in self.keywords: return -1 # No, this has neither keywords declaration nor "@" + len_tok = len(tok) if ( - argstr[i : i + len(tok)] == tok - and (argstr[i + len(tok)] in _notKeywordsChars) - or (colon and argstr[i + len(tok)] == ":") + argstr[i : i + len_tok] == tok + and (argstr[i + len_tok] in _notKeywordsChars) + or (colon and argstr[i + len_tok] == ":") ): - i = i + len(tok) + i += len_tok return i else: return -1 @@ -527,10 +528,11 @@ def sparqlTok(self, tok, argstr, i): assert tok[0] not in _notNameChars # not for punctuation - if argstr[i : i + len(tok)].lower() == tok.lower() and ( - argstr[i + len(tok)] in _notQNameChars + len_tok = len(tok) + if argstr[i : i + len_tok].lower() == tok.lower() and ( + argstr[i + len_tok] in _notQNameChars ): - i = i + len(tok) + i += len_tok return i else: return -1 @@ -802,10 +804,10 @@ def verb(self, argstr, i, res): res.append(("<-", self._store.newSymbol(Logic_NS + "implies"))) return i + 2 - if argstr[i : i + 1] == "=": + if argstr[i] == "=": if self.turtle: self.BadSyntax(argstr, i, "Found '=' in Turtle mode") - if argstr[i + 1 : i + 2] == ">": + if argstr[i + 1] == ">": res.append(("->", self._store.newSymbol(Logic_NS + "implies"))) return i + 2 res.append(("->", DAML_sameAs)) @@ -845,8 +847,8 @@ def path(self, argstr, i, res): if j < 0: return j # nope - while argstr[j : j + 1] in "!^": # no spaces, must follow exactly (?) - ch = argstr[j : j + 1] + while argstr[j] in "!^": # no spaces, must follow exactly (?) + ch = argstr[j] subj = res.pop() obj = self.blankNode(uri=self.here(j)) j = self.node(argstr, j + 1, res) @@ -880,7 +882,7 @@ def node(self, argstr, i, res, subjectAlready=None): if j < 0: return j # eof i = j - ch = argstr[i : i + 1] # Quick 1-character checks first: + ch = argstr[i] # Quick 1-character checks first: if ch == "[": bnodeID = self.here(i) @@ -888,7 +890,7 @@ def node(self, argstr, i, res, subjectAlready=None): if j < 0: self.BadSyntax(argstr, i, "EOF after '['") # Hack for "is" binding name to anon node - if argstr[j : j + 1] == "=": + if argstr[j] == "=": if self.turtle: self.BadSyntax( argstr, j, "Found '[=' or '[ =' when in turtle mode." @@ -906,8 +908,8 @@ def node(self, argstr, i, res, subjectAlready=None): self.BadSyntax( argstr, i, "EOF when objectList expected after [ = " ) - if argstr[j : j + 1] == ";": - j = j + 1 + if argstr[j] == ";": + j += 1 else: self.BadSyntax(argstr, i, "objectList expected after [= ") @@ -923,7 +925,7 @@ def node(self, argstr, i, res, subjectAlready=None): self.BadSyntax( argstr, i, "EOF when ']' expected after [ " ) - if argstr[j : j + 1] != "]": + if argstr[j] != "]": self.BadSyntax(argstr, j, "']' expected") res.append(subj) return j + 1 @@ -932,7 +934,7 @@ def node(self, argstr, i, res, subjectAlready=None): # if self.turtle: # self.BadSyntax(argstr, i, # "found '{' while in Turtle mode, Formulas not supported!") - ch2 = argstr[i + 1 : i + 2] + ch2 = argstr[i + 1] if ch2 == "$": # a set i += 1 @@ -948,7 +950,7 @@ def node(self, argstr, i, res, subjectAlready=None): break if not first_run: - if argstr[i : i + 1] == ",": + if argstr[i] == ",": i += 1 else: self.BadSyntax(argstr, i, "expected: ','") @@ -983,7 +985,7 @@ def node(self, argstr, i, res, subjectAlready=None): if i < 0: self.BadSyntax(argstr, i, "needed '}', found end.") - if argstr[i : i + 1] == "}": + if argstr[i] == "}": j = i + 1 break @@ -1002,7 +1004,7 @@ def node(self, argstr, i, res, subjectAlready=None): if ch == "(": thing_type = self._store.newList - ch2 = argstr[i + 1 : i + 2] + ch2 = argstr[i + 1] if ch2 == "$": thing_type = self._store.newSet i += 1 @@ -1013,7 +1015,7 @@ def node(self, argstr, i, res, subjectAlready=None): i = self.skipSpace(argstr, j) if i < 0: self.BadSyntax(argstr, i, "needed ')', found end.") - if argstr[i : i + 1] == ")": + if argstr[i] == ")": j = i + 1 break @@ -1096,9 +1098,9 @@ def property_list(self, argstr, i, subj): j = self.skipSpace(argstr, i) if j < 0: self.BadSyntax(argstr, j, "EOF found in list of objects") - if argstr[i : i + 1] != ";": + if argstr[i] != ";": return i - i = i + 1 # skip semicolon and continue + i += 1 # skip semicolon and continue def commaSeparatedList(self, argstr, j, res, what): """return value: -1 bad syntax; >1 new position in argstr @@ -1117,7 +1119,7 @@ def commaSeparatedList(self, argstr, j, res, what): j = self.skipSpace(argstr, i) if j < 0: return j # eof - ch = argstr[j : j + 1] + ch = argstr[j] if ch != ",": if ch != ".": return -1 @@ -1134,7 +1136,7 @@ def objectList(self, argstr, i, res): j = self.skipSpace(argstr, i) if j < 0: self.BadSyntax(argstr, j, "EOF found after object") - if argstr[j : j + 1] != ",": + if argstr[j] != ",": return j # Found something else! i = self.object(argstr, j + 1, res) if i < 0: @@ -1144,11 +1146,12 @@ def checkDot(self, argstr, i): j = self.skipSpace(argstr, i) if j < 0: return j # eof - if argstr[j : j + 1] == ".": + ch = argstr[j] + if ch == ".": return j + 1 # skip - if argstr[j : j + 1] == "}": + if ch == "}": return j # don't skip it - if argstr[j : j + 1] == "]": + if ch == "]": return j self.BadSyntax(argstr, j, "expected '.' or '}' or ']' at end of statement") @@ -1178,10 +1181,7 @@ def uri_ref2(self, argstr, i, res): else: self.BadSyntax(argstr, i, 'Prefix "%s:" not bound' % (pfx)) symb = self._store.newSymbol(ns + ln) - if symb in self._variables: - res.append(self._variables[symb]) - else: - res.append(symb) # @@@ "#" CONVENTION + res.append(self._variables.get(symb, symb)) return j i = self.skipSpace(argstr, i) @@ -1197,31 +1197,26 @@ def uri_ref2(self, argstr, i, res): return -1 elif argstr[i] == "<": - i = i + 1 - st = i - while i < len(argstr): - if argstr[i] == ">": - uref = argstr[st:i] # the join should dealt with "": - - # expand unicode escapes - uref = unicodeEscape8.sub(unicodeExpand, uref) - uref = unicodeEscape4.sub(unicodeExpand, uref) - - if self._baseURI: - uref = join(self._baseURI, uref) # was: uripath.join - else: - assert ( - ":" in uref - ), "With no base URI, cannot deal with relative URIs" - if argstr[i - 1 : i] == "#" and not uref[-1:] == "#": - uref = uref + "#" # She meant it! Weirdness in urlparse? - symb = self._store.newSymbol(uref) - if symb in self._variables: - res.append(self._variables[symb]) - else: - res.append(symb) - return i + 1 - i = i + 1 + st = i + 1 + i = argstr.find(">", st) + if i >= 0: + uref = argstr[st:i] # the join should dealt with "": + + # expand unicode escapes + uref = unicodeEscape8.sub(unicodeExpand, uref) + uref = unicodeEscape4.sub(unicodeExpand, uref) + + if self._baseURI: + uref = join(self._baseURI, uref) # was: uripath.join + else: + assert ( + ":" in uref + ), "With no base URI, cannot deal with relative URIs" + if argstr[i - 1] == "#" and not uref[-1:] == "#": + uref += "#" # She meant it! Weirdness in urlparse? + symb = self._store.newSymbol(uref) + res.append(self._variables.get(symb, symb)) + return i + 1 self.BadSyntax(argstr, j, "unterminated URI reference") elif self.keywordsSet: @@ -1239,20 +1234,31 @@ def uri_ref2(self, argstr, i, res): def skipSpace(self, argstr, i): """Skip white space, newlines and comments. return -1 if EOF, else position of first non-ws character""" + + # Most common case is a non-commented line starting with few spaces and tabs. + try: + while True: + ch = argstr[i] + if ch in " \t": + i += 1 + continue + elif ch not in "#\r\n": + return i + break + except IndexError: + return -1 + while 1: m = eol.match(argstr, i) if m is None: break - self.lines = self.lines + 1 - i = m.end() # Point to first character unmatched - self.startOfLine = i + self.lines += 1 + self.startOfLine = i = m.end() # Point to first character unmatched m = ws.match(argstr, i) if m is not None: i = m.end() m = eof.match(argstr, i) - if m is not None: - return -1 - return i + return i if m is None else -1 def variable(self, argstr, i, res): """ ?abc -> variable(:abc) @@ -1262,14 +1268,15 @@ def variable(self, argstr, i, res): if j < 0: return -1 - if argstr[j : j + 1] != "?": + if argstr[j] != "?": return -1 - j = j + 1 + j += 1 i = j if argstr[j] in "0123456789-": self.BadSyntax(argstr, j, "Varible name can't start with '%s'" % argstr[j]) - while i < len(argstr) and argstr[i] not in _notKeywordsChars: - i = i + 1 + len_argstr = len(argstr) + while i < len_argstr and argstr[i] not in _notKeywordsChars: + i += 1 if self._parentContext is None: varURI = self._store.newSymbol(self._baseURI + "#" + argstr[j:i]) if varURI not in self._variables: @@ -1300,8 +1307,9 @@ def bareWord(self, argstr, i, res): if argstr[j] in "0123456789-" or argstr[j] in _notKeywordsChars: return -1 i = j - while i < len(argstr) and argstr[i] not in _notKeywordsChars: - i = i + 1 + len_argstr = len(argstr) + while i < len_argstr and argstr[i] not in _notKeywordsChars: + i += 1 res.append(argstr[j:i]) return i @@ -1319,27 +1327,27 @@ def qname(self, argstr, i, res): c = argstr[i] if c in "0123456789-+.": return -1 + len_argstr = len(argstr) if c not in _notNameChars: - ln = c - i = i + 1 - while i < len(argstr): - c = argstr[i] - if c not in _notNameChars: - ln = ln + c - i = i + 1 - else: - break + j = i + i += 1 + + try: + while argstr[i] not in _notNameChars: + i += 1 + except IndexError: + pass # Very rare. if argstr[i - 1] == ".": # qname cannot end with "." - ln = ln[:-1] - if not ln: - return -1 i -= 1 + if i == j: + return -1 + ln = argstr[j:i] else: # First character is non-alpha ln = "" # Was: None - TBL (why? useful?) - if i < len(argstr) and argstr[i] == ":": + if i < len_argstr and argstr[i] == ":": pfx = ln # bnodes names have different rules if pfx == "_": @@ -1347,18 +1355,18 @@ def qname(self, argstr, i, res): else: allowedChars = _notQNameChars - i = i + 1 + i += 1 lastslash = False - # start = i # TODO first char . + start = i ln = "" - while i < len(argstr): + while i < len_argstr: c = argstr[i] - if not lastslash and c == "\\": + if c == "\\" and not lastslash: # Very rare. lastslash = True - i += 1 - - elif lastslash or c not in allowedChars: - + if start < i: + ln += argstr[start:i] + start = i + 1 + elif c not in allowedChars or lastslash: # Most common case is "a-zA-Z" if lastslash: if c not in escapeChars: raise BadSyntax( @@ -1368,7 +1376,7 @@ def qname(self, argstr, i, res): i, "illegal escape " + c, ) - elif c == "%": + elif c == "%": # Very rare. if ( argstr[i + 1] not in hexChars or argstr[i + 2] not in hexChars @@ -1380,12 +1388,10 @@ def qname(self, argstr, i, res): i, "illegal hex escape " + c, ) - - ln = ln + c - i = i + 1 lastslash = False else: break + i += 1 if lastslash: raise BadSyntax( @@ -1394,11 +1400,13 @@ def qname(self, argstr, i, res): if argstr[i - 1] == ".": # localname cannot end in . - ln = ln[:-1] - if not ln: + if len(ln) == 0 and start == i: return -1 i -= 1 + if start < i: + ln += argstr[start:i] + res.append((pfx, ln)) return i @@ -1419,12 +1427,15 @@ def object(self, argstr, i, res): else: i = j - if argstr[i] in self.string_delimiters: - if argstr[i : i + 3] == argstr[i] * 3: - delim = argstr[i] * 3 + ch = argstr[i] + if ch in self.string_delimiters: + ch_three = ch * 3 + if argstr[i : i + 3] == ch_three: + delim = ch_three + i += 3 else: - delim = argstr[i] - i = i + len(delim) + delim = ch + i += 1 j, s = self.strconst(argstr, i, delim) @@ -1467,17 +1478,19 @@ def nodeOrLiteral(self, argstr, i, res): # return -1 ## or fall through? - if argstr[i] in self.string_delimiters: - if argstr[i : i + 3] == argstr[i] * 3: - delim = argstr[i] * 3 + ch_three = ch * 3 + if ch in self.string_delimiters: + if argstr[i : i + 3] == ch_three: + delim = ch_three + i += 3 else: - delim = argstr[i] - i = i + len(delim) + delim = ch + i += 1 dt = None j, s = self.strconst(argstr, i, delim) lang = None - if argstr[j : j + 1] == "@": # Language? + if argstr[j] == "@": # Language? m = langcode.match(argstr, j + 1) if m is None: raise BadSyntax( @@ -1515,7 +1528,8 @@ def strconst(self, argstr, i, delim): j = i ustr = "" # Empty unicode string startline = self.lines # Remember where for error messages - while j < len(argstr): + len_argstr = len(argstr) + while j < len_argstr: if argstr[j] == delim1: if delim == delim1: # done when delim is " or ' i = j + 1 @@ -1525,19 +1539,19 @@ def strconst(self, argstr, i, delim): ): # done when delim is """ or ''' and, respectively ... if argstr[j : j + 5] == delim5: # ... we have "" or '' before i = j + 5 - ustr = ustr + delim2 + ustr += delim2 return i, ustr if argstr[j : j + 4] == delim4: # ... we have " or ' before i = j + 4 - ustr = ustr + delim1 + ustr += delim1 return i, ustr if argstr[j : j + 3] == delim3: # current " or ' is part of delim i = j + 3 return i, ustr # we are inside of the string and current char is " or ' - j = j + 1 - ustr = ustr + delim1 + j += 1 + ustr += delim1 continue m = interesting.search(argstr, j) # was argstr[j:]. @@ -1549,7 +1563,7 @@ def strconst(self, argstr, i, delim): i = m.start() try: - ustr = ustr + argstr[j:i] + ustr += argstr[j:i] except UnicodeError: err = "" for c in argstr[j:i]: @@ -1570,8 +1584,8 @@ def strconst(self, argstr, i, delim): if ch == delim1: j = i continue - elif ch in ('"', "'") and ch != delim1: - ustr = ustr + ch + elif ch in "\"'" and ch != delim1: + ustr += ch j = i + 1 continue elif ch in "\r\n": @@ -1583,14 +1597,14 @@ def strconst(self, argstr, i, delim): i, "newline found in string literal", ) - self.lines = self.lines + 1 - ustr = ustr + ch + self.lines += 1 + ustr += ch j = i + 1 self.startOfLine = j elif ch == "\\": j = i + 1 - ch = argstr[j : j + 1] # Will be empty if string ends + ch = argstr[j] # Will be empty if string ends if not ch: raise BadSyntax( self._thisDoc, @@ -1602,14 +1616,14 @@ def strconst(self, argstr, i, delim): k = "abfrtvn\\\"'".find(ch) if k >= 0: uch = "\a\b\f\r\t\v\n\\\"'"[k] - ustr = ustr + uch - j = j + 1 + ustr += uch + j += 1 elif ch == "u": j, ch = self.uEscape(argstr, j + 1, startline) - ustr = ustr + ch + ustr += ch elif ch == "U": j, ch = self.UEscape(argstr, j + 1, startline) - ustr = ustr + ch + ustr += ch else: self.BadSyntax(argstr, i, "bad escape") From 5c6e279820bb6f930a8e896fb4bf36cb80c18512 Mon Sep 17 00:00:00 2001 From: Remi Chateauneu Date: Mon, 1 Mar 2021 11:12:03 +0000 Subject: [PATCH 2/5] Cleaner code. --- rdflib/plugins/parsers/notation3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rdflib/plugins/parsers/notation3.py b/rdflib/plugins/parsers/notation3.py index d67185f58..e4eaccb7b 100755 --- a/rdflib/plugins/parsers/notation3.py +++ b/rdflib/plugins/parsers/notation3.py @@ -1584,7 +1584,7 @@ def strconst(self, argstr, i, delim): if ch == delim1: j = i continue - elif ch in "\"'" and ch != delim1: + elif ch in ['"', "'"] and ch != delim1: ustr += ch j = i + 1 continue From 6580da099a518f05561f2c15d8b1cdb6fabfd7fb Mon Sep 17 00:00:00 2001 From: Natanael Arndt Date: Mon, 1 Mar 2021 17:13:03 +0100 Subject: [PATCH 3/5] Use lists instead of strings for set of chars --- rdflib/plugins/parsers/notation3.py | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/rdflib/plugins/parsers/notation3.py b/rdflib/plugins/parsers/notation3.py index e4eaccb7b..99f6cc161 100755 --- a/rdflib/plugins/parsers/notation3.py +++ b/rdflib/plugins/parsers/notation3.py @@ -303,14 +303,15 @@ def becauseSubexpression(*args, **kargs): # characters. The XML spec switched to assuming unknown things were name # characaters. # _namechars = string.lowercase + string.uppercase + string.digits + '_-' -_notQNameChars = "\t\r\n !\"#$&'()*,+/;<=>?@[\\]^`{|}~" # else valid qname :-/ -_notKeywordsChars = _notQNameChars + "." -_notNameChars = _notQNameChars + ":" # Assume anything else valid name :-/ +_notQNameChars = list("\t\r\n !\"#$&'()*,+/;<=>?@[\\]^`{|}~") # else valid qname :-/ +_notKeywordsChars = _notQNameChars + ["."] +_notNameChars = _notQNameChars + [":"] # Assume anything else valid name :-/ _rdfns = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" -hexChars = "ABCDEFabcdef0123456789" -escapeChars = "(_~.-!$&'()*+,;=/?#@%)" # valid for \ escapes in localnames - +hexChars = list("ABCDEFabcdef0123456789") +escapeChars = list("(_~.-!$&'()*+,;=/?#@%)") # valid for \ escapes in localnames +numberChars = list("0123456789-") +numberCharsPlus = numberChars + ["+", "."] def unicodeExpand(m): try: @@ -847,7 +848,7 @@ def path(self, argstr, i, res): if j < 0: return j # nope - while argstr[j] in "!^": # no spaces, must follow exactly (?) + while argstr[j] in ["!", "^"]: # no spaces, must follow exactly (?) ch = argstr[j] subj = res.pop() obj = self.blankNode(uri=self.here(j)) @@ -1239,10 +1240,10 @@ def skipSpace(self, argstr, i): try: while True: ch = argstr[i] - if ch in " \t": + if ch in [" ", "\t"]: i += 1 continue - elif ch not in "#\r\n": + elif ch not in ["#", "\r", "\n"]: return i break except IndexError: @@ -1272,7 +1273,7 @@ def variable(self, argstr, i, res): return -1 j += 1 i = j - if argstr[j] in "0123456789-": + if argstr[j] in numberChars: self.BadSyntax(argstr, j, "Varible name can't start with '%s'" % argstr[j]) len_argstr = len(argstr) while i < len_argstr and argstr[i] not in _notKeywordsChars: @@ -1304,7 +1305,7 @@ def bareWord(self, argstr, i, res): if j < 0: return -1 - if argstr[j] in "0123456789-" or argstr[j] in _notKeywordsChars: + if argstr[j] in numberChars or argstr[j] in _notKeywordsChars: return -1 i = j len_argstr = len(argstr) @@ -1325,7 +1326,7 @@ def qname(self, argstr, i, res): return -1 c = argstr[i] - if c in "0123456789-+.": + if c in numberCharsPlus: return -1 len_argstr = len(argstr) if c not in _notNameChars: @@ -1457,7 +1458,7 @@ def nodeOrLiteral(self, argstr, i, res): i = j ch = argstr[i] - if ch in "-+0987654321.": + if ch in numberCharsPlus: m = exponent_syntax.match(argstr, i) if m: j = m.end() @@ -1588,7 +1589,7 @@ def strconst(self, argstr, i, delim): ustr += ch j = i + 1 continue - elif ch in "\r\n": + elif ch in ["\r", "\n"]: if delim == delim1: raise BadSyntax( self._thisDoc, From eefaa374a01dbfef45da3984699b83f75962a0cb Mon Sep 17 00:00:00 2001 From: Natanael Arndt Date: Mon, 1 Mar 2021 17:43:39 +0100 Subject: [PATCH 4/5] Use set instead of string or list for set of chars --- rdflib/plugins/parsers/notation3.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/rdflib/plugins/parsers/notation3.py b/rdflib/plugins/parsers/notation3.py index 99f6cc161..4bb6b5dfa 100755 --- a/rdflib/plugins/parsers/notation3.py +++ b/rdflib/plugins/parsers/notation3.py @@ -303,15 +303,15 @@ def becauseSubexpression(*args, **kargs): # characters. The XML spec switched to assuming unknown things were name # characaters. # _namechars = string.lowercase + string.uppercase + string.digits + '_-' -_notQNameChars = list("\t\r\n !\"#$&'()*,+/;<=>?@[\\]^`{|}~") # else valid qname :-/ -_notKeywordsChars = _notQNameChars + ["."] -_notNameChars = _notQNameChars + [":"] # Assume anything else valid name :-/ +_notQNameChars = set("\t\r\n !\"#$&'()*,+/;<=>?@[\\]^`{|}~") # else valid qname :-/ +_notKeywordsChars = _notQNameChars | {"."} +_notNameChars = _notQNameChars | {":"} # Assume anything else valid name :-/ _rdfns = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" -hexChars = list("ABCDEFabcdef0123456789") -escapeChars = list("(_~.-!$&'()*+,;=/?#@%)") # valid for \ escapes in localnames -numberChars = list("0123456789-") -numberCharsPlus = numberChars + ["+", "."] +hexChars = set("ABCDEFabcdef0123456789") +escapeChars = set("(_~.-!$&'()*+,;=/?#@%)") # valid for \ escapes in localnames +numberChars = set("0123456789-") +numberCharsPlus = numberChars | {"+", "."} def unicodeExpand(m): try: @@ -848,7 +848,7 @@ def path(self, argstr, i, res): if j < 0: return j # nope - while argstr[j] in ["!", "^"]: # no spaces, must follow exactly (?) + while argstr[j] in {"!", "^"}: # no spaces, must follow exactly (?) ch = argstr[j] subj = res.pop() obj = self.blankNode(uri=self.here(j)) @@ -1240,10 +1240,10 @@ def skipSpace(self, argstr, i): try: while True: ch = argstr[i] - if ch in [" ", "\t"]: + if ch in {" ", "\t"}: i += 1 continue - elif ch not in ["#", "\r", "\n"]: + elif ch not in {"#", "\r", "\n"}: return i break except IndexError: @@ -1585,11 +1585,11 @@ def strconst(self, argstr, i, delim): if ch == delim1: j = i continue - elif ch in ['"', "'"] and ch != delim1: + elif ch in {'"', "'"} and ch != delim1: ustr += ch j = i + 1 continue - elif ch in ["\r", "\n"]: + elif ch in {"\r", "\n"}: if delim == delim1: raise BadSyntax( self._thisDoc, From 9653eefb1a51de751c44dab3c072cf190a85844e Mon Sep 17 00:00:00 2001 From: Remi Chateauneu Date: Mon, 1 Mar 2021 17:51:58 +0000 Subject: [PATCH 5/5] Speedup tok. --- rdflib/plugins/parsers/notation3.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/rdflib/plugins/parsers/notation3.py b/rdflib/plugins/parsers/notation3.py index 4bb6b5dfa..3cf19ace5 100755 --- a/rdflib/plugins/parsers/notation3.py +++ b/rdflib/plugins/parsers/notation3.py @@ -510,14 +510,13 @@ def tok(self, tok, argstr, i, colon=False): if tok not in self.keywords: return -1 # No, this has neither keywords declaration nor "@" - len_tok = len(tok) + i_plus_len_tok = i + len(tok) if ( - argstr[i : i + len_tok] == tok - and (argstr[i + len_tok] in _notKeywordsChars) - or (colon and argstr[i + len_tok] == ":") + argstr[i : i_plus_len_tok] == tok + and (argstr[i_plus_len_tok] in _notKeywordsChars) + or (colon and argstr[i_plus_len_tok] == ":") ): - i += len_tok - return i + return i_plus_len_tok else: return -1