Skip to content

Commit

Permalink
[JSInterp] Improve performance
Browse files Browse the repository at this point in the history
Pull out some constant regex expressions and change a loop over assignment operators into a regex alternative match
  • Loading branch information
dirkf committed Feb 17, 2022
1 parent 74f8cc4 commit 200ce6c
Showing 1 changed file with 35 additions and 28 deletions.
63 changes: 35 additions & 28 deletions youtube_dl/jsinterp.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ class Nonlocal:
('/', operator.truediv),
('*', operator.mul),
]
_ASSIGN_OPERATORS = [(op + '=', opfunc) for op, opfunc in _OPERATORS]
_ASSIGN_OPERATORS.append(('=', (lambda cur, right: right)))
_ASSIGN_OPERATORS = dict((op + '=', opfunc) for op, opfunc in _OPERATORS)
_ASSIGN_OPERATORS['='] = lambda cur, right: right

_NAME_RE = r'[a-zA-Z_$][a-zA-Z_$0-9]*'

Expand Down Expand Up @@ -84,6 +84,28 @@ def __repr__(self):


class JSInterpreter(object):
_EXPR_SPLIT_RE = (
r'''(?x)
(?P<pre_sign>\+\+|--)(?P<var1>%(_NAME_RE)s)|
(?P<var2>%(_NAME_RE)s)(?P<post_sign>\+\+|--)'''
% {'_NAME_RE': _NAME_RE, })
_VARNAME_RE = r'(?!if|return|true|false|null)(?P<name>%s)$' % _NAME_RE
_ARRAY_REF_RE = r'(?P<in>%s)\[(?P<idx>.+)\]$' % _NAME_RE
_FN_CALL_RE = r'^(?P<func>%s)\((?P<args>[a-zA-Z0-9_$,]*)\)$' % _NAME_RE
_MEMBER_REF_RE = (
r'(?P<var>%s)(?:\.(?P<member>[^(]+)|\[(?P<member2>[^]]+)\])\s*'
% _NAME_RE)
_FN_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
_FN_DEF_RE = (
r'(?P<key>%s)\s*:\s*function\s*\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}'
% _FN_NAME_RE)
_ASSIGN_EXPR_RE = (
r'''(?x)
(?P<out>%s)(?:\[(?P<index>[^\]]+?)\])?
\s*(?P<op>%s)
(?P<expr>.*)$'''
% (_NAME_RE, '|'.join(re.escape(op) for op in _ASSIGN_OPERATORS.keys())))

def __init__(self, code, objects=None):
if objects is None:
objects = {}
Expand Down Expand Up @@ -269,9 +291,7 @@ def raise_constructor_error(c):
for sub_expr in sub_expressions:
self.interpret_expression(sub_expr, local_vars, allow_recursion)

for m in re.finditer(r'''(?x)
(?P<pre_sign>\+\+|--)(?P<var1>%(_NAME_RE)s)|
(?P<var2>%(_NAME_RE)s)(?P<post_sign>\+\+|--)''' % globals(), expr):
for m in re.finditer(self._EXPR_SPLIT_RE, expr):
var = m.group('var1') or m.group('var2')
start, end = m.span()
sign = m.group('pre_sign') or m.group('post_sign')
Expand All @@ -281,13 +301,10 @@ def raise_constructor_error(c):
ret = local_vars[var]
expr = expr[:start] + json.dumps(ret) + expr[end:]

for op, opfunc in _ASSIGN_OPERATORS:
m = re.match(r'''(?x)
(?P<out>%s)(?:\[(?P<index>[^\]]+?)\])?
\s*%s
(?P<expr>.*)$''' % (_NAME_RE, re.escape(op)), expr)
if not m:
continue
m = re.match(self._ASSIGN_EXPR_RE, expr)
if m:
op = m.group('op')
opfunc = _ASSIGN_OPERATORS[op]
right_val = self.interpret_expression(m.group('expr'), local_vars, allow_recursion)

if m.groupdict().get('index'):
Expand All @@ -313,9 +330,7 @@ def raise_constructor_error(c):
elif expr == 'continue':
raise JS_Continue()

var_m = re.match(
r'(?!if|return|true|false|null)(?P<name>%s)$' % _NAME_RE,
expr)
var_m = re.match(self._VARNAME_RE, expr)
if var_m:
return local_vars[var_m.group('name')]

Expand All @@ -324,8 +339,7 @@ def raise_constructor_error(c):
except ValueError:
pass

m = re.match(
r'(?P<in>%s)\[(?P<idx>.+)\]$' % _NAME_RE, expr)
m = re.match(self._ARRAY_REF_RE, expr)
if m:
val = local_vars[m.group('in')]
idx = self.interpret_expression(m.group('idx'), local_vars, allow_recursion)
Expand All @@ -350,9 +364,7 @@ def raise_expr_error(where, op, exp):
raise_expr_error('right-side', op, expr)
return opfunc(left_val or 0, right_val)

m = re.match(
r'(?P<var>%s)(?:\.(?P<member>[^(]+)|\[(?P<member2>[^]]+)\])\s*' % _NAME_RE,
expr)
m = re.match(self._MEMBER_REF_RE, expr)
if m:
variable = m.group('var')
nl = Nonlocal()
Expand Down Expand Up @@ -469,7 +481,7 @@ def eval_method():
else:
return eval_method()

m = re.match(r'^(?P<func>%s)\((?P<args>[a-zA-Z0-9_$,]*)\)$' % _NAME_RE, expr)
m = re.match(self._FN_CALL_RE, expr)
if m:
fname = m.group('func')
argvals = tuple([
Expand All @@ -485,22 +497,17 @@ def eval_method():
raise ExtractorError('Unsupported JS expression %r' % expr)

def extract_object(self, objname):
_FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
obj = {}
obj_m = re.search(
r'''(?x)
(?<!this\.)%s\s*=\s*{\s*
(?P<fields>(%s\s*:\s*function\s*\(.*?\)\s*{.*?}(?:,\s*)?)*)
}\s*;
''' % (re.escape(objname), _FUNC_NAME_RE),
''' % (re.escape(objname), self._FN_NAME_RE),
self.code)
fields = obj_m.group('fields')
# Currently, it only supports function definitions
fields_m = re.finditer(
r'''(?x)
(?P<key>%s)\s*:\s*function\s*\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}
''' % _FUNC_NAME_RE,
fields)
fields_m = re.finditer(self._FN_DEF_RE, fields)
for f in fields_m:
argnames = f.group('args').split(',')
obj[remove_quotes(f.group('key'))] = self.build_function(argnames, f.group('code'))
Expand Down

0 comments on commit 200ce6c

Please sign in to comment.