CLN: Lint for lists instead of generators in built-in Python functions (

#18335)
pandas-dev · Nov 19, 2017 · 410ad37 · 410ad37
1 parent f1b1158
commit 410ad37
Show file tree

Hide file tree

Showing 60 changed files with 169 additions and 159 deletions.
diff --git a/asv_bench/benchmarks/frame_ctor.py b/asv_bench/benchmarks/frame_ctor.py
@@ -132,7 +132,7 @@ def setup(self, offset, n_steps):
         offset = getattr(offsets, offset)
         self.idx = get_index_for_offset(offset(n_steps, **kwargs))
         self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx)
-        self.d = dict([(col, self.df[col]) for col in self.df.columns])
+        self.d = dict(self.df.items())
 
     def time_frame_ctor(self, offset, n_steps):
         DataFrame(self.d)
diff --git a/asv_bench/benchmarks/io_bench.py b/asv_bench/benchmarks/io_bench.py
@@ -202,7 +202,7 @@ class read_json_lines(object):
     def setup(self):
         self.N = 100000
         self.C = 5
-        self.df = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]))
+        self.df = DataFrame({('float{0}'.format(i), randn(self.N)) for i in range(self.C)})
         self.df.to_json(self.fname,orient="records",lines=True)
 
     def teardown(self):

diff --git a/asv_bench/benchmarks/packers.py b/asv_bench/benchmarks/packers.py
@@ -17,7 +17,7 @@ def _setup(self):
         self.N = 100000
         self.C = 5
         self.index = date_range('20000101', periods=self.N, freq='H')
-        self.df = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]), index=self.index)
+        self.df = DataFrame(dict(('float{0}'.format(i), randn(self.N)) for i in range(self.C)), index=self.index)
         self.df2 = self.df.copy()
         self.df2['object'] = [('%08x' % randrange((16 ** 8))) for _ in range(self.N)]
         self.remove(self.f)

diff --git a/asv_bench/vbench_to_asv.py b/asv_bench/vbench_to_asv.py
@@ -69,7 +69,7 @@ def visit_ClassDef(self, node):
         return node
 
     def visit_TryExcept(self, node):
-        if any([isinstance(x, (ast.Import, ast.ImportFrom)) for x in node.body]):
+        if any(isinstance(x, (ast.Import, ast.ImportFrom)) for x in node.body):
             self.imports.append(node)
         else:
             self.generic_visit(node)

diff --git a/ci/lint.sh b/ci/lint.sh
@@ -84,6 +84,19 @@ if [ "$LINT" ]; then
     fi
     echo "Check for invalid testing DONE"
 
+    echo "Check for use of lists instead of generators in built-in Python functions"
+
+    # Example: Avoid `any([i for i in some_iterator])` in favor of `any(i for i in some_iterator)`
+    #
+    # Check the following functions:
+    # any(), all(), sum(), max(), min(), list(), dict(), set(), frozenset(), tuple(), str.join()
+    grep -R --include="*.py*" -E "[^_](any|all|sum|max|min|list|dict|set|frozenset|tuple|join)\(\[.* for .* in .*\]\)"
+
+    if [ $? = "0" ]; then
+        RET=1
+    fi
+    echo "Check for use of lists instead of generators in built-in Python functions DONE"
+
 else
     echo "NOT Linting"
 fi

diff --git a/doc/source/conf.py b/doc/source/conf.py
@@ -78,7 +78,7 @@
 # JP: added from sphinxdocs
 autosummary_generate = False
 
-if any([re.match("\s*api\s*", l) for l in index_rst_lines]):
+if any(re.match("\s*api\s*", l) for l in index_rst_lines):
     autosummary_generate = True
 
 files_to_delete = []
@@ -89,7 +89,7 @@
 
     _file_basename = os.path.splitext(f)[0]
     _regex_to_match = "\s*{}\s*$".format(_file_basename)
-    if not any([re.match(_regex_to_match, line) for line in index_rst_lines]):
+    if not any(re.match(_regex_to_match, line) for line in index_rst_lines):
         files_to_delete.append(f)
 
 if files_to_delete:

diff --git a/doc/sphinxext/ipython_sphinxext/ipython_directive.py b/doc/sphinxext/ipython_sphinxext/ipython_directive.py
@@ -522,7 +522,7 @@ def process_output(self, data, output_prompt,
                 source = self.directive.state.document.current_source
                 content = self.directive.content
                 # Add tabs and join into a single string.
-                content = '\n'.join([TAB + line for line in content])
+                content = '\n'.join(TAB + line for line in content)
 
             # Make sure the output contains the output prompt.
             ind = found.find(output_prompt)

diff --git a/doc/sphinxext/numpydoc/compiler_unparse.py b/doc/sphinxext/numpydoc/compiler_unparse.py
@@ -399,7 +399,7 @@ def _Return(self, t):
         self._fill("return ")
         if t.value:
             if isinstance(t.value, Tuple):
-                text = ', '.join([ name.name for name in t.value.asList() ])
+                text = ', '.join(name.name for name in t.value.asList())
                 self._write(text)
             else:
                 self._dispatch(t.value)

diff --git a/doc/sphinxext/numpydoc/docscrape.py b/doc/sphinxext/numpydoc/docscrape.py
@@ -270,7 +270,7 @@ def _parse_summary(self):
         # If several signatures present, take the last one
         while True:
             summary = self._doc.read_to_next_empty_line()
-            summary_str = " ".join([s.strip() for s in summary]).strip()
+            summary_str = " ".join(s.strip() for s in summary).strip()
             if re.compile('^([\w., ]+=)?\s*[\w\.]+\(.*\)$').match(summary_str):
                 self['Signature'] = summary_str
                 if not self._is_at_section():
@@ -289,7 +289,7 @@ def _parse(self):
 
         for (section,content) in self._read_sections():
             if not section.startswith('..'):
-                section = ' '.join([s.capitalize() for s in section.split(' ')])
+                section = ' '.join(s.capitalize() for s in section.split(' '))
             if section in ('Parameters', 'Returns', 'Raises', 'Warns',
                            'Other Parameters', 'Attributes', 'Methods'):
                 self[section] = self._parse_param_list(content)

diff --git a/doc/sphinxext/numpydoc/docscrape_sphinx.py b/doc/sphinxext/numpydoc/docscrape_sphinx.py
@@ -130,7 +130,7 @@ def _str_member_list(self, name):
                 out += [''] + autosum
 
             if others:
-                maxlen_0 = max(3, max([len(x[0]) for x in others]))
+                maxlen_0 = max(3, max(len(x[0]) for x in others))
                 hdr = sixu("=")*maxlen_0 + sixu("  ") + sixu("=")*10
                 fmt = sixu('%%%ds  %%s  ') % (maxlen_0,)
                 out += ['', hdr]
@@ -203,7 +203,7 @@ def _str_references(self):
                 m = re.match(r'.. \[([a-z0-9._-]+)\]', line, re.I)
                 if m:
                     items.append(m.group(1))
-            out += ['   ' + ", ".join(["[%s]_" % item for item in items]), '']
+            out += ['   ' + ", ".join("[%s]_" % item for item in items), '']
         return out
 
     def _str_examples(self):

diff --git a/doc/sphinxext/numpydoc/phantom_import.py b/doc/sphinxext/numpydoc/phantom_import.py
@@ -60,7 +60,7 @@ def import_phantom_module(xml_file):
     # Sort items so that
     # - Base classes come before classes inherited from them
     # - Modules come before their contents
-    all_nodes = dict([(n.attrib['id'], n) for n in root])
+    all_nodes = dict((n.attrib['id'], n) for n in root)
 
     def _get_bases(node, recurse=False):
         bases = [x.attrib['ref'] for x in node.findall('base')]

diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
@@ -770,7 +770,7 @@ cdef class TextReader:
                     msg = self.orig_header
                     if isinstance(msg, list):
                         msg = "[%s], len of %d," % (
-                            ','.join([ str(m) for m in msg ]), len(msg))
+                            ','.join(str(m) for m in msg), len(msg))
                     raise ParserError(
                         'Passed header=%s but only %d lines in file'
                         % (msg, self.parser.lines))
@@ -2227,7 +2227,7 @@ def _concatenate_chunks(list chunks):
     for name in names:
         arrs = [chunk.pop(name) for chunk in chunks]
         # Check each arr for consistent types.
-        dtypes = set([a.dtype for a in arrs])
+        dtypes = set(a.dtype for a in arrs)
         if len(dtypes) > 1:
             common_type = np.find_common_type(dtypes, [])
             if common_type == np.object:

diff --git a/pandas/_libs/src/inference.pyx b/pandas/_libs/src/inference.pyx
@@ -1309,7 +1309,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
 
     # we try to coerce datetime w/tz but must all have the same tz
     if seen.datetimetz_:
-        if len(set([getattr(val, 'tzinfo', None) for val in objects])) == 1:
+        if len({getattr(val, 'tzinfo', None) for val in objects}) == 1:
             from pandas import DatetimeIndex
             return DatetimeIndex(objects)
         seen.object_ = 1

diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx
@@ -218,7 +218,7 @@ class Resolution(object):
         'U': 'N',
         'N': None}
 
-    _str_reso_map = dict([(v, k) for k, v in _reso_str_map.items()])
+    _str_reso_map = {v: k for k, v in _reso_str_map.items()}
 
     _reso_freq_map = {
         'year': 'A',
@@ -232,8 +232,7 @@ class Resolution(object):
         'microsecond': 'U',
         'nanosecond': 'N'}
 
-    _freq_reso_map = dict([(v, k)
-                           for k, v in _reso_freq_map.items()])
+    _freq_reso_map = {v: k for k, v in _reso_freq_map.items()}
 
     @classmethod
     def get_str(cls, reso):

diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx
@@ -568,7 +568,7 @@ class TimeRE(dict):
                 break
         else:
             return ''
-        regex = '|'.join([re.escape(stuff) for stuff in to_convert])
+        regex = '|'.join(re.escape(stuff) for stuff in to_convert)
         regex = '(?P<%s>%s' % (directive, regex)
         return '%s)' % regex
 

diff --git a/pandas/_version.py b/pandas/_version.py
@@ -141,11 +141,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
         if verbose:
             print("keywords are unexpanded, not using")
         raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
-    refs = set([r.strip() for r in refnames.strip("()").split(",")])
+    refs = set(r.strip() for r in refnames.strip("()").split(","))
     # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
     # just "foo-1.0". If we see a "tag: " prefix, prefer those.
     TAG = "tag: "
-    tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
+    tags = set(r[len(TAG):] for r in refs if r.startswith(TAG))
     if not tags:
         # Either we're using git < 1.8.3, or there really are no tags. We use
         # a heuristic: assume all version tags have a digit. The old git %d
@@ -154,7 +154,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
         # between branches and tags. By ignoring refnames without digits, we
         # filter out many common branch names like "release" and
         # "stabilization", as well as "HEAD" and "master".
-        tags = set([r for r in refs if re.search(r'\d', r)])
+        tags = set(r for r in refs if re.search(r'\d', r))
         if verbose:
             print("discarding '{}', no digits".format(",".join(refs - tags)))
     if verbose:

diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -347,7 +347,7 @@ def map_indices_py(arr):
     Returns a dictionary with (element, index) pairs for each element in the
     given array/list
     """
-    return dict([(x, i) for i, x in enumerate(arr)])
+    return dict((x, i) for i, x in enumerate(arr))
 
 
 def union(*seqs):

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
@@ -459,7 +459,7 @@ def _concat_datetimetz(to_concat, name=None):
     it is used in DatetimeIndex.append also
     """
     # do not pass tz to set because tzlocal cannot be hashed
-    if len(set([str(x.dtype) for x in to_concat])) != 1:
+    if len(set(str(x.dtype) for x in to_concat)) != 1:
         raise ValueError('to_concat must have the same tz')
     tz = to_concat[0].tz
     # no need to localize because internal repr will not be changed

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -3895,7 +3895,7 @@ def f(col):
                     return self._constructor_sliced(r, index=new_index,
                                                     dtype=r.dtype)
 
-                result = dict([(col, f(col)) for col in this])
+                result = dict((col, f(col)) for col in this)
 
             # non-unique
             else:
@@ -3906,9 +3906,7 @@ def f(i):
                     return self._constructor_sliced(r, index=new_index,
                                                     dtype=r.dtype)
 
-                result = dict([
-                    (i, f(i)) for i, col in enumerate(this.columns)
-                ])
+                result = dict((i, f(i)) for i, col in enumerate(this.columns))
                 result = self._constructor(result, index=new_index, copy=False)
                 result.columns = new_columns
                 return result
@@ -3986,7 +3984,7 @@ def _compare_frame_evaluate(self, other, func, str_rep, try_cast=True):
         if self.columns.is_unique:
 
             def _compare(a, b):
-                return dict([(col, func(a[col], b[col])) for col in a.columns])
+                return dict((col, func(a[col], b[col])) for col in a.columns)
 
             new_data = expressions.evaluate(_compare, str_rep, self, other)
             return self._constructor(data=new_data, index=self.index,
@@ -3995,8 +3993,8 @@ def _compare(a, b):
         else:
 
             def _compare(a, b):
-                return dict([(i, func(a.iloc[:, i], b.iloc[:, i]))
-                             for i, col in enumerate(a.columns)])
+                return dict((i, func(a.iloc[:, i], b.iloc[:, i]))
+                            for i, col in enumerate(a.columns))
 
             new_data = expressions.evaluate(_compare, str_rep, self, other)
             result = self._constructor(data=new_data, index=self.index,

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -279,21 +279,21 @@ def set_axis(a, i):
 
     def _construct_axes_dict(self, axes=None, **kwargs):
         """Return an axes dictionary for myself."""
-        d = dict([(a, self._get_axis(a)) for a in (axes or self._AXIS_ORDERS)])
+        d = dict((a, self._get_axis(a)) for a in (axes or self._AXIS_ORDERS))
         d.update(kwargs)
         return d
 
     @staticmethod
     def _construct_axes_dict_from(self, axes, **kwargs):
         """Return an axes dictionary for the passed axes."""
-        d = dict([(a, ax) for a, ax in zip(self._AXIS_ORDERS, axes)])
+        d = dict((a, ax) for a, ax in zip(self._AXIS_ORDERS, axes))
         d.update(kwargs)
         return d
 
     def _construct_axes_dict_for_slice(self, axes=None, **kwargs):
         """Return an axes dictionary for myself."""
-        d = dict([(self._AXIS_SLICEMAP[a], self._get_axis(a))
-                  for a in (axes or self._AXIS_ORDERS)])
+        d = dict((self._AXIS_SLICEMAP[a], self._get_axis(a))
+                 for a in (axes or self._AXIS_ORDERS))
         d.update(kwargs)
         return d
 
@@ -329,7 +329,7 @@ def _construct_axes_from_arguments(self, args, kwargs, require_all=False):
                         raise TypeError("not enough/duplicate arguments "
                                         "specified!")
 
-        axes = dict([(a, kwargs.pop(a, None)) for a in self._AXIS_ORDERS])
+        axes = dict((a, kwargs.pop(a, None)) for a in self._AXIS_ORDERS)
         return axes, kwargs
 
     @classmethod
@@ -586,10 +586,10 @@ def transpose(self, *args, **kwargs):
         # construct the args
         axes, kwargs = self._construct_axes_from_arguments(args, kwargs,
                                                            require_all=True)
-        axes_names = tuple([self._get_axis_name(axes[a])
-                            for a in self._AXIS_ORDERS])
-        axes_numbers = tuple([self._get_axis_number(axes[a])
-                              for a in self._AXIS_ORDERS])
+        axes_names = tuple(self._get_axis_name(axes[a])
+                           for a in self._AXIS_ORDERS)
+        axes_numbers = tuple(self._get_axis_number(axes[a])
+                             for a in self._AXIS_ORDERS)
 
         # we must have unique axes
         if len(axes) != len(set(axes)):
@@ -699,8 +699,8 @@ def squeeze(self, axis=None):
                 (self._get_axis_number(axis),))
         try:
             return self.iloc[
-                tuple([0 if i in axis and len(a) == 1 else slice(None)
-                       for i, a in enumerate(self.axes)])]
+                tuple(0 if i in axis and len(a) == 1 else slice(None)
+                      for i, a in enumerate(self.axes))]
         except Exception:
             return self
 
@@ -4277,8 +4277,8 @@ def fillna(self, value=None, method=None, axis=None, inplace=False,
             elif self.ndim == 3:
 
                 # fill in 2d chunks
-                result = dict([(col, s.fillna(method=method, value=value))
-                               for col, s in self.iteritems()])
+                result = dict((col, s.fillna(method=method, value=value))
+                              for col, s in self.iteritems())
                 new_obj = self._constructor.\
                     from_dict(result).__finalize__(self)
                 new_data = new_obj._data

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
@@ -471,7 +471,7 @@ def get_converter(s):
                     raise ValueError(msg)
 
             converters = [get_converter(s) for s in index_sample]
-            names = [tuple([f(n) for f, n in zip(converters, name)])
+            names = [tuple(f(n) for f, n in zip(converters, name))
                      for name in names]
 
         else:

diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py
@@ -101,7 +101,7 @@ def conv(i):
 
 
 def _sanitize_and_check(indexes):
-    kinds = list(set([type(index) for index in indexes]))
+    kinds = list({type(index) for index in indexes})
 
     if list in kinds:
         if len(kinds) > 1:
@@ -122,8 +122,8 @@ def _get_consensus_names(indexes):
 
     # find the non-none names, need to tupleify to make
     # the set hashable, then reverse on return
-    consensus_names = set([tuple(i.names) for i in indexes
-                           if com._any_not_none(*i.names)])
+    consensus_names = set(tuple(i.names) for i in indexes
+                          if com._any_not_none(*i.names))
     if len(consensus_names) == 1:
         return list(list(consensus_names)[0])
     return [None] * indexes[0].nlevels