CLN: For loops, boolean conditions, misc. (pandas-dev#25206)

Pingviinituutti · Feb 28, 2019 · aaa94d0 · aaa94d0
1 parent aa23a37
commit aaa94d0
Show file tree

Hide file tree

Showing 24 changed files with 57 additions and 80 deletions.
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -2167,8 +2167,7 @@ def _reverse_indexer(self):
         r, counts = libalgos.groupsort_indexer(self.codes.astype('int64'),
                                                categories.size)
         counts = counts.cumsum()
-        result = [r[counts[indexer]:counts[indexer + 1]]
-                  for indexer in range(len(counts) - 1)]
+        result = (r[start:end] for start, end in zip(counts, counts[1:]))
         result = dict(zip(categories, result))
         return result
 

diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
@@ -128,7 +128,7 @@ def _dt_array_cmp(cls, op):
     Wrap comparison operations to convert datetime-like to datetime64
     """
     opname = '__{name}__'.format(name=op.__name__)
-    nat_result = True if opname == '__ne__' else False
+    nat_result = opname == '__ne__'
 
     def wrapper(self, other):
         if isinstance(other, (ABCDataFrame, ABCSeries, ABCIndexClass)):

diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
@@ -561,7 +561,7 @@ def cmp_method(self, other):
             else:
                 mask = self._mask | mask
 
-            result[mask] = True if op_name == 'ne' else False
+            result[mask] = op_name == 'ne'
             return result
 
         name = '__{name}__'.format(name=op.__name__)

diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py
@@ -46,7 +46,7 @@ def _period_array_cmp(cls, op):
     Wrap comparison operations to convert Period-like to PeriodDtype
     """
     opname = '__{name}__'.format(name=op.__name__)
-    nat_result = True if opname == '__ne__' else False
+    nat_result = opname == '__ne__'
 
     def wrapper(self, other):
         op = getattr(self.asi8, opname)

diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
@@ -62,7 +62,7 @@ def _td_array_cmp(cls, op):
     Wrap comparison operations to convert timedelta-like to timedelta64
     """
     opname = '__{name}__'.format(name=op.__name__)
-    nat_result = True if opname == '__ne__' else False
+    nat_result = opname == '__ne__'
 
     def wrapper(self, other):
         if isinstance(other, (ABCDataFrame, ABCSeries, ABCIndexClass)):

diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py
@@ -252,7 +252,7 @@ def evaluate(self):
                              .format(slf=self))
 
         rhs = self.conform(self.rhs)
-        values = [TermValue(v, v, self.kind) for v in rhs]
+        values = [TermValue(v, v, self.kind).value for v in rhs]
 
         if self.is_in_table:
 
@@ -263,7 +263,7 @@ def evaluate(self):
                 self.filter = (
                     self.lhs,
                     filter_op,
-                    pd.Index([v.value for v in values]))
+                    pd.Index(values))
 
                 return self
             return None
@@ -275,7 +275,7 @@ def evaluate(self):
             self.filter = (
                 self.lhs,
                 filter_op,
-                pd.Index([v.value for v in values]))
+                pd.Index(values))
 
         else:
             raise TypeError("passing a filterable condition to a non-table "

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -1111,11 +1111,9 @@ def find_common_type(types):
     # this is different from numpy, which casts bool with float/int as int
     has_bools = any(is_bool_dtype(t) for t in types)
     if has_bools:
-        has_ints = any(is_integer_dtype(t) for t in types)
-        has_floats = any(is_float_dtype(t) for t in types)
-        has_complex = any(is_complex_dtype(t) for t in types)
-        if has_ints or has_floats or has_complex:
-            return np.object
+        for t in types:
+            if is_integer_dtype(t) or is_float_dtype(t) or is_complex_dtype(t):
+                return np.object
 
     return np.find_common_type(types, [])
 

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
@@ -123,8 +123,6 @@ def is_nonempty(x):
         except Exception:
             return True
 
-    nonempty = [x for x in to_concat if is_nonempty(x)]
-
     # If all arrays are empty, there's nothing to convert, just short-cut to
     # the concatenation, #3121.
     #
@@ -148,11 +146,11 @@ def is_nonempty(x):
     elif 'sparse' in typs:
         return _concat_sparse(to_concat, axis=axis, typs=typs)
 
-    extensions = [is_extension_array_dtype(x) for x in to_concat]
-    if any(extensions) and axis == 1:
+    all_empty = all(not is_nonempty(x) for x in to_concat)
+    if any(is_extension_array_dtype(x) for x in to_concat) and axis == 1:
         to_concat = [np.atleast_2d(x.astype('object')) for x in to_concat]
 
-    if not nonempty:
+    if all_empty:
         # we have all empties, but may need to coerce the result dtype to
         # object if we have non-numeric type operands (numpy would otherwise
         # cast this to float)

diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
@@ -414,8 +414,7 @@ def _hash_categories(categories, ordered=True):
             cat_array = hash_tuples(categories)
         else:
             if categories.dtype == 'O':
-                types = [type(x) for x in categories]
-                if not len(set(types)) == 1:
+                if len({type(x) for x in categories}) != 1:
                     # TODO: hash_array doesn't handle mixed types. It casts
                     # everything to a str first, which means we treat
                     # {'1', '2'} the same as {'1', 2}

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -1535,8 +1535,8 @@ def from_records(cls, data, index=None, exclude=None, columns=None,
                     result_index = Index([], name=index)
             else:
                 try:
-                    to_remove = [arr_columns.get_loc(field) for field in index]
-                    index_data = [arrays[i] for i in to_remove]
+                    index_data = [arrays[arr_columns.get_loc(field)]
+                                  for field in index]
                     result_index = ensure_index_from_sequences(index_data,
                                                                names=index)
 

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -1564,14 +1564,14 @@ def _is_label_reference(self, key, axis=0):
         -------
         is_label: bool
         """
-        axis = self._get_axis_number(axis)
-        other_axes = [ax for ax in range(self._AXIS_LEN) if ax != axis]
-
         if self.ndim > 2:
             raise NotImplementedError(
                 "_is_label_reference is not implemented for {type}"
                 .format(type=type(self)))
 
+        axis = self._get_axis_number(axis)
+        other_axes = (ax for ax in range(self._AXIS_LEN) if ax != axis)
+
         return (key is not None and
                 is_hashable(key) and
                 any(key in self.axes[ax] for ax in other_axes))
@@ -1623,15 +1623,14 @@ def _check_label_or_level_ambiguity(self, key, axis=0):
         ------
         ValueError: `key` is ambiguous
         """
-
-        axis = self._get_axis_number(axis)
-        other_axes = [ax for ax in range(self._AXIS_LEN) if ax != axis]
-
         if self.ndim > 2:
             raise NotImplementedError(
                 "_check_label_or_level_ambiguity is not implemented for {type}"
                 .format(type=type(self)))
 
+        axis = self._get_axis_number(axis)
+        other_axes = (ax for ax in range(self._AXIS_LEN) if ax != axis)
+
         if (key is not None and
                 is_hashable(key) and
                 key in self.axes[axis].names and
@@ -1689,15 +1688,14 @@ def _get_label_or_level_values(self, key, axis=0):
             if `key` is ambiguous. This will become an ambiguity error in a
             future version
         """
-
-        axis = self._get_axis_number(axis)
-        other_axes = [ax for ax in range(self._AXIS_LEN) if ax != axis]
-
         if self.ndim > 2:
             raise NotImplementedError(
                 "_get_label_or_level_values is not implemented for {type}"
                 .format(type=type(self)))
 
+        axis = self._get_axis_number(axis)
+        other_axes = [ax for ax in range(self._AXIS_LEN) if ax != axis]
+
         if self._is_label_reference(key, axis=axis):
             self._check_label_or_level_ambiguity(key, axis=axis)
             values = self.xs(key, axis=other_axes[0])._values
@@ -1753,14 +1751,13 @@ def _drop_labels_or_levels(self, keys, axis=0):
         ValueError
             if any `keys` match neither a label nor a level
         """
-
-        axis = self._get_axis_number(axis)
-
         if self.ndim > 2:
             raise NotImplementedError(
                 "_drop_labels_or_levels is not implemented for {type}"
                 .format(type=type(self)))
 
+        axis = self._get_axis_number(axis)
+
         # Validate keys
         keys = com.maybe_make_list(keys)
         invalid_keys = [k for k in keys if not
@@ -8579,7 +8576,7 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None,
             cond = self._constructor(cond, **self._construct_axes_dict())
 
         # make sure we are boolean
-        fill_value = True if inplace else False
+        fill_value = bool(inplace)
         cond = cond.fillna(fill_value)
 
         msg = "Boolean array expected for the condition, not {dtype}"
@@ -10243,8 +10240,8 @@ def last_valid_index(self):
 
 def _doc_parms(cls):
     """Return a tuple of the doc parms."""
-    axis_descr = "{%s}" % ', '.join(["{0} ({1})".format(a, i)
-                                     for i, a in enumerate(cls._AXIS_ORDERS)])
+    axis_descr = "{%s}" % ', '.join("{0} ({1})".format(a, i)
+                                    for i, a in enumerate(cls._AXIS_ORDERS))
     name = (cls._constructor_sliced.__name__
             if cls._AXIS_LEN > 1 else 'scalar')
     name2 = cls.__name__

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -1462,8 +1462,8 @@ def _reindex_output(self, result):
         # reindex `result`, and then reset the in-axis grouper columns.
 
         # Select in-axis groupers
-        in_axis_grps = [(i, ping.name) for (i, ping)
-                        in enumerate(groupings) if ping.in_axis]
+        in_axis_grps = ((i, ping.name) for (i, ping)
+                        in enumerate(groupings) if ping.in_axis)
         g_nums, g_names = zip(*in_axis_grps)
 
         result = result.drop(labels=list(g_names), axis=1)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -443,12 +443,12 @@ def get_converter(s):
                     raise ValueError(msg)
 
             converters = [get_converter(s) for s in index_sample]
-            names = [tuple(f(n) for f, n in zip(converters, name))
-                     for name in names]
+            names = (tuple(f(n) for f, n in zip(converters, name))
+                     for name in names)
 
         else:
             converter = get_converter(index_sample)
-            names = [converter(name) for name in names]
+            names = (converter(name) for name in names)
 
         return [self.indices.get(name, []) for name in names]
 

diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
@@ -195,9 +195,9 @@ def groups(self):
         return self.grouper.groups
 
     def __repr__(self):
-        attrs_list = ["{}={!r}".format(attr_name, getattr(self, attr_name))
+        attrs_list = ("{}={!r}".format(attr_name, getattr(self, attr_name))
                       for attr_name in self._attributes
-                      if getattr(self, attr_name) is not None]
+                      if getattr(self, attr_name) is not None)
         attrs = ", ".join(attrs_list)
         cls_name = self.__class__.__name__
         return "{}({})".format(cls_name, attrs)

diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
@@ -780,8 +780,8 @@ def _concat_same_dtype(self, to_concat, name):
         Concatenate to_concat which has the same class
         ValueError if other is not in the categories
         """
-        to_concat = [self._is_dtype_compat(c) for c in to_concat]
-        codes = np.concatenate([c.codes for c in to_concat])
+        codes = np.concatenate([self._is_dtype_compat(c).codes
+                                for c in to_concat])
         result = self._create_from_codes(codes, name=name)
         # if name is None, _create_from_codes sets self.name
         result.name = name

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -347,10 +347,10 @@ def _setitem_with_indexer(self, indexer, value):
                         # must have all defined axes if we have a scalar
                         # or a list-like on the non-info axes if we have a
                         # list-like
-                        len_non_info_axes = [
+                        len_non_info_axes = (
                             len(_ax) for _i, _ax in enumerate(self.obj.axes)
                             if _i != i
-                        ]
+                        )
                         if any(not l for l in len_non_info_axes):
                             if not is_list_like_indexer(value):
                                 raise ValueError("cannot set a frame with no "

diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
@@ -197,18 +197,12 @@ def init_dict(data, index, columns, dtype=None):
             arrays.loc[missing] = [val] * missing.sum()
 
     else:
-
-        for key in data:
-            if (isinstance(data[key], ABCDatetimeIndex) and
-                    data[key].tz is not None):
-                # GH#24096 need copy to be deep for datetime64tz case
-                # TODO: See if we can avoid these copies
-                data[key] = data[key].copy(deep=True)
-
         keys = com.dict_keys_to_ordered_list(data)
         columns = data_names = Index(keys)
-        arrays = [data[k] for k in keys]
-
+        # GH#24096 need copy to be deep for datetime64tz case
+        # TODO: See if we can avoid these copies
+        arrays = [data[k] if not is_datetime64tz_dtype(data[k]) else
+                  data[k].copy(deep=True) for k in keys]
     return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
 
 

diff --git a/pandas/core/resample.py b/pandas/core/resample.py
@@ -83,9 +83,9 @@ def __unicode__(self):
         """
         Provide a nice str repr of our rolling object.
         """
-        attrs = ["{k}={v}".format(k=k, v=getattr(self.groupby, k))
+        attrs = ("{k}={v}".format(k=k, v=getattr(self.groupby, k))
                  for k in self._attributes if
-                 getattr(self.groupby, k, None) is not None]
+                 getattr(self.groupby, k, None) is not None)
         return "{klass} [{attrs}]".format(klass=self.__class__.__name__,
                                           attrs=', '.join(attrs))
 

diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py
@@ -88,9 +88,9 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
         # the original values are ints
         # as we grouped with a NaN value
         # and then dropped, coercing to floats
-        for v in [v for v in values if v in data and v in agged]:
-            if (is_integer_dtype(data[v]) and
-                    not is_integer_dtype(agged[v])):
+        for v in values:
+            if (v in data and is_integer_dtype(data[v]) and
+                    v in agged and not is_integer_dtype(agged[v])):
                 agged[v] = maybe_downcast_to_dtype(agged[v], data[v].dtype)
 
     table = agged

diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py
@@ -372,14 +372,6 @@ def _bins_to_cuts(x, bins, right=True, labels=None,
     return result, bins
 
 
-def _trim_zeros(x):
-    while len(x) > 1 and x[-1] == '0':
-        x = x[:-1]
-    if len(x) > 1 and x[-1] == '.':
-        x = x[:-1]
-    return x
-
-
 def _coerce_to_type(x):
     """
     if the passed data is of datetime/timedelta type,

diff --git a/pandas/core/strings.py b/pandas/core/strings.py
@@ -1872,7 +1872,7 @@ def _wrap_result(self, result, use_codes=True,
 
         if expand is None:
             # infer from ndim if expand is not specified
-            expand = False if result.ndim == 1 else True
+            expand = result.ndim != 1
 
         elif expand is True and not isinstance(self._orig, Index):
             # required when expand=True is explicitly specified

diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py
@@ -138,7 +138,7 @@ def to_numeric(arg, errors='raise', downcast=None):
             values = values.astype(np.int64)
         else:
             values = ensure_object(values)
-            coerce_numeric = False if errors in ('ignore', 'raise') else True
+            coerce_numeric = errors not in ('ignore', 'raise')
             values = lib.maybe_convert_numeric(values, set(),
                                                coerce_numeric=coerce_numeric)
 

diff --git a/pandas/core/window.py b/pandas/core/window.py
@@ -164,9 +164,9 @@ def __unicode__(self):
         Provide a nice str repr of our rolling object.
         """
 
-        attrs = ["{k}={v}".format(k=k, v=getattr(self, k))
+        attrs = ("{k}={v}".format(k=k, v=getattr(self, k))
                  for k in self._attributes
-                 if getattr(self, k, None) is not None]
+                 if getattr(self, k, None) is not None)
         return "{klass} [{attrs}]".format(klass=self._window_type,
                                           attrs=','.join(attrs))
 

diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py
@@ -339,7 +339,7 @@ def _compare_other(self, data, op_name, other):
         expected = pd.Series(op(data._data, other))
 
         # fill the nan locations
-        expected[data._mask] = True if op_name == '__ne__' else False
+        expected[data._mask] = op_name == '__ne__'
 
         tm.assert_series_equal(result, expected)
 
@@ -351,7 +351,7 @@ def _compare_other(self, data, op_name, other):
         expected = op(expected, other)
 
         # fill the nan locations
-        expected[data._mask] = True if op_name == '__ne__' else False
+        expected[data._mask] = op_name == '__ne__'
 
         tm.assert_series_equal(result, expected)