From 555e6e1cbb852b2b0c2cd13f6f575656254160f6 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Thu, 21 Nov 2019 16:27:39 +0000 Subject: [PATCH] TYP: disallow comment-based annotation syntax (#29741) --- ci/code_checks.sh | 4 +++ doc/source/development/contributing.rst | 10 +++--- pandas/_config/config.py | 8 ++--- pandas/_version.py | 2 +- pandas/compat/numpy/function.py | 24 ++++++------- pandas/core/accessor.py | 4 +-- pandas/core/algorithms.py | 2 +- pandas/core/apply.py | 3 +- pandas/core/arrays/base.py | 2 +- pandas/core/arrays/datetimelike.py | 2 +- pandas/core/arrays/datetimes.py | 2 +- pandas/core/arrays/integer.py | 4 +-- pandas/core/arrays/period.py | 6 ++-- pandas/core/arrays/timedeltas.py | 4 +-- pandas/core/base.py | 8 ++--- pandas/core/computation/expr.py | 2 +- pandas/core/dtypes/base.py | 2 +- pandas/core/dtypes/dtypes.py | 36 ++++++++++---------- pandas/core/frame.py | 6 ++-- pandas/core/generic.py | 20 +++++------ pandas/core/groupby/generic.py | 4 +-- pandas/core/groupby/groupby.py | 7 ++-- pandas/core/groupby/grouper.py | 11 +++--- pandas/core/groupby/ops.py | 8 ++--- pandas/core/indexes/base.py | 16 ++++----- pandas/core/indexes/datetimelike.py | 4 +-- pandas/core/indexes/range.py | 8 ++--- pandas/core/indexing.py | 2 +- pandas/core/internals/managers.py | 2 +- pandas/core/nanops.py | 2 +- pandas/core/ops/docstrings.py | 4 +-- pandas/core/resample.py | 2 +- pandas/core/reshape/merge.py | 3 +- pandas/core/reshape/pivot.py | 11 +++--- pandas/core/series.py | 9 ++--- pandas/core/strings.py | 4 +-- pandas/core/window/rolling.py | 8 ++--- pandas/io/common.py | 2 +- pandas/io/excel/_odfreader.py | 4 +-- pandas/io/excel/_openpyxl.py | 2 +- pandas/io/formats/format.py | 26 ++++++++------ pandas/io/formats/html.py | 9 +++-- pandas/io/formats/latex.py | 2 +- pandas/io/formats/printing.py | 2 +- pandas/io/json/_json.py | 4 ++- pandas/io/json/_normalize.py | 4 +-- pandas/io/parsers.py | 4 +-- pandas/io/pytables.py | 20 +++++------ pandas/plotting/_matplotlib/core.py | 2 +- pandas/tests/api/test_api.py | 10 +++--- pandas/tests/arrays/test_datetimelike.py | 2 +- pandas/tests/computation/test_eval.py | 4 +-- pandas/tests/dtypes/test_common.py | 6 ++-- pandas/tests/extension/base/ops.py | 8 ++--- pandas/tests/indexes/common.py | 2 +- pandas/tests/indexing/test_coercion.py | 2 +- pandas/tests/io/parser/conftest.py | 4 +-- pandas/tests/io/test_sql.py | 4 +-- pandas/tests/tseries/offsets/test_offsets.py | 12 +++---- pandas/tseries/frequencies.py | 2 +- pandas/tseries/holiday.py | 2 +- pandas/tseries/offsets.py | 4 +-- pandas/util/_decorators.py | 4 ++- 63 files changed, 209 insertions(+), 194 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index edd8fcd418c47..7c6c98d910492 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -194,6 +194,10 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then invgrep -R --include="*.py" --include="*.pyx" -E 'class.*:\n\n( )+"""' . RET=$(($RET + $?)) ; echo $MSG "DONE" + MSG='Check for use of comment-based annotation syntax' ; echo $MSG + invgrep -R --include="*.py" -P '# type: (?!ignore)' pandas + RET=$(($RET + $?)) ; echo $MSG "DONE" + MSG='Check that no file in the repo contains trailing whitespaces' ; echo $MSG set -o pipefail if [[ "$AZURE" == "true" ]]; then diff --git a/doc/source/development/contributing.rst b/doc/source/development/contributing.rst index 33084d0d23771..042d6926d84f5 100644 --- a/doc/source/development/contributing.rst +++ b/doc/source/development/contributing.rst @@ -804,7 +804,7 @@ Types imports should follow the ``from typing import ...`` convention. So rather import typing - primes = [] # type: typing.List[int] + primes: typing.List[int] = [] You should write @@ -812,19 +812,19 @@ You should write from typing import List, Optional, Union - primes = [] # type: List[int] + primes: List[int] = [] ``Optional`` should be used where applicable, so instead of .. code-block:: python - maybe_primes = [] # type: List[Union[int, None]] + maybe_primes: List[Union[int, None]] = [] You should write .. code-block:: python - maybe_primes = [] # type: List[Optional[int]] + maybe_primes: List[Optional[int]] = [] In some cases in the code base classes may define class variables that shadow builtins. This causes an issue as described in `Mypy 1775 `_. The defensive solution here is to create an unambiguous alias of the builtin and use that without your annotation. For example, if you come across a definition like @@ -840,7 +840,7 @@ The appropriate way to annotate this would be as follows str_type = str class SomeClass2: - str = None # type: str_type + str: str_type = None In some cases you may be tempted to use ``cast`` from the typing module when you know better than the analyzer. This occurs particularly when using custom inference functions. For example diff --git a/pandas/_config/config.py b/pandas/_config/config.py index 890db5b41907e..814f855cceeac 100644 --- a/pandas/_config/config.py +++ b/pandas/_config/config.py @@ -58,16 +58,16 @@ RegisteredOption = namedtuple("RegisteredOption", "key defval doc validator cb") # holds deprecated option metdata -_deprecated_options = {} # type: Dict[str, DeprecatedOption] +_deprecated_options: Dict[str, DeprecatedOption] = {} # holds registered option metdata -_registered_options = {} # type: Dict[str, RegisteredOption] +_registered_options: Dict[str, RegisteredOption] = {} # holds the current values for registered options -_global_config = {} # type: Dict[str, str] +_global_config: Dict[str, str] = {} # keys which have a special meaning -_reserved_keys = ["all"] # type: List[str] +_reserved_keys: List[str] = ["all"] class OptionError(AttributeError, KeyError): diff --git a/pandas/_version.py b/pandas/_version.py index 0cdedf3da3ea7..dfed9574c7cb0 100644 --- a/pandas/_version.py +++ b/pandas/_version.py @@ -47,7 +47,7 @@ class NotThisMethod(Exception): pass -HANDLERS = {} # type: Dict[str, Dict[str, Callable]] +HANDLERS: Dict[str, Dict[str, Callable]] = {} def register_vcs_handler(vcs: str, method: str) -> Callable: # decorator diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py index ea5aaf6b6476d..fffe09a74571e 100644 --- a/pandas/compat/numpy/function.py +++ b/pandas/compat/numpy/function.py @@ -106,7 +106,7 @@ def validate_argmax_with_skipna(skipna, args, kwargs): return skipna -ARGSORT_DEFAULTS = OrderedDict() # type: OrderedDict[str, Optional[Union[int, str]]] +ARGSORT_DEFAULTS: "OrderedDict[str, Optional[Union[int, str]]]" = OrderedDict() ARGSORT_DEFAULTS["axis"] = -1 ARGSORT_DEFAULTS["kind"] = "quicksort" ARGSORT_DEFAULTS["order"] = None @@ -122,7 +122,7 @@ def validate_argmax_with_skipna(skipna, args, kwargs): # two different signatures of argsort, this second validation # for when the `kind` param is supported -ARGSORT_DEFAULTS_KIND = OrderedDict() # type: OrderedDict[str, Optional[int]] +ARGSORT_DEFAULTS_KIND: "OrderedDict[str, Optional[int]]" = OrderedDict() ARGSORT_DEFAULTS_KIND["axis"] = -1 ARGSORT_DEFAULTS_KIND["order"] = None validate_argsort_kind = CompatValidator( @@ -169,14 +169,14 @@ def validate_clip_with_axis(axis, args, kwargs): return axis -COMPRESS_DEFAULTS = OrderedDict() # type: OrderedDict[str, Any] +COMPRESS_DEFAULTS: "OrderedDict[str, Any]" = OrderedDict() COMPRESS_DEFAULTS["axis"] = None COMPRESS_DEFAULTS["out"] = None validate_compress = CompatValidator( COMPRESS_DEFAULTS, fname="compress", method="both", max_fname_arg_count=1 ) -CUM_FUNC_DEFAULTS = OrderedDict() # type: OrderedDict[str, Any] +CUM_FUNC_DEFAULTS: "OrderedDict[str, Any]" = OrderedDict() CUM_FUNC_DEFAULTS["dtype"] = None CUM_FUNC_DEFAULTS["out"] = None validate_cum_func = CompatValidator( @@ -202,7 +202,7 @@ def validate_cum_func_with_skipna(skipna, args, kwargs, name): return skipna -ALLANY_DEFAULTS = OrderedDict() # type: OrderedDict[str, Optional[bool]] +ALLANY_DEFAULTS: "OrderedDict[str, Optional[bool]]" = OrderedDict() ALLANY_DEFAULTS["dtype"] = None ALLANY_DEFAULTS["out"] = None ALLANY_DEFAULTS["keepdims"] = False @@ -224,28 +224,28 @@ def validate_cum_func_with_skipna(skipna, args, kwargs, name): MINMAX_DEFAULTS, fname="max", method="both", max_fname_arg_count=1 ) -RESHAPE_DEFAULTS = dict(order="C") # type: Dict[str, str] +RESHAPE_DEFAULTS: Dict[str, str] = dict(order="C") validate_reshape = CompatValidator( RESHAPE_DEFAULTS, fname="reshape", method="both", max_fname_arg_count=1 ) -REPEAT_DEFAULTS = dict(axis=None) # type: Dict[str, Any] +REPEAT_DEFAULTS: Dict[str, Any] = dict(axis=None) validate_repeat = CompatValidator( REPEAT_DEFAULTS, fname="repeat", method="both", max_fname_arg_count=1 ) -ROUND_DEFAULTS = dict(out=None) # type: Dict[str, Any] +ROUND_DEFAULTS: Dict[str, Any] = dict(out=None) validate_round = CompatValidator( ROUND_DEFAULTS, fname="round", method="both", max_fname_arg_count=1 ) -SORT_DEFAULTS = OrderedDict() # type: OrderedDict[str, Optional[Union[int, str]]] +SORT_DEFAULTS: "OrderedDict[str, Optional[Union[int, str]]]" = OrderedDict() SORT_DEFAULTS["axis"] = -1 SORT_DEFAULTS["kind"] = "quicksort" SORT_DEFAULTS["order"] = None validate_sort = CompatValidator(SORT_DEFAULTS, fname="sort", method="kwargs") -STAT_FUNC_DEFAULTS = OrderedDict() # type: OrderedDict[str, Optional[Any]] +STAT_FUNC_DEFAULTS: "OrderedDict[str, Optional[Any]]" = OrderedDict() STAT_FUNC_DEFAULTS["dtype"] = None STAT_FUNC_DEFAULTS["out"] = None @@ -273,13 +273,13 @@ def validate_cum_func_with_skipna(skipna, args, kwargs, name): MEDIAN_DEFAULTS, fname="median", method="both", max_fname_arg_count=1 ) -STAT_DDOF_FUNC_DEFAULTS = OrderedDict() # type: OrderedDict[str, Optional[bool]] +STAT_DDOF_FUNC_DEFAULTS: "OrderedDict[str, Optional[bool]]" = OrderedDict() STAT_DDOF_FUNC_DEFAULTS["dtype"] = None STAT_DDOF_FUNC_DEFAULTS["out"] = None STAT_DDOF_FUNC_DEFAULTS["keepdims"] = False validate_stat_ddof_func = CompatValidator(STAT_DDOF_FUNC_DEFAULTS, method="kwargs") -TAKE_DEFAULTS = OrderedDict() # type: OrderedDict[str, Optional[str]] +TAKE_DEFAULTS: "OrderedDict[str, Optional[str]]" = OrderedDict() TAKE_DEFAULTS["out"] = None TAKE_DEFAULTS["mode"] = "raise" validate_take = CompatValidator(TAKE_DEFAULTS, fname="take", method="kwargs") diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py index fc60c01d7b808..182b07d57ea49 100644 --- a/pandas/core/accessor.py +++ b/pandas/core/accessor.py @@ -11,8 +11,8 @@ class DirNamesMixin: - _accessors = set() # type: Set[str] - _deprecations = frozenset() # type: FrozenSet[str] + _accessors: Set[str] = set() + _deprecations: FrozenSet[str] = frozenset() def _dir_deletions(self): """ diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 9c14102529b48..18adb12a9ad72 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -50,7 +50,7 @@ from pandas.core.construction import array, extract_array from pandas.core.indexers import validate_indices -_shared_docs = {} # type: Dict[str, str] +_shared_docs: Dict[str, str] = {} # --------------- # diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 34a8ed1fa7a83..8c49b2b803241 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -34,8 +34,9 @@ def frame_apply( """ construct and return a row or column based frame apply object """ axis = obj._get_axis_number(axis) + klass: Type[FrameApply] if axis == 0: - klass = FrameRowApply # type: Type[FrameApply] + klass = FrameRowApply elif axis == 1: klass = FrameColumnApply diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 82dabe735581b..fa0e025c22c88 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -29,7 +29,7 @@ _not_implemented_message = "{} does not implement {}." -_extension_array_shared_docs = dict() # type: Dict[str, str] +_extension_array_shared_docs: Dict[str, str] = dict() def try_cast_to_ea(cls_or_instance, obj, dtype=None): diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 8e66db4c61032..dc3c49b7e06a9 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -51,7 +51,7 @@ class AttributesMixin: - _data = None # type: np.ndarray + _data: np.ndarray @classmethod def _simple_new(cls, values, **kwargs): diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 8e3c727a14c99..71420e6e58090 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -320,7 +320,7 @@ class DatetimeArray(dtl.DatetimeLikeArrayMixin, dtl.TimelikeOps, dtl.DatelikeOps # ----------------------------------------------------------------- # Constructors - _dtype = None # type: Union[np.dtype, DatetimeTZDtype] + _dtype: Union[np.dtype, DatetimeTZDtype] _freq = None def __init__(self, values, dtype=_NS_DTYPE, freq=None, copy=False): diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 63296b4a26354..12b76df9a5983 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -40,9 +40,9 @@ class _IntegerDtype(ExtensionDtype): The attributes name & type are set when these subclasses are created. """ - name = None # type: str + name: str base = None - type = None # type: Type + type: Type na_value = np.nan def __repr__(self) -> str: diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index f3d51b28ad399..41a8c48452647 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -161,7 +161,7 @@ class PeriodArray(dtl.DatetimeLikeArrayMixin, dtl.DatelikeOps): _scalar_type = Period # Names others delegate to us - _other_ops = [] # type: List[str] + _other_ops: List[str] = [] _bool_ops = ["is_leap_year"] _object_ops = ["start_time", "end_time", "freq"] _field_ops = [ @@ -894,9 +894,9 @@ def period_array( data = np.asarray(data) + dtype: Optional[PeriodDtype] if freq: - # typed Optional here because the else block below assigns None - dtype = PeriodDtype(freq) # type: Optional[PeriodDtype] + dtype = PeriodDtype(freq) else: dtype = None diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 816beb758dd33..bacd0b9699e93 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -161,8 +161,8 @@ class TimedeltaArray(dtl.DatetimeLikeArrayMixin, dtl.TimelikeOps): _scalar_type = Timedelta __array_priority__ = 1000 # define my properties & methods for delegation - _other_ops = [] # type: List[str] - _bool_ops = [] # type: List[str] + _other_ops: List[str] = [] + _bool_ops: List[str] = [] _object_ops = ["freq"] _field_ops = ["days", "seconds", "microseconds", "nanoseconds"] _datetimelike_ops = _field_ops + _object_ops + _bool_ops diff --git a/pandas/core/base.py b/pandas/core/base.py index c9855701eeb03..176a92132e20a 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -36,7 +36,7 @@ from pandas.core.arrays import ExtensionArray import pandas.core.nanops as nanops -_shared_docs = dict() # type: Dict[str, str] +_shared_docs: Dict[str, str] = dict() _indexops_doc_kwargs = dict( klass="IndexOpsMixin", inplace="", @@ -603,7 +603,7 @@ def _is_builtin_func(self, arg): class ShallowMixin: - _attributes = [] # type: List[str] + _attributes: List[str] = [] def _shallow_copy(self, obj=None, **kwargs): """ @@ -627,7 +627,7 @@ class IndexOpsMixin: # ndarray compatibility __array_priority__ = 1000 - _deprecations = frozenset( + _deprecations: FrozenSet[str] = frozenset( [ "tolist", # tolist is not deprecated, just suppressed in the __dir__ "base", @@ -637,7 +637,7 @@ class IndexOpsMixin: "flags", "strides", ] - ) # type: FrozenSet[str] + ) def transpose(self, *args, **kwargs): """ diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py index 4d1fc42070ea8..253d64d50d0cd 100644 --- a/pandas/core/computation/expr.py +++ b/pandas/core/computation/expr.py @@ -378,7 +378,7 @@ class BaseExprVisitor(ast.NodeVisitor): preparser : callable """ - const_type = Constant # type: Type[Term] + const_type: Type[Term] = Constant term_type = Term binary_ops = _cmp_ops_syms + _bool_ops_syms + _arith_ops_syms diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index c90f1cdeaabfd..8acdf32c8768e 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -81,7 +81,7 @@ def __from_arrow__( provided for registering virtual subclasses. """ - _metadata = () # type: Tuple[str, ...] + _metadata: Tuple[str, ...] = () def __str__(self) -> str: return self.name diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 3d1388db371ca..523c8e8bd02d0 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -20,7 +20,7 @@ # GH26403: sentinel value used for the default value of ordered in the # CategoricalDtype constructor to detect when ordered=None is explicitly passed -ordered_sentinel = object() # type: object +ordered_sentinel: object = object() def register_extension_dtype(cls: Type[ExtensionDtype]) -> Type[ExtensionDtype]: @@ -66,7 +66,7 @@ class Registry: """ def __init__(self): - self.dtypes = [] # type: List[Type[ExtensionDtype]] + self.dtypes: List[Type[ExtensionDtype]] = [] def register(self, dtype: Type[ExtensionDtype]) -> None: """ @@ -119,21 +119,21 @@ class PandasExtensionDtype(ExtensionDtype): THIS IS NOT A REAL NUMPY DTYPE """ - type = None # type: Any - kind = None # type: Any + type: Any + kind: Any # The Any type annotations above are here only because mypy seems to have a # problem dealing with with multiple inheritance from PandasExtensionDtype # and ExtensionDtype's @properties in the subclasses below. The kind and # type variables in those subclasses are explicitly typed below. subdtype = None - str = None # type: Optional[str_type] + str: Optional[str_type] = None num = 100 - shape = tuple() # type: Tuple[int, ...] + shape: Tuple[int, ...] = tuple() itemsize = 8 base = None isbuiltin = 0 isnative = 0 - _cache = {} # type: Dict[str_type, 'PandasExtensionDtype'] + _cache: Dict[str_type, "PandasExtensionDtype"] = {} def __str__(self) -> str_type: """ @@ -214,12 +214,12 @@ class CategoricalDtype(PandasExtensionDtype, ExtensionDtype): # TODO: Document public vs. private API name = "category" - type = CategoricalDtypeType # type: Type[CategoricalDtypeType] - kind = "O" # type: str_type + type: Type[CategoricalDtypeType] = CategoricalDtypeType + kind: str_type = "O" str = "|O08" base = np.dtype("O") _metadata = ("categories", "ordered", "_ordered_from_sentinel") - _cache = {} # type: Dict[str_type, PandasExtensionDtype] + _cache: Dict[str_type, PandasExtensionDtype] = {} def __init__( self, categories=None, ordered: Union[Ordered, object] = ordered_sentinel @@ -650,15 +650,15 @@ class DatetimeTZDtype(PandasExtensionDtype): datetime64[ns, tzfile('/usr/share/zoneinfo/US/Central')] """ - type = Timestamp # type: Type[Timestamp] - kind = "M" # type: str_type + type: Type[Timestamp] = Timestamp + kind: str_type = "M" str = "|M8[ns]" num = 101 base = np.dtype("M8[ns]") na_value = NaT _metadata = ("unit", "tz") _match = re.compile(r"(datetime64|M8)\[(?P.+), (?P.+)\]") - _cache = {} # type: Dict[str_type, PandasExtensionDtype] + _cache: Dict[str_type, PandasExtensionDtype] = {} def __init__(self, unit="ns", tz=None): if isinstance(unit, DatetimeTZDtype): @@ -812,14 +812,14 @@ class PeriodDtype(PandasExtensionDtype): period[M] """ - type = Period # type: Type[Period] - kind = "O" # type: str_type + type: Type[Period] = Period + kind: str_type = "O" str = "|O08" base = np.dtype("O") num = 102 _metadata = ("freq",) _match = re.compile(r"(P|p)eriod\[(?P.+)\]") - _cache = {} # type: Dict[str_type, PandasExtensionDtype] + _cache: Dict[str_type, PandasExtensionDtype] = {} def __new__(cls, freq=None): """ @@ -972,13 +972,13 @@ class IntervalDtype(PandasExtensionDtype): """ name = "interval" - kind = None # type: Optional[str_type] + kind: Optional[str_type] = None str = "|O08" base = np.dtype("O") num = 103 _metadata = ("subtype",) _match = re.compile(r"(I|i)nterval\[(?P.+)\]") - _cache = {} # type: Dict[str_type, PandasExtensionDtype] + _cache: Dict[str_type, PandasExtensionDtype] = {} def __new__(cls, subtype=None): from pandas.core.dtypes.common import ( diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8b31b6d503eda..46b213b25df49 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -381,9 +381,9 @@ class DataFrame(NDFrame): def _constructor(self) -> Type["DataFrame"]: return DataFrame - _constructor_sliced = Series # type: Type[Series] - _deprecations = NDFrame._deprecations | frozenset([]) # type: FrozenSet[str] - _accessors = set() # type: Set[str] + _constructor_sliced: Type[Series] = Series + _deprecations: FrozenSet[str] = NDFrame._deprecations | frozenset([]) + _accessors: Set[str] = set() @property def _constructor_expanddim(self): diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 7f83bb9e69f7a..b16a72f01c739 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -92,7 +92,7 @@ # goal is to be able to define the docs close to function, while still being # able to share -_shared_docs = dict() # type: Dict[str, str] +_shared_docs: Dict[str, str] = dict() _shared_doc_kwargs = dict( axes="keywords for axes", klass="Series/DataFrame", @@ -154,7 +154,7 @@ class NDFrame(PandasObject, SelectionMixin): copy : bool, default False """ - _internal_names = [ + _internal_names: List[str] = [ "_data", "_cacher", "_item_cache", @@ -168,15 +168,15 @@ class NDFrame(PandasObject, SelectionMixin): "_metadata", "__array_struct__", "__array_interface__", - ] # type: List[str] - _internal_names_set = set(_internal_names) # type: Set[str] - _accessors = set() # type: Set[str] - _deprecations = frozenset( + ] + _internal_names_set: Set[str] = set(_internal_names) + _accessors: Set[str] = set() + _deprecations: FrozenSet[str] = frozenset( ["get_dtype_counts", "get_values", "ftypes", "ix"] - ) # type: FrozenSet[str] - _metadata = [] # type: List[str] + ) + _metadata: List[str] = [] _is_copy = None - _data = None # type: BlockManager + _data: BlockManager _attrs: Dict[Optional[Hashable], Any] # ---------------------------------------------------------------------- @@ -3599,7 +3599,7 @@ class animal locomotion result._set_is_copy(self, copy=not result._is_view) return result - _xs = xs # type: Callable + _xs: Callable = xs def __getitem__(self, item): raise AbstractMethodError(self) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 900e11dedb8b1..99ef281e842b1 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1105,7 +1105,7 @@ def _aggregate_frame(self, func, *args, **kwargs) -> DataFrame: axis = self.axis obj = self._obj_with_exclusions - result = OrderedDict() # type: OrderedDict + result: OrderedDict = OrderedDict() if axis != obj._info_axis_number: for name, data in self: fres = func(data, *args, **kwargs) @@ -1122,7 +1122,7 @@ def _aggregate_item_by_item(self, func, *args, **kwargs) -> DataFrame: # only for axis==0 obj = self._obj_with_exclusions - result = OrderedDict() # type: dict + result: OrderedDict = OrderedDict() cannot_agg = [] errors = None for item in obj: diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 21c085c775399..9e12ac82fb3ae 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -345,7 +345,7 @@ def _group_selection_context(groupby): class _GroupBy(PandasObject, SelectionMixin): _group_selection = None - _apply_whitelist = frozenset() # type: FrozenSet[str] + _apply_whitelist: FrozenSet[str] = frozenset() def __init__( self, @@ -2518,12 +2518,11 @@ def get_groupby( mutated: bool = False, ): + klass: Union[Type["SeriesGroupBy"], Type["DataFrameGroupBy"]] if isinstance(obj, Series): from pandas.core.groupby.generic import SeriesGroupBy - klass = ( - SeriesGroupBy - ) # type: Union[Type["SeriesGroupBy"], Type["DataFrameGroupBy"]] + klass = SeriesGroupBy elif isinstance(obj, DataFrame): from pandas.core.groupby.generic import DataFrameGroupBy diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 2b946d1ff0a7a..308d4d1864bdd 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -93,7 +93,7 @@ class Grouper: >>> df.groupby(Grouper(level='date', freq='60s', axis=1)) """ - _attributes = ("key", "level", "freq", "axis", "sort") # type: Tuple[str, ...] + _attributes: Tuple[str, ...] = ("key", "level", "freq", "axis", "sort") def __new__(cls, *args, **kwargs): if kwargs.get("freq") is not None: @@ -373,8 +373,8 @@ def __repr__(self) -> str: def __iter__(self): return iter(self.indices) - _codes = None # type: np.ndarray - _group_index = None # type: Index + _codes: Optional[np.ndarray] = None + _group_index: Optional[Index] = None @property def ngroups(self) -> int: @@ -405,6 +405,7 @@ def result_index(self) -> Index: def group_index(self) -> Index: if self._group_index is None: self._make_codes() + assert self._group_index is not None return self._group_index def _make_codes(self) -> None: @@ -576,8 +577,8 @@ def get_grouper( else: levels = [level] * len(keys) - groupings = [] # type: List[Grouping] - exclusions = [] # type: List[Hashable] + groupings: List[Grouping] = [] + exclusions: List[Hashable] = [] # if the actual grouper should be obj[key] def is_in_axis(key) -> bool: diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index a7e0a901a5394..4780254e060e6 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -90,7 +90,7 @@ def __init__( self._filter_empty_groups = self.compressed = len(groupings) != 1 self.axis = axis - self._groupings = list(groupings) # type: List[grouper.Grouping] + self._groupings: List[grouper.Grouping] = list(groupings) self.sort = sort self.group_keys = group_keys self.mutated = mutated @@ -153,7 +153,7 @@ def apply(self, f, data: FrameOrSeries, axis: int = 0): group_keys = self._get_group_keys() result_values = None - sdata = splitter._get_sorted_data() # type: FrameOrSeries + sdata: FrameOrSeries = splitter._get_sorted_data() if sdata.ndim == 2 and np.any(sdata.dtypes.apply(is_extension_array_dtype)): # calling splitter.fast_apply will raise TypeError via apply_frame_axis0 # if we pass EA instead of ndarray @@ -551,7 +551,7 @@ def _cython_operation( if vdim == 1 and arity == 1: result = result[:, 0] - names = self._name_functions.get(how, None) # type: Optional[List[str]] + names: Optional[List[str]] = self._name_functions.get(how, None) if swapped: result = result.swapaxes(0, axis) @@ -923,7 +923,7 @@ def _chop(self, sdata: DataFrame, slice_obj: slice) -> DataFrame: def get_splitter(data: FrameOrSeries, *args, **kwargs) -> DataSplitter: if isinstance(data, Series): - klass = SeriesSplitter # type: Type[DataSplitter] + klass: Type[DataSplitter] = SeriesSplitter else: # i.e. DataFrame klass = FrameSplitter diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 8978a09825ee9..10c0f465f69da 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -205,11 +205,11 @@ class Index(IndexOpsMixin, PandasObject): """ # tolist is not actually deprecated, just suppressed in the __dir__ - _deprecations = ( + _deprecations: FrozenSet[str] = ( PandasObject._deprecations | IndexOpsMixin._deprecations | frozenset(["asobject", "contains", "dtype_str", "get_values", "set_value"]) - ) # type: FrozenSet[str] + ) # To hand over control to subclasses _join_precedence = 1 @@ -321,10 +321,9 @@ def __new__( # the DatetimeIndex construction. # Note we can pass copy=False because the .astype below # will always make a copy - result = DatetimeIndex( - data, copy=False, name=name, **kwargs - ) # type: "Index" - return result.astype(object) + return DatetimeIndex(data, copy=False, name=name, **kwargs).astype( + object + ) else: return DatetimeIndex(data, copy=copy, name=name, dtype=dtype, **kwargs) @@ -332,8 +331,9 @@ def __new__( if is_dtype_equal(_o_dtype, dtype): # Note we can pass copy=False because the .astype below # will always make a copy - result = TimedeltaIndex(data, copy=False, name=name, **kwargs) - return result.astype(object) + return TimedeltaIndex(data, copy=False, name=name, **kwargs).astype( + object + ) else: return TimedeltaIndex(data, copy=copy, name=name, dtype=dtype, **kwargs) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index df3420ea14e24..e420cf0cb0d78 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -826,9 +826,9 @@ class DatetimelikeDelegateMixin(PandasDelegate): """ # raw_methods : dispatch methods that shouldn't be boxed in an Index - _raw_methods = set() # type: Set[str] + _raw_methods: Set[str] = set() # raw_properties : dispatch properties that shouldn't be boxed in an Index - _raw_properties = set() # type: Set[str] + _raw_properties: Set[str] = set() name = None _data: ExtensionArray diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 6f677848b1c79..e68b340130b9b 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -1,7 +1,7 @@ from datetime import timedelta import operator from sys import getsizeof -from typing import Union +from typing import Optional, Union import warnings import numpy as np @@ -73,10 +73,10 @@ class RangeIndex(Int64Index): _typ = "rangeindex" _engine_type = libindex.Int64Engine - _range = None # type: range + _range: range # check whether self._data has been called - _cached_data = None # type: np.ndarray + _cached_data: Optional[np.ndarray] = None # -------------------------------------------------------------------- # Constructors @@ -654,7 +654,7 @@ def _concat_same_dtype(self, indexes, name): non_empty_indexes = [obj for obj in indexes if len(obj)] for obj in non_empty_indexes: - rng = obj._range # type: range + rng: range = obj._range if start is None: # This is set by the first non-empty index diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 673764ef6a124..b52015b738c6e 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -100,7 +100,7 @@ class IndexingError(Exception): class _NDFrameIndexer(_NDFrameIndexerBase): - _valid_types = None # type: str + _valid_types: str axis = None def __call__(self, axis=None): diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index d53fbe2e60e9a..5e60440f1577e 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -126,7 +126,7 @@ def __init__( do_integrity_check: bool = True, ): self.axes = [ensure_index(ax) for ax in axes] - self.blocks = tuple(blocks) # type: Tuple[Block, ...] + self.blocks: Tuple[Block, ...] = tuple(blocks) for block in blocks: if self.ndim != block.ndim: diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 7e50348962fc5..a2a40bbf93604 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -660,7 +660,7 @@ def _get_counts_nanvar( count = np.nan d = np.nan else: - mask2 = count <= ddof # type: np.ndarray + mask2: np.ndarray = count <= ddof if mask2.any(): np.putmask(d, mask2, np.nan) np.putmask(count, mask2, np.nan) diff --git a/pandas/core/ops/docstrings.py b/pandas/core/ops/docstrings.py index 5d3f9cd92aa1a..e3db65f11a332 100644 --- a/pandas/core/ops/docstrings.py +++ b/pandas/core/ops/docstrings.py @@ -233,7 +233,7 @@ def _make_flex_doc(op_name, typ): dtype: float64 """ -_op_descriptions = { +_op_descriptions: Dict[str, Dict[str, Optional[str]]] = { # Arithmetic Operators "add": { "op": "+", @@ -310,7 +310,7 @@ def _make_flex_doc(op_name, typ): "reverse": None, "series_examples": None, }, -} # type: Dict[str, Dict[str, Optional[str]]] +} _op_names = list(_op_descriptions.keys()) for key in _op_names: diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 81ec4f45ec8e1..25731c4e1c54c 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -31,7 +31,7 @@ from pandas.tseries.frequencies import to_offset from pandas.tseries.offsets import DateOffset, Day, Nano, Tick -_shared_docs_kwargs = dict() # type: Dict[str, str] +_shared_docs_kwargs: Dict[str, str] = dict() class Resampler(_GroupBy, ShallowMixin): diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 4d838db6c95f6..fdd31b3b7c022 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -583,8 +583,9 @@ def __init__( self.indicator = indicator + self.indicator_name: Optional[str] if isinstance(self.indicator, str): - self.indicator_name = self.indicator # type: Optional[str] + self.indicator_name = self.indicator elif isinstance(self.indicator, bool): self.indicator_name = "_merge" if self.indicator else None else: diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index b126b6e221ccc..c7d3adece521e 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -211,10 +211,9 @@ def _add_margins( if margins_name in table.columns.get_level_values(level): raise ValueError(msg) + key: Union[str, Tuple[str, ...]] if len(rows) > 1: - key = (margins_name,) + ("",) * ( - len(rows) - 1 - ) # type: Union[str, Tuple[str, ...]] + key = (margins_name,) + ("",) * (len(rows) - 1) else: key = margins_name @@ -564,7 +563,7 @@ def crosstab( if pass_objs: common_idx = get_objs_combined_axis(pass_objs, intersect=True, sort=False) - data = {} # type: dict + data: Dict = {} data.update(zip(rownames, index)) data.update(zip(colnames, columns)) @@ -615,11 +614,11 @@ def _normalize(table, normalize, margins: bool, margins_name="All"): if margins is False: # Actual Normalizations - normalizers = { + normalizers: Dict[Union[bool, str], Callable] = { "all": lambda x: x / x.sum(axis=1).sum(axis=0), "columns": lambda x: x / x.sum(), "index": lambda x: x.div(x.sum(axis=1), axis=0), - } # type: Dict[Union[bool, str], Callable] + } normalizers[True] = normalizers["all"] diff --git a/pandas/core/series.py b/pandas/core/series.py index a950b4496baa7..6045d6a654508 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -170,7 +170,7 @@ class Series(base.IndexOpsMixin, generic.NDFrame): Copy input data. """ - _metadata = [] # type: List[str] + _metadata: List[str] = [] _accessors = {"dt", "cat", "str", "sparse"} _deprecations = ( base.IndexOpsMixin._deprecations @@ -184,7 +184,7 @@ class Series(base.IndexOpsMixin, generic.NDFrame): hasnans = property( base.IndexOpsMixin.hasnans.func, doc=base.IndexOpsMixin.hasnans.__doc__ ) - _data = None # type: SingleBlockManager + _data: SingleBlockManager # ---------------------------------------------------------------------- # Constructors @@ -781,9 +781,10 @@ def __array_ufunc__( inputs = tuple(extract_array(x, extract_numpy=True) for x in inputs) result = getattr(ufunc, method)(*inputs, **kwargs) + + name: Optional[Hashable] if len(set(names)) == 1: - # we require names to be hashable, right? - name = names[0] # type: Any + name = names[0] else: name = None diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 413e7e85eb6fe..137c37f938dfa 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -52,7 +52,7 @@ ) _cpython_optimized_decoders = _cpython_optimized_encoders + ("utf-16", "utf-32") -_shared_docs = dict() # type: Dict[str, str] +_shared_docs: Dict[str, str] = dict() def cat_core(list_of_columns: List, sep: str): @@ -3284,7 +3284,7 @@ def rindex(self, sub, start=0, end=None): """ # _doc_args holds dict of strings to use in substituting casemethod docs - _doc_args = {} # type: Dict[str, Dict[str, str]] + _doc_args: Dict[str, Dict[str, str]] = {} _doc_args["lower"] = dict(type="lowercase", method="lower", version="") _doc_args["upper"] = dict(type="uppercase", method="upper", version="") _doc_args["title"] = dict(type="titlecase", method="title", version="") diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index fd2e8aa2ad02f..6a35664ece765 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -53,7 +53,7 @@ class _Window(PandasObject, ShallowMixin, SelectionMixin): - _attributes = [ + _attributes: List[str] = [ "window", "min_periods", "center", @@ -61,8 +61,8 @@ class _Window(PandasObject, ShallowMixin, SelectionMixin): "axis", "on", "closed", - ] # type: List[str] - exclusions = set() # type: Set[str] + ] + exclusions: Set[str] = set() def __init__( self, @@ -449,7 +449,7 @@ def _apply( window_indexer = self._get_window_indexer() results = [] - exclude = [] # type: List[Scalar] + exclude: List[Scalar] = [] for i, b in enumerate(blocks): try: values = self._prep_values(b.values) diff --git a/pandas/io/common.py b/pandas/io/common.py index bd3808cf37b6b..c0eddb679c6f8 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -418,7 +418,7 @@ def _get_handle( except ImportError: need_text_wrapping = BufferedIOBase # type: ignore - handles = list() # type: List[IO] + handles: List[IO] = list() f = path_or_buf # Convert pathlib.Path/py.path.local or string diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py index 97556f9685001..78054936f50f2 100644 --- a/pandas/io/excel/_odfreader.py +++ b/pandas/io/excel/_odfreader.py @@ -76,12 +76,12 @@ def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]: empty_rows = 0 max_row_len = 0 - table = [] # type: List[List[Scalar]] + table: List[List[Scalar]] = [] for i, sheet_row in enumerate(sheet_rows): sheet_cells = [x for x in sheet_row.childNodes if x.qname in cell_names] empty_cells = 0 - table_row = [] # type: List[Scalar] + table_row: List[Scalar] = [] for j, sheet_cell in enumerate(sheet_cells): if sheet_cell.qname == table_cell_name: diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index d0d6096a4425e..d278c6b3bbef2 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -531,7 +531,7 @@ def _convert_cell(self, cell, convert_float: bool) -> Scalar: return cell.value def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]: - data = [] # type: List[List[Scalar]] + data: List[List[Scalar]] = [] for row in sheet.rows: data.append([self._convert_cell(cell, convert_float) for cell in row]) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 41bddc7683764..b18f0db622b3e 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -262,6 +262,8 @@ def __init__( def _chk_truncate(self) -> None: from pandas.core.reshape.concat import concat + self.tr_row_num: Optional[int] + min_rows = self.min_rows max_rows = self.max_rows # truncation determined by max_rows, actual truncated number of rows @@ -280,7 +282,7 @@ def _chk_truncate(self) -> None: else: row_num = max_rows // 2 series = concat((series.iloc[:row_num], series.iloc[-row_num:])) - self.tr_row_num = row_num # type: Optional[int] + self.tr_row_num = row_num else: self.tr_row_num = None self.tr_series = series @@ -448,13 +450,13 @@ def _get_adjustment() -> TextAdjustment: class TableFormatter: - show_dimensions = None # type: bool - is_truncated = None # type: bool - formatters = None # type: formatters_type - columns = None # type: Index + show_dimensions: bool + is_truncated: bool + formatters: formatters_type + columns: Index @property - def should_show_dimensions(self) -> Optional[bool]: + def should_show_dimensions(self) -> bool: return self.show_dimensions is True or ( self.show_dimensions == "truncate" and self.is_truncated ) @@ -616,6 +618,8 @@ def _chk_truncate(self) -> None: # Cut the data to the information actually printed max_cols = self.max_cols max_rows = self.max_rows + self.max_rows_adj: Optional[int] + max_rows_adj: Optional[int] if max_cols == 0 or max_rows == 0: # assume we are in the terminal (w, h) = get_terminal_size() @@ -631,7 +635,7 @@ def _chk_truncate(self) -> None: self.header = cast(bool, self.header) n_add_rows = self.header + dot_row + show_dimension_rows + prompt_row # rows available to fill with actual data - max_rows_adj = self.h - n_add_rows # type: Optional[int] + max_rows_adj = self.h - n_add_rows self.max_rows_adj = max_rows_adj # Format only rows and columns that could potentially fit the @@ -1073,7 +1077,7 @@ def _get_formatted_index(self, frame: "DataFrame") -> List[str]: return adjoined def _get_column_name_list(self) -> List[str]: - names = [] # type: List[str] + names: List[str] = [] columns = self.frame.columns if isinstance(columns, ABCMultiIndex): names.extend("" if name is None else name for name in columns.names) @@ -1124,8 +1128,9 @@ def format_array( List[str] """ + fmt_klass: Type[GenericArrayFormatter] if is_datetime64_dtype(values.dtype): - fmt_klass = Datetime64Formatter # type: Type[GenericArrayFormatter] + fmt_klass = Datetime64Formatter elif is_datetime64tz_dtype(values): fmt_klass = Datetime64TZFormatter elif is_timedelta64_dtype(values.dtype): @@ -1375,11 +1380,12 @@ def format_values_with(float_format): # There is a special default string when we are fixed-width # The default is otherwise to use str instead of a formatting string + float_format: Optional[float_format_type] if self.float_format is None: if self.fixed_width: float_format = partial( "{value: .{digits:d}f}".format, digits=self.digits - ) # type: Optional[float_format_type] + ) else: float_format = self.float_format else: diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py index 38f2e332017f0..0c6b0c1a5810b 100644 --- a/pandas/io/formats/html.py +++ b/pandas/io/formats/html.py @@ -45,7 +45,7 @@ def __init__( self.frame = self.fmt.frame self.columns = self.fmt.tr_frame.columns - self.elements = [] # type: List[str] + self.elements: List[str] = [] self.bold_rows = self.fmt.bold_rows self.escape = self.fmt.escape self.show_dimensions = self.fmt.show_dimensions @@ -138,11 +138,10 @@ def _write_cell( else: start_tag = "<{kind}>".format(kind=kind) + esc: Union[OrderedDict[str, str], Dict] if self.escape: # escape & first to prevent double escaping of & - esc = OrderedDict( - [("&", r"&"), ("<", r"<"), (">", r">")] - ) # type: Union[OrderedDict[str, str], Dict] + esc = OrderedDict([("&", r"&"), ("<", r"<"), (">", r">")]) else: esc = {} @@ -408,7 +407,7 @@ def _write_regular_rows( else: index_values = self.fmt.tr_frame.index.format() - row = [] # type: List[str] + row: List[str] = [] for i in range(nrows): if truncate_v and i == (self.fmt.tr_row_num): diff --git a/pandas/io/formats/latex.py b/pandas/io/formats/latex.py index 6f903e770c86c..008a99427f3c7 100644 --- a/pandas/io/formats/latex.py +++ b/pandas/io/formats/latex.py @@ -133,7 +133,7 @@ def pad_empties(x): if self.fmt.has_index_names and self.fmt.show_index_names: nlevels += 1 strrows = list(zip(*strcols)) - self.clinebuf = [] # type: List[List[int]] + self.clinebuf: List[List[int]] = [] for i, row in enumerate(strrows): if i == nlevels and self.fmt.header: diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py index 061103820ca83..a4f1488fb6b69 100644 --- a/pandas/io/formats/printing.py +++ b/pandas/io/formats/printing.py @@ -513,7 +513,7 @@ def format_object_attrs( list of 2-tuple """ - attrs = [] # type: List[Tuple[str, Union[str, int]]] + attrs: List[Tuple[str, Union[str, int]]] = [] if hasattr(obj, "dtype") and include_dtype: # error: "Sequence[Any]" has no attribute "dtype" attrs.append(("dtype", "'{}'".format(obj.dtype))) # type: ignore diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 26a3248262f9a..89d5b52ffbf1e 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -62,8 +62,10 @@ def to_json( if orient == "table" and isinstance(obj, Series): obj = obj.to_frame(name=obj.name or "values") + + writer: Type["Writer"] if orient == "table" and isinstance(obj, DataFrame): - writer = JSONTableWriter # type: Type["Writer"] + writer = JSONTableWriter elif isinstance(obj, Series): writer = SeriesWriter elif isinstance(obj, DataFrame): diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index 702241bde2b34..df513d4d37d71 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -267,10 +267,10 @@ def _pull_field(js, spec): meta = [m if isinstance(m, list) else [m] for m in meta] # Disastrously inefficient for now - records = [] # type: List + records: List = [] lengths = [] - meta_vals = defaultdict(list) # type: DefaultDict + meta_vals: DefaultDict = defaultdict(list) meta_keys = [sep.join(val) for val in meta] def _recursive_extract(data, path, seen_meta, level=0): diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index ff3583b79d79c..cf1511c1221b3 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -522,8 +522,8 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds): _c_unsupported = {"skipfooter"} _python_unsupported = {"low_memory", "float_precision"} -_deprecated_defaults = {} # type: Dict[str, Any] -_deprecated_args = set() # type: Set[str] +_deprecated_defaults: Dict[str, Any] = {} +_deprecated_args: Set[str] = set() def _make_parser_function(name, default_sep=","): diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 9a1bfdd2be798..ba53d8cfd0de5 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1343,7 +1343,7 @@ def copy( data = self.select(k) if s.is_table: - index = False # type: Union[bool, list] + index: Union[bool, list] = False if propindexes: index = [a.name for a in s.axes if a.is_indexed] new_store.append( @@ -2548,9 +2548,9 @@ class Fixed: group : the group node where the table resides """ - pandas_kind = None # type: str - obj_type = None # type: Type[Union[DataFrame, Series]] - ndim = None # type: int + pandas_kind: str + obj_type: Type[Union[DataFrame, Series]] + ndim: int is_table = False def __init__(self, parent, group, encoding=None, errors="strict", **kwargs): @@ -2708,7 +2708,7 @@ class GenericFixed(Fixed): _index_type_map = {DatetimeIndex: "datetime", PeriodIndex: "period"} _reverse_index_map = {v: k for k, v in _index_type_map.items()} - attributes = [] # type: List[str] + attributes: List[str] = [] # indexer helpders def _class_to_alias(self, cls) -> str: @@ -3254,7 +3254,7 @@ class Table(Fixed): """ pandas_kind = "wide_table" - table_type = None # type: str + table_type: str levels = 1 is_table = True is_shape_reversed = False @@ -4147,11 +4147,11 @@ class LegacyTable(Table): """ - _indexables = [ + _indexables: Optional[List[IndexCol]] = [ IndexCol(name="index", axis=1, pos=0), IndexCol(name="column", axis=2, pos=1, index_kind="columns_kind"), DataCol(name="fields", cname="values", kind_attr="fields", pos=2), - ] # type: Optional[List[IndexCol]] + ] table_type = "legacy" ndim = 3 @@ -4424,7 +4424,7 @@ class AppendableFrameTable(AppendableTable): pandas_kind = "frame_table" table_type = "appendable_frame" ndim = 2 - obj_type = DataFrame # type: Type[Union[DataFrame, Series]] + obj_type: Type[Union[DataFrame, Series]] = DataFrame @property def is_transposed(self) -> bool: @@ -4650,7 +4650,7 @@ def _reindex_axis(obj, axis: int, labels: Index, other=None): if other is not None: labels = ensure_index(other.unique()).intersection(labels, sort=False) if not labels.equals(ax): - slicer = [slice(None, None)] * obj.ndim # type: List[Union[slice, Index]] + slicer: List[Union[slice, Index]] = [slice(None, None)] * obj.ndim slicer[axis] = labels obj = obj.loc[tuple(slicer)] return obj diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 5341dc3a6338a..0c5375ccc5d5c 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -57,7 +57,7 @@ def _kind(self): _layout_type = "vertical" _default_rot = 0 - orientation = None # type: Optional[str] + orientation: Optional[str] = None _pop_attributes = [ "label", "style", diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 601fde80e9a94..5d11e160bbd71 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -43,7 +43,7 @@ class TestPDApi(Base): ] # these are already deprecated; awaiting removal - deprecated_modules = [] # type: List[str] + deprecated_modules: List[str] = [] # misc misc = ["IndexSlice", "NaT"] @@ -94,10 +94,10 @@ class TestPDApi(Base): classes.extend(["Panel", "SparseSeries", "SparseDataFrame"]) # these are already deprecated; awaiting removal - deprecated_classes = [] # type: List[str] + deprecated_classes: List[str] = [] # these should be deprecated in the future - deprecated_classes_in_future = [] # type: List[str] + deprecated_classes_in_future: List[str] = [] # external modules exposed in pandas namespace modules = ["np", "datetime"] @@ -173,10 +173,10 @@ class TestPDApi(Base): funcs_to = ["to_datetime", "to_msgpack", "to_numeric", "to_pickle", "to_timedelta"] # top-level to deprecate in the future - deprecated_funcs_in_future = [] # type: List[str] + deprecated_funcs_in_future: List[str] = [] # these are already deprecated; awaiting removal - deprecated_funcs = [] # type: List[str] + deprecated_funcs: List[str] = [] # private modules in pandas namespace private_modules = [ diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 3bacd560e75cf..5cab0c1fe6d59 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -57,7 +57,7 @@ def timedelta_index(request): class SharedTests: - index_cls = None # type: Type[Union[DatetimeIndex, PeriodIndex, TimedeltaIndex]] + index_cls: Type[Union[DatetimeIndex, PeriodIndex, TimedeltaIndex]] def test_compare_len1_raises(self): # make sure we raise when comparing with different lengths, specific diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index a075521b67561..c6ce08080314a 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -1889,11 +1889,11 @@ def test_invalid_parser(): pd.eval("x + y", local_dict={"x": 1, "y": 2}, parser="asdf") -_parsers = { +_parsers: Dict[str, Type[BaseExprVisitor]] = { "python": PythonExprVisitor, "pytables": pytables.ExprVisitor, "pandas": PandasExprVisitor, -} # type: Dict[str, Type[BaseExprVisitor]] +} @pytest.mark.parametrize("engine", _engines) diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index ae625ed8e389f..6d91d13027f69 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -290,7 +290,7 @@ def test_is_datetime_arraylike(): assert com.is_datetime_arraylike(pd.DatetimeIndex([1, 2, 3])) -integer_dtypes = [] # type: List +integer_dtypes: List = [] @pytest.mark.parametrize( @@ -322,7 +322,7 @@ def test_is_not_integer_dtype(dtype): assert not com.is_integer_dtype(dtype) -signed_integer_dtypes = [] # type: List +signed_integer_dtypes: List = [] @pytest.mark.parametrize( @@ -358,7 +358,7 @@ def test_is_not_signed_integer_dtype(dtype): assert not com.is_signed_integer_dtype(dtype) -unsigned_integer_dtypes = [] # type: List +unsigned_integer_dtypes: List = [] @pytest.mark.parametrize( diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py index e968962caf0b7..5e4fb6d69e52c 100644 --- a/pandas/tests/extension/base/ops.py +++ b/pandas/tests/extension/base/ops.py @@ -62,10 +62,10 @@ class BaseArithmeticOpsTests(BaseOpsUtil): * divmod_exc = TypeError """ - series_scalar_exc = TypeError # type: Optional[Type[TypeError]] - frame_scalar_exc = TypeError # type: Optional[Type[TypeError]] - series_array_exc = TypeError # type: Optional[Type[TypeError]] - divmod_exc = TypeError # type: Optional[Type[TypeError]] + series_scalar_exc: Optional[Type[TypeError]] = TypeError + frame_scalar_exc: Optional[Type[TypeError]] = TypeError + series_array_exc: Optional[Type[TypeError]] = TypeError + divmod_exc: Optional[Type[TypeError]] = TypeError def test_arith_series_with_scalar(self, data, all_arithmetic_operators): # series & scalar diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 1ac6370860ba6..c35c4c3568f74 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -31,7 +31,7 @@ class Base: """ base class for index sub-class tests """ - _holder = None # type: Optional[Type[Index]] + _holder: Optional[Type[Index]] = None _compat_props = ["shape", "ndim", "size", "nbytes"] def test_pickle_compat_construction(self): diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index 469c011001467..8b29cf3813d13 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -927,7 +927,7 @@ class TestReplaceSeriesCoercion(CoercionBase): klasses = ["series"] method = "replace" - rep = {} # type: Dict[str, List] + rep: Dict[str, List] = {} rep["object"] = ["a", "b"] rep["int64"] = [4, 5] rep["float64"] = [1.1, 2.2] diff --git a/pandas/tests/io/parser/conftest.py b/pandas/tests/io/parser/conftest.py index 183ad500b15f3..a87e1e796c194 100644 --- a/pandas/tests/io/parser/conftest.py +++ b/pandas/tests/io/parser/conftest.py @@ -7,9 +7,9 @@ class BaseParser: - engine = None # type: Optional[str] + engine: Optional[str] = None low_memory = True - float_precision_choices = [] # type: List[Optional[str]] + float_precision_choices: List[Optional[str]] = [] def update_kwargs(self, kwargs): kwargs = kwargs.copy() diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 1c80dd9e59164..fe65820a7c975 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -583,7 +583,7 @@ class _TestSQLApi(PandasSQLTest): """ flavor = "sqlite" - mode = None # type: str + mode: str def setup_connect(self): self.conn = self.connect() @@ -1234,7 +1234,7 @@ class _TestSQLAlchemy(SQLAlchemyMixIn, PandasSQLTest): """ - flavor = None # type: str + flavor: str @pytest.fixture(autouse=True, scope="class") def setup_class(cls): diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index e443a7cc932be..d70780741aa88 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -1,5 +1,5 @@ from datetime import date, datetime, time as dt_time, timedelta -from typing import Dict, List, Tuple, Type +from typing import Dict, List, Optional, Tuple, Type import numpy as np import pytest @@ -95,7 +95,7 @@ def test_to_M8(): class Base: - _offset = None # type: Type[DateOffset] + _offset: Optional[Type[DateOffset]] = None d = Timestamp(datetime(2008, 1, 2)) timezones = [ @@ -743,7 +743,7 @@ def test_onOffset(self): for offset, d, expected in tests: assert_onOffset(offset, d, expected) - apply_cases = [] # type: _ApplyCases + apply_cases: _ApplyCases = [] apply_cases.append( ( BDay(), @@ -2631,7 +2631,7 @@ def test_onOffset(self, case): offset, d, expected = case assert_onOffset(offset, d, expected) - apply_cases = [] # type: _ApplyCases + apply_cases: _ApplyCases = [] apply_cases.append( ( CDay(), @@ -2878,7 +2878,7 @@ def test_onOffset(self, case): offset, d, expected = case assert_onOffset(offset, d, expected) - apply_cases = [] # type: _ApplyCases + apply_cases: _ApplyCases = [] apply_cases.append( ( CBMonthEnd(), @@ -3027,7 +3027,7 @@ def test_onOffset(self, case): offset, dt, expected = case assert_onOffset(offset, dt, expected) - apply_cases = [] # type: _ApplyCases + apply_cases: _ApplyCases = [] apply_cases.append( ( CBMonthBegin(), diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 9ec0dce438099..898060d011372 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -49,7 +49,7 @@ # Offset names ("time rules") and related functions #: cache of previously seen offsets -_offset_map = {} # type: Dict[str, DateOffset] +_offset_map: Dict[str, DateOffset] = {} def get_period_alias(offset_str): diff --git a/pandas/tseries/holiday.py b/pandas/tseries/holiday.py index d4f02286ff8d6..9417dc4b48499 100644 --- a/pandas/tseries/holiday.py +++ b/pandas/tseries/holiday.py @@ -344,7 +344,7 @@ class AbstractHolidayCalendar(metaclass=HolidayCalendarMetaClass): Abstract interface to create holidays following certain rules. """ - rules = [] # type: List[Holiday] + rules: List[Holiday] = [] start_date = Timestamp(datetime(1970, 1, 1)) end_date = Timestamp(datetime(2200, 12, 31)) _cache = None diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index f5e40e712642e..e516d30d5490f 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -1817,8 +1817,8 @@ class QuarterOffset(DateOffset): Quarter representation - doesn't call super. """ - _default_startingMonth = None # type: Optional[int] - _from_name_startingMonth = None # type: Optional[int] + _default_startingMonth: Optional[int] = None + _from_name_startingMonth: Optional[int] = None _adjust_dst = True _attributes = frozenset(["n", "normalize", "startingMonth"]) # TODO: Consider combining QuarterOffset and YearOffset __init__ at some diff --git a/pandas/util/_decorators.py b/pandas/util/_decorators.py index f8c08ed8c099f..b8f17cd848292 100644 --- a/pandas/util/_decorators.py +++ b/pandas/util/_decorators.py @@ -327,9 +327,11 @@ def my_dog(has='fleas'): pass """ + addendum: Optional[str] + def __init__(self, addendum: Optional[str], join: str = "", indents: int = 0): if indents > 0: - self.addendum = indent(addendum, indents=indents) # type: Optional[str] + self.addendum = indent(addendum, indents=indents) else: self.addendum = addendum self.join = join