Skip to content

Commit

Permalink
CI,STYLE: add spell check? (#38776)
Browse files Browse the repository at this point in the history
* run codespell on pandas/core

* fix underline
  • Loading branch information
MarcoGorelli authored Dec 29, 2020
1 parent e752928 commit d201fcc
Show file tree
Hide file tree
Showing 20 changed files with 46 additions and 37 deletions.
8 changes: 7 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
minimum_pre_commit_version: '2.9.2'
minimum_pre_commit_version: 2.9.2
repos:
- repo: https://github.com/python/black
rev: 20.8b1
Expand Down Expand Up @@ -168,3 +168,9 @@ repos:
exclude: ^LICENSES/|\.(html|csv|txt|svg|py)$
- id: trailing-whitespace
exclude: \.(html|svg)$
- repo: https://github.com/codespell-project/codespell
rev: v2.0.0
hooks:
- id: codespell
types_or: [python, rst, markdown]
files: ^pandas/core/
2 changes: 1 addition & 1 deletion pandas/core/arrays/_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ def fillna(
if method is not None:
func = missing.get_fill_func(method)
new_values = func(self._ndarray.copy(), limit=limit, mask=mask)
# TODO: PandasArray didnt used to copy, need tests for this
# TODO: PandasArray didn't used to copy, need tests for this
new_values = self._from_backing_data(new_values)
else:
# fill with value
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -741,7 +741,7 @@ def isin(self, values) -> np.ndarray:
return np.zeros(self.shape, dtype=bool)

if not isinstance(values, type(self)):
inferrable = [
inferable = [
"timedelta",
"timedelta64",
"datetime",
Expand All @@ -751,7 +751,7 @@ def isin(self, values) -> np.ndarray:
]
if values.dtype == object:
inferred = lib.infer_dtype(values, skipna=False)
if inferred not in inferrable:
if inferred not in inferable:
if inferred == "string":
pass

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/floating.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ class FloatingArray(NumericArray):
.. warning::
FloatingArray is currently experimental, and its API or internal
implementation may change without warning. Expecially the behaviour
implementation may change without warning. Especially the behaviour
regarding NaN (distinct from NA missing values) is subject to change.
We represent a FloatingArray with 2 numpy arrays:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -975,7 +975,7 @@ def _concat_same_type(

else:
# when concatenating block indices, we don't claim that you'll
# get an identical index as concating the values and then
# get an identical index as concatenating the values and then
# creating a new index. We don't want to spend the time trying
# to merge blocks across arrays in `to_concat`, so the resulting
# BlockIndex may have more blocks.
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/sparse/dtype.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,7 @@ def _get_common_dtype(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
fill_value = fill_values[0]

# np.nan isn't a singleton, so we may end up with multiple
# NaNs here, so we ignore tha all NA case too.
# NaNs here, so we ignore the all NA case too.
if not (len(set(fill_values)) == 1 or isna(fill_values).all()):
warnings.warn(
"Concatenating sparse arrays with multiple fill "
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/arrays/string_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -467,7 +467,7 @@ def __setitem__(self, key: Union[int, np.ndarray], value: Any) -> None:
elif not isinstance(value, str):
raise ValueError("Scalar must be NA or str")

# Slice data and insert inbetween
# Slice data and insert in-between
new_data = [
*self._data[0:key].chunks,
pa.array([value], type=pa.string()),
Expand Down Expand Up @@ -616,7 +616,7 @@ def value_counts(self, dropna: bool = True) -> Series:

# Index cannot hold ExtensionArrays yet
index = Index(type(self)(vc.field(0)).astype(object))
# No missings, so we can adhere to the interface and return a numpy array.
# No missing values so we can adhere to the interface and return a numpy array.
counts = np.array(vc.field(1))

if dropna and self._data.null_count > 0:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/computation/parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def create_valid_python_identifier(name: str) -> str:

# Create a dict with the special characters and their replacement string.
# EXACT_TOKEN_TYPES contains these special characters
# toke.tok_name contains a readable description of the replacement string.
# token.tok_name contains a readable description of the replacement string.
special_characters_replacements = {
char: f"_{token.tok_name[tokval]}_"
# The ignore here is because of a bug in mypy that is resolved in 0.740
Expand Down
8 changes: 4 additions & 4 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -5560,7 +5560,7 @@ def _is_mixed_type(self) -> bool_t:
return False

if self._mgr.any_extension_types:
# Even if they have the same dtype, we cant consolidate them,
# Even if they have the same dtype, we can't consolidate them,
# so we pretend this is "mixed'"
return True

Expand Down Expand Up @@ -10647,7 +10647,7 @@ def _add_numeric_operations(cls):
"""
Add the operations to the cls; evaluate the doc strings again
"""
axis_descr, name1, name2 = _doc_parms(cls)
axis_descr, name1, name2 = _doc_params(cls)

@doc(
_bool_doc,
Expand Down Expand Up @@ -11207,8 +11207,8 @@ def last_valid_index(self):
return self._find_valid_index("last")


def _doc_parms(cls):
"""Return a tuple of the doc parms."""
def _doc_params(cls):
"""Return a tuple of the doc params."""
axis_descr = (
f"{{{', '.join(f'{a} ({i})' for i, a in enumerate(cls._AXIS_ORDERS))}}}"
)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -495,7 +495,7 @@ def _ea_wrap_cython_operation(
If we have an ExtensionArray, unwrap, call _cython_operation, and
re-wrap if appropriate.
"""
# TODO: general case implementation overrideable by EAs.
# TODO: general case implementation overridable by EAs.
orig_values = values

if is_datetime64tz_dtype(values.dtype) or is_period_dtype(values.dtype):
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4842,7 +4842,7 @@ def argsort(self, *args, **kwargs) -> np.ndarray:
>>> idx[order]
Index(['a', 'b', 'c', 'd'], dtype='object')
"""
# This works for either ndarray or EA, is overriden
# This works for either ndarray or EA, is overridden
# by RangeIndex, MultIIndex
return self._data.argsort(*args, **kwargs)

Expand Down Expand Up @@ -4974,7 +4974,7 @@ def get_indexer_non_unique(self, target):
return self._get_indexer_non_comparable(target, method=None, unique=False)

if not is_dtype_equal(self.dtype, target.dtype):
# TODO: if object, could use infer_dtype to pre-empt costly
# TODO: if object, could use infer_dtype to preempt costly
# conversion if still non-comparable?
dtype = find_common_type([self.dtype, target.dtype])
if (
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,12 +164,12 @@ def equals(self, other: object) -> bool:
return False
elif not isinstance(other, type(self)):
should_try = False
inferrable = self._data._infer_matches
inferable = self._data._infer_matches
if other.dtype == object:
should_try = other.inferred_type in inferrable
should_try = other.inferred_type in inferable
elif is_categorical_dtype(other.dtype):
other = cast("CategoricalIndex", other)
should_try = other.categories.inferred_type in inferrable
should_try = other.categories.inferred_type in inferable

if should_try:
try:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/range.py
Original file line number Diff line number Diff line change
Expand Up @@ -643,7 +643,7 @@ def difference(self, other, sort=None):
if len(overlap) == len(self):
return self[:0].rename(res_name)
if not isinstance(overlap, RangeIndex):
# We wont end up with RangeIndex, so fall back
# We won't end up with RangeIndex, so fall back
return super().difference(other, sort=sort)
if overlap.step != first.step:
# In some cases we might be able to get a RangeIndex back,
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1807,7 +1807,7 @@ def _slice(self, slicer):
# return same dims as we currently have
if not isinstance(slicer, tuple) and self.ndim == 2:
# reached via getitem_block via _slice_take_blocks_ax0
# TODO(EA2D): wont be necessary with 2D EAs
# TODO(EA2D): won't be necessary with 2D EAs
slicer = (slicer, slice(None))

if isinstance(slicer, tuple) and len(slicer) == 2:
Expand All @@ -1817,7 +1817,7 @@ def _slice(self, slicer):
"invalid slicing for a 1-ndim ExtensionArray", first
)
# GH#32959 only full-slicers along fake-dim0 are valid
# TODO(EA2D): wont be necessary with 2D EAs
# TODO(EA2D): won't be necessary with 2D EAs
new_locs = self.mgr_locs[first]
if len(new_locs):
# effectively slice(None)
Expand Down Expand Up @@ -2280,7 +2280,7 @@ def _check_ndim(self, values, ndim):
"""
ndim inference and validation.
This is overriden by the DatetimeTZBlock to check the case of 2D
This is overridden by the DatetimeTZBlock to check the case of 2D
data (values.ndim == 2), which should only be allowed if ndim is
also 2.
The case of 1D array is still allowed with both ndim of 1 or 2, as
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -746,7 +746,7 @@ def _convert_object_array(
content: List[Scalar], dtype: Optional[DtypeObj] = None
) -> List[Scalar]:
"""
Internal function ot convert object array.
Internal function to convert object array.
Parameters
----------
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1355,7 +1355,7 @@ def _slice_take_blocks_ax0(
blk = self.blocks[0]

if sl_type in ("slice", "mask"):
# GH#32959 EABlock would fail since we cant make 0-width
# GH#32959 EABlock would fail since we can't make 0-width
# TODO(EA2D): special casing unnecessary with 2D EAs
if sllen == 0:
return []
Expand Down
16 changes: 8 additions & 8 deletions pandas/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1221,33 +1221,33 @@ def nankurt(

with np.errstate(invalid="ignore", divide="ignore"):
adj = 3 * (count - 1) ** 2 / ((count - 2) * (count - 3))
numer = count * (count + 1) * (count - 1) * m4
denom = (count - 2) * (count - 3) * m2 ** 2
numerator = count * (count + 1) * (count - 1) * m4
denominator = (count - 2) * (count - 3) * m2 ** 2

# floating point error
#
# #18044 in _libs/windows.pyx calc_kurt follow this behavior
# to fix the fperr to treat denom <1e-14 as zero
numer = _zero_out_fperr(numer)
denom = _zero_out_fperr(denom)
numerator = _zero_out_fperr(numerator)
denominator = _zero_out_fperr(denominator)

if not isinstance(denom, np.ndarray):
if not isinstance(denominator, np.ndarray):
# if ``denom`` is a scalar, check these corner cases first before
# doing division
if count < 4:
return np.nan
if denom == 0:
if denominator == 0:
return 0

with np.errstate(invalid="ignore", divide="ignore"):
result = numer / denom - adj
result = numerator / denominator - adj

dtype = values.dtype
if is_float_dtype(dtype):
result = result.astype(dtype)

if isinstance(result, np.ndarray):
result = np.where(denom == 0, 0, result)
result = np.where(denominator == 0, 0, result)
result[count < 4] = np.nan

return result
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/reshape/pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -740,8 +740,8 @@ def _build_names_mapper(
A row or column name is replaced if it is duplicate among the rows of the inputs,
among the columns of the inputs or between the rows and the columns.
Paramters
---------
Parameters
----------
rownames: list[str]
colnames: list[str]
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/window/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -1858,7 +1858,7 @@ def _get_corr(a, b):
window=window, min_periods=self.min_periods, center=self.center
)
# GH 31286: Through using var instead of std we can avoid numerical
# issues when the result of var is withing floating proint precision
# issues when the result of var is within floating proint precision
# while std is not.
return a.cov(b, **kwargs) / (a.var(**kwargs) * b.var(**kwargs)) ** 0.5

Expand Down
3 changes: 3 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ filterwarnings =
error:The SparseArray:FutureWarning
junit_family=xunit2

[codespell]
ignore-words-list=ba,blocs,coo,datas,fo,hist,nd,ser

[coverage:run]
branch = False
omit =
Expand Down

0 comments on commit d201fcc

Please sign in to comment.