Skip to content

Commit

Permalink
Update docs (#77)
Browse files Browse the repository at this point in the history
Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
  • Loading branch information
anmyachev authored Apr 25, 2024
1 parent 3dc9b67 commit 6abdf24
Show file tree
Hide file tree
Showing 17 changed files with 490 additions and 128 deletions.
2 changes: 1 addition & 1 deletion docs/basics/column.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def my_func(df1, df2):
df1_s = df1.__dataframe_consortium_standard__(api_version='2023.11-beta')
df2_s = df2.__dataframe_consortium_standard__(api_version='2023.11-beta')
df1_s.filter(df2_s.col('a') > 0)
return df_s.dataframe
return df1_s.dataframe
```

However, if you tried passing two different dataframes to this function, you'd get
Expand Down
116 changes: 79 additions & 37 deletions docs/generate_members.py
Original file line number Diff line number Diff line change
@@ -1,42 +1,84 @@
# ruff: noqa
import sys
import argparse

sys.path.append("..")

import pandas as pd
import polars as pl

pd_series = pd.Series([1], name="a").__column_consortium_standard__()
pl_series = pl.Series("a", [1]).__column_consortium_standard__()
pd_df = pd.DataFrame({"a": [1]}).__dataframe_consortium_standard__()
pl_df = pl.DataFrame({"a": [1]}).__dataframe_consortium_standard__()
pd_scalar = pd_df.col("a").mean()
pl_scalar = pl_df.col("a").mean()
pd_namespace = pd_df.__dataframe_namespace__()
pl_namespace = pl_df.__dataframe_namespace__()

for name, object in [
("pandas-column.md", pd_series),
("polars-column.md", pl_series),
("pandas-dataframe.md", pd_df),
("polars-dataframe.md", pl_df),
("pandas-scalar.md", pd_scalar),
("polars-scalar.md", pl_scalar),
("pandas-namespace.md", pd_scalar),
("polars-namespace.md", pl_scalar),
]:
members = [
i for i in object.__dir__() if not (i.startswith("_") and not i.startswith("__"))
]

with open(name) as fd:
content = fd.read()

members_txt = "\n - ".join(sorted(members)) + "\n "

start = content.index("members:")
end = content.index("show_signature")
content = content[:start] + f"members:\n - {members_txt}" + content[end:]

with open(name, "w") as fd:
fd.write(content)

def get_polars_objects():
import polars as pl

dataframe = pl.DataFrame({"a": [1]}).__dataframe_consortium_standard__()
column = pl.Series("a", [1]).__column_consortium_standard__()
scalar = dataframe.col("a").mean()
namespace = dataframe.__dataframe_namespace__()
return dataframe, column, scalar, namespace


def get_pandas_objects():
import pandas as pd

dataframe = pd.DataFrame({"a": [1]}).__dataframe_consortium_standard__()
column = pd.Series([1], name="a").__column_consortium_standard__()
scalar = dataframe.col("a").mean()
namespace = dataframe.__dataframe_namespace__()
return dataframe, column, scalar, namespace


def get_modin_objects():
import modin.pandas as pd

dataframe = pd.DataFrame({"a": [1]}).__dataframe_consortium_standard__()
column = pd.Series([1], name="a").__column_consortium_standard__()
scalar = dataframe.col("a").mean()
namespace = dataframe.__dataframe_namespace__()
return dataframe, column, scalar, namespace


def generate_members(library: str):
mapper = {
"pandas": get_pandas_objects,
"polars": get_polars_objects,
"modin": get_modin_objects,
}
dataframe, column, scalar, namespace = mapper[library]()

for name, object in [
(f"{library}-dataframe.md", dataframe),
(f"{library}-column.md", column),
(f"{library}-scalar.md", scalar),
(f"{library}-namespace.md", namespace),
]:
members = [
i
for i in object.__dir__()
if not (i.startswith("_") and not i.startswith("__"))
]

with open(name) as fd:
content = fd.read()

members_txt = "\n - ".join(sorted(members)) + "\n "

start = content.index("members:")
end = content.index("show_signature")
content = content[:start] + f"members:\n - {members_txt}" + content[end:]

with open(name, "w") as fd:
fd.write(content)


def main():
parse = argparse.ArgumentParser()
parse.add_argument(
"--library",
type=str,
default="pandas",
help="Library for which members will be generated.",
)
args = parse.parse_args()
sys.exit(generate_members(args.library))


if __name__ == "__main__":
main()
4 changes: 2 additions & 2 deletions docs/index.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# Dataframe API Compat

Extremely lightweight compatibility layer between pandas and Polars:
Extremely lightweight compatibility layer between pandas, Polars and Modin:

- ✅ No dependencies.
- ✅ Lightweight: wheel is smaller than 30 kB.
- ✅ Lightweight: wheel is smaller than 50 kB.
- ✅ Simple, minimal, and predictable.

No need to choose - support both with ease!
Expand Down
105 changes: 105 additions & 0 deletions docs/modin-column.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
# Pandas Column
::: dataframe_api_compat.modin_standard.Column
handler: python
paths: []
options:
members:
- __add__
- __and__
- __class__
- __column_namespace__
- __delattr__
- __dict__
- __dir__
- __divmod__
- __doc__
- __eq__
- __floordiv__
- __format__
- __ge__
- __getattribute__
- __gt__
- __hash__
- __init__
- __init_subclass__
- __invert__
- __iter__
- __le__
- __lt__
- __mod__
- __module__
- __mul__
- __ne__
- __new__
- __or__
- __pow__
- __radd__
- __rand__
- __reduce__
- __reduce_ex__
- __repr__
- __rfloordiv__
- __rmod__
- __rmul__
- __ror__
- __rpow__
- __rsub__
- __rtruediv__
- __setattr__
- __sizeof__
- __str__
- __sub__
- __subclasshook__
- __truediv__
- __weakref__
- all
- any
- cast
- column
- cumulative_max
- cumulative_min
- cumulative_prod
- cumulative_sum
- day
- dtype
- fill_nan
- fill_null
- filter
- floor
- get_value
- hour
- is_in
- is_nan
- is_null
- iso_weekday
- len
- max
- mean
- median
- microsecond
- min
- minute
- month
- n_unique
- name
- nanosecond
- parent_dataframe
- persist
- prod
- rename
- second
- shift
- slice_rows
- sort
- sorted_indices
- std
- sum
- take
- to_array
- unique_indices
- unix_timestamp
- var
- year
show_signature_annotations: true
show_bases: false
show_source: false
90 changes: 90 additions & 0 deletions docs/modin-dataframe.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# Pandas DataFrame
::: dataframe_api_compat.modin_standard.DataFrame
handler: python
paths: []
options:
members:
- __add__
- __and__
- __class__
- __dataframe_namespace__
- __delattr__
- __dict__
- __dir__
- __divmod__
- __doc__
- __eq__
- __floordiv__
- __format__
- __ge__
- __getattribute__
- __gt__
- __hash__
- __init__
- __init_subclass__
- __invert__
- __iter__
- __le__
- __lt__
- __mod__
- __module__
- __mul__
- __ne__
- __new__
- __or__
- __pow__
- __radd__
- __rand__
- __reduce__
- __reduce_ex__
- __repr__
- __rfloordiv__
- __rmod__
- __rmul__
- __ror__
- __rpow__
- __rsub__
- __rtruediv__
- __setattr__
- __sizeof__
- __str__
- __sub__
- __subclasshook__
- __truediv__
- __weakref__
- all
- any
- assign
- cast
- col
- column_names
- dataframe
- drop
- drop_nulls
- fill_nan
- fill_null
- filter
- get_column_names
- group_by
- is_nan
- is_null
- iter_columns
- join
- max
- mean
- median
- min
- persist
- prod
- rename
- schema
- select
- shape
- slice_rows
- sort
- std
- sum
- take
- to_array
- var
show_signature_annotations: true
Loading

0 comments on commit 6abdf24

Please sign in to comment.