Skip to content

Commit

Permalink
REFACTOR-#5262: Update the examples to the latest version of the omni…
Browse files Browse the repository at this point in the history
…scripts (#5263)

Co-authored-by: Iaroslav Igoshev <Poolliver868@mail.ru>
Signed-off-by: Kirill Suvorov <kirill.suvorov@intel.com>
  • Loading branch information
Retribution98 and YarShev committed Nov 28, 2022
1 parent 9534478 commit a7b6066
Showing 1 changed file with 22 additions and 9 deletions.
31 changes: 22 additions & 9 deletions examples/docker/modin-ray/nyc-taxi.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,18 +87,31 @@ def q2(df):


def q3(df):
df["pickup_datetime"] = df["pickup_datetime"].dt.year
return df.groupby(["pickup_datetime", "passenger_count"]).size().reset_index()
transformed = pd.DataFrame(
{
"pickup_datetime": df["pickup_datetime"].dt.year,
"passenger_count": df["passenger_count"],
}
)
return transformed.groupby(
["pickup_datetime", "passenger_count"], as_index=False
).size()


def q4(df):
df["pickup_datetime"] = df["pickup_datetime"].dt.year
df["trip_distance"] = df["trip_distance"].astype("int64")
transformed = pd.DataFrame(
{
"passenger_count": df["passenger_count"],
"pickup_datetime": df["pickup_datetime"].dt.year,
"trip_distance": df["trip_distance"].astype("int64"),
}
)
return (
df.groupby(["passenger_count", "pickup_datetime", "trip_distance"])
transformed.groupby(
["passenger_count", "pickup_datetime", "trip_distance"], as_index=False
)
.size()
.reset_index()
.sort_values(by=["pickup_datetime", 0], ascending=[True, False])
.sort_values(by=["pickup_datetime", "size"], ascending=[True, False])
)


Expand All @@ -119,8 +132,8 @@ def main():
df = measure("Reading", read, sys.argv[1])
measure("Q1", q1, df)
measure("Q2", q2, df)
measure("Q3", q3, df.copy())
measure("Q4", q4, df.copy())
measure("Q3", q3, df)
measure("Q4", q4, df)


if __name__ == "__main__":
Expand Down

0 comments on commit a7b6066

Please sign in to comment.