Skip to content

Commit

Permalink
Remove cudf._lib.avro in favor of inlining pylicudf (#17319)
Browse files Browse the repository at this point in the history
Contributes to #17317

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: #17319
  • Loading branch information
mroeschke authored Nov 15, 2024
1 parent 8a9131a commit d67d017
Show file tree
Hide file tree
Showing 6 changed files with 20 additions and 42 deletions.
1 change: 0 additions & 1 deletion python/cudf/cudf/_lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@

set(cython_sources
aggregation.pyx
avro.pyx
binaryop.pyx
column.pyx
concat.pyx
Expand Down
1 change: 0 additions & 1 deletion python/cudf/cudf/_lib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import numpy as np

from . import (
avro,
binaryop,
concat,
copying,
Expand Down
33 changes: 0 additions & 33 deletions python/cudf/cudf/_lib/avro.pyx

This file was deleted.

2 changes: 1 addition & 1 deletion python/cudf/cudf/_lib/utils.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ from pylibcudf.libcudf.table.table cimport table, table_view
cdef data_from_unique_ptr(
unique_ptr[table] c_tbl, column_names, index_names=*)
cdef data_from_pylibcudf_table(tbl, column_names, index_names=*)
cdef data_from_pylibcudf_io(tbl_with_meta, column_names = *, index_names = *)
cpdef data_from_pylibcudf_io(tbl_with_meta, column_names = *, index_names = *)
cdef data_from_table_view(
table_view tv, object owner, object column_names, object index_names=*)
cdef table_view table_view_from_columns(columns) except *
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/_lib/utils.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ cdef data_from_pylibcudf_table(tbl, column_names, index_names=None):
index_names
)

cdef data_from_pylibcudf_io(tbl_with_meta, column_names=None, index_names=None):
cpdef data_from_pylibcudf_io(tbl_with_meta, column_names=None, index_names=None):
"""
Unpacks the TableWithMetadata from libcudf I/O
into a dict of columns and an Index (cuDF format)
Expand Down
23 changes: 18 additions & 5 deletions python/cudf/cudf/io/avro.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
# Copyright (c) 2019-2024, NVIDIA CORPORATION.

import pylibcudf as plc

import cudf
from cudf import _lib as libcudf
from cudf._lib.utils import data_from_pylibcudf_io
from cudf.utils import ioutils


Expand All @@ -23,8 +25,19 @@ def read_avro(
filepath_or_buffer, "read_avro"
)

return cudf.DataFrame._from_data(
*libcudf.avro.read_avro(
filepath_or_buffer, columns, skiprows, num_rows
)
num_rows = -1 if num_rows is None else num_rows
skip_rows = 0 if skiprows is None else skiprows

if not isinstance(num_rows, int) or num_rows < -1:
raise TypeError("num_rows must be an int >= -1")
if not isinstance(skip_rows, int) or skip_rows < 0:
raise TypeError("skip_rows must be an int >= 0")

plc_result = plc.io.avro.read_avro(
plc.io.types.SourceInfo([filepath_or_buffer]),
columns,
skip_rows,
num_rows,
)

return cudf.DataFrame._from_data(*data_from_pylibcudf_io(plc_result))

0 comments on commit d67d017

Please sign in to comment.