Skip to content

Commit

Permalink
REFACTOR-#5332: define PQ_INDEX_REGEX as class variable (#5333)
Browse files Browse the repository at this point in the history
Signed-off-by: Myachev <anatoly.myachev@intel.com>
  • Loading branch information
anmyachev authored Dec 3, 2022
1 parent a884c35 commit b847107
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 7 deletions.
9 changes: 5 additions & 4 deletions modin/core/io/column_stores/parquet_dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@
"""Module houses `ParquetDispatcher` class, that is used for reading `.parquet` files."""

import os

import re
import json

import fsspec
from fsspec.core import url_to_fs
from fsspec.spec import AbstractBufferedFile
Expand Down Expand Up @@ -292,6 +293,8 @@ def _get_fastparquet_files(self): # noqa: GL08
class ParquetDispatcher(ColumnStoreDispatcher):
"""Class handles utils for reading `.parquet` files."""

index_regex = re.compile(r"__index_level_\d+__")

@classmethod
def get_dataset(cls, path, engine, storage_options):
"""
Expand Down Expand Up @@ -603,8 +606,6 @@ def _read(cls, path, engine, columns, **kwargs):
ParquetFile API is used. Please refer to the documentation here
https://arrow.apache.org/docs/python/parquet.html
"""
from modin.pandas.io import PQ_INDEX_REGEX

if isinstance(path, str):
if os.path.isdir(path):
path_generator = os.walk(path)
Expand Down Expand Up @@ -650,7 +651,7 @@ def _read(cls, path, engine, columns, **kwargs):
columns = [
c
for c in column_names
if c not in index_columns and not PQ_INDEX_REGEX.match(c)
if c not in index_columns and not cls.index_regex.match(c)
]

return cls.build_query_compiler(dataset, columns, index_columns, **kwargs)
3 changes: 0 additions & 3 deletions modin/pandas/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@
)
import pathlib
import pickle
import re
from typing import (
Union,
IO,
Expand All @@ -69,8 +68,6 @@
from modin.utils import _inherit_docstrings, Engine
from . import _update_engine

PQ_INDEX_REGEX = re.compile(r"__index_level_\d+__")


def _read(**kwargs):
"""
Expand Down

0 comments on commit b847107

Please sign in to comment.