Skip to content

Commit

Permalink
[SPARK-45390][PYTHON] Remove distutils usage
Browse files Browse the repository at this point in the history
  • Loading branch information
dongjoon-hyun committed Oct 1, 2023
1 parent 0b68e41 commit adfd926
Show file tree
Hide file tree
Showing 41 changed files with 98 additions and 44 deletions.
1 change: 1 addition & 0 deletions LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ connector/spark-ganglia-lgpl/src/main/java/com/codahale/metrics/ganglia/GangliaR
Python Software Foundation License
----------------------------------

python/pyspark/loose_version.py
python/docs/source/_static/copybutton.js

BSD 3-Clause
Expand Down
6 changes: 6 additions & 0 deletions LICENSE-binary
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,12 @@ This section summarizes those components and their licenses. See licenses-binary
for text of these licenses.


Python Software Foundation License
----------------------------------

python/pyspark/loose_version.py


BSD 2-Clause
------------

Expand Down
1 change: 1 addition & 0 deletions dev/.rat-excludes
Original file line number Diff line number Diff line change
Expand Up @@ -142,3 +142,4 @@ empty.proto
LimitedInputStream.java
TimSort.java
xml-resources/*
loose_version.py
4 changes: 0 additions & 4 deletions python/pyspark/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,6 @@
from typing import cast, Any, Callable, Optional, TypeVar, Union
from warnings import filterwarnings

filterwarnings(
"ignore", message="distutils Version classes are deprecated. Use packaging.version instead."
)

from pyspark.conf import SparkConf
from pyspark.rdd import RDD, RDDBarrier
from pyspark.files import SparkFiles
Expand Down
78 changes: 78 additions & 0 deletions python/pyspark/loose_version.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import re

class LooseVersion:

component_re = re.compile(r'(\d+ | [a-z]+ | \.)', re.VERBOSE)

def __init__ (self, vstring=None):
if vstring:
self.parse(vstring)


def parse (self, vstring):
# I've given up on thinking I can reconstruct the version string
# from the parsed tuple -- so I just store the string here for
# use by __str__
self.vstring = vstring
components = [x for x in self.component_re.split(vstring)
if x and x != '.']
for i, obj in enumerate(components):
try:
components[i] = int(obj)
except ValueError:
pass

self.version = components


def __str__ (self):
return self.vstring


def __repr__ (self):
return "LooseVersion ('%s')" % str(self)


def __eq__(self, other):
c = self._cmp(other)
if c is NotImplemented:
return c
return c == 0

def __lt__(self, other):
c = self._cmp(other)
if c is NotImplemented:
return c
return c < 0

def __le__(self, other):
c = self._cmp(other)
if c is NotImplemented:
return c
return c <= 0

def __gt__(self, other):
c = self._cmp(other)
if c is NotImplemented:
return c
return c > 0

def __ge__(self, other):
c = self._cmp(other)
if c is NotImplemented:
return c
return c >= 0

def _cmp (self, other):
if isinstance(other, str):
other = LooseVersion(other)
elif not isinstance(other, LooseVersion):
return NotImplemented

if self.version == other.version:
return 0
if self.version < other.version:
return -1
if self.version > other.version:
return 1

2 changes: 1 addition & 1 deletion python/pyspark/pandas/plot/matplotlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# limitations under the License.
#

from distutils.version import LooseVersion
from pyspark.loose_version import LooseVersion

import matplotlib as mat
import numpy as np
Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/pandas/supported_api_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
Generate 'Supported pandas APIs' documentation file
"""
import warnings
from distutils.version import LooseVersion
from pyspark.loose_version import LooseVersion
from enum import Enum, unique
from inspect import getmembers, isclass, isfunction, signature
from typing import Any, Callable, Dict, List, NamedTuple, Set, TextIO, Tuple
Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/computation/test_any_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
from distutils.version import LooseVersion
import unittest

import numpy as np
Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/computation/test_corrwith.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
from distutils.version import LooseVersion
import unittest


Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/computation/test_cov.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
from distutils.version import LooseVersion
import unittest
import decimal

Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/data_type_ops/test_date_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

import datetime
import unittest
from distutils.version import LooseVersion

import pandas as pd
from pandas.api.types import CategoricalDtype
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
from distutils.version import LooseVersion

import pandas as pd
import numpy as np

from pyspark import pandas as ps
from pyspark.loose_version import LooseVersion
from pyspark.pandas.config import set_option, reset_option
from pyspark.testing.pandasutils import PandasOnSparkTestCase
from pyspark.testing.sqlutils import SQLTestUtils
Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/groupby/test_aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
# limitations under the License.
#
import unittest
from distutils.version import LooseVersion

import pandas as pd

Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/groupby/test_apply_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
from distutils.version import LooseVersion
import unittest

import numpy as np
Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/groupby/test_head_tail.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
from distutils.version import LooseVersion
import unittest

import numpy as np
Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/groupby/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
from distutils.version import LooseVersion
import unittest

import pandas as pd
Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/groupby/test_split_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
from distutils.version import LooseVersion
import unittest

import pandas as pd
Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/groupby/test_stat.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
from distutils.version import LooseVersion
import unittest

import numpy as np
Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@

import inspect
import unittest
from distutils.version import LooseVersion
from datetime import datetime, timedelta

import numpy as np
import pandas as pd

import pyspark.pandas as ps
from pyspark.loose_version import LooseVersion
from pyspark.pandas.exceptions import PandasNotImplementedError
from pyspark.pandas.missing.indexes import (
MissingPandasLikeDatetimeIndex,
Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/pandas/tests/indexes/test_category.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@
#

import unittest
from distutils.version import LooseVersion

import pandas as pd
from pandas.api.types import CategoricalDtype

import pyspark.pandas as ps
from pyspark.loose_version import LooseVersion
from pyspark.testing.pandasutils import PandasOnSparkTestCase, TestUtils


Expand Down
2 changes: 0 additions & 2 deletions python/pyspark/pandas/tests/indexes/test_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@

import datetime

from distutils.version import LooseVersion

import numpy as np
import pandas as pd

Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/indexes/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
# limitations under the License.
#
import unittest
from distutils.version import LooseVersion

import numpy as np
import pandas as pd
Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/indexes/test_reindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
# limitations under the License.
#
import unittest
from distutils.version import LooseVersion

import numpy as np
import pandas as pd
Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/indexes/test_timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

import unittest
from datetime import timedelta
from distutils.version import LooseVersion

import pandas as pd

Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/series/test_compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
# limitations under the License.
#
import unittest
from distutils.version import LooseVersion
from itertools import product

import numpy as np
Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import tempfile
import unittest
from contextlib import contextmanager
from distutils.version import LooseVersion

import pandas as pd
import numpy as np
Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/test_dataframe_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import tempfile
import unittest
import sys
from distutils.version import LooseVersion

import numpy as np
import pandas as pd
Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/pandas/tests/test_dataframe_spark_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@
import unittest
import glob
import os
from distutils.version import LooseVersion

import numpy as np
import pandas as pd

from pyspark import pandas as ps
from pyspark.loose_version import LooseVersion
from pyspark.testing.pandasutils import PandasOnSparkTestCase, TestUtils


Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/test_ops_on_diff_frames.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
# limitations under the License.
#

from distutils.version import LooseVersion
from itertools import product
import unittest

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
#

import unittest
from distutils.version import LooseVersion

import pandas as pd

Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/test_rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
# limitations under the License.
#
import unittest
from distutils.version import LooseVersion

import numpy as np
import pandas as pd
Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/test_series_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

import unittest
import sys
from distutils.version import LooseVersion

import pandas as pd

Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/test_series_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

import datetime
import unittest
from distutils.version import LooseVersion

import numpy as np
import pandas as pd
Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/test_series_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import numpy as np
import re
import unittest
from distutils.version import LooseVersion

from pyspark import pandas as ps
from pyspark.testing.pandasutils import PandasOnSparkTestCase
Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/test_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
#

import unittest
from distutils.version import LooseVersion
import numpy as np
import pandas as pd

Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/sql/connect/client/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
"SparkConnectClient",
]

from pyspark.loose_version import LooseVersion
from pyspark.sql.connect.utils import check_dependencies

check_dependencies(__name__)
Expand All @@ -31,7 +32,6 @@
import urllib.parse
import uuid
import sys
from distutils.version import LooseVersion
from types import TracebackType
from typing import (
Iterable,
Expand Down
Loading

0 comments on commit adfd926

Please sign in to comment.