-
Notifications
You must be signed in to change notification settings - Fork 10
/
pandas_feature_union.py
49 lines (44 loc) · 1.61 KB
/
pandas_feature_union.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import numpy as np
import pandas as pd
from sklearn.externals.joblib import Parallel, delayed
from sklearn.pipeline import FeatureUnion, _fit_transform_one, _transform_one
from scipy import sparse
class PandasFeatureUnion(FeatureUnion):
def fit_transform(self, X, y=None, **fit_params):
self._validate_transformers()
result = Parallel(n_jobs=self.n_jobs)(
delayed(_fit_transform_one)(
transformer=trans,
X=X,
y=y,
weight=weight,
**fit_params)
for name, trans, weight in self._iter())
if not result:
# All transformers are None
return np.zeros((X.shape[0], 0))
Xs, transformers = zip(*result)
self._update_transformer_list(transformers)
if any(sparse.issparse(f) for f in Xs):
Xs = sparse.hstack(Xs).tocsr()
else:
Xs = self.merge_dataframes_by_column(Xs)
return Xs
def merge_dataframes_by_column(self, Xs):
return pd.concat(Xs, axis="columns", copy=False)
def transform(self, X):
Xs = Parallel(n_jobs=self.n_jobs)(
delayed(_transform_one)(
transformer=trans,
X=X,
y=None,
weight=weight)
for name, trans, weight in self._iter())
if not Xs:
# All transformers are None
return np.zeros((X.shape[0], 0))
if any(sparse.issparse(f) for f in Xs):
Xs = sparse.hstack(Xs).tocsr()
else:
Xs = self.merge_dataframes_by_column(Xs)
return Xs