Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add XGBRanker #649

Merged
merged 9 commits into from
Jan 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion eland/ml/ml_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,11 @@
except ImportError:
pass
try:
from xgboost import XGBClassifier, XGBRegressor # type: ignore # noqa: F401
from xgboost import ( # type: ignore # noqa: F401
XGBClassifier,
XGBRanker,
XGBRegressor,
)
except ImportError:
pass
try:
Expand Down Expand Up @@ -252,6 +256,7 @@ def import_model(
"RandomForestRegressor",
"RandomForestClassifier",
"XGBClassifier",
"XGBRanker",
"XGBRegressor",
"LGBMRegressor",
"LGBMClassifier",
Expand Down Expand Up @@ -304,6 +309,11 @@ def import_model(
- "binary:logistic"
- "multi:softmax"
- "multi:softprob"
- xgboost.XGBRanker
- only the following objectives are supported:
- "rank:map"
- "rank:ndcg"
- "rank:pairwise"
- xgboost.XGBRegressor
- only the following objectives are supported:
- "reg:squarederror"
Expand Down
19 changes: 17 additions & 2 deletions eland/ml/transformers/xgboost.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,13 @@

import_optional_dependency("xgboost", on_version="warn")

from xgboost import Booster, XGBClassifier, XGBModel, XGBRegressor # type: ignore
from xgboost import ( # type: ignore
Booster,
XGBClassifier,
XGBModel,
XGBRanker,
XGBRegressor,
)


class XGBoostForestTransformer(ModelTransformer):
Expand Down Expand Up @@ -140,7 +146,7 @@ def build_forest(self) -> List[Tree]:
if len(tree_nodes) > 0:
transformed_trees.append(self.build_tree(tree_nodes))
# We add this stump as XGBoost adds the base_score to the regression outputs
if self._objective.partition(":")[0] == "reg":
if self._objective.partition(":")[0] in ["reg", "rank"]:
transformed_trees.append(self.build_base_score_stump())
return transformed_trees

Expand Down Expand Up @@ -184,6 +190,7 @@ def transform(self) -> Ensemble:

class XGBoostRegressorTransformer(XGBoostForestTransformer):
def __init__(self, model: XGBRegressor, feature_names: List[str]):
self._regressor_model = model
# XGBRegressor.base_score defaults to 0.5.
base_score = model.base_score
if base_score is None:
Expand All @@ -197,6 +204,13 @@ def determine_target_type(self) -> str:
return "regression"

def is_objective_supported(self) -> bool:
if isinstance(self._regressor_model, XGBRanker):
return self._objective in {
"rank:pairwise",
"rank:ndcg",
"rank:map",
}

return self._objective in {
"reg:squarederror",
"reg:squaredlogerror",
Expand Down Expand Up @@ -264,5 +278,6 @@ def model_type(self) -> str:

_MODEL_TRANSFORMERS: Dict[type, Type[ModelTransformer]] = {
XGBRegressor: XGBoostRegressorTransformer,
XGBRanker: XGBoostRegressorTransformer,
XGBClassifier: XGBoostClassifierTransformer,
}
41 changes: 40 additions & 1 deletion tests/ml/test_ml_model_pytest.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
HAS_SKLEARN = False

try:
from xgboost import XGBClassifier, XGBRegressor
from xgboost import XGBClassifier, XGBRanker, XGBRegressor

HAS_XGBOOST = True
except ImportError:
Expand Down Expand Up @@ -555,6 +555,45 @@ def test_xgb_classifier_objectives_and_booster(self, objective, booster):
# Clean up
es_model.delete_model()

@requires_xgboost
@pytest.mark.parametrize("compress_model_definition", [True, False])
@pytest.mark.parametrize(
"objective",
["rank:ndcg", "rank:map", "rank:pairwise"],
)
def test_xgb_ranker(self, compress_model_definition, objective):
X, y = datasets.make_classification(n_features=5)
rng = np.random.default_rng()
qid = rng.integers(0, 3, size=X.shape[0])

# Sort the inputs based on query index
sorted_idx = np.argsort(qid)
X = X[sorted_idx, :]
y = y[sorted_idx]
qid = qid[sorted_idx]

ranker = XGBRanker(objective=objective)
ranker.fit(X, y, qid=qid)

# Serialise the models to Elasticsearch
feature_names = ["f0", "f1", "f2", "f3", "f4"]
model_id = "test_xgb_ranker"

es_model = MLModel.import_model(
ES_TEST_CLIENT,
model_id,
ranker,
feature_names,
es_if_exists="replace",
es_compress_model_definition=compress_model_definition,
)

# Get some test results
check_prediction_equality(es_model, ranker, random_rows(X, 20))

# Clean up
es_model.delete_model()

@requires_xgboost
@pytest.mark.parametrize("compress_model_definition", [True, False])
@pytest.mark.parametrize(
Expand Down