Skip to content

Commit

Permalink
Cleanup configuration for constraints. (#7758)
Browse files Browse the repository at this point in the history
  • Loading branch information
trivialfis authored Mar 28, 2022
1 parent 3c9b044 commit a50b842
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 42 deletions.
2 changes: 1 addition & 1 deletion doc/treemethod.rst
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ Following table summarizes some differences in supported features between 4 tree
+------------------+-----------+---------------------+---------------------+------------------------+
| categorical data | F | T | T | T |
+------------------+-----------+---------------------+---------------------+------------------------+
| External memory | F | T | P | P |
| External memory | F | T | T | P |
+------------------+-----------+---------------------+---------------------+------------------------+
| Distributed | F | T | T | T |
+------------------+-----------+---------------------+---------------------+------------------------+
Expand Down
8 changes: 8 additions & 0 deletions doc/tutorials/feature_interaction_constraint.rst
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,14 @@ parameter:
num_boost_round = 1000, evals = evallist,
early_stopping_rounds = 10)
**************************
Using feature name instead
**************************

XGBoost's Python package supports using feature names instead of feature index for
specifying the constraints. Given a data frame with columns ``["f0", "f1", "f2"]``, the
feature interaction constraint can be specified as ``[["f0", "f2"]]``.

**************
Advanced topic
**************
Expand Down
12 changes: 11 additions & 1 deletion doc/tutorials/monotonic.rst
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ Then fitting with monotonicity constraints only requires adding a single paramet
.. code-block:: python
params_constrained = params.copy()
params_constrained['monotone_constraints'] = "(1,-1)"
params_constrained['monotone_constraints'] = (1,-1)
model_with_constraints = xgb.train(params_constrained, dtrain,
num_boost_round = 1000, evals = evallist,
Expand All @@ -90,3 +90,13 @@ monotonic constraints may produce unnecessarily shallow trees. This is because t
split. Monotonic constraints may wipe out all available split candidates, in which case no
split is made. To reduce the effect, you may want to increase the ``max_bin`` parameter to
consider more split candidates.


*******************
Using feature names
*******************

XGBoost's Python package supports using feature names instead of feature index for
specifying the constraints. Given a data frame with columns ``["f0", "f1", "f2"]``, the
monotonic constraint can be specified as ``{"f0": 1, "f2": -1}``, and ``"f1"`` will
default to ``0`` (no constraint).
71 changes: 32 additions & 39 deletions python-package/xgboost/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1392,50 +1392,46 @@ def __init__(
raise TypeError('Unknown type:', model_file)

params = params or {}
params = _configure_metrics(params.copy())
params = self._configure_constraints(params)
if isinstance(params, list):
params.append(('validate_parameters', True))
params_processed = _configure_metrics(params.copy())
params_processed = self._configure_constraints(params_processed)
if isinstance(params_processed, list):
params_processed.append(("validate_parameters", True))
else:
params['validate_parameters'] = True
params_processed["validate_parameters"] = True

self.set_param(params or {})
if (params is not None) and ('booster' in params):
self.booster = params['booster']
else:
self.booster = 'gbtree'
self.set_param(params_processed or {})

def _transform_monotone_constrains(self, value: Union[Dict[str, int], str]) -> str:
def _transform_monotone_constrains(
self, value: Union[Dict[str, int], str]
) -> Union[Tuple[int, ...], str]:
if isinstance(value, str):
return value

constrained_features = set(value.keys())
if not constrained_features.issubset(set(self.feature_names or [])):
raise ValueError('Constrained features are not a subset of '
'training data feature names')
feature_names = self.feature_names or []
if not constrained_features.issubset(set(feature_names)):
raise ValueError(
"Constrained features are not a subset of training data feature names"
)

return '(' + ','.join([str(value.get(feature_name, 0))
for feature_name in self.feature_names]) + ')'
return tuple(value.get(name, 0) for name in feature_names)

def _transform_interaction_constraints(
self, value: Union[List[Tuple[str]], str]
) -> str:
self, value: Union[Sequence[Sequence[str]], str]
) -> Union[str, List[List[int]]]:
if isinstance(value, str):
return value

feature_idx_mapping = {k: str(v) for v, k in enumerate(self.feature_names or [])}
feature_idx_mapping = {
name: idx for idx, name in enumerate(self.feature_names or [])
}

try:
s = "["
result = []
for constraint in value:
s += (
"["
+ ",".join(
[feature_idx_mapping[feature_name] for feature_name in constraint]
)
+ "],"
result.append(
[feature_idx_mapping[feature_name] for feature_name in constraint]
)
return s[:-1] + "]"
return result
except KeyError as e:
raise ValueError(
"Constrained features are not a subset of training data feature names"
Expand All @@ -1444,17 +1440,16 @@ def _transform_interaction_constraints(
def _configure_constraints(self, params: Union[List, Dict]) -> Union[List, Dict]:
if isinstance(params, dict):
value = params.get("monotone_constraints")
if value:
params[
"monotone_constraints"
] = self._transform_monotone_constrains(value)
if value is not None:
params["monotone_constraints"] = self._transform_monotone_constrains(
value
)

value = params.get("interaction_constraints")
if value:
if value is not None:
params[
"interaction_constraints"
] = self._transform_interaction_constraints(value)

elif isinstance(params, list):
for idx, param in enumerate(params):
name, value = param
Expand Down Expand Up @@ -2462,11 +2457,9 @@ def trees_to_dataframe(self, fmap: Union[str, os.PathLike] = '') -> DataFrame:
if not PANDAS_INSTALLED:
raise ImportError(('pandas must be available to use this method.'
'Install pandas before calling again.'))

if getattr(self, 'booster', None) is not None and self.booster not in {'gbtree', 'dart'}:
raise ValueError(
f"This method is not defined for Booster type {self.booster}"
)
booster = json.loads(self.save_config())["learner"]["gradient_booster"]["name"]
if booster not in {"gbtree", "dart"}:
raise ValueError(f"This method is not defined for Booster type {booster}")

tree_ids = []
node_ids = []
Expand Down
2 changes: 1 addition & 1 deletion tests/python/test_monotone_constraints.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def test_monotone_constraints_feature_names(self, format):

# next check monotonicity when initializing monotone_constraints by feature names
params = {
'tree_method': 'hist', 'verbosity': 1,
'tree_method': 'hist',
'grow_policy': 'lossguide',
'monotone_constraints': {'feature_0': 1, 'feature_1': -1}
}
Expand Down

0 comments on commit a50b842

Please sign in to comment.