diff --git a/doc/treemethod.rst b/doc/treemethod.rst index fee68688ab55..8feba686c4f5 100644 --- a/doc/treemethod.rst +++ b/doc/treemethod.rst @@ -134,7 +134,7 @@ Following table summarizes some differences in supported features between 4 tree +------------------+-----------+---------------------+---------------------+------------------------+ | categorical data | F | T | T | T | +------------------+-----------+---------------------+---------------------+------------------------+ -| External memory | F | T | P | P | +| External memory | F | T | T | P | +------------------+-----------+---------------------+---------------------+------------------------+ | Distributed | F | T | T | T | +------------------+-----------+---------------------+---------------------+------------------------+ diff --git a/doc/tutorials/feature_interaction_constraint.rst b/doc/tutorials/feature_interaction_constraint.rst index 994934bc4b5a..07e5f5676633 100644 --- a/doc/tutorials/feature_interaction_constraint.rst +++ b/doc/tutorials/feature_interaction_constraint.rst @@ -174,6 +174,14 @@ parameter: num_boost_round = 1000, evals = evallist, early_stopping_rounds = 10) +************************** +Using feature name instead +************************** + +XGBoost's Python package supports using feature names instead of feature index for +specifying the constraints. Given a data frame with columns ``["f0", "f1", "f2"]``, the +feature interaction constraint can be specified as ``[["f0", "f2"]]``. + ************** Advanced topic ************** diff --git a/doc/tutorials/monotonic.rst b/doc/tutorials/monotonic.rst index a19229be9e0e..4ed7fa273c24 100644 --- a/doc/tutorials/monotonic.rst +++ b/doc/tutorials/monotonic.rst @@ -69,7 +69,7 @@ Then fitting with monotonicity constraints only requires adding a single paramet .. code-block:: python params_constrained = params.copy() - params_constrained['monotone_constraints'] = "(1,-1)" + params_constrained['monotone_constraints'] = (1,-1) model_with_constraints = xgb.train(params_constrained, dtrain, num_boost_round = 1000, evals = evallist, @@ -90,3 +90,13 @@ monotonic constraints may produce unnecessarily shallow trees. This is because t split. Monotonic constraints may wipe out all available split candidates, in which case no split is made. To reduce the effect, you may want to increase the ``max_bin`` parameter to consider more split candidates. + + +******************* +Using feature names +******************* + +XGBoost's Python package supports using feature names instead of feature index for +specifying the constraints. Given a data frame with columns ``["f0", "f1", "f2"]``, the +monotonic constraint can be specified as ``{"f0": 1, "f2": -1}``, and ``"f1"`` will +default to ``0`` (no constraint). diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index 75f69cb13e7c..36548d81375f 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -1392,50 +1392,46 @@ def __init__( raise TypeError('Unknown type:', model_file) params = params or {} - params = _configure_metrics(params.copy()) - params = self._configure_constraints(params) - if isinstance(params, list): - params.append(('validate_parameters', True)) + params_processed = _configure_metrics(params.copy()) + params_processed = self._configure_constraints(params_processed) + if isinstance(params_processed, list): + params_processed.append(("validate_parameters", True)) else: - params['validate_parameters'] = True + params_processed["validate_parameters"] = True - self.set_param(params or {}) - if (params is not None) and ('booster' in params): - self.booster = params['booster'] - else: - self.booster = 'gbtree' + self.set_param(params_processed or {}) - def _transform_monotone_constrains(self, value: Union[Dict[str, int], str]) -> str: + def _transform_monotone_constrains( + self, value: Union[Dict[str, int], str] + ) -> Union[Tuple[int, ...], str]: if isinstance(value, str): return value constrained_features = set(value.keys()) - if not constrained_features.issubset(set(self.feature_names or [])): - raise ValueError('Constrained features are not a subset of ' - 'training data feature names') + feature_names = self.feature_names or [] + if not constrained_features.issubset(set(feature_names)): + raise ValueError( + "Constrained features are not a subset of training data feature names" + ) - return '(' + ','.join([str(value.get(feature_name, 0)) - for feature_name in self.feature_names]) + ')' + return tuple(value.get(name, 0) for name in feature_names) def _transform_interaction_constraints( - self, value: Union[List[Tuple[str]], str] - ) -> str: + self, value: Union[Sequence[Sequence[str]], str] + ) -> Union[str, List[List[int]]]: if isinstance(value, str): return value - - feature_idx_mapping = {k: str(v) for v, k in enumerate(self.feature_names or [])} + feature_idx_mapping = { + name: idx for idx, name in enumerate(self.feature_names or []) + } try: - s = "[" + result = [] for constraint in value: - s += ( - "[" - + ",".join( - [feature_idx_mapping[feature_name] for feature_name in constraint] - ) - + "]," + result.append( + [feature_idx_mapping[feature_name] for feature_name in constraint] ) - return s[:-1] + "]" + return result except KeyError as e: raise ValueError( "Constrained features are not a subset of training data feature names" @@ -1444,17 +1440,16 @@ def _transform_interaction_constraints( def _configure_constraints(self, params: Union[List, Dict]) -> Union[List, Dict]: if isinstance(params, dict): value = params.get("monotone_constraints") - if value: - params[ - "monotone_constraints" - ] = self._transform_monotone_constrains(value) + if value is not None: + params["monotone_constraints"] = self._transform_monotone_constrains( + value + ) value = params.get("interaction_constraints") - if value: + if value is not None: params[ "interaction_constraints" ] = self._transform_interaction_constraints(value) - elif isinstance(params, list): for idx, param in enumerate(params): name, value = param @@ -2462,11 +2457,9 @@ def trees_to_dataframe(self, fmap: Union[str, os.PathLike] = '') -> DataFrame: if not PANDAS_INSTALLED: raise ImportError(('pandas must be available to use this method.' 'Install pandas before calling again.')) - - if getattr(self, 'booster', None) is not None and self.booster not in {'gbtree', 'dart'}: - raise ValueError( - f"This method is not defined for Booster type {self.booster}" - ) + booster = json.loads(self.save_config())["learner"]["gradient_booster"]["name"] + if booster not in {"gbtree", "dart"}: + raise ValueError(f"This method is not defined for Booster type {booster}") tree_ids = [] node_ids = [] diff --git a/tests/python/test_monotone_constraints.py b/tests/python/test_monotone_constraints.py index 2c538bff989b..c46569f6a238 100644 --- a/tests/python/test_monotone_constraints.py +++ b/tests/python/test_monotone_constraints.py @@ -98,7 +98,7 @@ def test_monotone_constraints_feature_names(self, format): # next check monotonicity when initializing monotone_constraints by feature names params = { - 'tree_method': 'hist', 'verbosity': 1, + 'tree_method': 'hist', 'grow_policy': 'lossguide', 'monotone_constraints': {'feature_0': 1, 'feature_1': -1} }