Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Supports XGBRFClassifier and XGBRFRegressor #665

Merged
merged 5 commits into from
Nov 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 20 additions & 10 deletions .azure-pipelines/linux-conda-CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,39 +15,49 @@ jobs:
strategy:
matrix:

Python311-1140-RT1160-xgb175-lgbm40:
Python311-1150-RT1160-xgb175-lgbm40:
python.version: '3.11'
ONNX_PATH: 'onnx==1.14.1' #'-i https://test.pypi.org/simple/ onnx==1.14.0rc3'
ONNXRT_PATH: 'onnxruntime==1.16.0'
ONNX_PATH: 'onnx==1.15.0'
ONNXRT_PATH: 'onnxruntime==1.16.2'
COREML_PATH: NONE
lightgbm.version: '>=4.0'
xgboost.version: '>=1.7.5'
numpy.version: ''
scipy.version: ''

Python310-1140-RT1151-xgb175:
Python311-1141-RT1162-xgb175-lgbm40:
python.version: '3.11'
ONNX_PATH: 'onnx==1.14.1'
ONNXRT_PATH: 'onnxruntime==1.16.2'
COREML_PATH: NONE
lightgbm.version: '>=4.0'
xgboost.version: '>=1.7.5'
numpy.version: ''
scipy.version: ''

Python310-1141-RT1151-xgb175:
python.version: '3.10'
ONNX_PATH: 'onnx==1.14.1' #'-i https://test.pypi.org/simple/ onnx==1.14.0rc3'
ONNX_PATH: 'onnx==1.14.1'
ONNXRT_PATH: 'onnxruntime==1.15.1'
COREML_PATH: NONE
lightgbm.version: '<4.0'
xgboost.version: '>=1.7.5'
numpy.version: ''
scipy.version: ''

Python310-1140-RT1140-xgb175:
Python310-1141-RT1140-xgb175:
python.version: '3.10'
ONNX_PATH: 'onnx==1.14.1' #'-i https://test.pypi.org/simple/ onnx==1.14.0rc3'
ONNXRT_PATH: onnxruntime==1.14.0 #'-i https://test.pypi.org/simple/ ort-nightly==1.11.0.dev20220311003'
ONNX_PATH: 'onnx==1.14.1'
ONNXRT_PATH: 'onnxruntime==1.14.0'
COREML_PATH: NONE
lightgbm.version: '<4.0'
xgboost.version: '>=1.7.5'
numpy.version: ''
scipy.version: ''

Python39-1140-RT1151-xgb175-scipy180:
Python39-1141-RT1151-xgb175-scipy180:
python.version: '3.9'
ONNX_PATH: 'onnx==1.14.1' #'-i https://test.pypi.org/simple/ onnx==1.14.0rc3'
ONNX_PATH: 'onnx==1.14.1'
ONNXRT_PATH: 'onnxruntime==1.15.1'
COREML_PATH: NONE
lightgbm.version: '>=4.0'
Expand Down
26 changes: 20 additions & 6 deletions .azure-pipelines/win32-conda-CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,24 +15,38 @@ jobs:
strategy:
matrix:

Python311-1141-RT1160:
Python311-1150-RT1162:
python.version: '3.11'
ONNX_PATH: 'onnx==1.14.1' #'-i https://test.pypi.org/simple/ onnx==1.14.0rc3'
ONNXRT_PATH: 'onnxruntime==1.16.0'
ONNX_PATH: 'onnx==1.15.0'
ONNXRT_PATH: 'onnxruntime==1.16.2'
COREML_PATH: NONE
numpy.version: ''

Python311-1141-RT1162:
python.version: '3.11'
ONNX_PATH: 'onnx==1.14.1'
ONNXRT_PATH: 'onnxruntime==1.16.2'
COREML_PATH: NONE
numpy.version: ''

Python310-1141-RT1151:
python.version: '3.10'
ONNX_PATH: 'onnx==1.14.1' #'-i https://test.pypi.org/simple/ onnx==1.14.0rc3'
ONNX_PATH: 'onnx==1.14.1'
ONNXRT_PATH: 'onnxruntime==1.15.1'
COREML_PATH: NONE
numpy.version: ''

Python310-1141-RT1140:
python.version: '3.10'
ONNX_PATH: 'onnx==1.14.1' #'-i https://test.pypi.org/simple/ onnx==1.14.0rc3'
ONNXRT_PATH: onnxruntime==1.14.0 #'-i https://test.pypi.org/simple/ ort-nightly==1.11.0.dev20220311003'
ONNX_PATH: 'onnx==1.14.1'
ONNXRT_PATH: onnxruntime==1.14.0
COREML_PATH: NONE
numpy.version: ''

Python39-1141-RT1140:
python.version: '3.9'
ONNX_PATH: 'onnx==1.14.1'
ONNXRT_PATH: onnxruntime==1.14.0
COREML_PATH: NONE
numpy.version: ''

Expand Down
22 changes: 18 additions & 4 deletions onnxmltools/convert/xgboost/_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@
from packaging.version import Version
import numpy as np
from xgboost import XGBRegressor, XGBClassifier, __version__

try:
from xgboost import XGBRFRegressor, XGBRFClassifier
except ImportError:
# old version of xgboost
XGBRFRegressor, XGBRFClassifier = None, None
from onnxconverter_common.data_types import FloatTensorType
from ..common._container import XGBoostModelContainer
from ..common._topology import Topology
Expand All @@ -19,6 +25,15 @@
XGBRegressor: "XGBRegressor",
}

if XGBRFClassifier:
xgboost_operator_name_map.update(
{
XGBRFClassifier: "XGBRFClassifier",
XGBRFRegressor: "XGBRFRegressor",
}
)
xgboost_classifier_list.append(XGBRFClassifier)


def _append_covers(node):
res = []
Expand Down Expand Up @@ -161,10 +176,9 @@ def _parse_xgboost_simple_model(scope, model, inputs):
)
this_operator.inputs = inputs

if (
type(model) in xgboost_classifier_list
or getattr(model, "operator_name", None) == "XGBClassifier"
):
if type(model) in xgboost_classifier_list or getattr(
model, "operator_name", None
) in ("XGBClassifier", "XGBRFClassifier"):
# For classifiers, we may have two outputs, one for label and
# the other one for probabilities of all classes.
# Notice that their types here are not necessarily correct
Expand Down
14 changes: 10 additions & 4 deletions onnxmltools/convert/xgboost/operator_converters/XGBoost.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@
import numpy as np
from onnx import TensorProto
from xgboost import XGBClassifier

try:
from xgboost import XGBRFClassifier
except ImportError:
XGBRFClassifier = None
from ...common._registration import register_converter
from ..common import get_xgb_params

Expand Down Expand Up @@ -390,10 +395,9 @@ def convert(scope, operator, container):

def convert_xgboost(scope, operator, container):
xgb_node = operator.raw_operator
if (
isinstance(xgb_node, XGBClassifier)
or getattr(xgb_node, "operator_name", None) == "XGBClassifier"
):
if isinstance(xgb_node, (XGBClassifier, XGBRFClassifier)) or getattr(
xgb_node, "operator_name", None
) in ("XGBClassifier", "XGBRFClassifier"):
cls = XGBClassifierConverter
else:
cls = XGBRegressorConverter
Expand All @@ -402,4 +406,6 @@ def convert_xgboost(scope, operator, container):


register_converter("XGBClassifier", convert_xgboost)
register_converter("XGBRFClassifier", convert_xgboost)
register_converter("XGBRegressor", convert_xgboost)
register_converter("XGBRFRegressor", convert_xgboost)
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,4 @@ def calculate_xgboost_classifier_output_shapes(operator):


register_shape_calculator("XGBClassifier", calculate_xgboost_classifier_output_shapes)
register_shape_calculator("XGBRFClassifier", calculate_xgboost_classifier_output_shapes)
1 change: 1 addition & 0 deletions onnxmltools/convert/xgboost/shape_calculators/Regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
from ...common.shape_calculator import calculate_linear_regressor_output_shapes

register_shape_calculator("XGBRegressor", calculate_linear_regressor_output_shapes)
register_shape_calculator("XGBRFRegressor", calculate_linear_regressor_output_shapes)
16 changes: 8 additions & 8 deletions tests/xgboost/test_xgboost_converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def test_xgb_regressor(self):
initial_types=[("input", FloatTensorType(shape=[None, None]))],
target_opset=TARGET_OPSET,
)
self.assertTrue(conv_model is not None)

dump_data_and_model(
x_test.astype("float32"),
xgb,
Expand All @@ -96,7 +96,7 @@ def test_xgb_classifier(self):
initial_types=[("input", FloatTensorType(shape=[None, None]))],
target_opset=TARGET_OPSET,
)
self.assertTrue(conv_model is not None)

dump_data_and_model(x_test, xgb, conv_model, basename="SklearnXGBClassifier")

def test_xgb_classifier_uint8(self):
Expand All @@ -106,7 +106,7 @@ def test_xgb_classifier_uint8(self):
initial_types=[("input", FloatTensorType(shape=["None", "None"]))],
target_opset=TARGET_OPSET,
)
self.assertTrue(conv_model is not None)

dump_data_and_model(x_test, xgb, conv_model, basename="SklearnXGBClassifier")

def test_xgb_classifier_multi(self):
Expand All @@ -116,7 +116,7 @@ def test_xgb_classifier_multi(self):
initial_types=[("input", FloatTensorType(shape=[None, None]))],
target_opset=TARGET_OPSET,
)
self.assertTrue(conv_model is not None)

dump_data_and_model(
x_test, xgb, conv_model, basename="SklearnXGBClassifierMulti"
)
Expand All @@ -130,7 +130,7 @@ def test_xgb_classifier_multi_reglog(self):
initial_types=[("input", FloatTensorType(shape=[None, None]))],
target_opset=TARGET_OPSET,
)
self.assertTrue(conv_model is not None)

dump_data_and_model(
x_test, xgb, conv_model, basename="SklearnXGBClassifierMultiRegLog"
)
Expand All @@ -144,7 +144,7 @@ def test_xgb_classifier_reglog(self):
initial_types=[("input", FloatTensorType(shape=[None, None]))],
target_opset=TARGET_OPSET,
)
self.assertTrue(conv_model is not None)

dump_data_and_model(
x_test, xgb, conv_model, basename="SklearnXGBClassifierRegLog"
)
Expand All @@ -163,7 +163,7 @@ def test_xgb_classifier_multi_discrete_int_labels(self):
initial_types=[("input", FloatTensorType(shape=[None, None]))],
target_opset=TARGET_OPSET,
)
self.assertTrue(conv_model is not None)

dump_data_and_model(
x_test.astype("float32"),
xgb,
Expand Down Expand Up @@ -631,7 +631,7 @@ def test_xgb_classifier_601(self):
initial_types=[("input", FloatTensorType(shape=[None, None]))],
target_opset=TARGET_OPSET,
)
self.assertTrue(conv_model is not None)

dump_data_and_model(x_test, xgb, conv_model, basename="SklearnXGBClassifier601")

def test_xgb_classifier_hinge(self):
Expand Down
82 changes: 82 additions & 0 deletions tests/xgboost/test_xgboost_converters_rf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# SPDX-License-Identifier: Apache-2.0

import unittest
import numpy as np
from sklearn.datasets import load_diabetes, make_classification
from sklearn.model_selection import train_test_split
from xgboost import XGBRFRegressor, XGBRFClassifier
from onnx.defs import onnx_opset_version
from onnxconverter_common.onnx_ex import DEFAULT_OPSET_NUMBER
from onnxmltools.convert import convert_xgboost
from onnxmltools.convert.common.data_types import FloatTensorType
from onnxmltools.utils import dump_data_and_model


TARGET_OPSET = min(DEFAULT_OPSET_NUMBER, onnx_opset_version())


def fct_cl2(y):
y[y == 2] = 0
return y


def fct_cl3(y):
y[y == 0] = 6
return y


def fct_id(y):
return y


def _fit_classification_model(model, n_classes, is_str=False, dtype=None):
x, y = make_classification(
n_classes=n_classes,
n_features=100,
n_samples=1000,
random_state=42,
n_informative=7,
)
y = y.astype(np.str_) if is_str else y.astype(np.int64)
x_train, x_test, y_train, _ = train_test_split(x, y, test_size=0.5, random_state=42)
if dtype is not None:
y_train = y_train.astype(dtype)
model.fit(x_train, y_train)
return model, x_test.astype(np.float32)


class TestXGBoostRFModels(unittest.TestCase):
def test_xgbrf_regressor(self):
iris = load_diabetes()
x = iris.data
y = iris.target
x_train, x_test, y_train, _ = train_test_split(
x, y, test_size=0.5, random_state=42
)
xgb = XGBRFRegressor()
xgb.fit(x_train, y_train)
conv_model = convert_xgboost(
xgb,
initial_types=[("input", FloatTensorType(shape=[None, None]))],
target_opset=TARGET_OPSET,
)
dump_data_and_model(
x_test.astype("float32"),
xgb,
conv_model,
basename="SklearnXGBRFRegressor-Dec3",
)

def test_xgbrf_classifier(self):
xgb, x_test = _fit_classification_model(XGBRFClassifier(), 2)
conv_model = convert_xgboost(
xgb,
initial_types=[("input", FloatTensorType(shape=[None, None]))],
target_opset=TARGET_OPSET,
)
dump_data_and_model(x_test, xgb, conv_model, basename="SklearnXGBRFClassifier")


if __name__ == "__main__":
# TestXGBoostModels().test_xgboost_booster_classifier_multiclass_softprob()
unittest.main(verbosity=2)
Loading