From 7727ba8c25da7d4aa94951146b58cba7c5b30e11 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Sun, 25 Jun 2023 22:58:42 -0500 Subject: [PATCH] [python-package] remove 'handle' attribute from public API (fixes #5313) --- python-package/lightgbm/basic.py | 253 ++++++++++++------------ tests/python_package_test/test_basic.py | 6 +- 2 files changed, 130 insertions(+), 129 deletions(-) diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index fd07283aa236..0cd69c64b240 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -852,7 +852,7 @@ def __init__( pred_parameter: dict or None, optional (default=None) Other parameters for the prediction. """ - self.handle = ctypes.c_void_p() + self._handle = ctypes.c_void_p() self.__is_manage_handle = True if model_file is not None: """Prediction task""" @@ -860,20 +860,20 @@ def __init__( _safe_call(_LIB.LGBM_BoosterCreateFromModelfile( _c_str(str(model_file)), ctypes.byref(out_num_iterations), - ctypes.byref(self.handle))) + ctypes.byref(self._handle))) out_num_class = ctypes.c_int(0) _safe_call(_LIB.LGBM_BoosterGetNumClasses( - self.handle, + self._handle, ctypes.byref(out_num_class))) self.num_class = out_num_class.value self.num_total_iteration = out_num_iterations.value self.pandas_categorical = _load_pandas_categorical(file_name=model_file) elif booster_handle is not None: self.__is_manage_handle = False - self.handle = booster_handle + self._handle = booster_handle out_num_class = ctypes.c_int(0) _safe_call(_LIB.LGBM_BoosterGetNumClasses( - self.handle, + self._handle, ctypes.byref(out_num_class))) self.num_class = out_num_class.value self.num_total_iteration = self.current_iteration() @@ -887,13 +887,14 @@ def __init__( def __del__(self) -> None: try: if self.__is_manage_handle: - _safe_call(_LIB.LGBM_BoosterFree(self.handle)) + _safe_call(_LIB.LGBM_BoosterFree(self._handle)) except AttributeError: pass def __getstate__(self) -> Dict[str, Any]: this = self.__dict__.copy() this.pop('handle', None) + this.pop('_handle', None) return this def predict( @@ -945,7 +946,7 @@ def predict( ptr_names[:] = [x.encode('utf-8') for x in data_names] _safe_call( _LIB.LGBM_BoosterValidateFeatureNames( - self.handle, + self._handle, ptr_names, ctypes.c_int(len(data_names)), ) @@ -963,7 +964,7 @@ def predict( if isinstance(data, (str, Path)): with _TempFile() as f: _safe_call(_LIB.LGBM_BoosterPredictForFile( - self.handle, + self._handle, _c_str(str(data)), ctypes.c_int(int_data_has_header), ctypes.c_int(predict_type), @@ -1049,7 +1050,7 @@ def __get_num_preds( 'and then concatenate predictions for them') n_preds = ctypes.c_int64(0) _safe_call(_LIB.LGBM_BoosterCalcNumPredict( - self.handle, + self._handle, ctypes.c_int(nrow), ctypes.c_int(predict_type), ctypes.c_int(start_iteration), @@ -1082,7 +1083,7 @@ def __inner_predict_np2d( raise ValueError("Wrong length of pre-allocated predict array") out_num_preds = ctypes.c_int64(0) _safe_call(_LIB.LGBM_BoosterPredictForMat( - self.handle, + self._handle, ptr_data, ctypes.c_int(type_ptr_data), ctypes.c_int32(mat.shape[0]), @@ -1219,7 +1220,7 @@ def __inner_predict_csr( csr_indices = csr.indices.astype(np.int32, copy=False) _safe_call(_LIB.LGBM_BoosterPredictForCSR( - self.handle, + self._handle, ptr_indptr, ctypes.c_int(type_ptr_indptr), csr_indices.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)), @@ -1262,7 +1263,7 @@ def __inner_predict_csr_sparse( out_ptr_data = ctypes.POINTER(ctypes.c_double)() out_shape = np.empty(2, dtype=np.int64) _safe_call(_LIB.LGBM_BoosterPredictSparseOutput( - self.handle, + self._handle, ptr_indptr, ctypes.c_int(type_ptr_indptr), csr_indices.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)), @@ -1359,7 +1360,7 @@ def __inner_predict_sparse_csc( out_ptr_data = ctypes.POINTER(ctypes.c_double)() out_shape = np.empty(2, dtype=np.int64) _safe_call(_LIB.LGBM_BoosterPredictSparseOutput( - self.handle, + self._handle, ptr_indptr, ctypes.c_int(type_ptr_indptr), csc_indices.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)), @@ -1429,7 +1430,7 @@ def __pred_for_csc( csc_indices = csc.indices.astype(np.int32, copy=False) _safe_call(_LIB.LGBM_BoosterPredictForCSC( - self.handle, + self._handle, ptr_indptr, ctypes.c_int(type_ptr_indptr), csc_indices.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)), @@ -1458,7 +1459,7 @@ def current_iteration(self) -> int: """ out_cur_iter = ctypes.c_int(0) _safe_call(_LIB.LGBM_BoosterGetCurrentIteration( - self.handle, + self._handle, ctypes.byref(out_cur_iter))) return out_cur_iter.value @@ -1518,7 +1519,7 @@ def __init__( free_raw_data : bool, optional (default=True) If True, raw data is freed after constructing inner Dataset. """ - self.handle: Optional[_DatasetHandle] = None + self._handle: Optional[_DatasetHandle] = None self.data = data self.label = label self.reference = reference @@ -1594,11 +1595,11 @@ def _init_from_ref_dataset( self : Dataset Constructed Dataset object. """ - self.handle = ctypes.c_void_p() + self._handle = ctypes.c_void_p() _safe_call(_LIB.LGBM_DatasetCreateByReference( ref_dataset, ctypes.c_int64(total_nrow), - ctypes.byref(self.handle), + ctypes.byref(self._handle), )) return self @@ -1649,7 +1650,7 @@ def _init_from_sample( num_per_col = np.array([len(d) for d in sample_indices], dtype=np.int32) num_per_col_ptr, _, _ = _c_int_array(num_per_col) - self.handle = ctypes.c_void_p() + self._handle = ctypes.c_void_p() params_str = _param_dict_to_str(self.get_params()) _safe_call(_LIB.LGBM_DatasetCreateFromSampledColumn( ctypes.cast(sample_col_ptr, ctypes.POINTER(ctypes.POINTER(ctypes.c_double))), @@ -1660,7 +1661,7 @@ def _init_from_sample( ctypes.c_int32(total_nrow), ctypes.c_int64(total_nrow), _c_str(params_str), - ctypes.byref(self.handle), + ctypes.byref(self._handle), )) return self @@ -1682,7 +1683,7 @@ def _push_rows(self, data: np.ndarray) -> 'Dataset': data_ptr, data_type, _ = _c_float_array(data) _safe_call(_LIB.LGBM_DatasetPushRows( - self.handle, + self._handle, data_ptr, data_type, ctypes.c_int32(nrow), @@ -1728,9 +1729,9 @@ def get_params(self) -> Dict[str, Any]: return {} def _free_handle(self) -> "Dataset": - if self.handle is not None: - _safe_call(_LIB.LGBM_DatasetFree(self.handle)) - self.handle = None + if self._handle is not None: + _safe_call(_LIB.LGBM_DatasetFree(self._handle)) + self._handle = None self._need_slice = True if self.used_indices is not None: self.data = None @@ -1791,7 +1792,7 @@ def _lazy_init( params: Optional[Dict[str, Any]] ) -> "Dataset": if data is None: - self.handle = None + self._handle = None return self if reference is not None: self.pandas_categorical = reference.pandas_categorical @@ -1835,17 +1836,17 @@ def _lazy_init( # process for reference dataset ref_dataset = None if isinstance(reference, Dataset): - ref_dataset = reference.construct().handle + ref_dataset = reference.construct()._handle elif reference is not None: raise TypeError('Reference dataset should be None or dataset instance') # start construct data if isinstance(data, (str, Path)): - self.handle = ctypes.c_void_p() + self._handle = ctypes.c_void_p() _safe_call(_LIB.LGBM_DatasetCreateFromFile( _c_str(str(data)), _c_str(params_str), ref_dataset, - ctypes.byref(self.handle))) + ctypes.byref(self._handle))) elif isinstance(data, scipy.sparse.csr_matrix): self.__init_from_csr(data, params_str, ref_dataset) elif isinstance(data, scipy.sparse.csc_matrix): @@ -1978,7 +1979,7 @@ def __init_from_np2d( if len(mat.shape) != 2: raise ValueError('Input numpy.ndarray must be 2 dimensional') - self.handle = ctypes.c_void_p() + self._handle = ctypes.c_void_p() if mat.dtype == np.float32 or mat.dtype == np.float64: data = np.array(mat.reshape(mat.size), dtype=mat.dtype, copy=False) else: # change non-float data to float data, need to copy @@ -1993,7 +1994,7 @@ def __init_from_np2d( ctypes.c_int(_C_API_IS_ROW_MAJOR), _c_str(params_str), ref_dataset, - ctypes.byref(self.handle))) + ctypes.byref(self._handle))) return self def __init_from_list_np2d( @@ -2035,7 +2036,7 @@ def __init_from_list_np2d( type_ptr_data = chunk_type_ptr_data holders.append(holder) - self.handle = ctypes.c_void_p() + self._handle = ctypes.c_void_p() _safe_call(_LIB.LGBM_DatasetCreateFromMats( ctypes.c_int32(len(mats)), ctypes.cast(ptr_data, ctypes.POINTER(ctypes.POINTER(ctypes.c_double))), @@ -2045,7 +2046,7 @@ def __init_from_list_np2d( ctypes.c_int(_C_API_IS_ROW_MAJOR), _c_str(params_str), ref_dataset, - ctypes.byref(self.handle))) + ctypes.byref(self._handle))) return self def __init_from_csr( @@ -2057,7 +2058,7 @@ def __init_from_csr( """Initialize data from a CSR matrix.""" if len(csr.indices) != len(csr.data): raise ValueError(f'Length mismatch: {len(csr.indices)} vs {len(csr.data)}') - self.handle = ctypes.c_void_p() + self._handle = ctypes.c_void_p() ptr_indptr, type_ptr_indptr, __ = _c_int_array(csr.indptr) ptr_data, type_ptr_data, _ = _c_float_array(csr.data) @@ -2076,7 +2077,7 @@ def __init_from_csr( ctypes.c_int64(csr.shape[1]), _c_str(params_str), ref_dataset, - ctypes.byref(self.handle))) + ctypes.byref(self._handle))) return self def __init_from_csc( @@ -2088,7 +2089,7 @@ def __init_from_csc( """Initialize data from a CSC matrix.""" if len(csc.indices) != len(csc.data): raise ValueError(f'Length mismatch: {len(csc.indices)} vs {len(csc.data)}') - self.handle = ctypes.c_void_p() + self._handle = ctypes.c_void_p() ptr_indptr, type_ptr_indptr, __ = _c_int_array(csc.indptr) ptr_data, type_ptr_data, _ = _c_float_array(csc.data) @@ -2107,7 +2108,7 @@ def __init_from_csc( ctypes.c_int64(csc.shape[0]), _c_str(params_str), ref_dataset, - ctypes.byref(self.handle))) + ctypes.byref(self._handle))) return self @staticmethod @@ -2156,7 +2157,7 @@ def construct(self) -> "Dataset": self : Dataset Constructed Dataset object. """ - if self.handle is None: + if self._handle is None: if self.reference is not None: reference_params = self.reference.get_params() params = self.get_params() @@ -2182,14 +2183,14 @@ def construct(self) -> "Dataset": group_info = np.array(self.reference.group).astype(np.int32, copy=False) _, self.group = np.unique(np.repeat(range(len(group_info)), repeats=group_info)[self.used_indices], return_counts=True) - self.handle = ctypes.c_void_p() + self._handle = ctypes.c_void_p() params_str = _param_dict_to_str(self.params) _safe_call(_LIB.LGBM_DatasetGetSubset( - self.reference.construct().handle, + self.reference.construct()._handle, used_indices.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)), ctypes.c_int32(used_indices.shape[0]), _c_str(params_str), - ctypes.byref(self.handle))) + ctypes.byref(self._handle))) if not self.free_raw_data: self.get_data() if self.group is not None: @@ -2305,7 +2306,7 @@ def save_binary(self, filename: Union[str, Path]) -> "Dataset": Returns self. """ _safe_call(_LIB.LGBM_DatasetSaveBinary( - self.construct().handle, + self.construct()._handle, _c_str(str(filename)))) return self @@ -2321,7 +2322,7 @@ def update(): self._params_back_up = deepcopy(self.params) self.params.update(params) - if self.handle is None: + if self._handle is None: update() elif params is not None: ret = _LIB.LGBM_DatasetUpdateParamChecking( @@ -2337,7 +2338,7 @@ def update(): return self def _reverse_update_params(self) -> "Dataset": - if self.handle is None: + if self._handle is None: self.params = deepcopy(self._params_back_up) self._params_back_up = None return self @@ -2361,12 +2362,12 @@ def set_field( self : Dataset Dataset with set property. """ - if self.handle is None: + if self._handle is None: raise Exception(f"Cannot set {field_name} before construct dataset") if data is None: # set to None _safe_call(_LIB.LGBM_DatasetSetField( - self.handle, + self._handle, _c_str(field_name), None, ctypes.c_int(0), @@ -2399,7 +2400,7 @@ def set_field( if type_data != _FIELD_TYPE_MAPPER[field_name]: raise TypeError("Input type error for set_field") _safe_call(_LIB.LGBM_DatasetSetField( - self.handle, + self._handle, _c_str(field_name), ptr_data, ctypes.c_int(len(data)), @@ -2420,13 +2421,13 @@ def get_field(self, field_name: str) -> Optional[np.ndarray]: info : numpy array or None A numpy array with information from the Dataset. """ - if self.handle is None: + if self._handle is None: raise Exception(f"Cannot get {field_name} before construct Dataset") tmp_out_len = ctypes.c_int(0) out_type = ctypes.c_int(0) ret = ctypes.POINTER(ctypes.c_void_p)() _safe_call(_LIB.LGBM_DatasetGetField( - self.handle, + self._handle, _c_str(field_name), ctypes.byref(tmp_out_len), ctypes.byref(ret), @@ -2498,7 +2499,7 @@ def _set_predictor( elif isinstance(predictor, _InnerPredictor) and isinstance(self._predictor, _InnerPredictor): if (predictor == self._predictor) and (predictor.current_iteration() == self._predictor.current_iteration()): return self - if self.handle is None: + if self._handle is None: self._predictor = predictor elif self.data is not None: self._predictor = predictor @@ -2560,12 +2561,12 @@ def set_feature_name(self, feature_name: _LGBM_FeatureNameConfiguration) -> "Dat """ if feature_name != 'auto': self.feature_name = feature_name - if self.handle is not None and feature_name is not None and feature_name != 'auto': + if self._handle is not None and feature_name is not None and feature_name != 'auto': if len(feature_name) != self.num_feature(): raise ValueError(f"Length of feature_name({len(feature_name)}) and num_feature({self.num_feature()}) don't match") c_feature_name = [_c_str(name) for name in feature_name] _safe_call(_LIB.LGBM_DatasetSetFeatureNames( - self.handle, + self._handle, _c_array(ctypes.c_char_p, c_feature_name), ctypes.c_int(len(feature_name)))) return self @@ -2584,7 +2585,7 @@ def set_label(self, label: Optional[_LGBM_LabelType]) -> "Dataset": Dataset with set label. """ self.label = label - if self.handle is not None: + if self._handle is not None: if isinstance(label, pd_DataFrame): if len(label.columns) > 1: raise ValueError('DataFrame for label cannot have multiple columns') @@ -2625,7 +2626,7 @@ def set_weight( if weight is not None and np.all(weight == 1): weight = None self.weight = weight - if self.handle is not None and weight is not None: + if self._handle is not None and weight is not None: weight = _list_to_1d_numpy(weight, dtype=np.float32, name='weight') self.set_field('weight', weight) self.weight = self.get_field('weight') # original values can be modified at cpp side @@ -2648,7 +2649,7 @@ def set_init_score( Dataset with set init score. """ self.init_score = init_score - if self.handle is not None and init_score is not None: + if self._handle is not None and init_score is not None: self.set_field('init_score', init_score) self.init_score = self.get_field('init_score') # original values can be modified at cpp side return self @@ -2674,7 +2675,7 @@ def set_group( Dataset with set group. """ self.group = group - if self.handle is not None and group is not None: + if self._handle is not None and group is not None: group = _list_to_1d_numpy(group, dtype=np.int32, name='group') self.set_field('group', group) return self @@ -2687,7 +2688,7 @@ def get_feature_name(self) -> List[str]: feature_names : list of str The names of columns (features) in the Dataset. """ - if self.handle is None: + if self._handle is None: raise LightGBMError("Cannot get feature_name before construct dataset") num_feature = self.num_feature() tmp_out_len = ctypes.c_int(0) @@ -2696,7 +2697,7 @@ def get_feature_name(self) -> List[str]: string_buffers = [ctypes.create_string_buffer(reserved_string_buffer_size) for _ in range(num_feature)] ptr_string_buffers = (ctypes.c_char_p * num_feature)(*map(ctypes.addressof, string_buffers)) _safe_call(_LIB.LGBM_DatasetGetFeatureNames( - self.handle, + self._handle, ctypes.c_int(num_feature), ctypes.byref(tmp_out_len), ctypes.c_size_t(reserved_string_buffer_size), @@ -2710,7 +2711,7 @@ def get_feature_name(self) -> List[str]: string_buffers = [ctypes.create_string_buffer(actual_string_buffer_size) for _ in range(num_feature)] ptr_string_buffers = (ctypes.c_char_p * num_feature)(*map(ctypes.addressof, string_buffers)) _safe_call(_LIB.LGBM_DatasetGetFeatureNames( - self.handle, + self._handle, ctypes.c_int(num_feature), ctypes.byref(tmp_out_len), ctypes.c_size_t(actual_string_buffer_size), @@ -2762,7 +2763,7 @@ def get_data(self) -> Optional[_LGBM_TrainDataType]: data : str, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame, scipy.sparse, Sequence, list of Sequence or list of numpy array or None Raw data used in the Dataset construction. """ - if self.handle is None: + if self._handle is None: raise Exception("Cannot get data before construct Dataset") if self._need_slice and self.used_indices is not None and self.reference is not None: self.data = self.reference.data @@ -2813,9 +2814,9 @@ def num_data(self) -> int: number_of_rows : int The number of rows in the Dataset. """ - if self.handle is not None: + if self._handle is not None: ret = ctypes.c_int(0) - _safe_call(_LIB.LGBM_DatasetGetNumData(self.handle, + _safe_call(_LIB.LGBM_DatasetGetNumData(self._handle, ctypes.byref(ret))) return ret.value else: @@ -2829,9 +2830,9 @@ def num_feature(self) -> int: number_of_columns : int The number of columns (features) in the Dataset. """ - if self.handle is not None: + if self._handle is not None: ret = ctypes.c_int(0) - _safe_call(_LIB.LGBM_DatasetGetNumFeature(self.handle, + _safe_call(_LIB.LGBM_DatasetGetNumFeature(self._handle, ctypes.byref(ret))) return ret.value else: @@ -2850,13 +2851,13 @@ def feature_num_bin(self, feature: Union[int, str]) -> int: number_of_bins : int The number of constructed bins for the feature in the Dataset. """ - if self.handle is not None: + if self._handle is not None: if isinstance(feature, str): feature_index = self.feature_name.index(feature) else: feature_index = feature ret = ctypes.c_int(0) - _safe_call(_LIB.LGBM_DatasetGetFeatureNumBin(self.handle, + _safe_call(_LIB.LGBM_DatasetGetFeatureNumBin(self._handle, ctypes.c_int(feature_index), ctypes.byref(ret))) return ret.value @@ -2908,9 +2909,9 @@ def add_features_from(self, other: "Dataset") -> "Dataset": self : Dataset Dataset with the new features added. """ - if self.handle is None or other.handle is None: + if self._handle is None or other._handle is None: raise ValueError('Both source and target Datasets must be constructed before adding features') - _safe_call(_LIB.LGBM_DatasetAddFeaturesFrom(self.handle, other.handle)) + _safe_call(_LIB.LGBM_DatasetAddFeaturesFrom(self._handle, other._handle)) was_none = self.data is None old_self_data_type = type(self.data).__name__ if other.data is None: @@ -2998,7 +2999,7 @@ def _dump_text(self, filename: Union[str, Path]) -> "Dataset": Returns self. """ _safe_call(_LIB.LGBM_DatasetDumpText( - self.construct().handle, + self.construct()._handle, _c_str(str(filename)))) return self @@ -3042,7 +3043,7 @@ def __init__( model_str : str or None, optional (default=None) Model will be loaded from this string. """ - self.handle = None + self._handle = None self._network = False self.__need_reload_eval_info = True self._train_data_name = "training" @@ -3093,11 +3094,11 @@ def __init__( # copy the parameters from train_set params.update(train_set.get_params()) params_str = _param_dict_to_str(params) - self.handle = ctypes.c_void_p() + self._handle = ctypes.c_void_p() _safe_call(_LIB.LGBM_BoosterCreate( - train_set.handle, + train_set._handle, _c_str(params_str), - ctypes.byref(self.handle))) + ctypes.byref(self._handle))) # save reference to data self.train_set = train_set self.valid_sets: List[Dataset] = [] @@ -3106,11 +3107,11 @@ def __init__( self.__init_predictor = train_set._predictor if self.__init_predictor is not None: _safe_call(_LIB.LGBM_BoosterMerge( - self.handle, - self.__init_predictor.handle)) + self._handle, + self.__init_predictor._handle)) out_num_class = ctypes.c_int(0) _safe_call(_LIB.LGBM_BoosterGetNumClasses( - self.handle, + self._handle, ctypes.byref(out_num_class))) self.__num_class = out_num_class.value # buffer for inner predict @@ -3122,14 +3123,14 @@ def __init__( elif model_file is not None: # Prediction task out_num_iterations = ctypes.c_int(0) - self.handle = ctypes.c_void_p() + self._handle = ctypes.c_void_p() _safe_call(_LIB.LGBM_BoosterCreateFromModelfile( _c_str(str(model_file)), ctypes.byref(out_num_iterations), - ctypes.byref(self.handle))) + ctypes.byref(self._handle))) out_num_class = ctypes.c_int(0) _safe_call(_LIB.LGBM_BoosterGetNumClasses( - self.handle, + self._handle, ctypes.byref(out_num_class))) self.__num_class = out_num_class.value self.pandas_categorical = _load_pandas_categorical(file_name=model_file) @@ -3150,8 +3151,8 @@ def __del__(self) -> None: except AttributeError: pass try: - if self.handle is not None: - _safe_call(_LIB.LGBM_BoosterFree(self.handle)) + if self._handle is not None: + _safe_call(_LIB.LGBM_BoosterFree(self._handle)) except AttributeError: pass @@ -3165,15 +3166,15 @@ def __deepcopy__(self, _) -> "Booster": def __getstate__(self) -> Dict[str, Any]: this = self.__dict__.copy() - handle = this['handle'] + handle = this['_handle'] this.pop('train_set', None) this.pop('valid_sets', None) if handle is not None: - this["handle"] = self.model_to_string(num_iteration=-1) + this["_handle"] = self.model_to_string(num_iteration=-1) return this def __setstate__(self, state: Dict[str, Any]) -> None: - model_str = state.get('handle', None) + model_str = state.get('_handle', state.get('handle', None)) if model_str is not None: handle = ctypes.c_void_p() out_num_iterations = ctypes.c_int(0) @@ -3181,7 +3182,7 @@ def __setstate__(self, state: Dict[str, Any]) -> None: _c_str(model_str), ctypes.byref(out_num_iterations), ctypes.byref(handle))) - state['handle'] = handle + state['_handle'] = handle self.__dict__.update(state) def _get_loaded_param(self) -> Dict[str, Any]: @@ -3190,7 +3191,7 @@ def _get_loaded_param(self) -> Dict[str, Any]: string_buffer = ctypes.create_string_buffer(buffer_len) ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)]) _safe_call(_LIB.LGBM_BoosterGetLoadedParam( - self.handle, + self._handle, ctypes.c_int64(buffer_len), ctypes.byref(tmp_out_len), ptr_string_buffer)) @@ -3200,7 +3201,7 @@ def _get_loaded_param(self) -> Dict[str, Any]: string_buffer = ctypes.create_string_buffer(actual_len) ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)]) _safe_call(_LIB.LGBM_BoosterGetLoadedParam( - self.handle, + self._handle, ctypes.c_int64(actual_len), ctypes.byref(tmp_out_len), ptr_string_buffer)) @@ -3460,8 +3461,8 @@ def add_valid(self, data: Dataset, name: str) -> "Booster": raise LightGBMError("Add validation data failed, " "you should use same predictor for these data") _safe_call(_LIB.LGBM_BoosterAddValidData( - self.handle, - data.construct().handle)) + self._handle, + data.construct()._handle)) self.valid_sets.append(data) self.name_valid_sets.append(name) self.__num_dataset += 1 @@ -3485,7 +3486,7 @@ def reset_parameter(self, params: Dict[str, Any]) -> "Booster": params_str = _param_dict_to_str(params) if params_str: _safe_call(_LIB.LGBM_BoosterResetParameter( - self.handle, + self._handle, _c_str(params_str))) self.params.update(params) return self @@ -3542,8 +3543,8 @@ def update( "you should use same predictor for these data") self.train_set = train_set _safe_call(_LIB.LGBM_BoosterResetTrainingData( - self.handle, - self.train_set.construct().handle)) + self._handle, + self.train_set.construct()._handle)) self.__inner_predict_buffer[0] = None self.train_set_version = self.train_set.version is_finished = ctypes.c_int(0) @@ -3551,7 +3552,7 @@ def update( if self.__set_objective_to_none: raise LightGBMError('Cannot update due to null objective function.') _safe_call(_LIB.LGBM_BoosterUpdateOneIter( - self.handle, + self._handle, ctypes.byref(is_finished))) self.__is_predicted_cur_iter = [False for _ in range(self.__num_dataset)] return is_finished.value == 1 @@ -3607,7 +3608,7 @@ def __boost( ) is_finished = ctypes.c_int(0) _safe_call(_LIB.LGBM_BoosterUpdateOneIterCustom( - self.handle, + self._handle, grad.ctypes.data_as(ctypes.POINTER(ctypes.c_float)), hess.ctypes.data_as(ctypes.POINTER(ctypes.c_float)), ctypes.byref(is_finished))) @@ -3623,7 +3624,7 @@ def rollback_one_iter(self) -> "Booster": Booster with rolled back one iteration. """ _safe_call(_LIB.LGBM_BoosterRollbackOneIter( - self.handle)) + self._handle)) self.__is_predicted_cur_iter = [False for _ in range(self.__num_dataset)] return self @@ -3637,7 +3638,7 @@ def current_iteration(self) -> int: """ out_cur_iter = ctypes.c_int(0) _safe_call(_LIB.LGBM_BoosterGetCurrentIteration( - self.handle, + self._handle, ctypes.byref(out_cur_iter))) return out_cur_iter.value @@ -3651,7 +3652,7 @@ def num_model_per_iteration(self) -> int: """ model_per_iter = ctypes.c_int(0) _safe_call(_LIB.LGBM_BoosterNumModelPerIteration( - self.handle, + self._handle, ctypes.byref(model_per_iter))) return model_per_iter.value @@ -3665,7 +3666,7 @@ def num_trees(self) -> int: """ num_trees = ctypes.c_int(0) _safe_call(_LIB.LGBM_BoosterNumberOfTotalModel( - self.handle, + self._handle, ctypes.byref(num_trees))) return num_trees.value @@ -3679,7 +3680,7 @@ def upper_bound(self) -> float: """ ret = ctypes.c_double(0) _safe_call(_LIB.LGBM_BoosterGetUpperBoundValue( - self.handle, + self._handle, ctypes.byref(ret))) return ret.value @@ -3693,7 +3694,7 @@ def lower_bound(self) -> float: """ ret = ctypes.c_double(0) _safe_call(_LIB.LGBM_BoosterGetLowerBoundValue( - self.handle, + self._handle, ctypes.byref(ret))) return ret.value @@ -3854,7 +3855,7 @@ def save_model( num_iteration = self.best_iteration importance_type_int = _FEATURE_IMPORTANCE_TYPE_MAPPER[importance_type] _safe_call(_LIB.LGBM_BoosterSaveModel( - self.handle, + self._handle, ctypes.c_int(start_iteration), ctypes.c_int(num_iteration), ctypes.c_int(importance_type_int), @@ -3883,7 +3884,7 @@ def shuffle_models( Booster with shuffled models. """ _safe_call(_LIB.LGBM_BoosterShuffleModels( - self.handle, + self._handle, ctypes.c_int(start_iteration), ctypes.c_int(end_iteration))) return self @@ -3901,18 +3902,18 @@ def model_from_string(self, model_str: str) -> "Booster": self : Booster Loaded Booster object. """ - if self.handle is not None: - _safe_call(_LIB.LGBM_BoosterFree(self.handle)) + if self._handle is not None: + _safe_call(_LIB.LGBM_BoosterFree(self._handle)) self._free_buffer() - self.handle = ctypes.c_void_p() + self._handle = ctypes.c_void_p() out_num_iterations = ctypes.c_int(0) _safe_call(_LIB.LGBM_BoosterLoadModelFromString( _c_str(model_str), ctypes.byref(out_num_iterations), - ctypes.byref(self.handle))) + ctypes.byref(self._handle))) out_num_class = ctypes.c_int(0) _safe_call(_LIB.LGBM_BoosterGetNumClasses( - self.handle, + self._handle, ctypes.byref(out_num_class))) self.__num_class = out_num_class.value self.pandas_categorical = _load_pandas_categorical(model_str=model_str) @@ -3952,7 +3953,7 @@ def model_to_string( string_buffer = ctypes.create_string_buffer(buffer_len) ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)]) _safe_call(_LIB.LGBM_BoosterSaveModelToString( - self.handle, + self._handle, ctypes.c_int(start_iteration), ctypes.c_int(num_iteration), ctypes.c_int(importance_type_int), @@ -3965,7 +3966,7 @@ def model_to_string( string_buffer = ctypes.create_string_buffer(actual_len) ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)]) _safe_call(_LIB.LGBM_BoosterSaveModelToString( - self.handle, + self._handle, ctypes.c_int(start_iteration), ctypes.c_int(num_iteration), ctypes.c_int(importance_type_int), @@ -4020,7 +4021,7 @@ def dump_model( string_buffer = ctypes.create_string_buffer(buffer_len) ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)]) _safe_call(_LIB.LGBM_BoosterDumpModel( - self.handle, + self._handle, ctypes.c_int(start_iteration), ctypes.c_int(num_iteration), ctypes.c_int(importance_type_int), @@ -4033,7 +4034,7 @@ def dump_model( string_buffer = ctypes.create_string_buffer(actual_len) ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)]) _safe_call(_LIB.LGBM_BoosterDumpModel( - self.handle, + self._handle, ctypes.c_int(start_iteration), ctypes.c_int(num_iteration), ctypes.c_int(importance_type_int), @@ -4202,7 +4203,7 @@ def refit( nrow, ncol = leaf_preds.shape out_is_linear = ctypes.c_int(0) _safe_call(_LIB.LGBM_BoosterGetLinear( - self.handle, + self._handle, ctypes.byref(out_is_linear))) new_params = _choose_param_value( main_param_name="linear_tree", @@ -4227,12 +4228,12 @@ def refit( new_booster = Booster(new_params, train_set) # Copy models _safe_call(_LIB.LGBM_BoosterMerge( - new_booster.handle, - predictor.handle)) + new_booster._handle, + predictor._handle)) leaf_preds = leaf_preds.reshape(-1) ptr_data, _, _ = _c_int_array(leaf_preds) _safe_call(_LIB.LGBM_BoosterRefit( - new_booster.handle, + new_booster._handle, ptr_data, ctypes.c_int32(nrow), ctypes.c_int32(ncol))) @@ -4256,7 +4257,7 @@ def get_leaf_output(self, tree_id: int, leaf_id: int) -> float: """ ret = ctypes.c_double(0) _safe_call(_LIB.LGBM_BoosterGetLeafValue( - self.handle, + self._handle, ctypes.c_int(tree_id), ctypes.c_int(leaf_id), ctypes.byref(ret))) @@ -4286,7 +4287,7 @@ def set_leaf_output( """ _safe_call( _LIB.LGBM_BoosterSetLeafValue( - self.handle, + self._handle, ctypes.c_int(tree_id), ctypes.c_int(leaf_id), ctypes.c_double(value) @@ -4299,7 +4300,7 @@ def _to_predictor( pred_parameter: Dict[str, Any] ) -> _InnerPredictor: """Convert to predictor.""" - predictor = _InnerPredictor(booster_handle=self.handle, pred_parameter=pred_parameter) + predictor = _InnerPredictor(booster_handle=self._handle, pred_parameter=pred_parameter) predictor.pandas_categorical = self.pandas_categorical return predictor @@ -4313,7 +4314,7 @@ def num_feature(self) -> int: """ out_num_feature = ctypes.c_int(0) _safe_call(_LIB.LGBM_BoosterGetNumFeature( - self.handle, + self._handle, ctypes.byref(out_num_feature))) return out_num_feature.value @@ -4333,7 +4334,7 @@ def feature_name(self) -> List[str]: string_buffers = [ctypes.create_string_buffer(reserved_string_buffer_size) for _ in range(num_feature)] ptr_string_buffers = (ctypes.c_char_p * num_feature)(*map(ctypes.addressof, string_buffers)) _safe_call(_LIB.LGBM_BoosterGetFeatureNames( - self.handle, + self._handle, ctypes.c_int(num_feature), ctypes.byref(tmp_out_len), ctypes.c_size_t(reserved_string_buffer_size), @@ -4347,7 +4348,7 @@ def feature_name(self) -> List[str]: string_buffers = [ctypes.create_string_buffer(actual_string_buffer_size) for _ in range(num_feature)] ptr_string_buffers = (ctypes.c_char_p * num_feature)(*map(ctypes.addressof, string_buffers)) _safe_call(_LIB.LGBM_BoosterGetFeatureNames( - self.handle, + self._handle, ctypes.c_int(num_feature), ctypes.byref(tmp_out_len), ctypes.c_size_t(actual_string_buffer_size), @@ -4383,7 +4384,7 @@ def feature_importance( importance_type_int = _FEATURE_IMPORTANCE_TYPE_MAPPER[importance_type] result = np.empty(self.num_feature(), dtype=np.float64) _safe_call(_LIB.LGBM_BoosterFeatureImportance( - self.handle, + self._handle, ctypes.c_int(iteration), ctypes.c_int(importance_type_int), result.ctypes.data_as(ctypes.POINTER(ctypes.c_double)))) @@ -4481,7 +4482,7 @@ def __inner_eval( result = np.empty(self.__num_inner_eval, dtype=np.float64) tmp_out_len = ctypes.c_int(0) _safe_call(_LIB.LGBM_BoosterGetEval( - self.handle, + self._handle, ctypes.c_int(data_idx), ctypes.byref(tmp_out_len), result.ctypes.data_as(ctypes.POINTER(ctypes.c_double)))) @@ -4524,7 +4525,7 @@ def __inner_predict(self, data_idx: int) -> np.ndarray: tmp_out_len = ctypes.c_int64(0) data_ptr = self.__inner_predict_buffer[data_idx].ctypes.data_as(ctypes.POINTER(ctypes.c_double)) # type: ignore[union-attr] _safe_call(_LIB.LGBM_BoosterGetPredict( - self.handle, + self._handle, ctypes.c_int(data_idx), ctypes.byref(tmp_out_len), data_ptr)) @@ -4544,7 +4545,7 @@ def __get_eval_info(self) -> None: out_num_eval = ctypes.c_int(0) # Get num of inner evals _safe_call(_LIB.LGBM_BoosterGetEvalCounts( - self.handle, + self._handle, ctypes.byref(out_num_eval))) self.__num_inner_eval = out_num_eval.value if self.__num_inner_eval > 0: @@ -4557,7 +4558,7 @@ def __get_eval_info(self) -> None: ] ptr_string_buffers = (ctypes.c_char_p * self.__num_inner_eval)(*map(ctypes.addressof, string_buffers)) _safe_call(_LIB.LGBM_BoosterGetEvalNames( - self.handle, + self._handle, ctypes.c_int(self.__num_inner_eval), ctypes.byref(tmp_out_len), ctypes.c_size_t(reserved_string_buffer_size), @@ -4573,7 +4574,7 @@ def __get_eval_info(self) -> None: ] ptr_string_buffers = (ctypes.c_char_p * self.__num_inner_eval)(*map(ctypes.addressof, string_buffers)) _safe_call(_LIB.LGBM_BoosterGetEvalNames( - self.handle, + self._handle, ctypes.c_int(self.__num_inner_eval), ctypes.byref(tmp_out_len), ctypes.c_size_t(actual_string_buffer_size), diff --git a/tests/python_package_test/test_basic.py b/tests/python_package_test/test_basic.py index 5e237724ae85..62450053904d 100644 --- a/tests/python_package_test/test_basic.py +++ b/tests/python_package_test/test_basic.py @@ -25,7 +25,6 @@ def test_basic(tmp_path): feature_names[1] = "a" * 1000 # set one name to a value longer than default buffer size train_data = lgb.Dataset(X_train, label=y_train, feature_name=feature_names) valid_data = train_data.create_valid(X_test, label=y_test) - params = { "objective": "binary", "metric": "auc", @@ -36,6 +35,7 @@ def test_basic(tmp_path): "max_bin": 255, "gpu_use_dp": True } + raise RuntimeError bst = lgb.Booster(params, train_data) bst.add_valid(valid_data, "valid_1") @@ -393,11 +393,11 @@ def test_add_features_does_not_fail_if_initial_dataset_has_zero_informative_feat dataset_b = lgb.Dataset(arr_b).construct() - original_handle = dataset_a.handle.value + original_handle = dataset_a._handle.value dataset_a.add_features_from(dataset_b) assert dataset_a.num_feature() == 6 assert dataset_a.num_data() == 100 - assert dataset_a.handle.value == original_handle + assert dataset_a._handle.value == original_handle def test_cegb_affects_behavior(tmp_path):