diff --git a/R-package/src/xgboost_R.cc b/R-package/src/xgboost_R.cc index a82913819565..b267d7da62fe 100644 --- a/R-package/src/xgboost_R.cc +++ b/R-package/src/xgboost_R.cc @@ -23,6 +23,31 @@ #include "./xgboost_R.h" // Must follow other includes. namespace { + +struct ErrorWithUnwind : public std::exception {}; + +void ThrowExceptionFromRError(void *unused, Rboolean jump) { + if (jump) { + throw ErrorWithUnwind(); + } +} + +struct PtrToConstChar { + const char *ptr; +}; + +SEXP WrappedMkChar(void *void_ptr) { + return Rf_mkChar(static_cast(void_ptr)->ptr); +} + +SEXP SafeMkChar(const char *c_str, SEXP continuation_token) { + PtrToConstChar ptr_struct{c_str}; + return R_UnwindProtect( + WrappedMkChar, static_cast(&ptr_struct), + ThrowExceptionFromRError, nullptr, + continuation_token); +} + [[nodiscard]] std::string MakeArrayInterfaceFromRMat(SEXP R_mat) { SEXP mat_dims = Rf_getAttrib(R_mat, R_DimSymbol); const int *ptr_mat_dims = INTEGER(mat_dims); @@ -208,8 +233,8 @@ void CreateFromSparse(SEXP indptr, SEXP indices, SEXP data, std::string *indptr_ const int *p_indices = INTEGER(indices); const double *p_data = REAL(data); - auto nindptr = static_cast(length(indptr)); - auto ndata = static_cast(length(data)); + auto nindptr = static_cast(Rf_xlength(indptr)); + auto ndata = static_cast(Rf_xlength(data)); CHECK_EQ(ndata, p_indptr[nindptr - 1]); xgboost::detail::MakeSparseFromPtr(p_indptr, p_indices, p_data, nindptr, indptr_str, indices_str, data_str); @@ -221,24 +246,27 @@ XGB_DLL SEXP XGDMatrixCreateFromCSC_R(SEXP indptr, SEXP indices, SEXP data, SEXP SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue)); R_API_BEGIN(); std::int32_t threads = asInteger(n_threads); - - using xgboost::Integer; - using xgboost::Json; - using xgboost::Object; - - std::string sindptr, sindices, sdata; - CreateFromSparse(indptr, indices, data, &sindptr, &sindices, &sdata); - auto nrow = static_cast(INTEGER(num_row)[0]); - DMatrixHandle handle; - Json jconfig{Object{}}; - // Construct configuration - jconfig["nthread"] = Integer{threads}; - jconfig["missing"] = xgboost::Number{asReal(missing)}; - std::string config; - Json::Dump(jconfig, &config); - CHECK_CALL(XGDMatrixCreateFromCSC(sindptr.c_str(), sindices.c_str(), sdata.c_str(), nrow, - config.c_str(), &handle)); + + int res_code; + { + using xgboost::Integer; + using xgboost::Json; + using xgboost::Object; + std::string sindptr, sindices, sdata; + CreateFromSparse(indptr, indices, data, &sindptr, &sindices, &sdata); + auto nrow = static_cast(INTEGER(num_row)[0]); + + Json jconfig{Object{}}; + // Construct configuration + jconfig["nthread"] = Integer{threads}; + jconfig["missing"] = xgboost::Number{asReal(missing)}; + std::string config; + Json::Dump(jconfig, &config); + res_code = XGDMatrixCreateFromCSC(sindptr.c_str(), sindices.c_str(), sdata.c_str(), nrow, + config.c_str(), &handle); + } + CHECK_CALL(res_code); R_SetExternalPtrAddr(ret, handle); R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); @@ -252,24 +280,27 @@ XGB_DLL SEXP XGDMatrixCreateFromCSR_R(SEXP indptr, SEXP indices, SEXP data, SEXP SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue)); R_API_BEGIN(); std::int32_t threads = asInteger(n_threads); - - using xgboost::Integer; - using xgboost::Json; - using xgboost::Object; - - std::string sindptr, sindices, sdata; - CreateFromSparse(indptr, indices, data, &sindptr, &sindices, &sdata); - auto ncol = static_cast(INTEGER(num_col)[0]); - DMatrixHandle handle; - Json jconfig{Object{}}; - // Construct configuration - jconfig["nthread"] = Integer{threads}; - jconfig["missing"] = xgboost::Number{asReal(missing)}; - std::string config; - Json::Dump(jconfig, &config); - CHECK_CALL(XGDMatrixCreateFromCSR(sindptr.c_str(), sindices.c_str(), sdata.c_str(), ncol, - config.c_str(), &handle)); + + int res_code; + { + using xgboost::Integer; + using xgboost::Json; + using xgboost::Object; + + std::string sindptr, sindices, sdata; + CreateFromSparse(indptr, indices, data, &sindptr, &sindices, &sdata); + auto ncol = static_cast(INTEGER(num_col)[0]); + + Json jconfig{Object{}}; + // Construct configuration + jconfig["nthread"] = Integer{threads}; + jconfig["missing"] = xgboost::Number{asReal(missing)}; + std::string config; + Json::Dump(jconfig, &config); + res_code = XGDMatrixCreateFromCSR(sindptr.c_str(), sindices.c_str(), sdata.c_str(), ncol, + config.c_str(), &handle); + } R_SetExternalPtrAddr(ret, handle); R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); R_API_END(); @@ -280,16 +311,22 @@ XGB_DLL SEXP XGDMatrixCreateFromCSR_R(SEXP indptr, SEXP indices, SEXP data, SEXP XGB_DLL SEXP XGDMatrixSliceDMatrix_R(SEXP handle, SEXP idxset) { SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue)); R_API_BEGIN(); - int len = length(idxset); - std::vector idxvec(len); - for (int i = 0; i < len; ++i) { - idxvec[i] = INTEGER(idxset)[i] - 1; - } + R_xlen_t len = Rf_xlength(idxset); + const int *idxset_ = INTEGER(idxset); DMatrixHandle res; - CHECK_CALL(XGDMatrixSliceDMatrixEx(R_ExternalPtrAddr(handle), - BeginPtr(idxvec), len, - &res, - 0)); + + int res_code; + { + std::vector idxvec(len); + for (R_xlen_t i = 0; i < len; ++i) { + idxvec[i] = idxset_[i] - 1; + } + res_code = XGDMatrixSliceDMatrixEx(R_ExternalPtrAddr(handle), + BeginPtr(idxvec), len, + &res, + 0); + } + CHECK_CALL(res_code); R_SetExternalPtrAddr(ret, res); R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); R_API_END(); @@ -325,18 +362,29 @@ XGB_DLL SEXP XGDMatrixSetStrFeatureInfo_R(SEXP handle, SEXP field, SEXP array) { R_API_BEGIN(); size_t len{0}; if (!isNull(array)) { - len = length(array); + len = Rf_xlength(array); } - const char *name = CHAR(asChar(field)); - std::vector str_info; + SEXP str_info_holder = PROTECT(Rf_allocVector(VECSXP, len)); for (size_t i = 0; i < len; ++i) { - str_info.emplace_back(CHAR(asChar(VECTOR_ELT(array, i)))); + SET_VECTOR_ELT(str_info_holder, i, Rf_asChar(VECTOR_ELT(array, i))); } - std::vector vec(len); - std::transform(str_info.cbegin(), str_info.cend(), vec.begin(), - [](std::string const &str) { return str.c_str(); }); - CHECK_CALL(XGDMatrixSetStrFeatureInfo(R_ExternalPtrAddr(handle), name, vec.data(), len)); + + SEXP field_ = PROTECT(Rf_asChar(field)); + const char *name = CHAR(field_); + int res_code; + { + std::vector str_info; + for (size_t i = 0; i < len; ++i) { + str_info.emplace_back(CHAR(VECTOR_ELT(str_info_holder, i))); + } + std::vector vec(len); + std::transform(str_info.cbegin(), str_info.cend(), vec.begin(), + [](std::string const &str) { return str.c_str(); }); + res_code = XGDMatrixSetStrFeatureInfo(R_ExternalPtrAddr(handle), name, vec.data(), len); + } + CHECK_CALL(res_code); + UNPROTECT(2); R_API_END(); return R_NilValue; } @@ -369,8 +417,9 @@ XGB_DLL SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field) { const float *res; CHECK_CALL(XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle), CHAR(asChar(field)), &olen, &res)); ret = PROTECT(allocVector(REALSXP, olen)); + double *ret_ = REAL(ret); for (size_t i = 0; i < olen; ++i) { - REAL(ret)[i] = res[i]; + ret_[i] = res[i]; } R_API_END(); UNPROTECT(1); @@ -403,13 +452,18 @@ void _BoosterFinalizer(SEXP ext) { XGB_DLL SEXP XGBoosterCreate_R(SEXP dmats) { SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue)); R_API_BEGIN(); - int len = length(dmats); - std::vector dvec; - for (int i = 0; i < len; ++i) { - dvec.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i))); - } + R_xlen_t len = Rf_xlength(dmats); BoosterHandle handle; - CHECK_CALL(XGBoosterCreate(BeginPtr(dvec), dvec.size(), &handle)); + + int res_code; + { + std::vector dvec; + for (R_xlen_t i = 0; i < len; ++i) { + dvec.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i))); + } + res_code = XGBoosterCreate(BeginPtr(dvec), dvec.size(), &handle); + } + CHECK_CALL(res_code); R_SetExternalPtrAddr(ret, handle); R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE); R_API_END(); @@ -419,13 +473,18 @@ XGB_DLL SEXP XGBoosterCreate_R(SEXP dmats) { XGB_DLL SEXP XGBoosterCreateInEmptyObj_R(SEXP dmats, SEXP R_handle) { R_API_BEGIN(); - int len = length(dmats); - std::vector dvec; - for (int i = 0; i < len; ++i) { - dvec.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i))); - } + R_xlen_t len = Rf_xlength(dmats); BoosterHandle handle; - CHECK_CALL(XGBoosterCreate(BeginPtr(dvec), dvec.size(), &handle)); + + int res_code; + { + std::vector dvec; + for (R_xlen_t i = 0; i < len; ++i) { + dvec.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i))); + } + res_code = XGBoosterCreate(BeginPtr(dvec), dvec.size(), &handle); + } + CHECK_CALL(res_code); R_SetExternalPtrAddr(R_handle, handle); R_RegisterCFinalizerEx(R_handle, _BoosterFinalizer, TRUE); R_API_END(); @@ -434,9 +493,12 @@ XGB_DLL SEXP XGBoosterCreateInEmptyObj_R(SEXP dmats, SEXP R_handle) { XGB_DLL SEXP XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val) { R_API_BEGIN(); + SEXP name_ = PROTECT(Rf_asChar(name)); + SEXP val_ = PROTECT(Rf_asChar(val)); CHECK_CALL(XGBoosterSetParam(R_ExternalPtrAddr(handle), - CHAR(asChar(name)), - CHAR(asChar(val)))); + CHAR(name_), + CHAR(val_))); + UNPROTECT(2); R_API_END(); return R_NilValue; } @@ -452,7 +514,7 @@ XGB_DLL SEXP XGBoosterUpdateOneIter_R(SEXP handle, SEXP iter, SEXP dtrain) { XGB_DLL SEXP XGBoosterTrainOneIter_R(SEXP handle, SEXP dtrain, SEXP iter, SEXP grad, SEXP hess) { R_API_BEGIN(); - CHECK_EQ(length(grad), length(hess)) << "gradient and hess must have same length."; + CHECK_EQ(Rf_xlength(grad), Rf_xlength(hess)) << "gradient and hess must have same length."; SEXP gdim = getAttrib(grad, R_DimSymbol); auto n_samples = static_cast(INTEGER(gdim)[0]); auto n_targets = static_cast(INTEGER(gdim)[1]); @@ -463,11 +525,15 @@ XGB_DLL SEXP XGBoosterTrainOneIter_R(SEXP handle, SEXP dtrain, SEXP iter, SEXP g double const *d_grad = REAL(grad); double const *d_hess = REAL(hess); - auto ctx = xgboost::detail::BoosterCtx(R_ExternalPtrAddr(handle)); - auto [s_grad, s_hess] = xgboost::detail::MakeGradientInterface( - ctx, d_grad, d_hess, xgboost::linalg::kF, n_samples, n_targets); - CHECK_CALL(XGBoosterTrainOneIter(R_ExternalPtrAddr(handle), R_ExternalPtrAddr(dtrain), - asInteger(iter), s_grad.c_str(), s_hess.c_str())); + int res_code; + { + auto ctx = xgboost::detail::BoosterCtx(R_ExternalPtrAddr(handle)); + auto [s_grad, s_hess] = xgboost::detail::MakeGradientInterface( + ctx, d_grad, d_hess, xgboost::linalg::kF, n_samples, n_targets); + res_code = XGBoosterTrainOneIter(R_ExternalPtrAddr(handle), R_ExternalPtrAddr(dtrain), + asInteger(iter), s_grad.c_str(), s_hess.c_str()); + } + CHECK_CALL(res_code); R_API_END(); return R_NilValue; @@ -476,24 +542,34 @@ XGB_DLL SEXP XGBoosterTrainOneIter_R(SEXP handle, SEXP dtrain, SEXP iter, SEXP g XGB_DLL SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames) { const char *ret; R_API_BEGIN(); - CHECK_EQ(length(dmats), length(evnames)) + CHECK_EQ(Rf_xlength(dmats), Rf_xlength(evnames)) << "dmats and evnams must have same length"; - int len = length(dmats); - std::vector vec_dmats; - std::vector vec_names; - std::vector vec_sptr; - for (int i = 0; i < len; ++i) { - vec_dmats.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i))); - vec_names.emplace_back(CHAR(asChar(VECTOR_ELT(evnames, i)))); + R_xlen_t len = Rf_xlength(dmats); + SEXP evnames_lst = PROTECT(Rf_allocVector(VECSXP, len)); + for (R_xlen_t i = 0; i < len; i++) { + SET_VECTOR_ELT(evnames_lst, i, Rf_asChar(VECTOR_ELT(evnames, i))); } - for (int i = 0; i < len; ++i) { - vec_sptr.push_back(vec_names[i].c_str()); + + int res_code; + { + std::vector vec_dmats; + std::vector vec_names; + std::vector vec_sptr; + for (R_xlen_t i = 0; i < len; ++i) { + vec_dmats.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i))); + vec_names.emplace_back(CHAR(VECTOR_ELT(evnames_lst, i))); + } + for (R_xlen_t i = 0; i < len; ++i) { + vec_sptr.push_back(vec_names[i].c_str()); + } + res_code = XGBoosterEvalOneIter(R_ExternalPtrAddr(handle), + asInteger(iter), + BeginPtr(vec_dmats), + BeginPtr(vec_sptr), + len, &ret); } - CHECK_CALL(XGBoosterEvalOneIter(R_ExternalPtrAddr(handle), - asInteger(iter), - BeginPtr(vec_dmats), - BeginPtr(vec_sptr), - len, &ret)); + CHECK_CALL(res_code); + UNPROTECT(1); R_API_END(); return mkString(ret); } @@ -501,10 +577,11 @@ XGB_DLL SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evn XGB_DLL SEXP XGBoosterPredictFromDMatrix_R(SEXP handle, SEXP dmat, SEXP json_config) { SEXP r_out_shape; SEXP r_out_result; - SEXP r_out; + SEXP r_out = PROTECT(allocVector(VECSXP, 2)); + SEXP json_config_ = PROTECT(Rf_asChar(json_config)); R_API_BEGIN(); - char const *c_json_config = CHAR(asChar(json_config)); + char const *c_json_config = CHAR(json_config_); bst_ulong out_dim; bst_ulong const *out_shape; @@ -515,23 +592,23 @@ XGB_DLL SEXP XGBoosterPredictFromDMatrix_R(SEXP handle, SEXP dmat, SEXP json_con r_out_shape = PROTECT(allocVector(INTSXP, out_dim)); size_t len = 1; + int *r_out_shape_ = INTEGER(r_out_shape); for (size_t i = 0; i < out_dim; ++i) { - INTEGER(r_out_shape)[i] = out_shape[i]; + r_out_shape_[i] = out_shape[i]; len *= out_shape[i]; } r_out_result = PROTECT(allocVector(REALSXP, len)); auto ctx = xgboost::detail::BoosterCtx(R_ExternalPtrAddr(handle)); + double *r_out_result_ = REAL(r_out_result); xgboost::common::ParallelFor(len, ctx->Threads(), [&](xgboost::omp_ulong i) { - REAL(r_out_result)[i] = out_result[i]; + r_out_result_[i] = out_result[i]; }); - r_out = PROTECT(allocVector(VECSXP, 2)); - SET_VECTOR_ELT(r_out, 0, r_out_shape); SET_VECTOR_ELT(r_out, 1, r_out_result); R_API_END(); - UNPROTECT(3); + UNPROTECT(4); return r_out; } @@ -554,7 +631,7 @@ XGB_DLL SEXP XGBoosterLoadModelFromRaw_R(SEXP handle, SEXP raw) { R_API_BEGIN(); CHECK_CALL(XGBoosterLoadModelFromBuffer(R_ExternalPtrAddr(handle), RAW(raw), - length(raw))); + Rf_xlength(raw))); R_API_END(); return R_NilValue; } @@ -612,45 +689,54 @@ XGB_DLL SEXP XGBoosterUnserializeFromBuffer_R(SEXP handle, SEXP raw) { R_API_BEGIN(); CHECK_CALL(XGBoosterUnserializeFromBuffer(R_ExternalPtrAddr(handle), RAW(raw), - length(raw))); + Rf_xlength(raw))); R_API_END(); return R_NilValue; } XGB_DLL SEXP XGBoosterDumpModel_R(SEXP handle, SEXP fmap, SEXP with_stats, SEXP dump_format) { SEXP out; + SEXP continuation_token = PROTECT(R_MakeUnwindCont()); + SEXP dump_format_ = PROTECT(Rf_asChar(dump_format)); + SEXP fmap_ = PROTECT(Rf_asChar(fmap)); R_API_BEGIN(); bst_ulong olen; const char **res; - const char *fmt = CHAR(asChar(dump_format)); + const char *fmt = CHAR(dump_format_); CHECK_CALL(XGBoosterDumpModelEx(R_ExternalPtrAddr(handle), - CHAR(asChar(fmap)), + CHAR(fmap_), asInteger(with_stats), fmt, &olen, &res)); out = PROTECT(allocVector(STRSXP, olen)); - if (!strcmp("json", fmt)) { - std::stringstream stream; - stream << "[\n"; - for (size_t i = 0; i < olen; ++i) { - stream << res[i]; - if (i < olen - 1) { - stream << ",\n"; - } else { - stream << "\n"; - } - } - stream << "]"; - SET_STRING_ELT(out, 0, mkChar(stream.str().c_str())); - } else { - for (size_t i = 0; i < olen; ++i) { + try { + if (!strcmp("json", fmt)) { std::stringstream stream; - stream << "booster[" << i <<"]\n" << res[i]; - SET_STRING_ELT(out, i, mkChar(stream.str().c_str())); + stream << "[\n"; + for (size_t i = 0; i < olen; ++i) { + stream << res[i]; + if (i < olen - 1) { + stream << ",\n"; + } else { + stream << "\n"; + } + } + stream << "]"; + const std::string temp_str = stream.str(); + SET_STRING_ELT(out, 0, SafeMkChar(temp_str.c_str(), continuation_token)); + } else { + for (size_t i = 0; i < olen; ++i) { + std::stringstream stream; + stream << "booster[" << i <<"]\n" << res[i]; + const std::string temp_str = stream.str(); + SET_STRING_ELT(out, i, SafeMkChar(temp_str.c_str(), continuation_token)); + } } + } catch (ErrorWithUnwind &e) { + R_ContinueUnwind(continuation_token); } R_API_END(); - UNPROTECT(1); + UNPROTECT(4); return out; } @@ -676,9 +762,19 @@ XGB_DLL SEXP XGBoosterGetAttr_R(SEXP handle, SEXP name) { XGB_DLL SEXP XGBoosterSetAttr_R(SEXP handle, SEXP name, SEXP val) { R_API_BEGIN(); - const char *v = isNull(val) ? nullptr : CHAR(asChar(val)); + const char *v = nullptr; + SEXP name_ = PROTECT(Rf_asChar(name)); + SEXP val_; + int n_protected = 1; + if (!Rf_isNull(val)) { + val_ = PROTECT(Rf_asChar(val)); + n_protected++; + v = CHAR(val_); + } + CHECK_CALL(XGBoosterSetAttr(R_ExternalPtrAddr(handle), - CHAR(asChar(name)), v)); + CHAR(name_), v)); + UNPROTECT(n_protected); R_API_END(); return R_NilValue; } @@ -707,7 +803,7 @@ XGB_DLL SEXP XGBoosterFeatureScore_R(SEXP handle, SEXP json_config) { SEXP out_features_sexp; SEXP out_scores_sexp; SEXP out_shape_sexp; - SEXP r_out; + SEXP r_out = PROTECT(allocVector(VECSXP, 3)); R_API_BEGIN(); char const *c_json_config = CHAR(asChar(json_config)); @@ -723,23 +819,24 @@ XGB_DLL SEXP XGBoosterFeatureScore_R(SEXP handle, SEXP json_config) { &out_dim, &out_shape, &out_scores)); out_shape_sexp = PROTECT(allocVector(INTSXP, out_dim)); size_t len = 1; + int *out_shape_sexp_ = INTEGER(out_shape_sexp); for (size_t i = 0; i < out_dim; ++i) { - INTEGER(out_shape_sexp)[i] = out_shape[i]; + out_shape_sexp_[i] = out_shape[i]; len *= out_shape[i]; } - out_scores_sexp = PROTECT(allocVector(REALSXP, len)); - auto ctx = xgboost::detail::BoosterCtx(R_ExternalPtrAddr(handle)); - xgboost::common::ParallelFor(len, ctx->Threads(), [&](xgboost::omp_ulong i) { - REAL(out_scores_sexp)[i] = out_scores[i]; - }); - out_features_sexp = PROTECT(allocVector(STRSXP, out_n_features)); for (size_t i = 0; i < out_n_features; ++i) { SET_STRING_ELT(out_features_sexp, i, mkChar(out_features[i])); } - r_out = PROTECT(allocVector(VECSXP, 3)); + out_scores_sexp = PROTECT(allocVector(REALSXP, len)); + auto ctx = xgboost::detail::BoosterCtx(R_ExternalPtrAddr(handle)); + double *out_scores_sexp_ = REAL(out_scores_sexp); + xgboost::common::ParallelFor(len, ctx->Threads(), [&](xgboost::omp_ulong i) { + out_scores_sexp_[i] = out_scores[i]; + }); + SET_VECTOR_ELT(r_out, 0, out_features_sexp); SET_VECTOR_ELT(r_out, 1, out_shape_sexp); SET_VECTOR_ELT(r_out, 2, out_scores_sexp);