Skip to content

Commit

Permalink
[swig] Change Booster StringArray wrappers API (safety)
Browse files Browse the repository at this point in the history
Before if the user allocated char** and passed it to the booster
with insufficient size it could cause an out of bounds error
which is not recoverable from.

Added a method to extract the size of the largest feature name from
the Booster first and it is used to allocate the memory for the
char** so such kind of error is not possible.

It is also easier for the user as it has no parameters.
  • Loading branch information
alberto.ferreira committed Mar 8, 2020
1 parent 9e74b99 commit 02f2d28
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 45 deletions.
9 changes: 9 additions & 0 deletions include/LightGBM/c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -574,6 +574,15 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterGetFeatureNames(BoosterHandle handle,
int* out_len,
char** out_strs);

/*!
* \brief Get the size of the largest feature name.
* \param handle Handle of booster
* \param[out] out_len Number of characters in largest feature name (excluding null-terminator).
* \return 0 when succeed, -1 when failure happens
*/
LIGHTGBM_C_EXPORT int LGBM_BoosterGetLargestFeatureNameSize(BoosterHandle handle,
int* out_len);

/*!
* \brief Get number of features.
* \param handle Handle of booster
Expand Down
15 changes: 15 additions & 0 deletions src/c_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,14 @@ class Booster {
return idx;
}

size_t GetLargestFeatureNameSize() const {
size_t max_len = 0;
for (const auto& name : boosting_->FeatureNames()) {
max_len = std::max(name.size(), max_len);
}
return max_len;
}

const Boosting* GetBoosting() const { return boosting_.get(); }

private:
Expand Down Expand Up @@ -1367,6 +1375,13 @@ int LGBM_BoosterGetFeatureNames(BoosterHandle handle, int* out_len, char** out_s
API_END();
}

int LGBM_BoosterGetLargestFeatureNameSize(BoosterHandle handle, int* out_len) {
API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
*out_len = ref_booster->GetLargestFeatureNameSize();
API_END();
}

int LGBM_BoosterGetNumFeature(BoosterHandle handle, int* out_len) {
API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
Expand Down
81 changes: 36 additions & 45 deletions swig/StringArray_API_extensions.i
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
#include <iostream>
#include <memory>

#define API_OK_OR_VALUE(api_return, return_value) if (api_return == -1) return return_value
#define API_OK_OR_NULL(api_return) API_OK_OR_VALUE(api_return, nullptr)

%inline %{


Expand Down Expand Up @@ -207,84 +210,72 @@ class StringArray
return reinterpret_cast<StringArray *>(handle)->get_num_elements();
}


/**
* Wraps LGBM_BoosterGetEvalNames.
*
* @brief Wraps LGBM_BoosterGetEvalNames.
*
* In case of success a new StringArray is created and returned,
* which you're responsible for freeing,
* which you're responsible for freeing,
* @see StringArrayHandle_free().
* In case of failure such resource is automatically freed.
*
* In case of failure such resource is freed and nullptr is returned.
* Check for that case with null (lightgbmlib) or 0 (lightgbmlibJNI).
*
* @param handle Booster handle
* @param eval_counts number of evaluations
* @param *strings [out] Pointer to ArrayStringHandle.
*
* @return 0 in case of success or -1 in case of error.
* @return StringArrayHandle with the eval names (or nullptr in case of error)
*/
int LGBM_BoosterGetEvalNamesSWIG(BoosterHandle handle,
int eval_counts,
StringArrayHandle *strings)
StringArrayHandle LGBM_BoosterGetEvalNamesSWIG(BoosterHandle handle,
int eval_counts)
{
strings = nullptr;
std::unique_ptr<StringArray> strings_ptr(nullptr);
std::unique_ptr<StringArray> strings(nullptr);

try {
// TODO: @reviewer, 128 was the chosen size, any particular reason for this constraint?
strings_ptr.reset(new StringArray(eval_counts, 128));
} catch (std::bad_alloc &e) {
strings.reset(new StringArray(eval_counts, 128));
} catch (std::bad_alloc &e) {
LGBM_SetLastError("Failure to allocate memory."); // TODO: @reviewer - why is this not setting the message?
return -1;
}

int api_return = LGBM_BoosterGetEvalNames(handle, &eval_counts, strings_ptr->data());
if (api_return == 0) {
*strings = strings_ptr.release();
return nullptr;
}

return api_return;
API_OK_OR_NULL(LGBM_BoosterGetEvalNames(handle, &eval_counts, strings->data()));
return strings.release();
}


/**
* Wraps LGBM_BoosterGetFeatureNames.
*
* @brief Wraps LGBM_BoosterGetFeatureNames.
*
* Allocates a new StringArray. You must free it yourself if it succeeds.
* @see StringArrayHandle_free().
* If the underlying LGBM calls fail, memory is freed automatically,
* otherwise you're responsible for freeing it.
* In case of failure such resource is freed and nullptr is returned.
* Check for that case with null (lightgbmlib) or 0 (lightgbmlibJNI).
*
* @param handle Booster handle
* @return StringArrayHandle with the feature names (or nullptr in case of error)
*/
int LGBM_BoosterGetFeatureNamesSWIG(BoosterHandle handle,
//int num_features,
int max_feature_name_size,
StringArrayHandle * strings)
StringArrayHandle LGBM_BoosterGetFeatureNamesSWIG2(BoosterHandle handle)
{
std::unique_ptr<StringArray> strings_ptr(nullptr);
int api_return;
std::unique_ptr<StringArray> strings(nullptr);

// 1) To preallocate memory extract number of features first:
// 1) To preallocate memory extract number of features & required size first:
int num_features;
api_return = LGBM_BoosterGetNumFeature(handle, &num_features);
if (api_return == -1) {
return -1;
}
API_OK_OR_NULL(LGBM_BoosterGetNumFeature(handle, &num_features));

int max_feature_name_size;
API_OK_OR_NULL(LGBM_BoosterGetLargestFeatureNameSize(handle, &max_feature_name_size));

// 2) Allocate an array of strings:
try {
// TODO: @reviewer should we also figure out the size of the biggest string and remove parameter max_feature_name_size?
strings_ptr.reset(new StringArray(num_features, max_feature_name_size));
strings.reset(new StringArray(num_features, max_feature_name_size));
} catch (std::bad_alloc &e) {
LGBM_SetLastError("Failure to allocate memory."); // TODO: @reviewer - why is this not setting the message?
return -1;
return nullptr;
}

// 3) Extract feature names:
int _dummy_out_num_features; // already know how many there are (to allocate memory).
api_return = LGBM_BoosterGetFeatureNames(handle, &_dummy_out_num_features, strings_ptr->data());
if (api_return == 0) {
*strings = strings_ptr.release();
}
API_OK_OR_NULL(LGBM_BoosterGetFeatureNames(handle, &_dummy_out_num_features, strings->data()));

return api_return;
return strings.release();
}
%}

0 comments on commit 02f2d28

Please sign in to comment.