Skip to content

Commit

Permalink
Enables model ensembles (#450)
Browse files Browse the repository at this point in the history
* Enables model ensembles

Adds the ability to use ensembles of models. This supports ensembles of
binary- or npz-format models, as well as mixtures of both.

When all models in the ensembles are of binary format, the load from
memory path is used. Otherwise, they are loaded via the file system.
Enable log-level debug for output related to this.

* Fix formatting

* Fix WASM bindings for MemoryBundle

For now, this does not support ensembles.

* Remove shared_ptr wrapping the AlignedMemory of models.

* Fix formatting
  • Loading branch information
graemenail authored Aug 1, 2023
1 parent 8011f9c commit 4b0da8d
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 32 deletions.
31 changes: 17 additions & 14 deletions src/translator/byte_array_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,21 +91,24 @@ AlignedMemory loadFileToMemory(const std::string& path, size_t alignment) {
return alignedMemory;
}

AlignedMemory getModelMemoryFromConfig(marian::Ptr<marian::Options> options) {
std::vector<AlignedMemory> getModelMemoryFromConfig(marian::Ptr<marian::Options> options) {
auto models = options->get<std::vector<std::string>>("models");
ABORT_IF(models.size() != 1, "Loading multiple binary models is not supported for now as it is not necessary.");

// If binary model we load into aligned memory. If .npz we leave it be to
// return empty aligned memory, thus allowing traditional file system loads.
if (marian::io::isBin(models[0])) {
AlignedMemory alignedMemory = loadFileToMemory(models[0], 256);
return alignedMemory;
} else if (marian::io::isNpz(models[0])) {
return AlignedMemory();
} else {
ABORT("Unknown extension for model: {}, should be one of `.bin` or `.npz`", models[0]);

std::vector<AlignedMemory> modelMemories(models.size());
for (size_t i = 0; i < models.size(); ++i) {
const auto model = models[i];
if (marian::io::isBin(model)) {
modelMemories[i] = loadFileToMemory(model, 256);
} else if (marian::io::isNpz(model)) {
// if any of the models are npz format, we revert to loading from file for all models.
LOG(debug, "Encountered an npz file {}; will use file loading for {} models", model, models.size());
return {};
} else {
ABORT("Unknown extension for model: {}, should be one of `.bin` or `.npz`", model);
}
}
return AlignedMemory();

return modelMemories;
}

AlignedMemory getShortlistMemoryFromConfig(marian::Ptr<marian::Options> options) {
Expand Down Expand Up @@ -153,7 +156,7 @@ AlignedMemory getQualityEstimatorModel(MemoryBundle& memoryBundle, const marian:

MemoryBundle getMemoryBundleFromConfig(marian::Ptr<marian::Options> options) {
MemoryBundle memoryBundle;
memoryBundle.model = getModelMemoryFromConfig(options);
memoryBundle.models = getModelMemoryFromConfig(options);
memoryBundle.shortlist = getShortlistMemoryFromConfig(options);
getVocabsMemoryFromConfig(options, memoryBundle.vocabs);
memoryBundle.ssplitPrefixFile = getSsplitPrefixFileMemoryFromConfig(options);
Expand Down
2 changes: 1 addition & 1 deletion src/translator/byte_array_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ namespace marian {
namespace bergamot {

AlignedMemory loadFileToMemory(const std::string& path, size_t alignment);
AlignedMemory getModelMemoryFromConfig(marian::Ptr<marian::Options> options);
std::vector<AlignedMemory> getModelMemoryFromConfig(marian::Ptr<marian::Options> options);
AlignedMemory getQualityEstimatorModel(const marian::Ptr<marian::Options>& options);
AlignedMemory getQualityEstimatorModel(MemoryBundle& memoryBundle, const marian::Ptr<marian::Options>& options);
AlignedMemory getShortlistMemoryFromConfig(marian::Ptr<marian::Options> options);
Expand Down
4 changes: 2 additions & 2 deletions src/translator/definitions.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ typedef AlignedVector<char> AlignedMemory;
/// Memory bundle for all byte-arrays.
/// Can be a set/subset of model, shortlist, vocabs and ssplitPrefixFile bytes.
struct MemoryBundle {
AlignedMemory model{}; ///< Byte-array of model (aligned to 256)
AlignedMemory shortlist{}; ///< Byte-array of shortlist (aligned to 64)
std::vector<AlignedMemory> models{}; ///< Byte-array of model (each element is aligned to 256)
AlignedMemory shortlist{}; ///< Byte-array of shortlist (aligned to 64)

/// Vector of vocabulary memories (aligned to 64).
/// If two vocabularies are the same (based on the filenames), two entries (shared
Expand Down
39 changes: 25 additions & 14 deletions src/translator/translation_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,24 +61,35 @@ void TranslationModel::loadBackend(size_t idx) {
graph->getBackend()->configureDevice(options_);
graph->reserveWorkspaceMB(options_->get<size_t>("workspace"));

// Marian Model: Load from memoryBundle or shortList
if (memory_.model.size() > 0 &&
memory_.model.begin() !=
nullptr) { // If we have provided a byte array that contains the model memory, we can initialise the
// model from there, as opposed to from reading in the config file
ABORT_IF((uintptr_t)memory_.model.begin() % 256 != 0,
"The provided memory is not aligned to 256 bytes and will crash when vector instructions are used on it.");
if (options_->get<bool>("check-bytearray", false)) {
ABORT_IF(!validateBinaryModel(memory_.model, memory_.model.size()),
"The binary file is invalid. Incomplete or corrupted download?");
}
const std::vector<const void *> container = {
memory_.model.begin()}; // Marian supports multiple models initialised in this manner hence std::vector.
// However we will only ever use 1 during decoding.
// if memory_.models is populated, then all models were of binary format
if (memory_.models.size() >= 1) {
const std::vector<const void *> container = std::invoke([&]() {
std::vector<const void *> model_ptrs(memory_.models.size());
for (size_t i = 0; i < memory_.models.size(); ++i) {
const AlignedMemory &model = memory_.models[i];

ABORT_IF(model.size() == 0 || model.begin() == nullptr, "The provided memory is empty. Cannot load the model.");
ABORT_IF(
(uintptr_t)model.begin() % 256 != 0,
"The provided memory is not aligned to 256 bytes and will crash when vector instructions are used on it.");
if (options_->get<bool>("check-bytearray", false)) {
ABORT_IF(!validateBinaryModel(model, model.size()),
"The binary file is invalid. Incomplete or corrupted download?");
}

model_ptrs[i] = model.begin();
LOG(debug, "Loaded model {} of {} from memory", (i + 1), model_ptrs.size());
}
return model_ptrs;
});

scorerEnsemble = createScorers(options_, container);
} else {
// load npz format models, or a mixture of binary/npz formats
scorerEnsemble = createScorers(options_);
LOG(debug, "Loaded {} model(s) from file", scorerEnsemble.size());
}

for (auto scorer : scorerEnsemble) {
scorer->init(graph);
if (shortlistGenerator_) {
Expand Down
2 changes: 1 addition & 1 deletion wasm/bindings/service_bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ MemoryBundle prepareMemoryBundle(AlignedMemory* modelMemory, AlignedMemory* shor
std::vector<AlignedMemory*> uniqueVocabsMemories,
AlignedMemory* qualityEstimatorMemory) {
MemoryBundle memoryBundle;
memoryBundle.model = std::move(*modelMemory);
memoryBundle.models.emplace_back(std::move(*modelMemory));
memoryBundle.shortlist = std::move(*shortlistMemory);
memoryBundle.vocabs = std::move(prepareVocabsSmartMemories(uniqueVocabsMemories));
if (qualityEstimatorMemory != nullptr) {
Expand Down

0 comments on commit 4b0da8d

Please sign in to comment.