.. _program_listing_file_src_translator_vocabs.h: Program Listing for File vocabs.h ================================= |exhale_lsh| :ref:`Return to documentation for file ` (``src/translator/vocabs.h``) .. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS .. code-block:: cpp #pragma once namespace marian { namespace bergamot { class Vocabs { public: Vocabs(Ptr options, std::vector>&& vocabMemories) : options_(options) { if (!vocabMemories.empty()) { // load vocabs from buffer load(std::move(vocabMemories)); } else { // load vocabs from file auto vocabPaths = options->get>("vocabs"); load(vocabPaths); } } const std::vector>& sources() const { return srcVocabs_; } const Ptr& target() const { return trgVocab_; } private: std::vector> srcVocabs_; // source vocabularies Ptr trgVocab_; // target vocabulary Ptr options_; // load from buffer void load(std::vector>&& vocabMemories) { // At least two vocabs: src and trg ABORT_IF(vocabMemories.size() < 2, "Insufficient number of vocabularies."); srcVocabs_.resize(vocabMemories.size()); // hashMap is introduced to avoid double loading the same vocab // loading vocabs (either from buffers or files) is the biggest bottleneck of the speed // uintptr_t holds unique keys (address) for share_ptr std::unordered_map> vmap; for (size_t i = 0; i < srcVocabs_.size(); i++) { auto m = vmap.emplace(std::make_pair(reinterpret_cast(vocabMemories[i].get()), Ptr())); if (m.second) { // new: load the vocab m.first->second = New(options_, i); m.first->second->loadFromSerialized(absl::string_view(vocabMemories[i]->begin(), vocabMemories[i]->size())); } srcVocabs_[i] = m.first->second; } // Initialize target vocab trgVocab_ = srcVocabs_.back(); srcVocabs_.pop_back(); } // load from file void load(const std::vector& vocabPaths) { // with the current setup, we need at least two vocabs: src and trg ABORT_IF(vocabPaths.size() < 2, "Insufficient number of vocabularies."); srcVocabs_.resize(vocabPaths.size()); std::unordered_map> vmap; for (size_t i = 0; i < srcVocabs_.size(); ++i) { auto m = vmap.emplace(std::make_pair(vocabPaths[i], Ptr())); if (m.second) { // new: load the vocab m.first->second = New(options_, i); m.first->second->load(vocabPaths[i]); } srcVocabs_[i] = m.first->second; } // Initialize target vocab trgVocab_ = srcVocabs_.back(); srcVocabs_.pop_back(); } }; } // namespace bergamot } // namespace marian