.. _program_listing_file_src_translator_annotation.cpp: Program Listing for File annotation.cpp ======================================= |exhale_lsh| :ref:`Return to documentation for file ` (``src/translator/annotation.cpp``) .. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS .. code-block:: cpp #include "annotation.h" #include namespace marian { namespace bergamot { AnnotatedText::AnnotatedText(std::string &&t) : text(std::move(t)) { // Treat the entire text as a gap that recordExistingSentence will break. annotation.token_begin_.back() = text.size(); } void AnnotatedText::appendSentence(string_view prefix, std::vector::iterator begin, std::vector::iterator end) { assert(annotation.token_begin_.back() == text.size()); // prefix is just end of the previous one. appendEndingWhitespace(prefix); // Appending sentence text. std::size_t offset = text.size(); for (std::vector::iterator token = begin; token != end; ++token) { offset += token->size(); annotation.token_begin_.push_back(offset); } if (begin != end) { text.append(begin->data(), (end - 1)->data() + (end - 1)->size()); assert(offset == text.size()); // Tokens should be contiguous. } // Add the gap after the sentence. This is empty for now, but will be // extended with appendEndingWhitespace or another appendSentence. annotation.gap_.push_back(annotation.token_begin_.size() - 1); annotation.token_begin_.push_back(offset); } void AnnotatedText::appendEndingWhitespace(string_view whitespace) { text.append(whitespace.data(), whitespace.size()); annotation.token_begin_.back() = text.size(); } void AnnotatedText::recordExistingSentence(std::vector::iterator begin, std::vector::iterator end, const char *sentence_begin) { assert(sentence_begin >= text.data()); assert(sentence_begin <= text.data() + text.size()); assert(begin == end || sentence_begin == begin->data()); assert(!annotation.token_begin_.empty()); assert(annotation.token_begin_.back() == text.size()); // Clip off size token ending. annotation.token_begin_.pop_back(); for (std::vector::iterator i = begin; i != end; ++i) { assert(i->data() >= text.data()); // In range. assert(i->data() + i->size() <= text.data() + text.size()); // In range assert(i + 1 == end || i->data() + i->size() == (i + 1)->data()); // Contiguous annotation.token_begin_.push_back(i->data() - text.data()); } // Gap token after sentence. annotation.gap_.push_back(annotation.token_begin_.size()); if (begin != end) { annotation.token_begin_.push_back((end - 1)->data() + (end - 1)->size() - text.data()); } else { // empty sentence. annotation.token_begin_.push_back(sentence_begin - text.data()); } // Add back size token ending. annotation.token_begin_.push_back(text.size()); } } // namespace bergamot } // namespace marian