.. _program_listing_file_src_translator_html.h: Program Listing for File html.h =============================== |exhale_lsh| :ref:`Return to documentation for file ` (``src/translator/html.h``) .. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS .. code-block:: cpp #ifndef SRC_BERGAMOT_HTML_H_ #define SRC_BERGAMOT_HTML_H_ #include #include #include #include #include #include "annotation.h" #include "data/types.h" #include "definitions.h" namespace marian::bergamot { struct Response; class HTML { public: using TagNameSet = std::set>; struct Options { TagNameSet voidTags{"area", "base", "basefont", "bgsound", "br", "col", "embed", "frame", "hr", "img", "input", "keygen", "link", "meta", "param", "source", "track", "wbr"}; TagNameSet inlineTags{"abbr", "a", "b", "em", "i", "kbd", "mark", "math", "output", "q", "ruby", "small", "span", "strong", "sub", "sup", "time", "u", "var", "wbr", "ins", "del", "img"}; TagNameSet inWordTags{"wbr"}; TagNameSet ignoredTags{"code", "kbd", "samp", "var", "dir", "acronym", "math"}; std::string continuationDelimiters = "\n ,.(){}[]"; bool substituteInlineTagsWithSpaces = true; }; struct Tag { enum NodeType { ELEMENT, // ... VOID_ELEMENT, // COMMENT, // PROCESSING_INSTRUCTION, // WHITESPACE, // A \n\n we inserted to break a sentence. }; NodeType type; // Type of the node std::string name; // Tag name (if type is ELEMENT or VOID_ELEMENT) std::string attributes; // Tag attributes (as raw HTML string, including // entities and prefix whitespace) std::string data; // Raw data of an element that just needs to be // copied as is, e.g.