.. _program_listing_file_src_translator_html.h:
Program Listing for File html.h
===============================
|exhale_lsh| :ref:`Return to documentation for file ` (``src/translator/html.h``)
.. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS
.. code-block:: cpp
#ifndef SRC_BERGAMOT_HTML_H_
#define SRC_BERGAMOT_HTML_H_
#include
#include
#include
#include
#include
#include "annotation.h"
#include "data/types.h"
#include "definitions.h"
namespace marian::bergamot {
struct Response;
class HTML {
public:
using TagNameSet = std::set>;
struct Options {
TagNameSet voidTags{"area", "base", "basefont", "bgsound", "br", "col", "embed", "frame", "hr",
"img", "input", "keygen", "link", "meta", "param", "source", "track", "wbr"};
TagNameSet inlineTags{"abbr", "a", "b", "em", "i", "kbd", "mark", "math",
"output", "q", "ruby", "small", "span", "strong", "sub", "sup",
"time", "u", "var", "wbr", "ins", "del", "img"};
TagNameSet inWordTags{"wbr"};
TagNameSet ignoredTags{"code", "kbd", "samp", "var", "dir", "acronym", "math"};
std::string continuationDelimiters = "\n ,.(){}[]";
bool substituteInlineTagsWithSpaces = true;
};
struct Tag {
enum NodeType {
ELEMENT, // ...
VOID_ELEMENT, //
COMMENT, //
PROCESSING_INSTRUCTION, // ...?>
WHITESPACE, // A \n\n we inserted to break a sentence.
};
NodeType type; // Type of the node
std::string name; // Tag name (if type is ELEMENT or VOID_ELEMENT)
std::string attributes; // Tag attributes (as raw HTML string, including
// entities and prefix whitespace)
std::string data; // Raw data of an element that just needs to be
// copied as is, e.g.