Program Listing for File quality_estimator.h¶
↰ Return to documentation for file (src/translator/quality_estimator.h
)
#pragma once
#include <array>
#include <vector>
#include "annotation.h"
#include "response.h"
#include "translator/history.h"
namespace marian::bergamot {
class QualityEstimator {
public:
virtual void computeQualityScores(const Histories &histories, Response &response) const = 0;
};
class UnsupervisedQualityEstimator : public QualityEstimator {
public:
void computeQualityScores(const Histories &histories, Response &response) const override;
private:
Response::SentenceQualityScore computeSentenceScores(const std::vector<float> &logProbs, const AnnotatedText &target,
const size_t sentenceIdx) const;
};
// ASCII and Unicode text files never start with the following 64 bits
// It serves as a signature for quality estimator binary files
constexpr std::uint64_t BINARY_QE_MODEL_MAGIC = 0x78cc336f1d54b180;
class LogisticRegressorQualityEstimator : public QualityEstimator {
public:
using Array = std::array<float, /*LRParamsDims = */ 4>;
struct Header {
uint64_t magic;
uint64_t lrParametersDims;
};
struct Scale {
Array stds;
Array means;
};
class Matrix {
public:
const size_t rows;
const size_t cols;
Matrix(const size_t rowsParam, const size_t colsParam);
Matrix(Matrix &&other);
const float &at(const size_t row, const size_t col) const;
float &at(const size_t row, const size_t col);
private:
std::vector<float> data_;
};
LogisticRegressorQualityEstimator(Scale &&scale, Array &&coefficients, const float intercept);
LogisticRegressorQualityEstimator(LogisticRegressorQualityEstimator &&other);
static LogisticRegressorQualityEstimator fromAlignedMemory(const AlignedMemory &alignedMemory);
AlignedMemory toAlignedMemory() const;
void computeQualityScores(const Histories &histories, Response &response) const override;
std::vector<float> predict(const Matrix &features) const;
private:
Scale scale_;
Array coefficients_;
float intercept_;
Array coefficientsByStds_;
float constantFactor_ = 0.0;
// Number of parameters with dimension - Scale(stds, means) and coefficients
static constexpr const size_t numLrParamsWithDimension_ = 3;
// Number of intercept values
static constexpr const size_t numIntercept_ = 1;
Response::SentenceQualityScore computeSentenceScores(const std::vector<float> &logProbs, const AnnotatedText &target,
const size_t sentenceIdx) const;
Matrix extractFeatures(const std::vector<SubwordRange> &wordIndices, const std::vector<float> &logProbs) const;
};
inline std::shared_ptr<QualityEstimator> createQualityEstimator(const AlignedMemory &qualityFileMemory) {
// If no quality file return simple model
if (qualityFileMemory.size() == 0) {
return std::make_shared<UnsupervisedQualityEstimator>();
}
return std::make_shared<LogisticRegressorQualityEstimator>(
LogisticRegressorQualityEstimator::fromAlignedMemory(qualityFileMemory));
}
std::vector<SubwordRange> mapWords(const std::vector<float> &logProbs, const AnnotatedText &target,
const size_t sentenceIdx);
} // namespace marian::bergamot