Port raggedness-minimizing line breaking from gl-js.

mapbox · Dec 21, 2016 · a88357d · a88357d
1 parent 9a6f81a
commit a88357d
Show file tree

Hide file tree

Showing 7 changed files with 130 additions and 71 deletions.
diff --git a/package.json b/package.json
@@ -24,7 +24,7 @@
     "lodash": "^4.16.4",
     "mapbox-gl": "mapbox/mapbox-gl-js#ab836206d415ca3a74257a3066d11a54ab2838cb",
     "mapbox-gl-style-spec": "mapbox/mapbox-gl-style-spec#49e8b407bdbbe6f7c92dbcb56d3d51f425fc2653",
-    "mapbox-gl-test-suite": "mapbox/mapbox-gl-test-suite#e1ada02a706fd124fc3441fd3a2b3cda67960ff5",
+    "mapbox-gl-test-suite": "mapbox/mapbox-gl-test-suite#e1208969a7855e524bd174bddf4e46f0bb697cb6",
     "mkdirp": "^0.5.1",
     "node-cmake": "^1.2.1",
     "pixelmatch": "^4.0.2",

diff --git a/platform/default/bidi.cpp b/platform/default/bidi.cpp
@@ -19,12 +19,8 @@ class BiDiImpl {
     UBiDi* bidiLine = nullptr;
 };
 
-BiDi::BiDi() : impl(std::make_unique<BiDiImpl>())
-{
-}
-
-BiDi::~BiDi() {
-}
+BiDi::BiDi() : impl(std::make_unique<BiDiImpl>()) {}
+BiDi::~BiDi() = default;
 
 // Takes UTF16 input in logical order and applies Arabic shaping to the input while maintaining
 // logical order. Output won't be intelligible until the bidirectional algorithm is applied
@@ -53,7 +49,7 @@ std::u16string applyArabicShaping(const std::u16string& input) {
     return std::u16string(outputText.get(), outputLength);
 }
 
-void BiDi::mergeParagraphLineBreaks(std::set<int32_t>& lineBreakPoints) {
+void BiDi::mergeParagraphLineBreaks(std::set<size_t>& lineBreakPoints) {
     int32_t paragraphCount = ubidi_countParagraphs(impl->bidiText);
     for (int32_t i = 0; i < paragraphCount; i++) {
         UErrorCode errorCode = U_ZERO_ERROR;
@@ -65,20 +61,20 @@ void BiDi::mergeParagraphLineBreaks(std::set<int32_t>& lineBreakPoints) {
                                      u_errorName(errorCode));
         }
 
-        lineBreakPoints.insert(paragraphEndIndex);
+        lineBreakPoints.insert(static_cast<std::size_t>(paragraphEndIndex));
     }
 }
 
-std::vector<std::u16string> BiDi::applyLineBreaking(std::set<int32_t> lineBreakPoints) {
+std::vector<std::u16string> BiDi::applyLineBreaking(std::set<std::size_t> lineBreakPoints) {
     // BiDi::getLine will error if called across a paragraph boundary, so we need to ensure that all
     // paragraph boundaries are included in the set of line break points. The calling code might not
     // include the line break because it didn't need to wrap at that point, or because the text was
     // separated with a more exotic code point such as (U+001C)
     mergeParagraphLineBreaks(lineBreakPoints);
 
     std::vector<std::u16string> transformedLines;
-    int32_t start = 0;
-    for (int32_t lineBreakPoint : lineBreakPoints) {
+    std::size_t start = 0;
+    for (std::size_t lineBreakPoint : lineBreakPoints) {
         transformedLines.push_back(getLine(start, lineBreakPoint));
         start = lineBreakPoint;
     }
@@ -87,7 +83,7 @@ std::vector<std::u16string> BiDi::applyLineBreaking(std::set<int32_t> lineBreakP
 }
 
 std::vector<std::u16string> BiDi::processText(const std::u16string& input,
-                                              std::set<int32_t> lineBreakPoints) {
+                                              std::set<std::size_t> lineBreakPoints) {
     UErrorCode errorCode = U_ZERO_ERROR;
 
     ubidi_setPara(impl->bidiText, input.c_str(), static_cast<int32_t>(input.size()),
@@ -100,9 +96,9 @@ std::vector<std::u16string> BiDi::processText(const std::u16string& input,
     return applyLineBreaking(lineBreakPoints);
 }
 
-std::u16string BiDi::getLine(int32_t start, int32_t end) {
+std::u16string BiDi::getLine(std::size_t start, std::size_t end) {
     UErrorCode errorCode = U_ZERO_ERROR;
-    ubidi_setLine(impl->bidiText, start, end, impl->bidiLine, &errorCode);
+    ubidi_setLine(impl->bidiText, static_cast<int32_t>(start), static_cast<int32_t>(end), impl->bidiLine, &errorCode);
 
     if (U_FAILURE(errorCode)) {
         throw std::runtime_error(std::string("BiDi::getLine (setLine): ") + u_errorName(errorCode));

diff --git a/platform/qt/src/bidi.cpp b/platform/qt/src/bidi.cpp
@@ -16,17 +16,17 @@ std::u16string applyArabicShaping(const std::u16string& input) {
     return utf16string.toStdU16String();
 }
 
-void BiDi::mergeParagraphLineBreaks(std::set<int32_t>& lineBreakPoints) {
-    lineBreakPoints.insert(bidi.impl->string.length());
+void BiDi::mergeParagraphLineBreaks(std::set<std::size_t>& lineBreakPoints) {
+    lineBreakPoints.insert(static_cast<std::size_t>(bidi.impl->string.length()));
 }
 
 std::vector<std::u16string>
-BiDi::applyLineBreaking(std::set<int32_t> lineBreakPoints) {
+BiDi::applyLineBreaking(std::set<std::size_t> lineBreakPoints) {
     mergeParagraphLineBreaks(lineBreakPoints);
 
     std::vector<std::u16string> transformedLines;
-    int32_t start = 0;
-    for (int32_t lineBreakPoint : lineBreakPoints) {
+    std::size_t start = 0;
+    for (std::size_t lineBreakPoint : lineBreakPoints) {
         transformedLines.push_back(bidi.getLine(start, lineBreakPoint));
         start = lineBreakPoint;
     }
@@ -38,16 +38,15 @@ BiDi::BiDi() : impl(std::make_unique<BiDiImpl>())
 {
 }
 
-BiDi::~BiDi() {
-}
+BiDi::~BiDi() = default;
 
-std::vector<std::u16string> BiDi::processText(const std::u16string& input, std::set<int32_t> lineBreakPoints) {
+std::vector<std::u16string> BiDi::processText(const std::u16string& input, std::set<std::size_t> lineBreakPoints) {
     impl->string = QString::fromStdU16String(input);
     return applyLineBreaking(lineBreakPoints);
 }
 
-std::u16string BiDi::getLine(int32_t start, int32_t end) {
-    return impl->string.mid(start, end - start).toStdU16String();
+std::u16string BiDi::getLine(std::size_t start, std::size_t end) {
+    return impl->string.mid(static_cast<int32_t>(start), static_cast<int32_t>(end - start)).toStdU16String();
 }
 
 } // end namespace mbgl
diff --git a/src/mbgl/text/bidi.hpp b/src/mbgl/text/bidi.hpp
@@ -19,12 +19,12 @@ class BiDi : private util::noncopyable {
     BiDi();
     ~BiDi();
 
-    std::vector<std::u16string> processText(const std::u16string&, std::set<int32_t>);
+    std::vector<std::u16string> processText(const std::u16string&, std::set<std::size_t>);
 
 private:
-    void mergeParagraphLineBreaks(std::set<int32_t>&);
-    std::vector<std::u16string> applyLineBreaking(std::set<int32_t>);
-    std::u16string getLine(int32_t start, int32_t end);
+    void mergeParagraphLineBreaks(std::set<std::size_t>&);
+    std::vector<std::u16string> applyLineBreaking(std::set<std::size_t>);
+    std::u16string getLine(std::size_t start, std::size_t end);
 
     std::unique_ptr<BiDiImpl> impl;
 };

diff --git a/src/mbgl/text/glyph_set.cpp b/src/mbgl/text/glyph_set.cpp
@@ -64,7 +64,7 @@ void align(Shaping& shaping,
            const float verticalAlign,
            const float maxLineLength,
            const float lineHeight,
-           const uint32_t lineCount,
+           const std::size_t lineCount,
            const Point<float>& translate) {
     const float shiftX =
         (justify - horizontalAlign) * maxLineLength + ::round(translate.x * 24 /* one em */);
@@ -80,8 +80,8 @@ void align(Shaping& shaping,
 // justify left = 0, right = 1, center = .5
 void justifyLine(std::vector<PositionedGlyph>& positionedGlyphs,
                  const std::map<uint32_t, SDFGlyph>& sdfs,
-                 uint32_t start,
-                 uint32_t end,
+                 std::size_t start,
+                 std::size_t end,
                  float justify) {
     if (!justify) {
         return;
@@ -93,34 +93,104 @@ void justifyLine(std::vector<PositionedGlyph>& positionedGlyphs,
         const uint32_t lastAdvance = it->second.metrics.advance;
         const float lineIndent = float(glyph.x + lastAdvance) * justify;
 
-        for (uint32_t j = start; j <= end; j++) {
+        for (std::size_t j = start; j <= end; j++) {
             positionedGlyphs[j].x -= lineIndent;
         }
     }
 }
 
-float GlyphSet::determineIdeographicLineWidth(const std::u16string& logicalInput,
+float GlyphSet::determineAverageLineWidth(const std::u16string& logicalInput,
                                               const float spacing,
                                               float maxWidth) const {
     float totalWidth = 0;
 
-    // totalWidth doesn't include the last character for magical tuning reasons. This makes the
-    // algorithm a little more agressive about trying to fit the text into fewer lines, taking
-    // advantage of the tolerance for going a little over maxWidth
-    for (uint32_t i = 0; i < logicalInput.size() - 1; i++) {
-        auto it = sdfs.find(logicalInput[i]);
+    for (char16_t chr : logicalInput) {
+        auto it = sdfs.find(chr);
         if (it != sdfs.end()) {
             totalWidth += it->second.metrics.advance + spacing;
         }
     }
 
-    int32_t lineCount = std::fmax(1, std::ceil(totalWidth / maxWidth));
-    return totalWidth / lineCount;
+    int32_t targetLineCount = std::fmax(1, std::ceil(totalWidth / maxWidth));
+    return totalWidth / targetLineCount;
+}
+
+float calculateBadness(const float lineWidth, const float targetWidth, const float penalty, const bool isLastBreak) {
+    const float raggedness = std::pow(lineWidth - targetWidth, 2);
+    if (isLastBreak && lineWidth < targetWidth) {
+        // Be more tolerant of short final lines
+        return std::fmax(0, raggedness - 150);
+    }
+    if (penalty < 0) {
+        return raggedness - std::pow(penalty, 2);
+    }
+    return raggedness + std::pow(penalty, 2);
+}
+
+float calculatePenalty(char16_t codePoint, char16_t previousCodePoint) {
+    float penalty = 0;
+    // Force break on newline
+    if (codePoint == 0x0a) {
+        penalty -= 10000;
+    }
+    // Penalize open parenthesis at end of line
+    if (previousCodePoint && (previousCodePoint == 0x28 || previousCodePoint == 0xff08)) {
+        penalty += 50;
+    }
+    // Penalize close parenthesis at beginning of line
+    if (codePoint == 0x29 || codePoint == 0xff09) {
+        penalty += 50;
+    }
+    return penalty;
+}
+
+struct PotentialBreak {
+    PotentialBreak(const std::size_t p_index, const float p_x, const PotentialBreak* p_priorBreak, const float p_badness)
+        : index(p_index), x(p_x), priorBreak(p_priorBreak), badness(p_badness)
+    {}
+
+    const std::size_t index;
+    const float x;
+    const PotentialBreak* priorBreak;
+    const float badness;
+};
+
+
+PotentialBreak evaluateBreak(const std::size_t breakIndex, const float breakX, const float targetWidth, const std::list<PotentialBreak>& potentialBreaks, const float penalty, const bool isLastBreak) {
+    // We could skip evaluating breaks where the line length (breakX - priorBreak.x) > maxWidth
+    //  ...but in fact we allow lines longer than maxWidth (if there's no break points)
+    //  ...and when targetWidth and maxWidth are close, strictly enforcing maxWidth can give
+    //     more lopsided results.
+
+    const PotentialBreak* bestPriorBreak = nullptr;
+    float bestBreakBadness = calculateBadness(breakX, targetWidth, penalty, isLastBreak);
+
+    for (const auto& potentialBreak : potentialBreaks) {
+        const float lineWidth = breakX - potentialBreak.x;
+        float breakBadness = calculateBadness(lineWidth, targetWidth, penalty, isLastBreak) + potentialBreak.badness;
+        if (breakBadness <= bestBreakBadness) {
+            bestPriorBreak = &potentialBreak;
+            bestBreakBadness = breakBadness;
+        }
+    }
+
+    return PotentialBreak(breakIndex, breakX, bestPriorBreak, bestBreakBadness);
+}
+
+std::set<std::size_t> leastBadBreaks(const PotentialBreak& lastLineBreak) {
+    std::set<std::size_t> leastBadBreaks = { lastLineBreak.index };
+    const PotentialBreak* priorBreak = lastLineBreak.priorBreak;
+    while (priorBreak) {
+        leastBadBreaks.insert(priorBreak->index);
+        priorBreak = priorBreak->priorBreak;
+    }
+    return leastBadBreaks;
 }
 
+
 // We determine line breaks based on shaped text in logical order. Working in visual order would be
 //  more intuitive, but we can't do that because the visual order may be changed by line breaks!
-std::set<int32_t> GlyphSet::determineLineBreaks(const std::u16string& logicalInput,
+std::set<std::size_t> GlyphSet::determineLineBreaks(const std::u16string& logicalInput,
                                                 const float spacing,
                                                 float maxWidth) const {
     if (!maxWidth) {
@@ -130,42 +200,34 @@ std::set<int32_t> GlyphSet::determineLineBreaks(const std::u16string& logicalInp
     if (logicalInput.empty()) {
         return {};
     }
-
-    if (util::i18n::allowsIdeographicBreaking(logicalInput)) {
-        maxWidth = determineIdeographicLineWidth(logicalInput, spacing, maxWidth);
-    }
-
-    std::set<int32_t> lineBreakPoints;
+
+    const float targetWidth = determineAverageLineWidth(logicalInput, spacing, maxWidth);
+
+    std::list<PotentialBreak> potentialBreaks;
     float currentX = 0;
-    uint32_t lastSafeBreak = 0;
-    float lastSafeBreakX = 0;
 
-    for (uint32_t i = 0; i < logicalInput.size(); i++) {
-        auto it = sdfs.find(logicalInput[i]);
+    for (std::size_t i = 0; i < logicalInput.size(); i++) {
+        const char16_t codePoint = logicalInput[i];
+        auto it = sdfs.find(codePoint);
         if (it == sdfs.end()) {
             continue;
         }
 
-        const SDFGlyph& glyph = it->second;
-
         // Ideographic characters, spaces, and word-breaking punctuation that often appear without
         // surrounding spaces.
-        if (util::i18n::allowsWordBreaking(glyph.id) ||
-            util::i18n::allowsIdeographicBreaking(glyph.id)) {
-            lastSafeBreak = i;
-            lastSafeBreakX = currentX;
-        }
+        if (util::i18n::allowsWordBreaking(codePoint) ||
+            util::i18n::allowsIdeographicBreaking(codePoint)) {
+            const char16_t previousCodePoint = i > 0 ? logicalInput[i-1] : 0;
 
-        if (currentX > maxWidth && lastSafeBreak > 0) {
-            lineBreakPoints.insert(lastSafeBreak);
-            currentX -= lastSafeBreakX;
-            lastSafeBreakX = 0;
+            potentialBreaks.push_back(evaluateBreak(i, currentX, targetWidth, potentialBreaks,
+                                                     calculatePenalty(codePoint, previousCodePoint),
+                                                     false));
         }
 
-        currentX += glyph.metrics.advance + spacing;
+        currentX += it->second.metrics.advance + spacing;
     }
 
-    return lineBreakPoints;
+    return leastBadBreaks(evaluateBreak(logicalInput.size(), currentX, targetWidth, potentialBreaks, 0, true));
 }
 
 void GlyphSet::shapeLines(Shaping& shaping,
@@ -194,7 +256,7 @@ void GlyphSet::shapeLines(Shaping& shaping,
             continue;
         }
 
-        uint32_t lineStartIndex = static_cast<uint32_t>(shaping.positionedGlyphs.size());
+        std::size_t lineStartIndex = shaping.positionedGlyphs.size();
         for (char16_t chr : line) {
             auto it = sdfs.find(chr);
             if (it == sdfs.end()) {
@@ -207,20 +269,20 @@ void GlyphSet::shapeLines(Shaping& shaping,
         }
 
         // Only justify if we placed at least one glyph
-        if (static_cast<uint32_t>(shaping.positionedGlyphs.size()) != lineStartIndex) {
+        if (shaping.positionedGlyphs.size() != lineStartIndex) {
             float lineLength = x - spacing; // Don't count trailing spacing
             maxLineLength = util::max(lineLength, maxLineLength);
 
             justifyLine(shaping.positionedGlyphs, sdfs, lineStartIndex,
-                        static_cast<uint32_t>(shaping.positionedGlyphs.size()) - 1, justify);
+                        shaping.positionedGlyphs.size() - 1, justify);
         }
 
         x = 0;
         y += lineHeight;
     }
 
     align(shaping, justify, horizontalAlign, verticalAlign, maxLineLength, lineHeight,
-          static_cast<uint32_t>(lines.size()), translate);
+          lines.size(), translate);
     const uint32_t height = lines.size() * lineHeight;
 
     // Calculate the bounding box

diff --git a/src/mbgl/text/glyph_set.hpp b/src/mbgl/text/glyph_set.hpp
@@ -21,10 +21,10 @@ class GlyphSet {
                              BiDi& bidi) const;
 
 private:
-    float determineIdeographicLineWidth(const std::u16string& logicalInput,
+    float determineAverageLineWidth(const std::u16string& logicalInput,
                                         const float spacing,
                                         float maxWidth) const;
-    std::set<int32_t> determineLineBreaks(const std::u16string& logicalInput,
+    std::set<std::size_t> determineLineBreaks(const std::u16string& logicalInput,
                                           const float spacing,
                                           float maxWidth) const;
 

diff --git a/src/mbgl/util/i18n.cpp b/src/mbgl/util/i18n.cpp
@@ -298,6 +298,8 @@ bool allowsWordBreaking(uint16_t chr) {
     return (chr == 0x0a      /* newline */
             || chr == 0x20   /* space */
             || chr == 0x26   /* ampersand */
+            || chr == 0x28   /* open parenthesis */
+            || chr == 0x29   /* close parenthesis */
             || chr == 0x2b   /* plus sign */
             || chr == 0x2d   /* hyphen-minus */
             || chr == 0x2f   /* solidus */