Fix broken ISO labels when using non-ASCII characters (#222)

This commit fixes the broken ISO labels when using characters outside of the character set supported by ISO-8859-1. Every dstring written to the UDF headers is now inspected whether it can use the limited encoding or if it's necessary to encode it as 16-bit. This has the advantage of leaving all the dstrings which don't need 16-bit encoding, like folder and file names, without any modification in the file structure.
2020-03-04 17:26:15 +01:00 · 2020-03-04 17:26:15 +01:00 · b4a5668234
commit b4a5668234
parent 39ddf3cc6a
4 changed files with 101 additions and 16 deletions
--- a/tsMuxer/convertUTF.cpp
+++ b/tsMuxer/convertUTF.cpp
@ -103,6 +103,20 @@ ConversionResult ConvertUTF32toUTF16(const UTF32** sourceStart, const UTF32* sou
    return result;
 }

+std::tuple<UTF16, UTF16> ConvertUTF32toUTF16(UTF32 ch)
+{
+    if (ch <= UNI_MAX_BMP)
+    {
+        return std::make_tuple(static_cast<UTF16>(ch), 0);
+    }
+    else
+    {
+        ch -= halfBase;
+        return std::make_tuple((UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START),
+                               (UTF16)((ch & halfMask) + UNI_SUR_LOW_START));
+    }
+}
+
 /* --------------------------------------------------------------------- */

 ConversionResult ConvertUTF16toUTF32(const UTF16** sourceStart, const UTF16* sourceEnd, UTF32** targetStart,
--- a/tsMuxer/convertUTF.h
+++ b/tsMuxer/convertUTF.h
@ -89,6 +89,7 @@

 #include <cstdint>
 #include <string>
+#include <tuple>

 namespace convertUTF
 {
@ -136,6 +137,8 @@ ConversionResult ConvertUTF16toUTF32(const UTF16** sourceStart, const UTF16* sou
 ConversionResult ConvertUTF32toUTF16(const UTF32** sourceStart, const UTF32* sourceEnd, UTF16** targetStart,
                                     UTF16* targetEnd, ConversionFlags flags);

+std::tuple<UTF16, UTF16> ConvertUTF32toUTF16(UTF32);
+
 Boolean isLegalUTF8Sequence(const UTF8* source, const UTF8* sourceEnd);

 Boolean isLegalUTF8String(const UTF8* string, int length);
@ -167,7 +170,8 @@ template <typename Fn>
 void IterateUTF8Chars(const std::string& utf8String, Fn f)
 {
    auto it = std::begin(utf8String);
-    while (it != std::end(utf8String))
+    bool keep_going = true;
+    while (keep_going && it != std::end(utf8String))
    {
        UTF32 ch = 0;
        unsigned short extraBytesToRead = trailingBytesForUTF8[static_cast<unsigned char>(*it)];
@ -193,7 +197,7 @@ void IterateUTF8Chars(const std::string& utf8String, Fn f)
            ch += get_as_uchar();
        }
        ch -= offsetsFromUTF8[extraBytesToRead];
-        f(ch);
+        keep_going = f(ch);
    }
 }

--- a/tsMuxer/iso_writer.cpp
+++ b/tsMuxer/iso_writer.cpp
@ -1,17 +1,19 @@
 #include "iso_writer.h"

-#include <assert.h>
-#include <stdlib.h>
-#include <string.h>
+#include <algorithm>
+#include <cassert>
+#include <cstdlib>
+#include <cstring>
+#include <ctime>

+#include "convertUTF.h"
+#include "utf8Converter.h"
 #include "vod_common.h"

-#ifdef _WIN32
-#include <time.h>
-#endif
-
 // ----------- routines --------------

+namespace
+{
 /*
  Name  : CRC-16 CCITT
  Poly  : 0x1021    x^16 + x^12 + x^5 + 1
@ -108,14 +110,75 @@ void writeTimestamp(uint8_t* buffer, time_t time)
    buffer[11] = 0;
 }

-void writeDString(uint8_t* buffer, const char* value, int len)
+bool canUse8BitUnicode(const std::string& utf8Str)
 {
-    int realLen = FFMIN(strlen(value), len - 2);
-    buffer[len - 1] = realLen + 1;
-    buffer[0] = 8;  // 8 bit per character string
-    memcpy(buffer + 1, value, realLen + 1);
-    int restLen = len - realLen - 2;
-    memset(buffer + 1 + realLen, 0, restLen);
+    bool rv = true;
+    convertUTF::IterateUTF8Chars(utf8Str, [&](auto c) {
+        rv = (c < 0x100);
+        return rv;
+    });
+    return rv;
+}
+
+std::vector<std::uint8_t> serializeDString(const std::string& str, int fieldLen)
+{
+    if (str.empty())
+    {
+        return std::vector<std::uint8_t>(fieldLen, 0);
+    }
+    std::vector<std::uint8_t> rv;
+#ifdef _WIN32
+    auto str_u8 = reinterpret_cast<const std::uint8_t*>(str.c_str());
+    auto utf8Str = convertUTF::isLegalUTF8String(str_u8, str.length())
+                       ? str
+                       : UtfConverter::toUtf8(str_u8, str.length(), UtfConverter::sfANSI);
+#else
+    auto& utf8Str = str;
+#endif
+    using namespace convertUTF;
+    const auto maxHeaderAndContentLength = fieldLen - 1;
+    rv.reserve(fieldLen);
+    if (canUse8BitUnicode(utf8Str))
+    {
+        rv.push_back(8);
+        IterateUTF8Chars(utf8Str, [&](auto c) {
+            rv.push_back(c);
+            return rv.size() < maxHeaderAndContentLength;
+        });
+    }
+    else
+    {
+        rv.push_back(16);
+        IterateUTF8Chars(utf8Str, [&](auto c) {
+            UTF16 high_surrogate, low_surrogate;
+            std::tie(high_surrogate, low_surrogate) = ConvertUTF32toUTF16(c);
+            auto spaceLeft = maxHeaderAndContentLength - rv.size();
+            if ((spaceLeft < 2) || (low_surrogate && spaceLeft < 4))
+            {
+                return false;
+            }
+            rv.push_back(high_surrogate >> 8);
+            rv.push_back(high_surrogate);
+            if (low_surrogate)
+            {
+                rv.push_back(low_surrogate >> 8);
+                rv.push_back(low_surrogate);
+            }
+            return true;
+        });
+    }
+    auto contentLength = rv.size();
+    auto paddingSize = maxHeaderAndContentLength - rv.size();
+    std::fill_n(std::back_inserter(rv), paddingSize, 0);
+    rv.push_back(contentLength);
+    return rv;
+}
+
+void writeDString(uint8_t* buffer, const char* value, int fieldLen)
+{
+    auto content = serializeDString(value, fieldLen);
+    assert(content.size() == fieldLen);
+    std::copy(std::begin(content), std::end(content), buffer);
 }

 void writeUDFString(uint8_t* buffer, const char* str, int len)
@ -136,6 +199,8 @@ void writeLongAD(uint8_t* buffer, uint32_t lenBytes, uint32_t pos, uint16_t part
    buff32[3] = id;
 }

+}  // namespace
+
 // --------------------- ByteFileWriter ---------------------

 ByteFileWriter::ByteFileWriter() : m_buffer(0), m_bufferEnd(0), m_curPos(0), m_tagPos(0) {}
--- a/tsMuxer/osdep/textSubtitlesRenderFT.cpp
+++ b/tsMuxer/osdep/textSubtitlesRenderFT.cpp
@ -537,6 +537,7 @@ void TextSubtitlesRenderFT::drawText(const string& text, RECT* rect)
        if (m_emulateBold || m_emulateItalic)
            pen.x += m_line_thickness - 1;
        maxX = pen.x + face->glyph->bitmap_left;
+        return true;
    });
    if ((m_font.m_opts & m_font.UNDERLINE) || (m_font.m_opts & m_font.STRIKE_OUT))
    {
@ -589,6 +590,7 @@ void TextSubtitlesRenderFT::getTextSize(const string& text, SIZE* mSize)
        pen.x += m_font.m_borderWidth / 2;
        mSize->cy = face->size->metrics.height >> 6;
        mSize->cx = pen.x + face->glyph->bitmap_left;
+        return true;
    });
 }