Fix broken ISO labels when using non-ASCII characters (#222)

This commit fixes the broken ISO labels when using characters outside of the character set supported by ISO-8859-1. Every dstring written to the UDF headers is now inspected whether it can use the limited encoding or if it's necessary to encode it as 16-bit. This has the advantage of leaving all the dstrings which don't need 16-bit encoding, like folder and file names, without any modification in the file structure.
2020-03-04 17:26:15 +01:00 · 2020-03-04 17:26:15 +01:00 · b4a5668234
commit b4a5668234
parent 39ddf3cc6a
4 changed files with 101 additions and 16 deletions
--- a/tsMuxer/convertUTF.cpp
+++ b/tsMuxer/convertUTF.cpp
@ -103,6 +103,20 @@ ConversionResult ConvertUTF32toUTF16(const UTF32** sourceStart, const UTF32* sou
    return result;
 }
 std::tuple<UTF16, UTF16> ConvertUTF32toUTF16(UTF32 ch)
 {
    if (ch <= UNI_MAX_BMP)
    {
        return std::make_tuple(static_cast<UTF16>(ch), 0);
    }
    else
    {
        ch -= halfBase;
        return std::make_tuple((UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START),
                               (UTF16)((ch & halfMask) + UNI_SUR_LOW_START));
    }
 }
 /* --------------------------------------------------------------------- */
 ConversionResult ConvertUTF16toUTF32(const UTF16** sourceStart, const UTF16* sourceEnd, UTF32** targetStart,
--- a/tsMuxer/convertUTF.h
+++ b/tsMuxer/convertUTF.h
@ -89,6 +89,7 @@
 #include <cstdint>
 #include <string>
 #include <tuple>
 namespace convertUTF
 {
@ -136,6 +137,8 @@ ConversionResult ConvertUTF16toUTF32(const UTF16** sourceStart, const UTF16* sou
 ConversionResult ConvertUTF32toUTF16(const UTF32** sourceStart, const UTF32* sourceEnd, UTF16** targetStart,
                                     UTF16* targetEnd, ConversionFlags flags);
 std::tuple<UTF16, UTF16> ConvertUTF32toUTF16(UTF32);
 Boolean isLegalUTF8Sequence(const UTF8* source, const UTF8* sourceEnd);
 Boolean isLegalUTF8String(const UTF8* string, int length);
@ -167,7 +170,8 @@ template <typename Fn>
 void IterateUTF8Chars(const std::string& utf8String, Fn f)
 {
    auto it = std::begin(utf8String);
-    while (it != std::end(utf8String))
+    bool keep_going = true;
    while (keep_going && it != std::end(utf8String))
    {
        UTF32 ch = 0;
        unsigned short extraBytesToRead = trailingBytesForUTF8[static_cast<unsigned char>(*it)];
@ -193,7 +197,7 @@ void IterateUTF8Chars(const std::string& utf8String, Fn f)
            ch += get_as_uchar();
        }
        ch -= offsetsFromUTF8[extraBytesToRead];
-        f(ch);
+        keep_going = f(ch);
    }
 }
--- a/tsMuxer/iso_writer.cpp
+++ b/tsMuxer/iso_writer.cpp
@ -1,17 +1,19 @@
 #include "iso_writer.h"
-#include <assert.h>
+#include <algorithm>
-#include <stdlib.h>
+#include <cassert>
-#include <string.h>
+#include <cstdlib>
 #include <cstring>
 #include <ctime>
 #include "convertUTF.h"
 #include "utf8Converter.h"
 #include "vod_common.h"
 #ifdef _WIN32
 #include <time.h>
 #endif
 // ----------- routines --------------
 namespace
 {
 /*
  Name  : CRC-16 CCITT
  Poly  : 0x1021    x^16 + x^12 + x^5 + 1
@ -108,14 +110,75 @@ void writeTimestamp(uint8_t* buffer, time_t time)
    buffer[11] = 0;
 }
-void writeDString(uint8_t* buffer, const char* value, int len)
+bool canUse8BitUnicode(const std::string& utf8Str)
 {
-    int realLen = FFMIN(strlen(value), len - 2);
+    bool rv = true;
-    buffer[len - 1] = realLen + 1;
+    convertUTF::IterateUTF8Chars(utf8Str, [&](auto c) {
-    buffer[0] = 8;  // 8 bit per character string
+        rv = (c < 0x100);
-    memcpy(buffer + 1, value, realLen + 1);
+        return rv;
-    int restLen = len - realLen - 2;
+    });
-    memset(buffer + 1 + realLen, 0, restLen);
+    return rv;
 }
 std::vector<std::uint8_t> serializeDString(const std::string& str, int fieldLen)
 {
    if (str.empty())
    {
        return std::vector<std::uint8_t>(fieldLen, 0);
    }
    std::vector<std::uint8_t> rv;
 #ifdef _WIN32
    auto str_u8 = reinterpret_cast<const std::uint8_t*>(str.c_str());
    auto utf8Str = convertUTF::isLegalUTF8String(str_u8, str.length())
                       ? str
                       : UtfConverter::toUtf8(str_u8, str.length(), UtfConverter::sfANSI);
 #else
    auto& utf8Str = str;
 #endif
    using namespace convertUTF;
    const auto maxHeaderAndContentLength = fieldLen - 1;
    rv.reserve(fieldLen);
    if (canUse8BitUnicode(utf8Str))
    {
        rv.push_back(8);
        IterateUTF8Chars(utf8Str, [&](auto c) {
            rv.push_back(c);
            return rv.size() < maxHeaderAndContentLength;
        });
    }
    else
    {
        rv.push_back(16);
        IterateUTF8Chars(utf8Str, [&](auto c) {
            UTF16 high_surrogate, low_surrogate;
            std::tie(high_surrogate, low_surrogate) = ConvertUTF32toUTF16(c);
            auto spaceLeft = maxHeaderAndContentLength - rv.size();
            if ((spaceLeft < 2) || (low_surrogate && spaceLeft < 4))
            {
                return false;
            }
            rv.push_back(high_surrogate >> 8);
            rv.push_back(high_surrogate);
            if (low_surrogate)
            {
                rv.push_back(low_surrogate >> 8);
                rv.push_back(low_surrogate);
            }
            return true;
        });
    }
    auto contentLength = rv.size();
    auto paddingSize = maxHeaderAndContentLength - rv.size();
    std::fill_n(std::back_inserter(rv), paddingSize, 0);
    rv.push_back(contentLength);
    return rv;
 }
 void writeDString(uint8_t* buffer, const char* value, int fieldLen)
 {
    auto content = serializeDString(value, fieldLen);
    assert(content.size() == fieldLen);
    std::copy(std::begin(content), std::end(content), buffer);
 }
 void writeUDFString(uint8_t* buffer, const char* str, int len)
@ -136,6 +199,8 @@ void writeLongAD(uint8_t* buffer, uint32_t lenBytes, uint32_t pos, uint16_t part
    buff32[3] = id;
 }
 }  // namespace
 // --------------------- ByteFileWriter ---------------------
 ByteFileWriter::ByteFileWriter() : m_buffer(0), m_bufferEnd(0), m_curPos(0), m_tagPos(0) {}
--- a/tsMuxer/osdep/textSubtitlesRenderFT.cpp
+++ b/tsMuxer/osdep/textSubtitlesRenderFT.cpp
@ -537,6 +537,7 @@ void TextSubtitlesRenderFT::drawText(const string& text, RECT* rect)
        if (m_emulateBold || m_emulateItalic)
            pen.x += m_line_thickness - 1;
        maxX = pen.x + face->glyph->bitmap_left;
        return true;
    });
    if ((m_font.m_opts & m_font.UNDERLINE) || (m_font.m_opts & m_font.STRIKE_OUT))
    {
@ -589,6 +590,7 @@ void TextSubtitlesRenderFT::getTextSize(const string& text, SIZE* mSize)
        pen.x += m_font.m_borderWidth / 2;
        mSize->cy = face->size->metrics.height >> 6;
        mSize->cx = pen.x + face->glyph->bitmap_left;
        return true;
    });
 }