Fix broken ISO labels when using non-ASCII characters (#222)

This commit fixes the broken ISO labels when using characters outside of the character set supported by ISO-8859-1. Every dstring written to the UDF headers is now inspected whether it can use the limited encoding or if it's necessary to encode it as 16-bit. This has the advantage of leaving all the dstrings which don't need 16-bit encoding, like folder and file names, without any modification in the file structure.
This commit is contained in:
Daniel Kamil Kozar 2020-03-04 17:26:15 +01:00 committed by GitHub
parent 39ddf3cc6a
commit b4a5668234
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 101 additions and 16 deletions

View File

@ -103,6 +103,20 @@ ConversionResult ConvertUTF32toUTF16(const UTF32** sourceStart, const UTF32* sou
return result;
}
std::tuple<UTF16, UTF16> ConvertUTF32toUTF16(UTF32 ch)
{
if (ch <= UNI_MAX_BMP)
{
return std::make_tuple(static_cast<UTF16>(ch), 0);
}
else
{
ch -= halfBase;
return std::make_tuple((UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START),
(UTF16)((ch & halfMask) + UNI_SUR_LOW_START));
}
}
/* --------------------------------------------------------------------- */
ConversionResult ConvertUTF16toUTF32(const UTF16** sourceStart, const UTF16* sourceEnd, UTF32** targetStart,

View File

@ -89,6 +89,7 @@
#include <cstdint>
#include <string>
#include <tuple>
namespace convertUTF
{
@ -136,6 +137,8 @@ ConversionResult ConvertUTF16toUTF32(const UTF16** sourceStart, const UTF16* sou
ConversionResult ConvertUTF32toUTF16(const UTF32** sourceStart, const UTF32* sourceEnd, UTF16** targetStart,
UTF16* targetEnd, ConversionFlags flags);
std::tuple<UTF16, UTF16> ConvertUTF32toUTF16(UTF32);
Boolean isLegalUTF8Sequence(const UTF8* source, const UTF8* sourceEnd);
Boolean isLegalUTF8String(const UTF8* string, int length);
@ -167,7 +170,8 @@ template <typename Fn>
void IterateUTF8Chars(const std::string& utf8String, Fn f)
{
auto it = std::begin(utf8String);
while (it != std::end(utf8String))
bool keep_going = true;
while (keep_going && it != std::end(utf8String))
{
UTF32 ch = 0;
unsigned short extraBytesToRead = trailingBytesForUTF8[static_cast<unsigned char>(*it)];
@ -193,7 +197,7 @@ void IterateUTF8Chars(const std::string& utf8String, Fn f)
ch += get_as_uchar();
}
ch -= offsetsFromUTF8[extraBytesToRead];
f(ch);
keep_going = f(ch);
}
}

View File

@ -1,17 +1,19 @@
#include "iso_writer.h"
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <algorithm>
#include <cassert>
#include <cstdlib>
#include <cstring>
#include <ctime>
#include "convertUTF.h"
#include "utf8Converter.h"
#include "vod_common.h"
#ifdef _WIN32
#include <time.h>
#endif
// ----------- routines --------------
namespace
{
/*
Name : CRC-16 CCITT
Poly : 0x1021 x^16 + x^12 + x^5 + 1
@ -108,14 +110,75 @@ void writeTimestamp(uint8_t* buffer, time_t time)
buffer[11] = 0;
}
void writeDString(uint8_t* buffer, const char* value, int len)
bool canUse8BitUnicode(const std::string& utf8Str)
{
int realLen = FFMIN(strlen(value), len - 2);
buffer[len - 1] = realLen + 1;
buffer[0] = 8; // 8 bit per character string
memcpy(buffer + 1, value, realLen + 1);
int restLen = len - realLen - 2;
memset(buffer + 1 + realLen, 0, restLen);
bool rv = true;
convertUTF::IterateUTF8Chars(utf8Str, [&](auto c) {
rv = (c < 0x100);
return rv;
});
return rv;
}
std::vector<std::uint8_t> serializeDString(const std::string& str, int fieldLen)
{
if (str.empty())
{
return std::vector<std::uint8_t>(fieldLen, 0);
}
std::vector<std::uint8_t> rv;
#ifdef _WIN32
auto str_u8 = reinterpret_cast<const std::uint8_t*>(str.c_str());
auto utf8Str = convertUTF::isLegalUTF8String(str_u8, str.length())
? str
: UtfConverter::toUtf8(str_u8, str.length(), UtfConverter::sfANSI);
#else
auto& utf8Str = str;
#endif
using namespace convertUTF;
const auto maxHeaderAndContentLength = fieldLen - 1;
rv.reserve(fieldLen);
if (canUse8BitUnicode(utf8Str))
{
rv.push_back(8);
IterateUTF8Chars(utf8Str, [&](auto c) {
rv.push_back(c);
return rv.size() < maxHeaderAndContentLength;
});
}
else
{
rv.push_back(16);
IterateUTF8Chars(utf8Str, [&](auto c) {
UTF16 high_surrogate, low_surrogate;
std::tie(high_surrogate, low_surrogate) = ConvertUTF32toUTF16(c);
auto spaceLeft = maxHeaderAndContentLength - rv.size();
if ((spaceLeft < 2) || (low_surrogate && spaceLeft < 4))
{
return false;
}
rv.push_back(high_surrogate >> 8);
rv.push_back(high_surrogate);
if (low_surrogate)
{
rv.push_back(low_surrogate >> 8);
rv.push_back(low_surrogate);
}
return true;
});
}
auto contentLength = rv.size();
auto paddingSize = maxHeaderAndContentLength - rv.size();
std::fill_n(std::back_inserter(rv), paddingSize, 0);
rv.push_back(contentLength);
return rv;
}
void writeDString(uint8_t* buffer, const char* value, int fieldLen)
{
auto content = serializeDString(value, fieldLen);
assert(content.size() == fieldLen);
std::copy(std::begin(content), std::end(content), buffer);
}
void writeUDFString(uint8_t* buffer, const char* str, int len)
@ -136,6 +199,8 @@ void writeLongAD(uint8_t* buffer, uint32_t lenBytes, uint32_t pos, uint16_t part
buff32[3] = id;
}
} // namespace
// --------------------- ByteFileWriter ---------------------
ByteFileWriter::ByteFileWriter() : m_buffer(0), m_bufferEnd(0), m_curPos(0), m_tagPos(0) {}

View File

@ -537,6 +537,7 @@ void TextSubtitlesRenderFT::drawText(const string& text, RECT* rect)
if (m_emulateBold || m_emulateItalic)
pen.x += m_line_thickness - 1;
maxX = pen.x + face->glyph->bitmap_left;
return true;
});
if ((m_font.m_opts & m_font.UNDERLINE) || (m_font.m_opts & m_font.STRIKE_OUT))
{
@ -589,6 +590,7 @@ void TextSubtitlesRenderFT::getTextSize(const string& text, SIZE* mSize)
pen.x += m_font.m_borderWidth / 2;
mSize->cy = face->size->metrics.height >> 6;
mSize->cx = pen.x + face->glyph->bitmap_left;
return true;
});
}