Fix broken ISO labels when using non-ASCII characters (#222)
This commit fixes the broken ISO labels when using characters outside of the character set supported by ISO-8859-1. Every dstring written to the UDF headers is now inspected whether it can use the limited encoding or if it's necessary to encode it as 16-bit. This has the advantage of leaving all the dstrings which don't need 16-bit encoding, like folder and file names, without any modification in the file structure.
This commit is contained in:
parent
39ddf3cc6a
commit
b4a5668234
@ -103,6 +103,20 @@ ConversionResult ConvertUTF32toUTF16(const UTF32** sourceStart, const UTF32* sou
|
||||
return result;
|
||||
}
|
||||
|
||||
std::tuple<UTF16, UTF16> ConvertUTF32toUTF16(UTF32 ch)
|
||||
{
|
||||
if (ch <= UNI_MAX_BMP)
|
||||
{
|
||||
return std::make_tuple(static_cast<UTF16>(ch), 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
ch -= halfBase;
|
||||
return std::make_tuple((UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START),
|
||||
(UTF16)((ch & halfMask) + UNI_SUR_LOW_START));
|
||||
}
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
ConversionResult ConvertUTF16toUTF32(const UTF16** sourceStart, const UTF16* sourceEnd, UTF32** targetStart,
|
||||
|
@ -89,6 +89,7 @@
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
|
||||
namespace convertUTF
|
||||
{
|
||||
@ -136,6 +137,8 @@ ConversionResult ConvertUTF16toUTF32(const UTF16** sourceStart, const UTF16* sou
|
||||
ConversionResult ConvertUTF32toUTF16(const UTF32** sourceStart, const UTF32* sourceEnd, UTF16** targetStart,
|
||||
UTF16* targetEnd, ConversionFlags flags);
|
||||
|
||||
std::tuple<UTF16, UTF16> ConvertUTF32toUTF16(UTF32);
|
||||
|
||||
Boolean isLegalUTF8Sequence(const UTF8* source, const UTF8* sourceEnd);
|
||||
|
||||
Boolean isLegalUTF8String(const UTF8* string, int length);
|
||||
@ -167,7 +170,8 @@ template <typename Fn>
|
||||
void IterateUTF8Chars(const std::string& utf8String, Fn f)
|
||||
{
|
||||
auto it = std::begin(utf8String);
|
||||
while (it != std::end(utf8String))
|
||||
bool keep_going = true;
|
||||
while (keep_going && it != std::end(utf8String))
|
||||
{
|
||||
UTF32 ch = 0;
|
||||
unsigned short extraBytesToRead = trailingBytesForUTF8[static_cast<unsigned char>(*it)];
|
||||
@ -193,7 +197,7 @@ void IterateUTF8Chars(const std::string& utf8String, Fn f)
|
||||
ch += get_as_uchar();
|
||||
}
|
||||
ch -= offsetsFromUTF8[extraBytesToRead];
|
||||
f(ch);
|
||||
keep_going = f(ch);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,17 +1,19 @@
|
||||
#include "iso_writer.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <ctime>
|
||||
|
||||
#include "convertUTF.h"
|
||||
#include "utf8Converter.h"
|
||||
#include "vod_common.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <time.h>
|
||||
#endif
|
||||
|
||||
// ----------- routines --------------
|
||||
|
||||
namespace
|
||||
{
|
||||
/*
|
||||
Name : CRC-16 CCITT
|
||||
Poly : 0x1021 x^16 + x^12 + x^5 + 1
|
||||
@ -108,14 +110,75 @@ void writeTimestamp(uint8_t* buffer, time_t time)
|
||||
buffer[11] = 0;
|
||||
}
|
||||
|
||||
void writeDString(uint8_t* buffer, const char* value, int len)
|
||||
bool canUse8BitUnicode(const std::string& utf8Str)
|
||||
{
|
||||
int realLen = FFMIN(strlen(value), len - 2);
|
||||
buffer[len - 1] = realLen + 1;
|
||||
buffer[0] = 8; // 8 bit per character string
|
||||
memcpy(buffer + 1, value, realLen + 1);
|
||||
int restLen = len - realLen - 2;
|
||||
memset(buffer + 1 + realLen, 0, restLen);
|
||||
bool rv = true;
|
||||
convertUTF::IterateUTF8Chars(utf8Str, [&](auto c) {
|
||||
rv = (c < 0x100);
|
||||
return rv;
|
||||
});
|
||||
return rv;
|
||||
}
|
||||
|
||||
std::vector<std::uint8_t> serializeDString(const std::string& str, int fieldLen)
|
||||
{
|
||||
if (str.empty())
|
||||
{
|
||||
return std::vector<std::uint8_t>(fieldLen, 0);
|
||||
}
|
||||
std::vector<std::uint8_t> rv;
|
||||
#ifdef _WIN32
|
||||
auto str_u8 = reinterpret_cast<const std::uint8_t*>(str.c_str());
|
||||
auto utf8Str = convertUTF::isLegalUTF8String(str_u8, str.length())
|
||||
? str
|
||||
: UtfConverter::toUtf8(str_u8, str.length(), UtfConverter::sfANSI);
|
||||
#else
|
||||
auto& utf8Str = str;
|
||||
#endif
|
||||
using namespace convertUTF;
|
||||
const auto maxHeaderAndContentLength = fieldLen - 1;
|
||||
rv.reserve(fieldLen);
|
||||
if (canUse8BitUnicode(utf8Str))
|
||||
{
|
||||
rv.push_back(8);
|
||||
IterateUTF8Chars(utf8Str, [&](auto c) {
|
||||
rv.push_back(c);
|
||||
return rv.size() < maxHeaderAndContentLength;
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
rv.push_back(16);
|
||||
IterateUTF8Chars(utf8Str, [&](auto c) {
|
||||
UTF16 high_surrogate, low_surrogate;
|
||||
std::tie(high_surrogate, low_surrogate) = ConvertUTF32toUTF16(c);
|
||||
auto spaceLeft = maxHeaderAndContentLength - rv.size();
|
||||
if ((spaceLeft < 2) || (low_surrogate && spaceLeft < 4))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
rv.push_back(high_surrogate >> 8);
|
||||
rv.push_back(high_surrogate);
|
||||
if (low_surrogate)
|
||||
{
|
||||
rv.push_back(low_surrogate >> 8);
|
||||
rv.push_back(low_surrogate);
|
||||
}
|
||||
return true;
|
||||
});
|
||||
}
|
||||
auto contentLength = rv.size();
|
||||
auto paddingSize = maxHeaderAndContentLength - rv.size();
|
||||
std::fill_n(std::back_inserter(rv), paddingSize, 0);
|
||||
rv.push_back(contentLength);
|
||||
return rv;
|
||||
}
|
||||
|
||||
void writeDString(uint8_t* buffer, const char* value, int fieldLen)
|
||||
{
|
||||
auto content = serializeDString(value, fieldLen);
|
||||
assert(content.size() == fieldLen);
|
||||
std::copy(std::begin(content), std::end(content), buffer);
|
||||
}
|
||||
|
||||
void writeUDFString(uint8_t* buffer, const char* str, int len)
|
||||
@ -136,6 +199,8 @@ void writeLongAD(uint8_t* buffer, uint32_t lenBytes, uint32_t pos, uint16_t part
|
||||
buff32[3] = id;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// --------------------- ByteFileWriter ---------------------
|
||||
|
||||
ByteFileWriter::ByteFileWriter() : m_buffer(0), m_bufferEnd(0), m_curPos(0), m_tagPos(0) {}
|
||||
|
@ -537,6 +537,7 @@ void TextSubtitlesRenderFT::drawText(const string& text, RECT* rect)
|
||||
if (m_emulateBold || m_emulateItalic)
|
||||
pen.x += m_line_thickness - 1;
|
||||
maxX = pen.x + face->glyph->bitmap_left;
|
||||
return true;
|
||||
});
|
||||
if ((m_font.m_opts & m_font.UNDERLINE) || (m_font.m_opts & m_font.STRIKE_OUT))
|
||||
{
|
||||
@ -589,6 +590,7 @@ void TextSubtitlesRenderFT::getTextSize(const string& text, SIZE* mSize)
|
||||
pen.x += m_font.m_borderWidth / 2;
|
||||
mSize->cy = face->size->metrics.height >> 6;
|
||||
mSize->cx = pen.x + face->glyph->bitmap_left;
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user