Fix broken ISO labels when using non-ASCII characters (#222)
This commit fixes the broken ISO labels when using characters outside of the character set supported by ISO-8859-1. Every dstring written to the UDF headers is now inspected whether it can use the limited encoding or if it's necessary to encode it as 16-bit. This has the advantage of leaving all the dstrings which don't need 16-bit encoding, like folder and file names, without any modification in the file structure.
This commit is contained in:
parent
39ddf3cc6a
commit
b4a5668234
@ -103,6 +103,20 @@ ConversionResult ConvertUTF32toUTF16(const UTF32** sourceStart, const UTF32* sou
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::tuple<UTF16, UTF16> ConvertUTF32toUTF16(UTF32 ch)
|
||||||
|
{
|
||||||
|
if (ch <= UNI_MAX_BMP)
|
||||||
|
{
|
||||||
|
return std::make_tuple(static_cast<UTF16>(ch), 0);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ch -= halfBase;
|
||||||
|
return std::make_tuple((UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START),
|
||||||
|
(UTF16)((ch & halfMask) + UNI_SUR_LOW_START));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* --------------------------------------------------------------------- */
|
/* --------------------------------------------------------------------- */
|
||||||
|
|
||||||
ConversionResult ConvertUTF16toUTF32(const UTF16** sourceStart, const UTF16* sourceEnd, UTF32** targetStart,
|
ConversionResult ConvertUTF16toUTF32(const UTF16** sourceStart, const UTF16* sourceEnd, UTF32** targetStart,
|
||||||
|
@ -89,6 +89,7 @@
|
|||||||
|
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <tuple>
|
||||||
|
|
||||||
namespace convertUTF
|
namespace convertUTF
|
||||||
{
|
{
|
||||||
@ -136,6 +137,8 @@ ConversionResult ConvertUTF16toUTF32(const UTF16** sourceStart, const UTF16* sou
|
|||||||
ConversionResult ConvertUTF32toUTF16(const UTF32** sourceStart, const UTF32* sourceEnd, UTF16** targetStart,
|
ConversionResult ConvertUTF32toUTF16(const UTF32** sourceStart, const UTF32* sourceEnd, UTF16** targetStart,
|
||||||
UTF16* targetEnd, ConversionFlags flags);
|
UTF16* targetEnd, ConversionFlags flags);
|
||||||
|
|
||||||
|
std::tuple<UTF16, UTF16> ConvertUTF32toUTF16(UTF32);
|
||||||
|
|
||||||
Boolean isLegalUTF8Sequence(const UTF8* source, const UTF8* sourceEnd);
|
Boolean isLegalUTF8Sequence(const UTF8* source, const UTF8* sourceEnd);
|
||||||
|
|
||||||
Boolean isLegalUTF8String(const UTF8* string, int length);
|
Boolean isLegalUTF8String(const UTF8* string, int length);
|
||||||
@ -167,7 +170,8 @@ template <typename Fn>
|
|||||||
void IterateUTF8Chars(const std::string& utf8String, Fn f)
|
void IterateUTF8Chars(const std::string& utf8String, Fn f)
|
||||||
{
|
{
|
||||||
auto it = std::begin(utf8String);
|
auto it = std::begin(utf8String);
|
||||||
while (it != std::end(utf8String))
|
bool keep_going = true;
|
||||||
|
while (keep_going && it != std::end(utf8String))
|
||||||
{
|
{
|
||||||
UTF32 ch = 0;
|
UTF32 ch = 0;
|
||||||
unsigned short extraBytesToRead = trailingBytesForUTF8[static_cast<unsigned char>(*it)];
|
unsigned short extraBytesToRead = trailingBytesForUTF8[static_cast<unsigned char>(*it)];
|
||||||
@ -193,7 +197,7 @@ void IterateUTF8Chars(const std::string& utf8String, Fn f)
|
|||||||
ch += get_as_uchar();
|
ch += get_as_uchar();
|
||||||
}
|
}
|
||||||
ch -= offsetsFromUTF8[extraBytesToRead];
|
ch -= offsetsFromUTF8[extraBytesToRead];
|
||||||
f(ch);
|
keep_going = f(ch);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,17 +1,19 @@
|
|||||||
#include "iso_writer.h"
|
#include "iso_writer.h"
|
||||||
|
|
||||||
#include <assert.h>
|
#include <algorithm>
|
||||||
#include <stdlib.h>
|
#include <cassert>
|
||||||
#include <string.h>
|
#include <cstdlib>
|
||||||
|
#include <cstring>
|
||||||
|
#include <ctime>
|
||||||
|
|
||||||
|
#include "convertUTF.h"
|
||||||
|
#include "utf8Converter.h"
|
||||||
#include "vod_common.h"
|
#include "vod_common.h"
|
||||||
|
|
||||||
#ifdef _WIN32
|
|
||||||
#include <time.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// ----------- routines --------------
|
// ----------- routines --------------
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
/*
|
/*
|
||||||
Name : CRC-16 CCITT
|
Name : CRC-16 CCITT
|
||||||
Poly : 0x1021 x^16 + x^12 + x^5 + 1
|
Poly : 0x1021 x^16 + x^12 + x^5 + 1
|
||||||
@ -108,14 +110,75 @@ void writeTimestamp(uint8_t* buffer, time_t time)
|
|||||||
buffer[11] = 0;
|
buffer[11] = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void writeDString(uint8_t* buffer, const char* value, int len)
|
bool canUse8BitUnicode(const std::string& utf8Str)
|
||||||
{
|
{
|
||||||
int realLen = FFMIN(strlen(value), len - 2);
|
bool rv = true;
|
||||||
buffer[len - 1] = realLen + 1;
|
convertUTF::IterateUTF8Chars(utf8Str, [&](auto c) {
|
||||||
buffer[0] = 8; // 8 bit per character string
|
rv = (c < 0x100);
|
||||||
memcpy(buffer + 1, value, realLen + 1);
|
return rv;
|
||||||
int restLen = len - realLen - 2;
|
});
|
||||||
memset(buffer + 1 + realLen, 0, restLen);
|
return rv;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::uint8_t> serializeDString(const std::string& str, int fieldLen)
|
||||||
|
{
|
||||||
|
if (str.empty())
|
||||||
|
{
|
||||||
|
return std::vector<std::uint8_t>(fieldLen, 0);
|
||||||
|
}
|
||||||
|
std::vector<std::uint8_t> rv;
|
||||||
|
#ifdef _WIN32
|
||||||
|
auto str_u8 = reinterpret_cast<const std::uint8_t*>(str.c_str());
|
||||||
|
auto utf8Str = convertUTF::isLegalUTF8String(str_u8, str.length())
|
||||||
|
? str
|
||||||
|
: UtfConverter::toUtf8(str_u8, str.length(), UtfConverter::sfANSI);
|
||||||
|
#else
|
||||||
|
auto& utf8Str = str;
|
||||||
|
#endif
|
||||||
|
using namespace convertUTF;
|
||||||
|
const auto maxHeaderAndContentLength = fieldLen - 1;
|
||||||
|
rv.reserve(fieldLen);
|
||||||
|
if (canUse8BitUnicode(utf8Str))
|
||||||
|
{
|
||||||
|
rv.push_back(8);
|
||||||
|
IterateUTF8Chars(utf8Str, [&](auto c) {
|
||||||
|
rv.push_back(c);
|
||||||
|
return rv.size() < maxHeaderAndContentLength;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
rv.push_back(16);
|
||||||
|
IterateUTF8Chars(utf8Str, [&](auto c) {
|
||||||
|
UTF16 high_surrogate, low_surrogate;
|
||||||
|
std::tie(high_surrogate, low_surrogate) = ConvertUTF32toUTF16(c);
|
||||||
|
auto spaceLeft = maxHeaderAndContentLength - rv.size();
|
||||||
|
if ((spaceLeft < 2) || (low_surrogate && spaceLeft < 4))
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
rv.push_back(high_surrogate >> 8);
|
||||||
|
rv.push_back(high_surrogate);
|
||||||
|
if (low_surrogate)
|
||||||
|
{
|
||||||
|
rv.push_back(low_surrogate >> 8);
|
||||||
|
rv.push_back(low_surrogate);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
auto contentLength = rv.size();
|
||||||
|
auto paddingSize = maxHeaderAndContentLength - rv.size();
|
||||||
|
std::fill_n(std::back_inserter(rv), paddingSize, 0);
|
||||||
|
rv.push_back(contentLength);
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
|
|
||||||
|
void writeDString(uint8_t* buffer, const char* value, int fieldLen)
|
||||||
|
{
|
||||||
|
auto content = serializeDString(value, fieldLen);
|
||||||
|
assert(content.size() == fieldLen);
|
||||||
|
std::copy(std::begin(content), std::end(content), buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
void writeUDFString(uint8_t* buffer, const char* str, int len)
|
void writeUDFString(uint8_t* buffer, const char* str, int len)
|
||||||
@ -136,6 +199,8 @@ void writeLongAD(uint8_t* buffer, uint32_t lenBytes, uint32_t pos, uint16_t part
|
|||||||
buff32[3] = id;
|
buff32[3] = id;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
// --------------------- ByteFileWriter ---------------------
|
// --------------------- ByteFileWriter ---------------------
|
||||||
|
|
||||||
ByteFileWriter::ByteFileWriter() : m_buffer(0), m_bufferEnd(0), m_curPos(0), m_tagPos(0) {}
|
ByteFileWriter::ByteFileWriter() : m_buffer(0), m_bufferEnd(0), m_curPos(0), m_tagPos(0) {}
|
||||||
|
@ -537,6 +537,7 @@ void TextSubtitlesRenderFT::drawText(const string& text, RECT* rect)
|
|||||||
if (m_emulateBold || m_emulateItalic)
|
if (m_emulateBold || m_emulateItalic)
|
||||||
pen.x += m_line_thickness - 1;
|
pen.x += m_line_thickness - 1;
|
||||||
maxX = pen.x + face->glyph->bitmap_left;
|
maxX = pen.x + face->glyph->bitmap_left;
|
||||||
|
return true;
|
||||||
});
|
});
|
||||||
if ((m_font.m_opts & m_font.UNDERLINE) || (m_font.m_opts & m_font.STRIKE_OUT))
|
if ((m_font.m_opts & m_font.UNDERLINE) || (m_font.m_opts & m_font.STRIKE_OUT))
|
||||||
{
|
{
|
||||||
@ -589,6 +590,7 @@ void TextSubtitlesRenderFT::getTextSize(const string& text, SIZE* mSize)
|
|||||||
pen.x += m_font.m_borderWidth / 2;
|
pen.x += m_font.m_borderWidth / 2;
|
||||||
mSize->cy = face->size->metrics.height >> 6;
|
mSize->cy = face->size->metrics.height >> 6;
|
||||||
mSize->cx = pen.x + face->glyph->bitmap_left;
|
mSize->cx = pen.x + face->glyph->bitmap_left;
|
||||||
|
return true;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user