obs-studio/deps/libcaption/src/eia608.c.re2c
2016-12-23 10:37:06 -08:00

422 lines
18 KiB
Plaintext

/**********************************************************************************************/
/* The MIT License */
/* */
/* Copyright 2016-2016 Twitch Interactive, Inc. or its affiliates. All Rights Reserved. */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining a copy */
/* of this software and associated documentation files (the "Software"), to deal */
/* in the Software without restriction, including without limitation the rights */
/* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell */
/* copies of the Software, and to permit persons to whom the Software is */
/* furnished to do so, subject to the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be included in */
/* all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR */
/* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, */
/* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE */
/* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER */
/* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, */
/* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN */
/* THE SOFTWARE. */
/**********************************************************************************************/
#include "eia608.h"
#include <string.h>
#include <stdio.h>
////////////////////////////////////////////////////////////////////////////////
int eia608_row_map[] = {10, -1, 0, 1, 2, 3, 11, 12, 13, 14, 4, 5, 6, 7, 8, 9};
int eia608_reverse_row_map[] = {2, 3, 4, 5, 10, 11, 12, 13, 14, 15, 0, 6, 7, 8, 9, 1};
const char* eia608_mode_map[] = {
"clear",
"loading",
"popOn",
"paintOn",
"rollUp",
};
const char* eia608_style_map[] = {
"white",
"green",
"blue",
"cyan",
"red",
"yellow",
"magenta",
"italics",
};
static inline uint16_t eia608_row_pramble (int row, int chan, int x, int underline)
{
row = eia608_reverse_row_map[row&0x0F];
return eia608_parity (0x1040 | (chan?0x0800:0x0000) | ( (row<<7) &0x0700) | ( (row<<5) &0x0020)) | ( (x<<1) &0x001E) | (underline?0x0001:0x0000);
}
uint16_t eia608_row_column_pramble (int row, int col, int chan, int underline) { return eia608_row_pramble (row,chan,0x10| (col/4),underline); }
uint16_t eia608_row_style_pramble (int row, eia608_style_t style, int chan, int underline) { return eia608_row_pramble (row,chan,style,underline); }
int eia608_parse_preamble (uint16_t cc_data, int* row, int* col, eia608_style_t* style, int* chan, int* underline)
{
(*row) = eia608_row_map[ ( (0x0700 & cc_data) >> 7) | ( (0x0020 & cc_data) >> 5)];
(*chan) = !! (0x0800 & cc_data);
(*underline) = 0x0001 & cc_data;
if (0x0010 & cc_data) {
(*style) = eia608_style_white;
(*col) = 4* ( (0x000E & cc_data) >> 1);
} else {
(*style) = (0x000E & cc_data) >> 1;
(*col) = 0;
}
return 1;
}
int eia608_parse_midrowchange (uint16_t cc_data, int* chan, eia608_style_t* style, int* underline)
{
(*chan) = !! (0x0800 & cc_data);
if (0x1120 == (0x7770 & cc_data)) {
(*style) = (0x000E & cc_data) >> 1;
(*underline) = 0x0001 & cc_data;
}
return 1;
}
////////////////////////////////////////////////////////////////////////////////
// control command
eia608_control_t eia608_parse_control (uint16_t cc_data, int* cc)
{
if (0x0200&cc_data) {
(*cc) = (cc_data&0x0800?0x01:0x00);
return (eia608_control_t) (0x177F & cc_data);
} else {
(*cc) = (cc_data&0x0800?0x01:0x00) | (cc_data&0x0100?0x02:0x00);
return (eia608_control_t) (0x167F & cc_data);
}
}
uint16_t eia608_control_command (eia608_control_t cmd, int cc)
{
uint16_t c = (cc&0x01) ?0x0800:0x0000;
uint16_t f = (cc&0x02) ?0x0100:0x0000;
if (eia608_tab_offset_0 == (eia608_control_t) (cmd&0xFFC0)) {
return (eia608_control_t) eia608_parity (cmd|c);
} else {
return (eia608_control_t) eia608_parity (cmd|c|f);
}
}
////////////////////////////////////////////////////////////////////////////////
// text
static const char* utf8_from_index (int idx) { return (0<=idx && EIA608_CHAR_COUNT>idx) ? eia608_char_map[idx] : ""; }
static int eia608_to_index (uint16_t cc_data, int* chan, int* c1, int* c2)
{
(*c1) = (*c2) = -1; (*chan) = 0;
cc_data &= 0x7F7F; // strip off parity bits
// Handle Basic NA BEFORE we strip the channel bit
if (eia608_is_basicna (cc_data)) {
// we got first char, yes. But what about second char?
(*c1) = (cc_data>>8) - 0x20;
cc_data &= 0x00FF;
if (0x0020<=cc_data && 0x0080>cc_data) {
(*c2) = cc_data - 0x20;
return 2;
}
return 1;
}
// Check then strip second channel toggle
(*chan) = cc_data & 0x0800;
cc_data = cc_data & 0xF7FF;
if (eia608_is_specialna (cc_data)) {
// Special North American character
(*c1) = cc_data - 0x1130 + 0x60;
return 1;
}
if (0x1220<=cc_data && 0x1240>cc_data) {
// Extended Western European character set, Spanish/Miscellaneous/French
(*c1) = cc_data - 0x1220 + 0x70;
return 1;
}
if (0x1320<=cc_data && 0x1340>cc_data) {
// Extended Western European character set, Portuguese/German/Danish
(*c1) = cc_data - 0x1320 + 0x90;
return 1;
}
return 0;
}
int eia608_to_utf8 (uint16_t c, int* chan, char* str1, char* str2)
{
int c1, c2;
int size = (int) eia608_to_index (c,chan,&c1,&c2);
strncpy (str1, utf8_from_index (c1),5);
strncpy (str2, utf8_from_index (c2),5);
return size;
}
uint16_t eia608_from_basicna (uint16_t bna1, uint16_t bna2)
{
if (! eia608_is_basicna (bna1) || ! eia608_is_basicna (bna2)) {
return 0;
}
return eia608_parity ( ( (0xFF00&bna1) >>0) | ( (0xFF00&bna2) >>8));
}
// prototype for re2c generated function
uint16_t _eia608_from_utf8 (const utf8_char_t* s);
uint16_t eia608_from_utf8_1 (const utf8_char_t* c, int chan)
{
uint16_t cc_data = _eia608_from_utf8 (c);
if (0 == cc_data) {
return cc_data;
}
if (chan && ! eia608_is_basicna (cc_data)) {
cc_data |= 0x0800;
}
return eia608_parity (cc_data);
}
uint16_t eia608_from_utf8_2 (const utf8_char_t* c1, const utf8_char_t* c2)
{
uint16_t cc1 = _eia608_from_utf8 (c1);
uint16_t cc2 = _eia608_from_utf8 (c2);
return eia608_from_basicna (cc1,cc2);
}
////////////////////////////////////////////////////////////////////////////////
void eia608_dump (uint16_t cc_data)
{
eia608_style_t style;
const char* text = 0;
char char1[5], char2[5];
char1[0] = char2[0] = 0;
int row, col, chan, underline;
if (!eia608_parity_varify (cc_data)) {
text = "parity failed";
} else if (0 == eia608_parity_strip (cc_data)) {
text = "pad";
} else if (eia608_is_basicna (cc_data)) {
text = "basicna";
eia608_to_utf8 (cc_data,&chan,&char1[0],&char2[0]);
} else if (eia608_is_specialna (cc_data)) {
text = "specialna";
eia608_to_utf8 (cc_data,&chan,&char1[0],&char2[0]);
} else if (eia608_is_westeu (cc_data)) {
text = "westeu";
eia608_to_utf8 (cc_data,&chan,&char1[0],&char2[0]);
} else if (eia608_is_xds (cc_data)) {
text = "xds";
} else if (eia608_is_midrowchange (cc_data)) {
text = "midrowchange";
} else if (eia608_is_norpak (cc_data)) {
text = "norpak";
} else if (eia608_is_preamble (cc_data)) {
text = "preamble";
eia608_parse_preamble (cc_data, &row, &col, &style, &chan, &underline);
fprintf (stderr,"preamble %d %d %d %d %d\n", row, col, style, chan, underline);
} else if (eia608_is_control (cc_data)) {
switch (eia608_parse_control (cc_data,&chan)) {
default: text = "unknown_control"; break;
case eia608_tab_offset_0: text = "eia608_tab_offset_0"; break;
case eia608_tab_offset_1: text = "eia608_tab_offset_1"; break;
case eia608_tab_offset_2:text = "eia608_tab_offset_2"; break;
case eia608_tab_offset_3: text = "eia608_tab_offset_3"; break;
case eia608_control_resume_caption_loading: text = "eia608_control_resume_caption_loading"; break;
case eia608_control_backspace: text = "eia608_control_backspace"; break;
case eia608_control_alarm_off: text = "eia608_control_alarm_off"; break;
case eia608_control_alarm_on: text = "eia608_control_alarm_on"; break;
case eia608_control_delete_to_end_of_row: text = "eia608_control_delete_to_end_of_row"; break;
case eia608_control_roll_up_2: text = "eia608_control_roll_up_2"; break;
case eia608_control_roll_up_3: text = "eia608_control_roll_up_3"; break;
case eia608_control_roll_up_4: text = "eia608_control_roll_up_4"; break;
case eia608_control_resume_direct_captioning: text = "eia608_control_resume_direct_captioning"; break;
case eia608_control_text_restart: text = "eia608_control_text_restart"; break;
case eia608_control_text_resume_text_display: text = "eia608_control_text_resume_text_display"; break;
case eia608_control_erase_display_memory: text = "eia608_control_erase_display_memory"; break;
case eia608_control_carriage_return: text = "eia608_control_carriage_return"; break;
case eia608_control_erase_non_displayed_memory:text = "eia608_control_erase_non_displayed_memory"; break;
case eia608_control_end_of_caption: text = "eia608_control_end_of_caption"; break;
}
} else {
text = "unhandled";
}
fprintf (stderr,"cc %04X (%04X) '%s' '%s' (%s)\n", cc_data, eia608_parity_strip (cc_data), char1, char2, text);
}
////////////////////////////////////////////////////////////////////////////////
// below this line is re2c
uint16_t _eia608_from_utf8 (const utf8_char_t* s)
{
const unsigned char* YYMARKER; // needed by default rule
const unsigned char* YYCURSOR = (const unsigned char*) s;
if (0==s) { return 0x0000;}
/*!re2c
re2c:yyfill:enable = 0;
re2c:indent:string = " ";
re2c:define:YYCTYPE = "unsigned char";
/*Ascii Exceptions*/
"\x00" { /*NULL*/ return 0x0000; }
"\x27" { /*APOSTROPHE -> RIGHT_SINGLE_QUOTATION_MARK*/ return 0x1229; }
"\x2A" { /*ASTERISK*/ return 0x1228; }
"\x5C" { /*REVERSE_SOLIDUS*/ return 0x132B; }
"\x5E" { /*CIRCUMFLEX_ACCENT*/ return 0x132C; }
"\x5F" { /*LOW_LINE*/ return 0x132D; }
/*Lets Map this to a LEFT_SINGLE_QUOTATION_MARK, just so we have a cc_data for every printable ASCII value*/
"\x60" { /*GRAVE_ACCENT, No equivalent return 0x0000; return 1;*/ /*LEFT_SINGLE_QUOTATION_MARK*/ return 0x1226; }
"\x7B" { /*LEFT_CURLY_BRACKET*/ return 0x1329; }
"\x7C" { /*VERTICAL_LINE*/ return 0x132E; }
"\x7D" { /*RIGHT_CURLY_BRACKET*/ return 0x132A; }
"\x7E" { /*TILDE*/ return 0x132F; }
/*There is a controll equivilant. Havnt decided if we want to habcle that here, or not*/
"\x7F" { /*DEL/BACKSPACE. Need to set bits 9 and 12! return 0x1421;*/ return 0x0000; }
/* Rules are processed top to bottom. So All single byte chars MUST be above this line!*/
[\x20-\x7F] { /*ASCII range*/ return (s[0]<<8) &0xFF00; } /* Should we use yych instead of s[0]?*/
/*This is the second half of the ascii exceptions*/
"\xC3\xA1" { /*LATIN_SMALL_LETTER_A_WITH_ACUTE*/ return 0x2A00; }
"\xC3\xA9" { /*LATIN_SMALL_LETTER_E_WITH_ACUTE*/ return 0x5C00; }
"\xC3\xAD" { /*LATIN_SMALL_LETTER_I_WITH_ACUTE*/ return 0x5E00; }
"\xC3\xB3" { /*LATIN_SMALL_LETTER_O_WITH_ACUTE*/ return 0x5F00; }
"\xC3\xBA" { /*LATIN_SMALL_LETTER_U_WITH_ACUTE*/ return 0x6000; }
"\xC3\xA7" { /*LATIN_SMALL_LETTER_C_WITH_CEDILLA*/ return 0x7B00; }
"\xC3\xB7" { /*DIVISION_SIGN*/ return 0x7C00; }
"\xC3\x91" { /*LATIN_CAPITAL_LETTER_N_WITH_TILDE*/ return 0x7D00; }
"\xC3\xB1" { /*LATIN_SMALL_LETTER_N_WITH_TILDE*/ return 0x7E00; }
"\xE2\x96\x88" { /*FULL_BLOCK*/ return 0x7F00; }
/*Special North American character set*/
"\xC2\xAE" { /*REGISTERED_SIGN*/ return 0x1130; }
"\xC2\xB0" { /*DEGREE_SIGN*/ return 0x1131; }
"\xC2\xBD" { /*VULGAR_FRACTION_ONE_HALF*/ return 0x1132; }
"\xC2\xBF" { /*INVERTED_QUESTION_MARK*/ return 0x1133; }
"\xE2\x84\xA2" { /*TRADE_MARK_SIGN*/ return 0x1134; }
"\xC2\xA2" { /*CENT_SIGN*/ return 0x1135; }
"\xC2\xA3" { /*POUND_SIGN*/ return 0x1136; }
"\xE2\x99\xAA" { /*EIGHTH_NOTE*/ return 0x1137; }
"\xC3\xA0" { /*LATIN_SMALL_LETTER_A_WITH_GRAVE*/ return 0x1138; }
"\xC2\xA0" { /*NO_BREAK_SPACE*/ return 0x1139; }
"\xC3\xA8" { /*LATIN_SMALL_LETTER_E_WITH_GRAVE*/ return 0x113A; }
"\xC3\xA2" { /*LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX*/ return 0x113B; }
"\xC3\xAA" { /*LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX*/ return 0x113C; }
"\xC3\xAE" { /*LATIN_SMALL_LETTER_I_WITH_CIRCUMFLEX*/ return 0x113D; }
"\xC3\xB4" { /*LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX*/ return 0x113E; }
"\xC3\xBB" { /*LATIN_SMALL_LETTER_U_WITH_CIRCUMFLEX*/ return 0x113F; }
/*Extended Spanish/Miscellaneous*/
"\xC3\x81" { /*LATIN_CAPITAL_LETTER_A_WITH_ACUTE*/ return 0x1220; }
"\xC3\x89" { /*LATIN_CAPITAL_LETTER_E_WITH_ACUTE*/ return 0x1221; }
"\xC3\x93" { /*LATIN_CAPITAL_LETTER_O_WITH_ACUTE*/ return 0x1222; }
"\xC3\x9A" { /*LATIN_CAPITAL_LETTER_U_WITH_ACUTE*/ return 0x1223; }
"\xC3\x9C" { /*LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS*/ return 0x1224; }
"\xC3\xBC" { /*LATIN_SMALL_LETTER_U_WITH_DIAERESIS*/ return 0x1225; }
"\xE2\x80\x98" { /*LEFT_SINGLE_QUOTATION_MARK*/ return 0x1226; }
"\xC2\xA1" { /*INVERTED_EXCLAMATION_MARK*/ return 0x1227; }
/*ASTERISK handled in ASCII mapping*/
"\xE2\x80\x99" { /*RIGHT_SINGLE_QUOTATION_MARK -> APOSTROPHE*/ return 0x2700; }
"\xE2\x80\x94" { /*EM_DASH*/ return 0x122A; }
"\xC2\xA9" { /*COPYRIGHT_SIGN*/ return 0x122B; }
"\xE2\x84\xA0" { /*SERVICE_MARK*/ return 0x122C; }
"\xE2\x80\xA2" { /*BULLET*/ return 0x122D; }
"\xE2\x80\x9C" { /*LEFT_DOUBLE_QUOTATION_MARK*/ return 0x122E; }
"\xE2\x80\x9D" { /*RIGHT_DOUBLE_QUOTATION_MARK*/ return 0x122F; }
/*Extended French*/
"\xC3\x80" { /*LATIN_CAPITAL_LETTER_A_WITH_GRAVE*/ return 0x1230; }
"\xC3\x82" { /*LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX*/ return 0x1231; }
"\xC3\x87" { /*LATIN_CAPITAL_LETTER_C_WITH_CEDILLA*/ return 0x1232; }
"\xC3\x88" { /*LATIN_CAPITAL_LETTER_E_WITH_GRAVE*/ return 0x1233; }
"\xC3\x8A" { /*LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX*/ return 0x1234; }
"\xC3\x8B" { /*LATIN_CAPITAL_LETTER_E_WITH_DIAERESIS*/ return 0x1235; }
"\xC3\xAB" { /*LATIN_SMALL_LETTER_E_WITH_DIAERESIS*/ return 0x1236; }
"\xC3\x8E" { /*LATIN_CAPITAL_LETTER_I_WITH_CIRCUMFLEX*/ return 0x1237; }
"\xC3\x8F" { /*LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS*/ return 0x1238; }
"\xC3\xAF" { /*LATIN_SMALL_LETTER_I_WITH_DIAERESIS*/ return 0x1239; }
"\xC3\x94" { /*LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX*/ return 0x123A; }
"\xC3\x99" { /*LATIN_CAPITAL_LETTER_U_WITH_GRAVE*/ return 0x123B; }
"\xC3\xB9" { /*LATIN_SMALL_LETTER_U_WITH_GRAVE*/ return 0x123C; }
"\xC3\x9B" { /*LATIN_CAPITAL_LETTER_U_WITH_CIRCUMFLEX*/ return 0x123D; }
"\xC2\xAB" { /*LEFT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK*/ return 0x123E; }
"\xC2\xBB" { /*RIGHT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK*/ return 0x123F; }
/*Portuguese*/
"\xC3\x83" { /*LATIN_CAPITAL_LETTER_A_WITH_TILDE*/ return 0x1320; }
"\xC3\xA3" { /*LATIN_SMALL_LETTER_A_WITH_TILDE*/ return 0x1321; }
"\xC3\x8D" { /*LATIN_CAPITAL_LETTER_I_WITH_ACUTE*/ return 0x1322; }
"\xC3\x8C" { /*LATIN_CAPITAL_LETTER_I_WITH_GRAVE*/ return 0x1323; }
"\xC3\xAC" { /*LATIN_SMALL_LETTER_I_WITH_GRAVE*/ return 0x1324; }
"\xC3\x92" { /*LATIN_CAPITAL_LETTER_O_WITH_GRAVE*/ return 0x1325; }
"\xC3\xB2" { /*LATIN_SMALL_LETTER_O_WITH_GRAVE*/ return 0x1326; }
"\xC3\x95" { /*LATIN_CAPITAL_LETTER_O_WITH_TILDE*/ return 0x1327; }
"\xC3\xB5" { /*LATIN_SMALL_LETTER_O_WITH_TILDE*/ return 0x1328; }
/*LEFT_CURLY_BRACKET handled in ASCII mapping*/
/*RIGHT_CURLY_BRACKET handled in ASCII mapping*/
/*REVERSE_SOLIDUS handled in ASCII mapping*/
/*CIRCUMFLEX_ACCENT handled in ASCII mapping*/
/*LOW_LINE handled in ASCII mapping*/
/*VERTICAL_LINE handled in ASCII mapping*/
/*TILDE handled in ASCII mapping*/
/*German/Danish*/
"\xC3\x84" { /*LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS*/ return 0x1330; }
"\xC3\xA4" { /*LATIN_SMALL_LETTER_A_WITH_DIAERESIS*/ return 0x1331; }
"\xC3\x96" { /*LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS*/ return 0x1332; }
"\xC3\xB6" { /*LATIN_SMALL_LETTER_O_WITH_DIAERESIS*/ return 0x1333; }
"\xC3\x9F" { /*LATIN_SMALL_LETTER_SHARP_S*/ return 0x1334; }
"\xC2\xA5" { /*YEN_SIGN*/ return 0x1335; }
"\xC2\xA4" { /*CURRENCY_SIGN*/ return 0x1336; }
"\xC2\xA6" { /*BROKEN_BAR*/ return 0x1337; }
"\xC3\x85" { /*LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE*/ return 0x1338; }
"\xC3\xA5" { /*LATIN_SMALL_LETTER_A_WITH_RING_ABOVE*/ return 0x1339; }
"\xC3\x98" { /*LATIN_CAPITAL_LETTER_O_WITH_STROKE*/ return 0x133A; }
"\xC3\xB8" { /*LATIN_SMALL_LETTER_O_WITH_STROKE*/ return 0x133B; }
"\xE2\x94\x8C" { /*EIA608_CHAR_BOX_DRAWINGS_LIGHT_DOWN_AND_RIGHT*/ return 0x133C; }
"\xE2\x94\x90" { /*EIA608_CHAR_BOX_DRAWINGS_LIGHT_DOWN_AND_LEFT*/ return 0x133D; }
"\xE2\x94\x94" { /*EIA608_CHAR_BOX_DRAWINGS_LIGHT_UP_AND_RIGHT*/ return 0x133E; }
"\xE2\x94\x98" { /*EIA608_CHAR_BOX_DRAWINGS_LIGHT_UP_AND_LEFT*/ return 0x133F; }
/*Default rule*/
[^] { /*DEFAULT_RULE*/ return 0x0000; }
*/
}