libgd/src/gdkanji.c

593 lines
12 KiB
C
Raw Permalink Normal View History

2006-04-05 08:38:05 -07:00
/* gdkanji.c (Kanji code converter) */
2006-04-05 08:42:56 -07:00
/* written by Masahito Yamaga (ma@yama-ga.com) */
2006-04-05 08:38:05 -07:00
2006-04-05 08:44:56 -07:00
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
2006-04-05 08:38:05 -07:00
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
2006-04-05 08:41:55 -07:00
#include "gd.h"
#include "gdhelpers.h"
#include "gd_errors.h"
2006-04-05 08:38:05 -07:00
#include <errno.h>
2006-04-05 08:46:42 -07:00
#include <stdarg.h>
#if defined(HAVE_ICONV_H)
#include <iconv.h>
2006-04-05 08:38:05 -07:00
#endif
2006-04-05 08:46:42 -07:00
#ifndef HAVE_ICONV_T_DEF
typedef void *iconv_t;
2006-04-05 08:38:05 -07:00
#endif
2006-04-05 08:46:42 -07:00
#ifndef HAVE_ICONV
#define ICONV_CONST /**/
2013-04-03 05:23:11 -07:00
iconv_t iconv_open (const char *, const char *);
2006-04-05 08:46:42 -07:00
size_t iconv (iconv_t, ICONV_CONST char **, size_t *, char **, size_t *);
int iconv_close (iconv_t);
iconv_t
iconv_open (const char *tocode, const char *fromcode)
{
2013-02-10 18:31:40 -08:00
(void)tocode;
(void)fromcode;
2013-04-03 05:23:11 -07:00
return (iconv_t) (-1);
2006-04-05 08:46:42 -07:00
}
size_t
iconv (iconv_t cd, ICONV_CONST char **inbuf, size_t * inbytesleft,
char **outbuf, size_t * outbytesleft)
{
2013-02-10 18:31:40 -08:00
(void)cd;
(void)inbuf;
(void)inbytesleft;
(void)outbuf;
(void)outbytesleft;
2013-04-03 05:23:11 -07:00
return 0;
2006-04-05 08:46:42 -07:00
}
int
iconv_close (iconv_t cd)
{
2013-02-10 18:31:40 -08:00
(void)cd;
2013-04-03 05:23:11 -07:00
return 0;
2006-04-05 08:46:42 -07:00
}
#endif /* !HAVE_ICONV */
2006-04-05 08:38:05 -07:00
#define LIBNAME "any2eucjp()"
#if defined(__MSC__) || defined(__BORLANDC__) || defined(__TURBOC__) || defined(_Windows) || defined(MSDOS)
#ifndef SJISPRE
#define SJISPRE 1
#endif
#endif
#ifdef TRUE
#undef TRUE
#endif
#ifdef FALSE
#undef FALSE
#endif
#define TRUE 1
#define FALSE 0
#define NEW 1
#define OLD 2
#define ESCI 3
#define NEC 4
#define EUC 5
#define SJIS 6
#define EUCORSJIS 7
#define ASCII 8
#define NEWJISSTR "JIS7"
#define OLDJISSTR "jis"
#define EUCSTR "eucJP"
#define SJISSTR "SJIS"
#define ESC 27
#define SS2 142
/* DetectKanjiCode() derived from DetectCodeType() by Ken Lunde. */
2006-04-05 08:42:56 -07:00
static int
DetectKanjiCode (const unsigned char *str)
2006-04-05 08:38:05 -07:00
{
2013-04-03 05:23:11 -07:00
static int whatcode = ASCII;
int oldcode = ASCII;
int c, i;
char *lang = NULL;
c = '\1';
i = 0;
if (whatcode != EUCORSJIS && whatcode != ASCII) {
oldcode = whatcode;
whatcode = ASCII;
}
while ((whatcode == EUCORSJIS || whatcode == ASCII) && c != '\0') {
if ((c = str[i++]) != '\0') {
if (c == ESC) {
c = str[i++];
if (c == '$') {
c = str[i++];
if (c == 'B')
whatcode = NEW;
else if (c == '@')
whatcode = OLD;
} else if (c == '(') {
c = str[i++];
if (c == 'I')
whatcode = ESCI;
} else if (c == 'K')
whatcode = NEC;
} else if ((c >= 129 && c <= 141) || (c >= 143 && c <= 159))
whatcode = SJIS;
else if (c == SS2) {
c = str[i++];
if ((c >= 64 && c <= 126) || (c >= 128 && c <= 160)
|| (c >= 224 && c <= 252))
whatcode = SJIS;
else if (c >= 161 && c <= 223)
whatcode = EUCORSJIS;
} else if (c >= 161 && c <= 223) {
c = str[i++];
if (c >= 240 && c <= 254)
whatcode = EUC;
else if (c >= 161 && c <= 223)
whatcode = EUCORSJIS;
else if (c >= 224 && c <= 239) {
whatcode = EUCORSJIS;
while (c >= 64 && whatcode == EUCORSJIS) {
2013-04-03 05:23:11 -07:00
if (c >= 129) {
if (c <= 141 || (c >= 143 && c <= 159))
whatcode = SJIS;
else if (c >= 253 && c <= 254)
whatcode = EUC;
}
c = str[i++];
}
} else if (c <= 159)
whatcode = SJIS;
} else if (c >= 240 && c <= 254)
whatcode = EUC;
else if (c >= 224 && c <= 239) {
c = str[i++];
if ((c >= 64 && c <= 126) || (c >= 128 && c <= 160))
whatcode = SJIS;
else if (c >= 253 && c <= 254)
whatcode = EUC;
else if (c >= 161 && c <= 252)
whatcode = EUCORSJIS;
2006-04-05 08:38:05 -07:00
}
}
}
#ifdef DEBUG
2013-04-03 05:23:11 -07:00
if (whatcode == ASCII)
gd_error_ex(GD_DEBUG, "Kanji code not included.");
2013-04-03 05:23:11 -07:00
else if (whatcode == EUCORSJIS)
gd_error_ex(GD_DEBUG, "Kanji code not detected.");
2013-04-03 05:23:11 -07:00
else
gd_error_ex(GD_DEBUG, "Kanji code detected at %d byte.", i);
2006-04-05 08:38:05 -07:00
#endif
2013-04-03 05:23:11 -07:00
if (whatcode == EUCORSJIS && oldcode != ASCII)
whatcode = oldcode;
if (whatcode == EUCORSJIS) {
if (getenv ("LC_ALL"))
lang = getenv ("LC_ALL");
else if (getenv ("LC_CTYPE"))
lang = getenv ("LC_CTYPE");
else if (getenv ("LANG"))
lang = getenv ("LANG");
if (lang) {
if (strcmp (lang, "ja_JP.SJIS") == 0 ||
2006-04-05 08:38:05 -07:00
#ifdef hpux
2013-04-03 05:23:11 -07:00
strcmp (lang, "japanese") == 0 ||
2006-04-05 08:38:05 -07:00
#endif
2013-04-03 05:23:11 -07:00
strcmp (lang, "ja_JP.mscode") == 0 ||
strcmp (lang, "ja_JP.PCK") == 0)
whatcode = SJIS;
else if (strncmp (lang, "ja", 2) == 0)
2006-04-05 08:38:05 -07:00
#ifdef SJISPRE
2013-04-03 05:23:11 -07:00
whatcode = SJIS;
2006-04-05 08:38:05 -07:00
#else
2013-04-03 05:23:11 -07:00
whatcode = EUC;
2006-04-05 08:38:05 -07:00
#endif
2013-04-03 05:23:11 -07:00
}
2006-04-05 08:38:05 -07:00
}
2013-04-03 05:23:11 -07:00
if (whatcode == EUCORSJIS)
2006-04-05 08:38:05 -07:00
#ifdef SJISPRE
2013-04-03 05:23:11 -07:00
whatcode = SJIS;
2006-04-05 08:38:05 -07:00
#else
2013-04-03 05:23:11 -07:00
whatcode = EUC;
2006-04-05 08:38:05 -07:00
#endif
2013-04-03 05:23:11 -07:00
return whatcode;
2006-04-05 08:38:05 -07:00
}
/* SJIStoJIS() is sjis2jis() by Ken Lunde. */
2006-04-05 08:42:56 -07:00
static void
SJIStoJIS (int *p1, int *p2)
2006-04-05 08:38:05 -07:00
{
2013-04-03 05:23:11 -07:00
register unsigned char c1 = *p1;
register unsigned char c2 = *p2;
register int adjust = c2 < 159;
register int rowOffset = c1 < 160 ? 112 : 176;
register int cellOffset = adjust ? (31 + (c2 > 127)) : 126;
*p1 = ((c1 - rowOffset) << 1) - adjust;
*p2 -= cellOffset;
2006-04-05 08:38:05 -07:00
}
/* han2zen() was derived from han2zen() written by Ken Lunde. */
#define IS_DAKU(c) ((c >= 182 && c <= 196) || (c >= 202 && c <= 206) || (c == 179))
#define IS_HANDAKU(c) (c >= 202 && c <= 206)
2006-04-05 08:42:56 -07:00
static void
han2zen (int *p1, int *p2)
2006-04-05 08:38:05 -07:00
{
2013-04-03 05:23:11 -07:00
int c = *p1;
int daku = FALSE;
int handaku = FALSE;
int mtable[][2] = {
{129, 66},
{129, 117},
{129, 118},
{129, 65},
{129, 69},
{131, 146},
{131, 64},
{131, 66},
{131, 68},
{131, 70},
{131, 72},
{131, 131},
{131, 133},
{131, 135},
{131, 98},
{129, 91},
{131, 65},
{131, 67},
{131, 69},
{131, 71},
{131, 73},
{131, 74},
{131, 76},
{131, 78},
{131, 80},
{131, 82},
{131, 84},
{131, 86},
{131, 88},
{131, 90},
{131, 92},
{131, 94},
{131, 96},
{131, 99},
{131, 101},
{131, 103},
{131, 105},
{131, 106},
{131, 107},
{131, 108},
{131, 109},
{131, 110},
{131, 113},
{131, 116},
{131, 119},
{131, 122},
{131, 125},
{131, 126},
{131, 128},
{131, 129},
{131, 130},
{131, 132},
{131, 134},
{131, 136},
{131, 137},
{131, 138},
{131, 139},
{131, 140},
{131, 141},
{131, 143},
{131, 147},
{129, 74},
{129, 75}
};
if (*p2 == 222 && IS_DAKU (*p1))
daku = TRUE; /* Daku-ten */
else if (*p2 == 223 && IS_HANDAKU (*p1))
handaku = TRUE; /* Han-daku-ten */
*p1 = mtable[c - 161][0];
*p2 = mtable[c - 161][1];
if (daku) {
if ((*p2 >= 74 && *p2 <= 103) || (*p2 >= 110 && *p2 <= 122))
(*p2)++;
2015-01-19 16:57:13 -08:00
else if (*p2 == 131 || *p2 == 69)
2013-04-03 05:23:11 -07:00
*p2 = 148;
} else if (handaku && *p2 >= 110 && *p2 <= 122)
(*p2) += 2;
2006-04-05 08:38:05 -07:00
}
2006-04-05 08:41:55 -07:00
/* Recast strcpy to handle unsigned chars used below. */
2006-04-05 08:41:55 -07:00
#define ustrcpy(A,B) (strcpy((char*)(A),(const char*)(B)))
#define ustrncpy(A,B, maxsize) (strncpy((char*)(A),(const char*)(B), maxsize))
2006-04-05 08:42:56 -07:00
static void
do_convert (unsigned char **to_p, const unsigned char **from_p, const char *code)
2006-04-05 08:38:05 -07:00
{
2013-04-03 05:23:11 -07:00
unsigned char *to = *to_p;
const unsigned char *from = *from_p;
2006-04-05 08:38:05 -07:00
#ifdef HAVE_ICONV
2013-04-03 05:23:11 -07:00
iconv_t cd;
size_t from_len, to_len;
2006-04-05 08:38:05 -07:00
2013-04-03 05:23:11 -07:00
if ((cd = iconv_open (EUCSTR, code)) == (iconv_t) - 1) {
gd_error ("iconv_open() error");
2013-04-03 05:23:11 -07:00
if (errno == EINVAL)
gd_error ("invalid code specification: \"%s\" or \"%s\"", EUCSTR, code);
2013-04-03 05:23:11 -07:00
ustrcpy (to, from);
return;
}
2006-04-05 08:38:05 -07:00
2013-04-03 05:23:11 -07:00
from_len = strlen ((const char *)from) + 1;
to_len = BUFSIZ;
2006-04-05 08:38:05 -07:00
2013-04-03 05:23:11 -07:00
if ((int) (iconv (cd, (char **)from_p, &from_len, (char **)to_p, &to_len))
== -1) {
2013-04-03 05:23:11 -07:00
if (errno == EINVAL)
gd_error ("invalid end of input string");
2013-04-03 05:23:11 -07:00
else if (errno == EILSEQ)
gd_error ("invalid code in input string");
2013-04-03 05:23:11 -07:00
else if (errno == E2BIG)
gd_error ("output buffer overflow at do_convert()");
2013-04-03 05:23:11 -07:00
else
gd_error ("something happen");
2013-04-03 05:23:11 -07:00
ustrcpy (to, from);
return;
}
if (iconv_close (cd) != 0) {
gd_error ("iconv_close() error");
2013-04-03 05:23:11 -07:00
}
2006-04-05 08:38:05 -07:00
#else
2013-04-03 05:23:11 -07:00
int p1, p2, i, j;
int jisx0208 = FALSE;
int hankaku = FALSE;
j = 0;
if (strcmp (code, NEWJISSTR) == 0 || strcmp (code, OLDJISSTR) == 0) {
for (i = 0; from[i] != '\0' && j < BUFSIZ; i++) {
if (from[i] == ESC) {
i++;
if (from[i] == '$') {
jisx0208 = TRUE;
hankaku = FALSE;
i++;
} else if (from[i] == '(') {
jisx0208 = FALSE;
i++;
if (from[i] == 'I') /* Hankaku Kana */
hankaku = TRUE;
else
hankaku = FALSE;
}
} else {
if (jisx0208)
to[j++] = from[i] + 128;
else if (hankaku) {
to[j++] = SS2;
to[j++] = from[i] + 128;
} else
to[j++] = from[i];
}
2006-04-05 08:38:05 -07:00
}
2013-04-03 05:23:11 -07:00
} else if (strcmp (code, SJISSTR) == 0) {
for (i = 0; from[i] != '\0' && j < BUFSIZ; i++) {
p1 = from[i];
if (p1 < 127)
to[j++] = p1;
else if ((p1 >= 161) && (p1 <= 223)) {
/* Hankaku Kana */
to[j++] = SS2;
to[j++] = p1;
} else {
p2 = from[++i];
SJIStoJIS (&p1, &p2);
to[j++] = p1 + 128;
to[j++] = p2 + 128;
}
2006-04-05 08:42:56 -07:00
}
2013-04-03 05:23:11 -07:00
} else {
gd_error ("invalid code specification: \"%s\"", code);
2013-04-03 05:23:11 -07:00
return;
2006-04-05 08:38:05 -07:00
}
2013-04-03 05:23:11 -07:00
if (j >= BUFSIZ) {
gd_error ("output buffer overflow at do_convert()");
2013-04-03 05:23:11 -07:00
ustrcpy (to, from);
} else
to[j] = '\0';
2006-04-05 08:38:05 -07:00
#endif /* HAVE_ICONV */
}
2006-04-05 08:42:56 -07:00
static int
do_check_and_conv (unsigned char *to, const unsigned char *from)
2006-04-05 08:38:05 -07:00
{
2013-04-03 05:23:11 -07:00
static unsigned char tmp[BUFSIZ];
unsigned char *tmp_p = &tmp[0];
int p1, p2, i, j;
int kanji = TRUE;
int copy_string = FALSE;
2013-04-03 05:23:11 -07:00
switch (DetectKanjiCode (from)) {
case NEW:
gd_error_ex(GD_DEBUG, "Kanji code is New JIS.");
2013-04-03 05:23:11 -07:00
do_convert (&tmp_p, &from, NEWJISSTR);
break;
case OLD:
gd_error_ex(GD_DEBUG, "Kanji code is Old JIS.");
2013-04-03 05:23:11 -07:00
do_convert (&tmp_p, &from, OLDJISSTR);
break;
case ESCI:
gd_error_ex(GD_DEBUG, "This string includes Hankaku-Kana (jisx0201) escape sequence [ESC] + ( + I.");
2013-04-03 05:23:11 -07:00
do_convert (&tmp_p, &from, NEWJISSTR);
break;
case NEC:
gd_error_ex(GD_DEBUG, "Kanji code is NEC Kanji.");
gd_error("cannot convert NEC Kanji.");
copy_string = TRUE;
2013-04-03 05:23:11 -07:00
kanji = FALSE;
break;
case EUC:
gd_error_ex(GD_DEBUG, "Kanji code is EUC.");
copy_string = TRUE;
2013-04-03 05:23:11 -07:00
break;
case SJIS:
gd_error_ex(GD_DEBUG, "Kanji code is SJIS.");
2013-04-03 05:23:11 -07:00
do_convert (&tmp_p, &from, SJISSTR);
break;
case EUCORSJIS:
gd_error_ex(GD_DEBUG, "Kanji code is EUC or SJIS.");
copy_string = TRUE;
2013-04-03 05:23:11 -07:00
kanji = FALSE;
break;
case ASCII:
gd_error_ex(GD_DEBUG, "This is ASCII string.");
copy_string = TRUE;
2013-04-03 05:23:11 -07:00
kanji = FALSE;
break;
default:
gd_error_ex(GD_DEBUG, "This string includes unknown code.");
copy_string = TRUE;
2013-04-03 05:23:11 -07:00
kanji = FALSE;
break;
2006-04-05 08:42:56 -07:00
}
2006-04-05 08:38:05 -07:00
if (copy_string) {
ustrncpy (tmp, from, BUFSIZ);
2016-06-20 20:40:06 -07:00
tmp[BUFSIZ-1] = '\0';
}
2013-04-03 05:23:11 -07:00
/* Hankaku Kana ---> Zenkaku Kana */
if (kanji) {
j = 0;
for (i = 0; tmp[i] != '\0' && j < BUFSIZ; i++) {
if (tmp[i] == SS2) {
p1 = tmp[++i];
if (tmp[i + 1] == SS2) {
p2 = tmp[i + 2];
if (p2 == 222 || p2 == 223)
i += 2;
else
p2 = 0;
} else
p2 = 0;
han2zen (&p1, &p2);
SJIStoJIS (&p1, &p2);
to[j++] = p1 + 128;
to[j++] = p2 + 128;
} else
to[j++] = tmp[i];
}
if (j >= BUFSIZ) {
gd_error("output buffer overflow at Hankaku --> Zenkaku");
2013-04-03 05:23:11 -07:00
ustrcpy (to, tmp);
} else
to[j] = '\0';
} else
ustrcpy (to, tmp);
2006-04-05 08:38:05 -07:00
2013-04-03 05:23:11 -07:00
return kanji;
2006-04-05 08:38:05 -07:00
}
2006-04-05 08:42:56 -07:00
int
any2eucjp (unsigned char *dest, const unsigned char *src, unsigned int dest_max)
2006-04-05 08:38:05 -07:00
{
2013-04-03 05:23:11 -07:00
static unsigned char tmp_dest[BUFSIZ];
int ret;
if (strlen ((const char *) src) >= BUFSIZ) {
gd_error("input string too large");
2013-04-03 05:23:11 -07:00
return -1;
}
if (dest_max > BUFSIZ) {
2013-04-18 12:29:38 -07:00
gd_error
2013-04-03 05:23:11 -07:00
("invalid maximum size of destination\nit should be less than %d.",
BUFSIZ);
return -1;
}
ret = do_check_and_conv (tmp_dest, src);
if (strlen ((const char *) tmp_dest) >= dest_max) {
gd_error("output buffer overflow");
2013-04-03 05:23:11 -07:00
ustrcpy (dest, src);
return -1;
}
ustrcpy (dest, tmp_dest);
return ret;
2006-04-05 08:38:05 -07:00
}
#if 0
2006-04-05 08:42:56 -07:00
unsigned int
strwidth (unsigned char *s)
2006-04-05 08:38:05 -07:00
{
2013-04-03 05:23:11 -07:00
unsigned char *t;
unsigned int i;
t = (unsigned char *) gdMalloc (BUFSIZ);
any2eucjp (t, s, BUFSIZ);
i = strlen (t);
gdFree (t);
return i;
2006-04-05 08:38:05 -07:00
}
#ifdef DEBUG
2006-04-05 08:42:56 -07:00
int
main ()
2006-04-05 08:38:05 -07:00
{
2013-04-03 05:23:11 -07:00
unsigned char input[BUFSIZ];
unsigned char *output;
unsigned char *str;
int c, i = 0;
while ((c = fgetc (stdin)) != '\n' && i < BUFSIZ)
input[i++] = c;
input[i] = '\0';
printf ("input : %d bytes\n", strlen ((const char *) input));
printf ("output: %d bytes\n", strwidth (input));
output = (unsigned char *) gdMalloc (BUFSIZ);
any2eucjp (output, input, BUFSIZ);
str = output;
while (*str != '\0')
putchar (*(str++));
putchar ('\n');
gdFree (output);
return 0;
2006-04-05 08:38:05 -07:00
}
#endif
2006-04-05 08:42:56 -07:00
#endif