libgd/src/gdkanji.c

593 lines
12 KiB
C

/* gdkanji.c (Kanji code converter) */
/* written by Masahito Yamaga (ma@yama-ga.com) */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "gd.h"
#include "gdhelpers.h"
#include "gd_errors.h"
#include <errno.h>
#include <stdarg.h>
#if defined(HAVE_ICONV_H)
#include <iconv.h>
#endif
#ifndef HAVE_ICONV_T_DEF
typedef void *iconv_t;
#endif
#ifndef HAVE_ICONV
#define ICONV_CONST /**/
iconv_t iconv_open (const char *, const char *);
size_t iconv (iconv_t, ICONV_CONST char **, size_t *, char **, size_t *);
int iconv_close (iconv_t);
iconv_t
iconv_open (const char *tocode, const char *fromcode)
{
(void)tocode;
(void)fromcode;
return (iconv_t) (-1);
}
size_t
iconv (iconv_t cd, ICONV_CONST char **inbuf, size_t * inbytesleft,
char **outbuf, size_t * outbytesleft)
{
(void)cd;
(void)inbuf;
(void)inbytesleft;
(void)outbuf;
(void)outbytesleft;
return 0;
}
int
iconv_close (iconv_t cd)
{
(void)cd;
return 0;
}
#endif /* !HAVE_ICONV */
#define LIBNAME "any2eucjp()"
#if defined(__MSC__) || defined(__BORLANDC__) || defined(__TURBOC__) || defined(_Windows) || defined(MSDOS)
#ifndef SJISPRE
#define SJISPRE 1
#endif
#endif
#ifdef TRUE
#undef TRUE
#endif
#ifdef FALSE
#undef FALSE
#endif
#define TRUE 1
#define FALSE 0
#define NEW 1
#define OLD 2
#define ESCI 3
#define NEC 4
#define EUC 5
#define SJIS 6
#define EUCORSJIS 7
#define ASCII 8
#define NEWJISSTR "JIS7"
#define OLDJISSTR "jis"
#define EUCSTR "eucJP"
#define SJISSTR "SJIS"
#define ESC 27
#define SS2 142
/* DetectKanjiCode() derived from DetectCodeType() by Ken Lunde. */
static int
DetectKanjiCode (const unsigned char *str)
{
static int whatcode = ASCII;
int oldcode = ASCII;
int c, i;
char *lang = NULL;
c = '\1';
i = 0;
if (whatcode != EUCORSJIS && whatcode != ASCII) {
oldcode = whatcode;
whatcode = ASCII;
}
while ((whatcode == EUCORSJIS || whatcode == ASCII) && c != '\0') {
if ((c = str[i++]) != '\0') {
if (c == ESC) {
c = str[i++];
if (c == '$') {
c = str[i++];
if (c == 'B')
whatcode = NEW;
else if (c == '@')
whatcode = OLD;
} else if (c == '(') {
c = str[i++];
if (c == 'I')
whatcode = ESCI;
} else if (c == 'K')
whatcode = NEC;
} else if ((c >= 129 && c <= 141) || (c >= 143 && c <= 159))
whatcode = SJIS;
else if (c == SS2) {
c = str[i++];
if ((c >= 64 && c <= 126) || (c >= 128 && c <= 160)
|| (c >= 224 && c <= 252))
whatcode = SJIS;
else if (c >= 161 && c <= 223)
whatcode = EUCORSJIS;
} else if (c >= 161 && c <= 223) {
c = str[i++];
if (c >= 240 && c <= 254)
whatcode = EUC;
else if (c >= 161 && c <= 223)
whatcode = EUCORSJIS;
else if (c >= 224 && c <= 239) {
whatcode = EUCORSJIS;
while (c >= 64 && whatcode == EUCORSJIS) {
if (c >= 129) {
if (c <= 141 || (c >= 143 && c <= 159))
whatcode = SJIS;
else if (c >= 253 && c <= 254)
whatcode = EUC;
}
c = str[i++];
}
} else if (c <= 159)
whatcode = SJIS;
} else if (c >= 240 && c <= 254)
whatcode = EUC;
else if (c >= 224 && c <= 239) {
c = str[i++];
if ((c >= 64 && c <= 126) || (c >= 128 && c <= 160))
whatcode = SJIS;
else if (c >= 253 && c <= 254)
whatcode = EUC;
else if (c >= 161 && c <= 252)
whatcode = EUCORSJIS;
}
}
}
#ifdef DEBUG
if (whatcode == ASCII)
gd_error_ex(GD_DEBUG, "Kanji code not included.");
else if (whatcode == EUCORSJIS)
gd_error_ex(GD_DEBUG, "Kanji code not detected.");
else
gd_error_ex(GD_DEBUG, "Kanji code detected at %d byte.", i);
#endif
if (whatcode == EUCORSJIS && oldcode != ASCII)
whatcode = oldcode;
if (whatcode == EUCORSJIS) {
if (getenv ("LC_ALL"))
lang = getenv ("LC_ALL");
else if (getenv ("LC_CTYPE"))
lang = getenv ("LC_CTYPE");
else if (getenv ("LANG"))
lang = getenv ("LANG");
if (lang) {
if (strcmp (lang, "ja_JP.SJIS") == 0 ||
#ifdef hpux
strcmp (lang, "japanese") == 0 ||
#endif
strcmp (lang, "ja_JP.mscode") == 0 ||
strcmp (lang, "ja_JP.PCK") == 0)
whatcode = SJIS;
else if (strncmp (lang, "ja", 2) == 0)
#ifdef SJISPRE
whatcode = SJIS;
#else
whatcode = EUC;
#endif
}
}
if (whatcode == EUCORSJIS)
#ifdef SJISPRE
whatcode = SJIS;
#else
whatcode = EUC;
#endif
return whatcode;
}
/* SJIStoJIS() is sjis2jis() by Ken Lunde. */
static void
SJIStoJIS (int *p1, int *p2)
{
register unsigned char c1 = *p1;
register unsigned char c2 = *p2;
register int adjust = c2 < 159;
register int rowOffset = c1 < 160 ? 112 : 176;
register int cellOffset = adjust ? (31 + (c2 > 127)) : 126;
*p1 = ((c1 - rowOffset) << 1) - adjust;
*p2 -= cellOffset;
}
/* han2zen() was derived from han2zen() written by Ken Lunde. */
#define IS_DAKU(c) ((c >= 182 && c <= 196) || (c >= 202 && c <= 206) || (c == 179))
#define IS_HANDAKU(c) (c >= 202 && c <= 206)
static void
han2zen (int *p1, int *p2)
{
int c = *p1;
int daku = FALSE;
int handaku = FALSE;
int mtable[][2] = {
{129, 66},
{129, 117},
{129, 118},
{129, 65},
{129, 69},
{131, 146},
{131, 64},
{131, 66},
{131, 68},
{131, 70},
{131, 72},
{131, 131},
{131, 133},
{131, 135},
{131, 98},
{129, 91},
{131, 65},
{131, 67},
{131, 69},
{131, 71},
{131, 73},
{131, 74},
{131, 76},
{131, 78},
{131, 80},
{131, 82},
{131, 84},
{131, 86},
{131, 88},
{131, 90},
{131, 92},
{131, 94},
{131, 96},
{131, 99},
{131, 101},
{131, 103},
{131, 105},
{131, 106},
{131, 107},
{131, 108},
{131, 109},
{131, 110},
{131, 113},
{131, 116},
{131, 119},
{131, 122},
{131, 125},
{131, 126},
{131, 128},
{131, 129},
{131, 130},
{131, 132},
{131, 134},
{131, 136},
{131, 137},
{131, 138},
{131, 139},
{131, 140},
{131, 141},
{131, 143},
{131, 147},
{129, 74},
{129, 75}
};
if (*p2 == 222 && IS_DAKU (*p1))
daku = TRUE; /* Daku-ten */
else if (*p2 == 223 && IS_HANDAKU (*p1))
handaku = TRUE; /* Han-daku-ten */
*p1 = mtable[c - 161][0];
*p2 = mtable[c - 161][1];
if (daku) {
if ((*p2 >= 74 && *p2 <= 103) || (*p2 >= 110 && *p2 <= 122))
(*p2)++;
else if (*p2 == 131 || *p2 == 69)
*p2 = 148;
} else if (handaku && *p2 >= 110 && *p2 <= 122)
(*p2) += 2;
}
/* Recast strcpy to handle unsigned chars used below. */
#define ustrcpy(A,B) (strcpy((char*)(A),(const char*)(B)))
#define ustrncpy(A,B, maxsize) (strncpy((char*)(A),(const char*)(B), maxsize))
static void
do_convert (unsigned char **to_p, const unsigned char **from_p, const char *code)
{
unsigned char *to = *to_p;
const unsigned char *from = *from_p;
#ifdef HAVE_ICONV
iconv_t cd;
size_t from_len, to_len;
if ((cd = iconv_open (EUCSTR, code)) == (iconv_t) - 1) {
gd_error ("iconv_open() error");
if (errno == EINVAL)
gd_error ("invalid code specification: \"%s\" or \"%s\"", EUCSTR, code);
ustrcpy (to, from);
return;
}
from_len = strlen ((const char *)from) + 1;
to_len = BUFSIZ;
if ((int) (iconv (cd, (char **)from_p, &from_len, (char **)to_p, &to_len))
== -1) {
if (errno == EINVAL)
gd_error ("invalid end of input string");
else if (errno == EILSEQ)
gd_error ("invalid code in input string");
else if (errno == E2BIG)
gd_error ("output buffer overflow at do_convert()");
else
gd_error ("something happen");
ustrcpy (to, from);
return;
}
if (iconv_close (cd) != 0) {
gd_error ("iconv_close() error");
}
#else
int p1, p2, i, j;
int jisx0208 = FALSE;
int hankaku = FALSE;
j = 0;
if (strcmp (code, NEWJISSTR) == 0 || strcmp (code, OLDJISSTR) == 0) {
for (i = 0; from[i] != '\0' && j < BUFSIZ; i++) {
if (from[i] == ESC) {
i++;
if (from[i] == '$') {
jisx0208 = TRUE;
hankaku = FALSE;
i++;
} else if (from[i] == '(') {
jisx0208 = FALSE;
i++;
if (from[i] == 'I') /* Hankaku Kana */
hankaku = TRUE;
else
hankaku = FALSE;
}
} else {
if (jisx0208)
to[j++] = from[i] + 128;
else if (hankaku) {
to[j++] = SS2;
to[j++] = from[i] + 128;
} else
to[j++] = from[i];
}
}
} else if (strcmp (code, SJISSTR) == 0) {
for (i = 0; from[i] != '\0' && j < BUFSIZ; i++) {
p1 = from[i];
if (p1 < 127)
to[j++] = p1;
else if ((p1 >= 161) && (p1 <= 223)) {
/* Hankaku Kana */
to[j++] = SS2;
to[j++] = p1;
} else {
p2 = from[++i];
SJIStoJIS (&p1, &p2);
to[j++] = p1 + 128;
to[j++] = p2 + 128;
}
}
} else {
gd_error ("invalid code specification: \"%s\"", code);
return;
}
if (j >= BUFSIZ) {
gd_error ("output buffer overflow at do_convert()");
ustrcpy (to, from);
} else
to[j] = '\0';
#endif /* HAVE_ICONV */
}
static int
do_check_and_conv (unsigned char *to, const unsigned char *from)
{
static unsigned char tmp[BUFSIZ];
unsigned char *tmp_p = &tmp[0];
int p1, p2, i, j;
int kanji = TRUE;
int copy_string = FALSE;
switch (DetectKanjiCode (from)) {
case NEW:
gd_error_ex(GD_DEBUG, "Kanji code is New JIS.");
do_convert (&tmp_p, &from, NEWJISSTR);
break;
case OLD:
gd_error_ex(GD_DEBUG, "Kanji code is Old JIS.");
do_convert (&tmp_p, &from, OLDJISSTR);
break;
case ESCI:
gd_error_ex(GD_DEBUG, "This string includes Hankaku-Kana (jisx0201) escape sequence [ESC] + ( + I.");
do_convert (&tmp_p, &from, NEWJISSTR);
break;
case NEC:
gd_error_ex(GD_DEBUG, "Kanji code is NEC Kanji.");
gd_error("cannot convert NEC Kanji.");
copy_string = TRUE;
kanji = FALSE;
break;
case EUC:
gd_error_ex(GD_DEBUG, "Kanji code is EUC.");
copy_string = TRUE;
break;
case SJIS:
gd_error_ex(GD_DEBUG, "Kanji code is SJIS.");
do_convert (&tmp_p, &from, SJISSTR);
break;
case EUCORSJIS:
gd_error_ex(GD_DEBUG, "Kanji code is EUC or SJIS.");
copy_string = TRUE;
kanji = FALSE;
break;
case ASCII:
gd_error_ex(GD_DEBUG, "This is ASCII string.");
copy_string = TRUE;
kanji = FALSE;
break;
default:
gd_error_ex(GD_DEBUG, "This string includes unknown code.");
copy_string = TRUE;
kanji = FALSE;
break;
}
if (copy_string) {
ustrncpy (tmp, from, BUFSIZ);
tmp[BUFSIZ-1] = '\0';
}
/* Hankaku Kana ---> Zenkaku Kana */
if (kanji) {
j = 0;
for (i = 0; tmp[i] != '\0' && j < BUFSIZ; i++) {
if (tmp[i] == SS2) {
p1 = tmp[++i];
if (tmp[i + 1] == SS2) {
p2 = tmp[i + 2];
if (p2 == 222 || p2 == 223)
i += 2;
else
p2 = 0;
} else
p2 = 0;
han2zen (&p1, &p2);
SJIStoJIS (&p1, &p2);
to[j++] = p1 + 128;
to[j++] = p2 + 128;
} else
to[j++] = tmp[i];
}
if (j >= BUFSIZ) {
gd_error("output buffer overflow at Hankaku --> Zenkaku");
ustrcpy (to, tmp);
} else
to[j] = '\0';
} else
ustrcpy (to, tmp);
return kanji;
}
int
any2eucjp (unsigned char *dest, const unsigned char *src, unsigned int dest_max)
{
static unsigned char tmp_dest[BUFSIZ];
int ret;
if (strlen ((const char *) src) >= BUFSIZ) {
gd_error("input string too large");
return -1;
}
if (dest_max > BUFSIZ) {
gd_error
("invalid maximum size of destination\nit should be less than %d.",
BUFSIZ);
return -1;
}
ret = do_check_and_conv (tmp_dest, src);
if (strlen ((const char *) tmp_dest) >= dest_max) {
gd_error("output buffer overflow");
ustrcpy (dest, src);
return -1;
}
ustrcpy (dest, tmp_dest);
return ret;
}
#if 0
unsigned int
strwidth (unsigned char *s)
{
unsigned char *t;
unsigned int i;
t = (unsigned char *) gdMalloc (BUFSIZ);
any2eucjp (t, s, BUFSIZ);
i = strlen (t);
gdFree (t);
return i;
}
#ifdef DEBUG
int
main ()
{
unsigned char input[BUFSIZ];
unsigned char *output;
unsigned char *str;
int c, i = 0;
while ((c = fgetc (stdin)) != '\n' && i < BUFSIZ)
input[i++] = c;
input[i] = '\0';
printf ("input : %d bytes\n", strlen ((const char *) input));
printf ("output: %d bytes\n", strwidth (input));
output = (unsigned char *) gdMalloc (BUFSIZ);
any2eucjp (output, input, BUFSIZ);
str = output;
while (*str != '\0')
putchar (*(str++));
putchar ('\n');
gdFree (output);
return 0;
}
#endif
#endif