diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h index d456309..1148eb5 100644 --- a/src/include/mb/pg_wchar.h +++ b/src/include/mb/pg_wchar.h @@ -37,6 +37,31 @@ typedef unsigned int pg_wchar; #define ISSJISTAIL(c) (((c) >= 0x40 && (c) <= 0x7e) || ((c) >= 0x80 && (c) <= 0xfc)) /* + * Currently PostgreSQL supports 5 types of mule internal encodings: + * + * 1) 1-byte ASCII characters, each byte is below 0x7f. + * + * 2) "Official" single byte charsets such as ISO 8859 latin1. Each + * mule character consists of 2 bytes: LC1 + C1, where LC1 is + * corresponds to each charset and in range of 0x81 to 0x8d and C1 + * is in rage of 0xa0 to 0xff(ISO 8859-1 for example, plus each + * high bit is on). + * + * 3) "Private" single byte charsets such as SISHENG. Each mule + * character consists of 3 bytes: LCPRV1 + LC12 + C1 where LCPRV1 + * is either 0x9a (if LC12 is in range of 0xa0 to 0xdf) or 0x9b (if + * LC12 is in range of 0xe0 to 0xef). + * + * 4) "Official" multibyte charsets such as JIS X0208. Each mule + * character consists of 3 bytes: LC2 + C1 + C2 where LC2 is + * corresponds to each charset and is in rage of 0x90 to 0x99. C1 + * and C2 is in rage of 0xa0 to 0xff(each high bit is on). + * + * 5) "Private" multibyte charsets such as CNS 11643-1992 Plane 3. + * Each mule character consists of 4 bytes: LCPRV2 + LC22 + C1 + + * C2. where LCPRV2 is either 0x9c (if LC12 is in range of 0xf0 to + * 0xf4) or 0x9d (if LC22 is in range of 0xf5 to 0xfe). + * * Leading byte types or leading prefix byte for MULE internal code. * See http://www.xemacs.org for more details. (there is a doc titled * "XEmacs Internals Manual", "MULE Character Sets and Encodings"