From 8888ac2ce29c83600faed2a1968a5fdd833667be Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Mon, 1 Oct 2007 05:06:36 +0000 Subject: updates to decomposition data: - support single-replacement decompositions (legacy chars like angstrom) - correct for mandatory replacements wrongly marked in ucd - include newly added characters in the tables - omit presentation forms/cjk compat block for the time being --- decomp.c | 3 +- decomp.h | 189 ++++++++++++++++++++++++++++++--------------------------------- 2 files changed, 90 insertions(+), 102 deletions(-) diff --git a/decomp.c b/decomp.c index 3b8d79d..7595509 100644 --- a/decomp.c +++ b/decomp.c @@ -22,8 +22,7 @@ int uu_decompose_char(unsigned c, unsigned *d, unsigned max) } p = c>>8; - if (p == 0xfb) page = page_0fb; - else if (p <= 0x30) page = pages[p]; + if (p <= sizeof(pages)/sizeof(pages[0])) page = pages[p]; else page = NULL; if (page && (page[c>>5 & 7] & 1<<(c&31))) { diff --git a/decomp.h b/decomp.h index 25fdd94..7991bd8 100644 --- a/decomp.h +++ b/decomp.h @@ -251,9 +251,15 @@ static const unsigned short decomp[] = { 0x231,0x22f,0x304, 0x232,0x59,0x304, 0x233,0x79,0x304, + 0x340,0x300,0, + 0x341,0x301,0, + 0x343,0x313,0, 0x344,0x308,0x301, + 0x374,0x2b9,0, + 0x37e,0x3b,0, 0x385,0xa8,0x301, 0x386,0x391,0x301, + 0x387,0xb7,0, 0x388,0x395,0x301, 0x389,0x397,0x301, 0x38a,0x399,0x301, @@ -388,7 +394,9 @@ static const unsigned short decomp[] = { 0xf73,0xf71,0xf72, 0xf75,0xf71,0xf74, 0xf76,0xfb2,0xf80, + 0xf77,0xfb2,0xf81, 0xf78,0xfb3,0xf80, + 0xf79,0xfb3,0xf81, 0xf81,0xf71,0xf80, 0xf93,0xf92,0xfb7, 0xf9d,0xf9c,0xfb7, @@ -397,6 +405,17 @@ static const unsigned short decomp[] = { 0xfac,0xfab,0xfb7, 0xfb9,0xf90,0xfb5, 0x1026,0x1025,0x102e, + 0x1b06,0x1b05,0x1b35, + 0x1b08,0x1b07,0x1b35, + 0x1b0a,0x1b09,0x1b35, + 0x1b0c,0x1b0b,0x1b35, + 0x1b0e,0x1b0d,0x1b35, + 0x1b12,0x1b11,0x1b35, + 0x1b3b,0x1b3a,0x1b35, + 0x1b3d,0x1b3c,0x1b35, + 0x1b40,0x1b3e,0x1b35, + 0x1b41,0x1b3f,0x1b35, + 0x1b43,0x1b42,0x1b35, 0x1e00,0x41,0x325, 0x1e01,0x61,0x325, 0x1e02,0x42,0x307, @@ -816,6 +835,7 @@ static const unsigned short decomp[] = { 0x1fba,0x391,0x300, 0x1fbb,0x391,0x301, 0x1fbc,0x391,0x345, + 0x1fbe,0x3b9,0, 0x1fc1,0xa8,0x342, 0x1fc2,0x1f74,0x345, 0x1fc3,0x3b7,0x345, @@ -857,6 +877,8 @@ static const unsigned short decomp[] = { 0x1feb,0x3a5,0x301, 0x1fec,0x3a1,0x314, 0x1fed,0xa8,0x300, + 0x1fee,0xa8,0x301, + 0x1fef,0x60,0, 0x1ff2,0x1f7c,0x345, 0x1ff3,0x3c9,0x345, 0x1ff4,0x3ce,0x345, @@ -867,6 +889,12 @@ static const unsigned short decomp[] = { 0x1ffa,0x3a9,0x300, 0x1ffb,0x3a9,0x301, 0x1ffc,0x3a9,0x345, + 0x1ffd,0xb4,0, + 0x2000,0x2002,0, + 0x2001,0x2003,0, + 0x2126,0x3a9,0, + 0x212a,0x4b,0, + 0x212b,0x41,0x30a, 0x219a,0x2190,0x338, 0x219b,0x2192,0x338, 0x21ae,0x2194,0x338, @@ -911,6 +939,8 @@ static const unsigned short decomp[] = { 0x22eb,0x22b3,0x338, 0x22ec,0x22b4,0x338, 0x22ed,0x22b5,0x338, + 0x2329,0x3008,0, + 0x232a,0x3009,0, 0x2adc,0x2add,0x338, 0x304c,0x304b,0x3099, 0x304e,0x304d,0x3099, @@ -970,40 +1000,6 @@ static const unsigned short decomp[] = { 0x30f9,0x30f1,0x3099, 0x30fa,0x30f2,0x3099, 0x30fe,0x30fd,0x3099, - 0xfb1d,0x5d9,0x5b4, - 0xfb1f,0x5f2,0x5b7, - 0xfb2a,0x5e9,0x5c1, - 0xfb2b,0x5e9,0x5c2, - 0xfb2c,0xfb49,0x5c1, - 0xfb2d,0xfb49,0x5c2, - 0xfb2e,0x5d0,0x5b7, - 0xfb2f,0x5d0,0x5b8, - 0xfb30,0x5d0,0x5bc, - 0xfb31,0x5d1,0x5bc, - 0xfb32,0x5d2,0x5bc, - 0xfb33,0x5d3,0x5bc, - 0xfb34,0x5d4,0x5bc, - 0xfb35,0x5d5,0x5bc, - 0xfb36,0x5d6,0x5bc, - 0xfb38,0x5d8,0x5bc, - 0xfb39,0x5d9,0x5bc, - 0xfb3a,0x5da,0x5bc, - 0xfb3b,0x5db,0x5bc, - 0xfb3c,0x5dc,0x5bc, - 0xfb3e,0x5de,0x5bc, - 0xfb40,0x5e0,0x5bc, - 0xfb41,0x5e1,0x5bc, - 0xfb43,0x5e3,0x5bc, - 0xfb44,0x5e4,0x5bc, - 0xfb46,0x5e6,0x5bc, - 0xfb47,0x5e7,0x5bc, - 0xfb48,0x5e8,0x5bc, - 0xfb49,0x5e9,0x5bc, - 0xfb4a,0x5ea,0x5bc, - 0xfb4b,0x5d5,0x5b9, - 0xfb4c,0x5d1,0x5bf, - 0xfb4d,0x5db,0x5bf, - 0xfb4e,0x5e4,0x5bf, 0 }; static const uint32_t page_000[] = { @@ -1039,9 +1035,9 @@ static const uint32_t page_002[] = { static const uint32_t page_003[] = { 0, 0, - 0x10, - 0, - 0x1d760, + 0x1b, + 0x40100000, + 0x1d7e0, 0x1fc00, 0x187c00, 0, @@ -1120,7 +1116,7 @@ static const uint32_t page_00f[] = { 0, 0, 0x10842008, - 0x1680200, + 0x3e80200, 0x20080002, 0x2001084, 0, @@ -1136,6 +1132,16 @@ static const uint32_t page_010[] = { 0, 0, }; +static const uint32_t page_01b[] = { + 0x45540, + 0x28000000, + 0xb, + 0, + 0, + 0, + 0, + 0, +}; static const uint32_t page_01e[] = { 0xffffffff, 0xffffffff, @@ -1152,15 +1158,25 @@ static const uint32_t page_01f[] = { 0xaaff3f3f, 0x3fffffff, 0xffffffff, - 0x1fdfffff, + 0x5fdfffff, 0xefcfffde, - 0x1fdc3fff, + 0x3fdcffff, }; -static const uint32_t page_021[] = { +static const uint32_t page_020[] = { + 0x3, + 0, + 0, + 0, 0, 0, 0, 0, +}; +static const uint32_t page_021[] = { + 0, + 0xc40, + 0, + 0, 0xc000000, 0x4000, 0xe000, @@ -1176,6 +1192,16 @@ static const uint32_t page_022[] = { 0, 0x3c0f, }; +static const uint32_t page_023[] = { + 0, + 0x600, + 0, + 0, + 0, + 0, + 0, + 0, +}; static const uint32_t page_02a[] = { 0, 0, @@ -1196,64 +1222,27 @@ static const uint32_t page_030[] = { 0x36db02a5, 0x47900000, }; -static const uint32_t page_0fb[] = { - 0xa0000000, - 0x5f7ffc00, - 0x7fdb, - 0, - 0, - 0, - 0, - 0, -}; static const uint32_t *pages[] = { - page_000, - page_001, - page_002, - page_003, - page_004, - NULL, - page_006, - NULL, - NULL, - page_009, - page_00a, - page_00b, - page_00c, - page_00d, - NULL, - page_00f, - page_010, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - page_01e, - page_01f, - NULL, - page_021, - page_022, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - page_02a, - NULL, - NULL, - NULL, - NULL, - NULL, - page_030, + [0] = page_000, + [0x1] = page_001, + [0x2] = page_002, + [0x3] = page_003, + [0x4] = page_004, + [0x6] = page_006, + [0x9] = page_009, + [0xa] = page_00a, + [0xb] = page_00b, + [0xc] = page_00c, + [0xd] = page_00d, + [0xf] = page_00f, + [0x10] = page_010, + [0x1b] = page_01b, + [0x1e] = page_01e, + [0x1f] = page_01f, + [0x20] = page_020, + [0x21] = page_021, + [0x22] = page_022, + [0x23] = page_023, + [0x2a] = page_02a, + [0x30] = page_030, }; -- cgit v1.2.3