summaryrefslogtreecommitdiff
path: root/comb.c
diff options
context:
space:
mode:
authorRich Felker <dalias@aerifal.cx>2006-10-29 08:07:51 +0000
committerRich Felker <dalias@aerifal.cx>2006-10-29 08:07:51 +0000
commit89fd3b76518cf3004053331c580a349afaaf2dab (patch)
tree61897409524c883266942c744ca58df69eb3970b /comb.c
parentb97a486c34c343ab18bf16aef507478992af0625 (diff)
major internal changes in representation of character cells.
we now use 12 bytes per cell instead of 10. however, this allows us to support 256-color mode (not yet implemented but the framework is in place) and to mix scripts when using combining characters. while the latter sounds ridiculous at first, being able to visibly see a combining letter attached to a [, ", or ' is extremely useful in scripting and regular expressions with some languages. some code is left slightly messy, but overall it's much cleaner now since struct uucell is now properly encapsulated.
Diffstat (limited to 'comb.c')
-rw-r--r--comb.c198
1 files changed, 0 insertions, 198 deletions
diff --git a/comb.c b/comb.c
deleted file mode 100644
index 6bf7f7d..0000000
--- a/comb.c
+++ /dev/null
@@ -1,198 +0,0 @@
-/* uuterm, Copyright (C) 2006 Rich Felker; licensed under GNU GPL v2 only */
-
-#define R(a,b) { (a), (b)-(a) }
-
-static const unsigned short common[][2] = {
- R( 0x300, 0x341 ),
- R( 0x346, 0x362 ),
- R( 0x200B, 0x200F ),
- R( 0x202A, 0x202E ),
- R( 0x2060, 0x206F ),
- R( 0x20D0, 0x20EA ),
- { 0, 0 }
-};
-
-static const unsigned short latin[][2] = {
- R( 0x363, 0x36F ),
- { 0, 0 }
-};
-
-static const unsigned short greek[][2] = {
- R( 0x342, 0x345 ),
- { 0, 0 }
-};
-
-static const unsigned short cyrillic[][2] = {
- R( 0x483, 0x489 ),
- { 0, 0 }
-};
-
-static const unsigned short hebrew[][2] = {
- R( 0x591, 0x5C4 ),
- { 0, 0 }
-};
-
-static const unsigned short arabic[][2] = {
- R( 0x600, 0x603 ),
- R( 0x610, 0x615 ),
- R( 0x64B, 0x658 ),
- R( 0x670, 0x670 ),
- R( 0x6D6, 0x6ED ),
- { 0, 0 }
-};
-
-static const unsigned short syriac[][2] = {
- R( 0x70F, 0x711 ),
- R( 0x730, 0x74A ),
- { 0, 0 }
-};
-
-static const unsigned short thaana[][2] = {
- R( 0x7A6, 0x7B0 ),
- { 0, 0 }
-};
-
-static const unsigned short devanagari[][2] = {
- R( 0x901, 0x902 ),
- R( 0x93C, 0x963 ),
- { 0, 0 }
-};
-
-static const unsigned short bengali[][2] = {
- R( 0x981, 0x981 ),
- R( 0x9BC, 0x9E3 ),
- { 0, 0 }
-};
-
-static const unsigned short gurmukhi[][2] = {
- R( 0xA01, 0xA02 ),
- R( 0xA3C, 0xA4D ),
- R( 0xA70, 0xA71 ),
- { 0, 0 }
-};
-
-static const unsigned short gujarati[][2] = {
- R( 0xA81, 0xA82 ),
- R( 0xABC, 0xAE3 ),
- { 0, 0 }
-};
-
-static const unsigned short oriya[][2] = {
- R( 0xB01, 0xB01 ),
- R( 0xB3C, 0xB4D ),
- R( 0xB56, 0xB56 ),
- { 0, 0 }
-};
-
-static const unsigned short tamil[][2] = {
- R( 0xB82, 0xB82 ),
- R( 0xBC0, 0xBCD ),
- { 0, 0 }
-};
-
-static const unsigned short telugu[][2] = {
- R( 0xC3E, 0xC56 ),
- { 0, 0 }
-};
-
-static const unsigned short kannada[][2] = {
- R( 0xCBC, 0xCCD ),
- { 0, 0 }
-};
-
-static const unsigned short malayalam[][2] = {
- R( 0xD41, 0xD4D ),
- { 0, 0 }
-};
-
-static const unsigned short sinhala[][2] = {
- R( 0xDCA, 0xDD6 ),
- { 0, 0 }
-};
-
-static const unsigned short thai[][2] = {
- R( 0xE31, 0xE3A ),
- R( 0xE47, 0xE4E ),
- { 0, 0 }
-};
-
-static const unsigned short lao[][2] = {
- R( 0xEB1, 0xECD ),
- { 0, 0 }
-};
-
-static const unsigned short tibetan[][2] = {
- R( 0xF18, 0xF19 ),
- R( 0xF35, 0xF35 ),
- R( 0xF39, 0xF39 ),
- R( 0xF71, 0xF84 ),
- R( 0xF90, 0xFBC ),
- R( 0xFC6, 0xFC6 ),
- { 0, 0 }
-};
-
-static const unsigned short burmese[][2] = {
- R( 0x102D, 0x1039 ),
- R( 0x1058, 0x1059 ),
- { 0, 0 }
-};
-
-static const unsigned short misc_scripts[][2] = {
- R( 0x1732, 0x1734 ), /* hanunoo */
- R( 0x1752, 0x1753 ), /* buhid */
- R( 0x17B4, 0x17BD ), /* khmer */
- R( 0x17C6, 0x17D3 ),
- R( 0x17DD, 0x17DD ),
- R( 0x18A9, 0x18A9 ), /* mongolian */
- R( 0x1920, 0x193B ), /* limbu (can be broken down more) */
- { 0, 0 }
-};
-
-#undef R
-#define R(a,b,s) { (a), (b)-(a), (s) }
-
-static const struct {
- unsigned a, l;
- const unsigned short (*r)[2];
-} scripts[] = {
- R( 0x400, 0x52F, cyrillic ),
- R( 0x590, 0x5FF, hebrew ),
- R( 0x600, 0x6FF, arabic ),
- R( 0x700, 0x74F, syriac ),
- R( 0x780, 0x7B1, thaana ),
- R( 0x900, 0x97F, devanagari ),
- R( 0x980, 0x9FF, bengali ),
- R( 0xA00, 0xA7F, gurmukhi ),
- R( 0xA80, 0xAFF, gujarati ),
- R( 0xB00, 0xB7F, oriya ),
- R( 0xB80, 0xBFF, tamil ),
- R( 0xC00, 0xC7F, telugu ),
- R( 0xC80, 0xCFF, kannada ),
- R( 0xD00, 0xD7F, malayalam ),
- R( 0xD80, 0xDFF, sinhala ),
- R( 0xE00, 0xE7F, thai ),
- R( 0xE80, 0xEFF, lao ),
- R( 0xF00, 0xFFF, tibetan ),
- R( 0x1000, 0x108F, burmese ),
- R( 0x1720, 0x19FF, misc_scripts ),
- R( 0x1D2B, 0x1D2B, cyrillic ),
- R( 0x0000, 0x10FFFF, common ),
- { }
-};
-
-#undef R
-
-int uu_combine_involution(unsigned b, unsigned c)
-{
- int i;
- unsigned code = 1;
- const unsigned short (*r)[2];
- for (i=0; scripts[i].l; i++)
- if (b - scripts[i].a <= scripts[i].l)
- for (r = scripts[i].r; r[0][0]; code += r++[0][1]+1)
- if (c - r[0][0] <= r[0][1])
- return c - r[0][0] + code;
- else if (c - code <= r[0][1])
- return c + r[0][0] - code;
- return 0;
-}