From 66681b296084bc3683cab50986402e5da27f5912 Mon Sep 17 00:00:00 2001 From: simonmar Date: Mon, 14 Mar 2005 15:57:57 +0000 Subject: [PATCH] [project @ 2005-03-14 15:57:57 by simonmar] Add the script used to generate WCsubst.c --- cbits/ubconfc | 339 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 339 insertions(+) create mode 100644 cbits/ubconfc diff --git a/cbits/ubconfc b/cbits/ubconfc new file mode 100644 index 0000000..6344ecc --- /dev/null +++ b/cbits/ubconfc @@ -0,0 +1,339 @@ +#!/bin/sh + +# -------------------------------------------------------------------------- +# This is the script to create the unicode chars property table +# Written by Dimitry Golubovsky (dimitry@golubovsky.org) as part +# of the Partial Unicode Support patch +# +# Adopted for use with GHC. +# License: see libraries/base/LICENSE +# +# ------------------------------------------------------------------------- + +# The script reads the file from the standard input, +# and outputs C code into the standard output. +# The C code contains the chars property table, and basic functions +# to access properties. + +# Output the file header + +echo "/*-------------------------------------------------------------------------" +echo "This is an automatically generated file: do not edit" +echo "Generated by `basename $0` at `date`" +echo "-------------------------------------------------------------------------*/" +echo +echo "#include \"WCsubst.h\"" + +# Define structures + +cat <")!=0) + { + dumpblock() + } + else if (index(name,"Last>")!=0) + { + blockl+=(self-blockb) + } + else if((self==blockb+blockl)&&(rule==blockr)) blockl++ + else + { + dumpblock() + } + } +} +END { + dumpblock() + for(c in cats) print "#define GENCAT_"c" "cats[c] + print "#define MAX_UNI_CHAR " self + print "#define NUM_BLOCKS " blockidx + print "#define NUM_CONVBLOCKS " cblckidx + print "#define NUM_SPACEBLOCKS " sblckidx + print "#define NUM_LAT1BLOCKS " lat1idx + print "#define NUM_RULES " rulidx + for(r in rules) + { + printf "static const struct _convrule_ rule" rules[r] "={" r "};\n" + } + print "static const struct _charblock_ allchars[]={" + for(i=0;istart is within the block. Otherwise + result of comparison of key->start and start of the + current block is returned as usual. +*/ + +static const struct _convrule_ nullrule={0,NUMCAT_CN,0,0,0,0}; + +int blkcmp(const void *vk,const void *vb) +{ + const struct _charblock_ *key,*cur; + key=vk; + cur=vb; + if((key->start>=cur->start)&&(key->start<(cur->start+cur->length))) + { + return 0; + } + if(key->start>cur->start) return 1; + return -1; +} + +static const struct _convrule_ *getrule( + const struct _charblock_ *blocks, + int numblocks, + int unichar) +{ + struct _charblock_ key={unichar,1,(void *)0}; + struct _charblock_ *cb=bsearch(&key,blocks,numblocks,sizeof(key),blkcmp); + if(cb==(void *)0) return &nullrule; + return cb->rule; +} + + + +/* + Check whether a character (internal code) has certain attributes. + Attributes (category flags) may be ORed. The function ANDs + character category flags and the mask and returns the result. + If the character belongs to one of the categories requested, + the result will be nonzero. +*/ + +inline static int checkattr(int c,unsigned int catmask) +{ + return (catmask & (getrule(allchars,(c<256)?NUM_LAT1BLOCKS:NUM_BLOCKS,c)->category)); +} + +inline static int checkattr_s(int c,unsigned int catmask) +{ + return (catmask & (getrule(spacechars,NUM_SPACEBLOCKS,c)->category)); +} + +/* + Define predicate functions for some combinations of categories. +*/ + +#define unipred(p,m) \\ +int p(int c) \\ +{ \\ + return checkattr(c,m); \\ +} + +#define unipred_s(p,m) \\ +int p(int c) \\ +{ \\ + return checkattr_s(c,m); \\ +} + +/* + Make these rules as close to Hugs as possible. +*/ + +unipred(u_iswcntrl,GENCAT_CC) +unipred(u_iswprint,~(GENCAT_ZL|GENCAT_ZP|GENCAT_CC|GENCAT_CF|GENCAT_CS|GENCAT_CO)) +unipred_s(u_iswspace,GENCAT_ZS) +unipred(u_iswupper,(GENCAT_LU|GENCAT_LT)) +unipred(u_iswlower,GENCAT_LL) +unipred(u_iswalpha,(GENCAT_LL|GENCAT_LU|GENCAT_LT|GENCAT_LM|GENCAT_LO)) +unipred(u_iswdigit,GENCAT_ND) + +unipred(u_iswalnum,(GENCAT_LT|GENCAT_LU|GENCAT_LL|GENCAT_LM|GENCAT_LO| + GENCAT_MC|GENCAT_ME|GENCAT_MN| + GENCAT_NO|GENCAT_ND|GENCAT_NL)) + +#define caseconv(p,to) \\ +int p(int c) \\ +{ \\ + const struct _convrule_ *rule=getrule(convchars,NUM_CONVBLOCKS,c);\\ + if(rule==&nullrule) return c;\\ + return c+rule->##to;\\ +} + +caseconv(u_towupper,updist) +caseconv(u_towlower,lowdist) +caseconv(u_towtitle,titledist) + +int u_gencat(int c) +{ + return getrule(allchars,NUM_BLOCKS,c)->catnumber; +} + +EOF -- 1.7.10.4