#!/bin/sh # -------------------------------------------------------------------------- # This is the script to create the unicode chars property table # Written by Dimitry Golubovsky (dimitry@golubovsky.org) as part # of the Partial Unicode Support patch # # Adopted for use with GHC. # License: see libraries/base/LICENSE # # ------------------------------------------------------------------------- # The script reads the file from the standard input, # and outputs C code into the standard output. # The C code contains the chars property table, and basic functions # to access properties. # Output the file header echo "/*-------------------------------------------------------------------------" echo "This is an automatically generated file: do not edit" echo "Generated by `basename $0` at `date`" echo "-------------------------------------------------------------------------*/" echo echo "#include \"WCsubst.h\"" # Define structures cat <")!=0) { dumpblock() } else if (index(name,"Last>")!=0) { blockl+=(self-blockb) } else if((self==blockb+blockl)&&(rule==blockr)) blockl++ else { dumpblock() } } } END { dumpblock() for(c in cats) print "#define GENCAT_"c" "cats[c] print "#define MAX_UNI_CHAR " self print "#define NUM_BLOCKS " blockidx print "#define NUM_CONVBLOCKS " cblckidx print "#define NUM_SPACEBLOCKS " sblckidx print "#define NUM_LAT1BLOCKS " lat1idx print "#define NUM_RULES " rulidx for(r in rules) { printf "static const struct _convrule_ rule" rules[r] "={" r "};\n" } print "static const struct _charblock_ allchars[]={" for(i=0;istart is within the block. Otherwise result of comparison of key->start and start of the current block is returned as usual. */ static const struct _convrule_ nullrule={0,NUMCAT_CN,0,0,0,0}; int blkcmp(const void *vk,const void *vb) { const struct _charblock_ *key,*cur; key=vk; cur=vb; if((key->start>=cur->start)&&(key->start<(cur->start+cur->length))) { return 0; } if(key->start>cur->start) return 1; return -1; } static const struct _convrule_ *getrule( const struct _charblock_ *blocks, int numblocks, int unichar) { struct _charblock_ key={unichar,1,(void *)0}; struct _charblock_ *cb=bsearch(&key,blocks,numblocks,sizeof(key),blkcmp); if(cb==(void *)0) return &nullrule; return cb->rule; } /* Check whether a character (internal code) has certain attributes. Attributes (category flags) may be ORed. The function ANDs character category flags and the mask and returns the result. If the character belongs to one of the categories requested, the result will be nonzero. */ inline static int checkattr(int c,unsigned int catmask) { return (catmask & (getrule(allchars,(c<256)?NUM_LAT1BLOCKS:NUM_BLOCKS,c)->category)); } inline static int checkattr_s(int c,unsigned int catmask) { return (catmask & (getrule(spacechars,NUM_SPACEBLOCKS,c)->category)); } /* Define predicate functions for some combinations of categories. */ #define unipred(p,m) \\ int p(int c) \\ { \\ return checkattr(c,m); \\ } #define unipred_s(p,m) \\ int p(int c) \\ { \\ return checkattr_s(c,m); \\ } /* Make these rules as close to Hugs as possible. */ unipred(u_iswcntrl,GENCAT_CC) unipred(u_iswprint, \ (GENCAT_MC | GENCAT_NO | GENCAT_SK | GENCAT_ME | GENCAT_ND | \ GENCAT_PO | GENCAT_LT | GENCAT_PC | GENCAT_SM | GENCAT_ZS | \ GENCAT_LU | GENCAT_PD | GENCAT_SO | GENCAT_PE | GENCAT_PF | \ GENCAT_PS | GENCAT_SC | GENCAT_LL | GENCAT_LM | GENCAT_PI | \ GENCAT_NL | GENCAT_MN | GENCAT_LO)) unipred_s(u_iswspace,GENCAT_ZS) unipred(u_iswupper,(GENCAT_LU|GENCAT_LT)) unipred(u_iswlower,GENCAT_LL) unipred(u_iswalpha,(GENCAT_LL|GENCAT_LU|GENCAT_LT|GENCAT_LM|GENCAT_LO)) unipred(u_iswdigit,GENCAT_ND) unipred(u_iswalnum,(GENCAT_LT|GENCAT_LU|GENCAT_LL|GENCAT_LM|GENCAT_LO| GENCAT_MC|GENCAT_ME|GENCAT_MN| GENCAT_NO|GENCAT_ND|GENCAT_NL)) #define caseconv(p,to) \\ int p(int c) \\ { \\ const struct _convrule_ *rule=getrule(convchars,NUM_CONVBLOCKS,c);\\ if(rule==&nullrule) return c;\\ return c+rule->to;\\ } caseconv(u_towupper,updist) caseconv(u_towlower,lowdist) caseconv(u_towtitle,titledist) int u_gencat(int c) { return getrule(allchars,NUM_BLOCKS,c)->catnumber; } EOF