Logo Search packages:      
Sourcecode: hanzim version File versions  Download package

hutil.c

/* hutil.c: Contains utility functions for Hanzi Master - mostly various
   character lookup and format conversion functions. */

/* This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.  Please see the file "COPYING"
   for details.  If you have not received it along with this program,
   please write to the Free Software Foundation, Inc., 59 Temple Place,
   Suite 330, Boston, MA 02111-1307, USA. */

#include "hanzim.h"
#include "hgb_utf.h"    /* gb->utf8 conversion array */


/****************************************************************************/
/* radical functions */

/* radnum() pulls off the number from a radical string, and returns 1 lower
   to be used as array index */
int
radnum(char *s)
{
  char ls[10];

  if ((s[strlen(s)-1] < '0') || (s[strlen(s)-1] > '9')) {
    strncpy(ls,s,strlen(s)-1);
    ls[strlen(s)-1] = '\0';
    return(atoi(ls) - 1);
  }
  else
    return(atoi(s) - 1);

}

/* radtype returns the type code of a radical string: 0=simplified, 1=normal
 2=full, 3=alternate */
char
radtype(char *s)
{
  int f;

  f = strlen(s) - 1;

  if (s[f] == 'S')
    return(0);
  else if (s[f] == 'F')
    return(2);
  else if (s[f] == 'A')
    return(3);
  else
    return(1);

}

/****************************************************************************/
/* pinyin functions */

/* pin_init() returns the index of the initial + 1 for a character given its
   pinyin string, or zero if it is not found */
int
pin_init(char *s)
{

  if (asc2fidx[*s-'a']) return (1); /* final only */
  else {
    if (s[1] == 'h') {
      if (s[0] == 'c') return(22);
      else return((s[0] == 's') ? 23:24);
    }
    else return(asc2iidx[s[0] - 'a'] + 1);
  }
}

/* pin_fin() returns the index of the final + 1 for a character given its
   pinyin string, or zero if it is not found */
int
pin_fin(char *s)
{
  int i;
  char *tpos;

  /* first strip off initial */
  for (tpos=s;asc2iidx[*tpos-'a'];tpos++);

  i = asc2fidx[*tpos - 'a']  - 1;
  while (strcmp(tpos,fin_str[i])) {
    i++;
    if (i > N_FINALS) return (0);
    }
  
  return (i+1);
}

/* pin_index() returns the character index given a pinyin string and tone.
   The index is computed by using 5 bits for the initial, 6 for the final,
   and 2 for the tone in a straightforward manner. */
int
pin_index(char *s)
{
  int init = 0,
      fin = 0;
  uchar tone;
  char      locstr[10];
  int m;

  /* pull off tone */
  m = strlen(s) - 1;
  if ((s[m] > '0') && (s[m] < '6')) {
    /* string ends in reasonable tone number */
    strncpy(locstr,s,m);
    locstr[m] = '\0';
    tone = s[m] - '1';
  } else {
    /* either we got an out of range number or no number; assume neutral */
    strcpy(locstr,s);
    tone = 4;
  }

  if (!(init = pin_init(locstr)) || !(fin = pin_fin(locstr))) {
    printf("hanzim: unrecognized pinyin (pi=%d, pf=%d\t%s)\n",init,fin,s);
    return(-1);         /* unrecognized initial or final */
  }
  else return((init-1) * (N_FINALS * N_TONES) + (fin-1) * N_TONES + tone);
}

/****************************************************************************/

/* g2u(): until tcl's gb to utf-8 conversion function works for gb2312-1980-1
   (doesn't in release 8.1a1), we use this code from a program by:

      Ross Paterson <rap@doc.ic.ac.uk>
      Department of Computing, Imperial College, London SW7

   to convert internally from gb to utf-8 within hanzim.
   The function converts a (string of) gb character(s) given as (2 byte)
   argument to a 3-byte utf-8 string(s), returning -1 on error, otherwise 0.
   Some portability problems may arise relating to how signed and unsigned
   characters are treated... */
int
g2u(char *gb, char *utf8)
{
  int       c1,c2;
  long            wc,
            tmp;
  int       i, extras,c;
  char            u8[4];

  for (i=0,utf8[0]=0;i<(int)strlen(gb);i+=2) {

    c1 = (unsigned char) gb[i], c2 = (unsigned char) gb[i+1];

    if ((c1&0x80) == 0) return(-1);
    else {
      wc = gb_in[(c1 - 0xa1)*94 + (c2&0x7f) - 0x21] & 0x7fffffffL;
      /* how many extra bytes are required? (if not 2, program is dead) */
      extras = 1;
      for (tmp = wc >> 11; tmp != 0; tmp >>= 5) extras++;
      /* first byte */
      u8[0] = (0xff&(0x1f80 >> extras)) | (int)(wc >> (extras*6));
      /* remaining bytes */
      for (c=1;extras-- != 0;c++)
      u8[c] = 0x80|(0x3f&(int)(wc >> (extras*6)));
      }
    u8[c] = 0;
    strcat(utf8,u8);
    }

  return(0);
}


/****************************************************************************/
/* file and string functions */

/* Dopen() opens a database file with given name and returns the file ptr. */
FILE *
Dopen(char *datadir, char *datafile)
{
  char            fname[100];
  FILE            *fp;

  sprintf(fname,"%s/%s",datadir,datafile);
  if (!(fp = fopen(fname,"r"))) { badfile(fname,"\0"); return(NULL); }
  else return fp;
}

/*getnextline() gets the next line of input from the file, discarding
  blank lines and comment lines (which begin with a number sign ("#").
  It returns 1 on success, 0 if eof reached.  The maxlen variable is
  used to prevent buffer overrun. */
char
getnextline(FILE *fp, char *line, int maxlen)
{
    char        c, *tpos,
                done;
    int           numRead = 1; /* number of bytes read */

    /*get to next line of data*/
    for (done=0;!done;) {
        c = getc(fp);
      if (feof(fp)) return(0);
        if (c == '#')
            while (c != '\n') c=getc(fp); /* go to end of line */
        if (c != '\n')
            done = 1;
        }

    /*read in line*/
    while(isspace(c)) c=getc(fp);       /* skip any indentation */
    for (tpos=line;(c!='\n') && (++numRead < maxlen);c=getc(fp))
        *tpos++ = c;
    
    *tpos = '\0';

    if (numRead == maxlen) {
      printf("Warning: String too long for buffer variable.  Started with\n");
      printf("         \"%s\"", line);
    }

    /* printf("%s\n",line); */

    return(1);

}

/* skip() skips over arguments in a string, returning pointer to beginning of
   argument after n, assuming starts at beginning of argument */
char *
skip(int n, char *s)
{
  int i;
  char *tpos;

  tpos = s;

  for (i=0;i<n;i++) {
    while (!isspace(*tpos)) tpos++;
    while (isspace(*tpos)) tpos++;
  }

    return(tpos);
}
  
/* itoa() returns a pointer to a character string containing the given
   integer in string format */
char *
itoa(int n)
{
  static char     s[25];

  sprintf(s,"%d",n);

  return(s);
}

/****************************************************************************/
/* error messages */

/* memfail() prints a failure to allocate message and exits.*/
void
memfail(char *item)
{
    fprintf(stderr,"\n***Memory allocation failed for %s.\n", item);
    fprintf(stderr,"Exiting...\n\n");
    exit(1);
}

/* badfile prints error messages on missing files.*/
void
badfile(char *fname, char *reason)
{
  if (!strlen(reason)) {
    fprintf(stderr,
           "\nI can't find the file \"%s\".\n", fname);
    fprintf(stderr,
           "Please consult the README on the matter of the \"library\"\n");
    fprintf(stderr,
           "directory (LIBDIR) and make sure that Hanzim was installed\n");
    fprintf(stderr,
            "properly with all of the character database files.\n\n");
  }
  else {
    fprintf(stderr,
           "\nCouldn't swallow data item %s in file %s.\n",
           reason,fname);
    fprintf(stderr,
          "Please check that the file was transferred by the \"binary\" method\n");
    fprintf(stderr,
          "if obtained by ftp and that it hasn't been garbled or truncated\n");
    fprintf(stderr,
            "somehow in the installation process.\n\n");
  }
  exit(1);
}

/* i_error prints general nonprogram errors. */
void
i_error(char *msg)
{
  fprintf(stderr, "\nInternal program error: %s -- exiting.\n",msg);
  fprintf(stderr, "Please notify author with text of error and description\n");
  fprintf(stderr, "of the circumstances if possible, thanks!\n\n");
  exit(1);
}


/****************************************************************************/
/* other miscellaneous */

/* dr() returns a double-precision random number uniform on [0,1) (because
   not all systems seem to implement this function)*/

float
dr()
{
  return ((float) rand()/RAND_MAX);
}

Generated by  Doxygen 1.6.0   Back to index