2005-06-22 18:20:32 +00:00

209 lines
5.2 KiB
C

/*=============================================================================
GNU UnRTF, a command-line program to convert RTF documents to other formats.
Copyright (C) 2000,2001 Zachary Thayer Smith
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
The author is reachable by electronic mail at tuorfa@yahoo.com.
=============================================================================*/
/*----------------------------------------------------------------------
* Module name: hash
* Author name: Zach Smith
* Create date: 01 Sep 00
* Purpose: Word-hash management. Words are put into a hash and an
* identifier is returned. This is used to save us from
* doing multiple mallocs for recurring strings such as
* 'the' and \par. This is not a big issue under Unix,
* but it is under other OSes and anyway, waste not want not.
*----------------------------------------------------------------------
* Changes:
* 08 Apr 01, tuorfa@yahoo.com: check for out of memory after malloc.
* 21 Apr 01, tuorfa@yahoo.com: signed to conversion unsigned bug
* 03 Aug 01, tuorfa@yahoo.com: fixes for using 16-bit compiler
* 22 Sep 01, tuorfa@yahoo.com: added function-level comment blocks
* 08 Oct 03, daved@physiol.usyd.edu.au: some type fixes
*--------------------------------------------------------------------*/
#include <stdio.h>
#include <string.h>
#include "error.h"
#include "main.h"
#include "malloc.h"
typedef struct _hi {
struct _hi *next;
char *str;
unsigned long value;
}
HashItem;
/* Index by first char of string */
static HashItem *hash[256];
static unsigned long hash_length[256];
static unsigned long hash_value=0;
/*========================================================================
* Name: hash_init
* Purpose: Clear the hash table.
* Args: None.
* Returns: None.
*=======================================================================*/
void
hash_init ()
{
int i;
for (i=0; i<256; i++) {
hash[i]=NULL;
hash_length[i]=0;
}
}
/*========================================================================
* Name: hash_stats
* Purpose: Prints to stderr the number of words stored.
* Args: None.
* Returns: None.
*=======================================================================*/
void
hash_stats ()
{
int i;
unsigned long total=0;
for (i=0; i<256; i++) {
total += hash_length[i];
}
fprintf (stderr,"%lu words were hashed.\n", total);
}
/*========================================================================
* Name: hashitem_new
* Purpose: Creates a new linked list item for the hash table.
* Args: String.
* Returns: HashItem.
*=======================================================================*/
static HashItem *
hashitem_new (char *str)
{
HashItem *hi;
unsigned long i;
hi=(HashItem*) my_malloc(sizeof(HashItem));
if (!hi)
error_handler ("out of memory");
memset ((void*)hi, 0, sizeof (HashItem));
hi->str = my_strdup(str);
i = *str;
if (i=='\\') i=str[1];
i <<= 24;
hi->value = i | (hash_value++ & 0xffffff);
hi->next = NULL;
#if 0
if (debug_mode) {
printf ("<!-- storing val %08lx str %s -->\n",
hi->value, hi->str);
}
#endif
return hi;
}
/*========================================================================
* Name: hash_get_index
* Purpose: Given a string, returns the "index" i.e. the word identifier.
* Args: String.
* Returns: Index.
*=======================================================================*/
unsigned long
hash_get_index (char *str)
{
#if 1 /* daved - 0.19.1 */
unsigned short index;
unsigned char ch;
#else
int index;
char ch;
#endif
HashItem *hi;
#if 1 /* daved - 0.19.1 */
ch = (unsigned char)*str;
#else
ch = *str;
#endif
if (ch=='\\' && *(str+1))
ch = *(str+1);
index = ch;
hi = hash[index];
while (hi) {
if (!strcmp(hi->str,str))
return hi->value;
hi=hi->next;
}
/* not in hash */
hi = hashitem_new (str);
hi->next = hash[index];
hash [index] = hi;
++hash_length [index];
return hi->value;
}
/*========================================================================
* Name: hash_get_string
* Purpose: Given the index (word identifier) returns the word string.
* Args: Index.
* Returns: String, or NULL if not found.
*=======================================================================*/
char*
hash_get_string (unsigned long value)
{
int index;
HashItem *hi;
index = value >> 24;
hi = hash[index];
while (hi) {
if (hi->value == value)
return hi->str;
hi=hi->next;
}
warning_handler ("word not in hash");
return NULL;
}