//
// aegis - project change supervisor
// Copyright (C) 1991-1995, 1998, 1999, 2001-2006, 2008 Peter Miller
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see
// .
//
//
// A literal pool is maintained. Each string has a reference count. The
// string stays in the literal pool for as long as it has a positive
// reference count. To determine if a string is already in the literal pool,
// linear dynamic hashing is used to guarantee an O(1) search. Making all equal
// strings the same item in the literal pool means that string equality is
// a pointer test, and thus very fast.
//
#include
#include
#include
#include
#include
//
// maximum conversion width for numbers
//
#define MAX_WIDTH 509
static string_ty **hash_table;
static str_hash_ty hash_modulus;
static str_hash_ty hash_mask;
static str_hash_ty hash_load;
#define MAX_HASH_LEN 20
//
// NAME
// hash_generate - hash string to number
//
// SYNOPSIS
// str_hash_ty hash_generate(char *s, size_t n);
//
// DESCRIPTION
// The hash_generate function is used to make a number from a string.
//
// RETURNS
// str_hash_ty - the magic number
//
// CAVEAT
// Only the last MAX_HASH_LEN characters are used.
// It is important that str_hash_ty be unsigned (int or long).
//
static str_hash_ty
hash_generate(const char *s, size_t n)
{
str_hash_ty retval;
if (n > MAX_HASH_LEN)
{
s += n - MAX_HASH_LEN;
n = MAX_HASH_LEN;
}
retval = 0;
while (n > 0)
{
retval = (retval + (retval << 1)) ^ *s++;
--n;
}
return retval;
}
static void
str_initialize(void)
{
str_hash_ty j;
hash_modulus = 1 << 8; // MUST be a power of 2
hash_mask = hash_modulus - 1;
hash_load = 0;
hash_table = (string_ty **)mem_alloc(hash_modulus * sizeof(string_ty *));
for (j = 0; j < hash_modulus; ++j)
hash_table[j] = 0;
}
void
str_release(void)
{
mem_free(hash_table);
hash_table = 0;
}
//
// NAME
// split - reduce table loading
//
// SYNOPSIS
// void split(void);
//
// DESCRIPTION
// The split function is used to reduce the load factor on the hash table.
//
// RETURNS
// void
//
// CAVEAT
// A load factor of about 80% is suggested.
//
static void
split(void)
{
string_ty **new_hash_table;
str_hash_ty new_hash_modulus;
str_hash_ty new_hash_mask;
str_hash_ty idx;
//
// double the modulus
//
// This is subtle. If we only increase the modulus by one, the
// load always hovers around 80%, so we have to do a split for
// every insert. I.e. thr malloc burden os O(n) for the lifetime of
// the program. BUT if we double the modulus, the length of time
// until the next split also doubles, making the probablity of a
// split halve, and sigma(2**-n)=1, so the malloc burden becomes O(1)
// for the lifetime of the program.
//
new_hash_modulus = hash_modulus * 2;
new_hash_table =
(string_ty **)mem_alloc(new_hash_modulus * sizeof(string_ty *));
new_hash_mask = new_hash_modulus - 1;
//
// now redistribute the list elements
//
for (idx = 0; idx < hash_modulus; ++idx)
{
string_ty *p;
new_hash_table[idx] = 0;
new_hash_table[idx + hash_modulus] = 0;
p = hash_table[idx];
while (p)
{
string_ty *p2;
str_hash_ty new_idx;
p2 = p;
p = p->str_next;
assert((p2->str_hash & hash_mask) == idx);
new_idx = p2->str_hash & new_hash_mask;
p2->str_next = new_hash_table[new_idx];
new_hash_table[new_idx] = p2;
}
}
mem_free(hash_table);
hash_table = new_hash_table;
hash_modulus = new_hash_modulus;
hash_mask = new_hash_mask;
}
string_ty *
str_from_c(const char *s)
{
return str_n_from_c(s, strlen(s));
}
string_ty *
str_n_from_c(const char *s, size_t length)
{
str_hash_ty hash;
str_hash_ty idx;
string_ty *p;
hash = hash_generate(s, length);
if (!hash_table)
str_initialize();
idx = hash & hash_mask;
for (p = hash_table[idx]; p; p = p->str_next)
{
if
(
p->str_hash == hash
&&
p->str_length == length
&&
!memcmp(p->str_text, s, length)
)
{
p->str_references++;
return p;
}
}
p = (string_ty *)mem_alloc(sizeof(string_ty) + length);
p->str_hash = hash;
p->str_length = length;
p->str_references = 1;
p->str_next = hash_table[idx];
hash_table[idx] = p;
memcpy(p->str_text, s, length);
p->str_text[length] = 0;
hash_load++;
if (hash_load * 10 > hash_modulus * 8)
split();
return p;
}
string_ty *
str_copy(string_ty *s)
{
s->str_references++;
return s;
}
void
str_free(string_ty *s)
{
str_hash_ty idx;
string_ty **spp;
if (!s)
return;
if (s->str_references > 1)
{
s->str_references--;
return;
}
//
// find the hash bucket it was in,
// and remove it
//
idx = s->str_hash & hash_mask;
for (spp = &hash_table[idx]; *spp; spp = &(*spp)->str_next)
{
if (*spp == s)
{
*spp = s->str_next;
mem_free(s);
--hash_load;
return;
}
}
//
// should never reach here!
//
#ifdef DEBUG
error_raw("attempted to free non-existent string (bug)");
abort();
#else
fatal_raw("attempted to free non-existent string (bug)");
#endif
}
int
str_validate(const string_ty *s)
{
str_hash_ty idx;
string_ty **spp;
if (!s)
return 0;
if (s->str_references == 0)
return 0;
idx = s->str_hash & hash_mask;
for (spp = &hash_table[idx]; *spp; spp = &(*spp)->str_next)
if (*spp == s)
return 1;
return 0;
}
void
slow_to_fast(const char *const *in, string_ty **out, size_t length)
{
size_t j;
if (out[0])
return;
for (j = 0; j < length; ++j)
out[j] = str_from_c(in[j]);
}