// // aegis - project change supervisor // Copyright (C) 1991-1995, 1998, 1999, 2001-2006, 2008, 2012, 2014 Peter Miller // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 3 of the License, or (at // your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program. If not, see . // // // A literal pool is maintained. Each string has a reference count. The // string stays in the literal pool for as long as it has a positive // reference count. To determine if a string is already in the literal pool, // linear dynamic hashing is used to guarantee an O(1) search. Making all equal // strings the same item in the literal pool means that string equality is // a pointer test, and thus very fast. // #include #include #include #include #include #include #include // // maximum conversion width for numbers // #define MAX_WIDTH 509 static string_ty **hash_table; static str_hash_ty hash_modulus; static str_hash_ty hash_mask; static str_hash_ty hash_load; #define MAX_HASH_LEN 20 // // NAME // hash_generate - hash string to number // // SYNOPSIS // str_hash_ty hash_generate(char *s, size_t n); // // DESCRIPTION // The hash_generate function is used to make a number from a string. // // RETURNS // str_hash_ty - the magic number // // CAVEAT // Only the last MAX_HASH_LEN characters are used. // It is important that str_hash_ty be unsigned (int or long). // static str_hash_ty hash_generate(const char *s, size_t n) { str_hash_ty retval; if (n > MAX_HASH_LEN) { s += n - MAX_HASH_LEN; n = MAX_HASH_LEN; } retval = 0; while (n > 0) { retval = (retval + (retval << 1)) ^ *s++; --n; } return retval; } static void str_initialize(void) { str_hash_ty j; hash_modulus = 1 << 8; // MUST be a power of 2 hash_mask = hash_modulus - 1; hash_load = 0; hash_table = (string_ty **)mem_alloc(hash_modulus * sizeof(string_ty *)); for (j = 0; j < hash_modulus; ++j) hash_table[j] = 0; } void str_release(void) { mem_free(hash_table); hash_table = 0; } // // NAME // split - reduce table loading // // SYNOPSIS // void split(void); // // DESCRIPTION // The split function is used to reduce the load factor on the hash table. // // RETURNS // void // // CAVEAT // A load factor of about 80% is suggested. // static void split(void) { string_ty **new_hash_table; str_hash_ty new_hash_modulus; str_hash_ty new_hash_mask; str_hash_ty idx; // // double the modulus // // This is subtle. If we only increase the modulus by one, the // load always hovers around 80%, so we have to do a split for // every insert. I.e. thr malloc burden os O(n) for the lifetime of // the program. BUT if we double the modulus, the length of time // until the next split also doubles, making the probablity of a // split halve, and sigma(2**-n)=1, so the malloc burden becomes O(1) // for the lifetime of the program. // new_hash_modulus = hash_modulus * 2; new_hash_table = (string_ty **)mem_alloc(new_hash_modulus * sizeof(string_ty *)); new_hash_mask = new_hash_modulus - 1; // // now redistribute the list elements // for (idx = 0; idx < hash_modulus; ++idx) { string_ty *p; new_hash_table[idx] = 0; new_hash_table[idx + hash_modulus] = 0; p = hash_table[idx]; while (p) { string_ty *p2; str_hash_ty new_idx; p2 = p; p = p->str_next; assert((p2->str_hash & hash_mask) == idx); new_idx = p2->str_hash & new_hash_mask; p2->str_next = new_hash_table[new_idx]; new_hash_table[new_idx] = p2; } } mem_free(hash_table); hash_table = new_hash_table; hash_modulus = new_hash_modulus; hash_mask = new_hash_mask; } string_ty * str_from_c(const char *s) { return str_n_from_c(s, strlen(s)); } string_ty * str_n_from_c(const char *s, size_t length) { str_hash_ty hash; str_hash_ty idx; string_ty *p; hash = hash_generate(s, length); if (!hash_table) str_initialize(); idx = hash & hash_mask; for (p = hash_table[idx]; p; p = p->str_next) { if ( p->str_hash == hash && p->str_length == length && !memcmp(p->str_text, s, length) ) { p->str_references++; return p; } } p = (string_ty *)mem_alloc(sizeof(string_ty) + length); p->str_hash = hash; p->str_length = length; p->str_references = 1; p->str_next = hash_table[idx]; hash_table[idx] = p; memcpy(p->str_text, s, length); p->str_text[length] = 0; hash_load++; if (hash_load * 10 > hash_modulus * 8) split(); return p; } string_ty * str_copy(string_ty *s) { assert(s->valid()); s->str_references++; return s; } void str_free(string_ty *s) { str_hash_ty idx; string_ty **spp; if (!s) return; s->str_references--; if (s->str_references > 0) { return; } // // find the hash bucket it was in, // and remove it // idx = s->str_hash & hash_mask; for (spp = &hash_table[idx]; *spp; spp = &(*spp)->str_next) { if (*spp == s) { *spp = s->str_next; s->str_next = 0; --hash_load; mem_free(s); return; } } // // should never reach here! // assert(!"attempted to free non-existent string (bug)"); } void string_ty::validate(void) const { assert(valid()); } bool string_ty::valid(void) const { if (!this) return false; if (str_references == 0) return false; str_hash_ty idx = str_hash & hash_mask; for (string_ty **spp = &hash_table[idx]; *spp; spp = &(*spp)->str_next) if (*spp == this) return true; return false; } void slow_to_fast(const char *const *in, string_ty **out, size_t length) { size_t j; if (out[0]) return; for (j = 0; j < length; ++j) out[j] = str_from_c(in[j]); } // vim: set ts=8 sw=4 et :