//
// aegis - project change supervisor
// Copyright (C) 1991-1995, 1998, 1999, 2001-2006, 2008, 2012, 2014 Peter Miller
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 3 of the License, or (at
// your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
//
// A literal pool is maintained.  Each string has a reference count.  The
// string stays in the literal pool for as long as it has a positive
// reference count.  To determine if a string is already in the literal pool,
// linear dynamic hashing is used to guarantee an O(1) search.  Making all equal
// strings the same item in the literal pool means that string equality is
// a pointer test, and thus very fast.
//

#include <common/ac/assert.h>
#include <common/ac/stdlib.h>
#include <common/ac/string.h>

#include <common/debug.h>
#include <common/error.h>
#include <common/mem.h>
#include <common/str.h>


//
// maximum conversion width for numbers
//
#define MAX_WIDTH 509

static string_ty **hash_table;
static str_hash_ty hash_modulus;
static str_hash_ty hash_mask;
static str_hash_ty hash_load;

#define MAX_HASH_LEN 20


//
// NAME
//      hash_generate - hash string to number
//
// SYNOPSIS
//      str_hash_ty hash_generate(char *s, size_t n);
//
// DESCRIPTION
//      The hash_generate function is used to make a number from a string.
//
// RETURNS
//      str_hash_ty - the magic number
//
// CAVEAT
//      Only the last MAX_HASH_LEN characters are used.
//      It is important that str_hash_ty be unsigned (int or long).
//

static str_hash_ty
hash_generate(const char *s, size_t n)
{
    str_hash_ty     retval;

    if (n > MAX_HASH_LEN)
    {
        s += n - MAX_HASH_LEN;
        n = MAX_HASH_LEN;
    }

    retval = 0;
    while (n > 0)
    {
        retval = (retval + (retval << 1)) ^ *s++;
        --n;
    }
    return retval;
}


static void
str_initialize(void)
{
    str_hash_ty     j;

    hash_modulus = 1 << 8;      // MUST be a power of 2
    hash_mask = hash_modulus - 1;
    hash_load = 0;
    hash_table = (string_ty **)mem_alloc(hash_modulus * sizeof(string_ty *));
    for (j = 0; j < hash_modulus; ++j)
        hash_table[j] = 0;
}

void
str_release(void)
{
    mem_free(hash_table);
    hash_table = 0;
}

//
// NAME
//      split - reduce table loading
//
// SYNOPSIS
//      void split(void);
//
// DESCRIPTION
//      The split function is used to reduce the load factor on the hash table.
//
// RETURNS
//      void
//
// CAVEAT
//      A load factor of about 80% is suggested.
//

static void
split(void)
{
    string_ty       **new_hash_table;
    str_hash_ty     new_hash_modulus;
    str_hash_ty     new_hash_mask;
    str_hash_ty     idx;

    //
    // double the modulus
    //
    // This is subtle.  If we only increase the modulus by one, the
    // load always hovers around 80%, so we have to do a split for
    // every insert.  I.e. thr malloc burden os O(n) for the lifetime of
    // the program.  BUT if we double the modulus, the length of time
    // until the next split also doubles, making the probablity of a
    // split halve, and sigma(2**-n)=1, so the malloc burden becomes O(1)
    // for the lifetime of the program.
    //
    new_hash_modulus = hash_modulus * 2;
    new_hash_table =
        (string_ty **)mem_alloc(new_hash_modulus * sizeof(string_ty *));
    new_hash_mask = new_hash_modulus - 1;

    //
    // now redistribute the list elements
    //
    for (idx = 0; idx < hash_modulus; ++idx)
    {
        string_ty       *p;

        new_hash_table[idx] = 0;
        new_hash_table[idx + hash_modulus] = 0;
        p = hash_table[idx];
        while (p)
        {
            string_ty       *p2;
            str_hash_ty     new_idx;

            p2 = p;
            p = p->str_next;

            assert((p2->str_hash & hash_mask) == idx);
            new_idx = p2->str_hash & new_hash_mask;
            p2->str_next = new_hash_table[new_idx];
            new_hash_table[new_idx] = p2;
        }
    }
    mem_free(hash_table);
    hash_table = new_hash_table;
    hash_modulus = new_hash_modulus;
    hash_mask = new_hash_mask;
}


string_ty *
str_from_c(const char *s)
{
    return str_n_from_c(s, strlen(s));
}


string_ty *
str_n_from_c(const char *s, size_t length)
{
    str_hash_ty     hash;
    str_hash_ty     idx;
    string_ty       *p;

    hash = hash_generate(s, length);

    if (!hash_table)
        str_initialize();
    idx = hash & hash_mask;

    for (p = hash_table[idx]; p; p = p->str_next)
    {
        if
        (
            p->str_hash == hash
        &&
            p->str_length == length
        &&
            !memcmp(p->str_text, s, length)
        )
        {
            p->str_references++;
            return p;
        }
    }

    p = (string_ty *)mem_alloc(sizeof(string_ty) + length);
    p->str_hash = hash;
    p->str_length = length;
    p->str_references = 1;
    p->str_next = hash_table[idx];
    hash_table[idx] = p;
    memcpy(p->str_text, s, length);
    p->str_text[length] = 0;

    hash_load++;
    if (hash_load * 10 > hash_modulus * 8)
        split();
    return p;
}


string_ty *
str_copy(string_ty *s)
{
    assert(s->valid());
    s->str_references++;
    return s;
}


void
str_free(string_ty *s)
{
    str_hash_ty     idx;
    string_ty       **spp;

    if (!s)
        return;
    s->str_references--;
    if (s->str_references > 0)
    {
        return;
    }

    //
    // find the hash bucket it was in,
    // and remove it
    //
    idx = s->str_hash & hash_mask;
    for (spp = &hash_table[idx]; *spp; spp = &(*spp)->str_next)
    {
        if (*spp == s)
        {
            *spp = s->str_next;
            s->str_next = 0;
            --hash_load;
            mem_free(s);
            return;
        }
    }

    //
    // should never reach here!
    //
    assert(!"attempted to free non-existent string (bug)");
}


void
string_ty::validate(void)
    const
{
    assert(valid());
}


bool
string_ty::valid(void)
    const
{
    if (!this)
        return false;
    if (str_references == 0)
        return false;
    str_hash_ty idx = str_hash & hash_mask;
    for (string_ty **spp = &hash_table[idx]; *spp; spp = &(*spp)->str_next)
        if (*spp == this)
            return true;
    return false;
}


void
slow_to_fast(const char *const *in, string_ty **out, size_t length)
{
    size_t          j;

    if (out[0])
        return;
    for (j = 0; j < length; ++j)
        out[j] = str_from_c(in[j]);
}


// vim: set ts=8 sw=4 et :