/*
 * Hash.c -- a simple string hashtable and utility allocation functions
 *
 * Morgan N. Price, March-May 2008
 *
 *  Copyright (C) 2008 The Regents of the University of California
 *  All rights reserved.
 * 
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License along
 *  with this program; if not, write to the Free Software Foundation, Inc.,
 *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 *  Disclaimer
 *
 *  NEITHER THE UNITED STATES NOR THE UNITED STATES DEPARTMENT OF ENERGY,
 *  NOR ANY OF THEIR EMPLOYEES, MAKES ANY WARRANTY, EXPRESS OR IMPLIED,
 *  OR ASSUMES ANY LEGAL LIABILITY OR RESPONSIBILITY FOR THE ACCURACY,
 *  COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT,
 *  OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE
 *  PRIVATELY OWNED RIGHTS.
 */

#include "Hash.h"
#include <assert.h>
#include <string.h>

void *mymalloc(size_t size) {
  void *p = malloc(size);
  assert(p != NULL);
  return(p);
}

int nHashCollisions;
hashstrings_t *MakeHashtable(char **strings, int nStrings) {
  hashstrings_t *hash = (hashstrings_t*)mymalloc(sizeof(hashstrings_t));
  hash->nBuckets = 8*nStrings+1;
  hash->buckets = (hashbucket_t*)mymalloc(sizeof(hashbucket_t) * hash->nBuckets);
  int i;
  for (i=0; i < hash->nBuckets; i++) {
    hash->buckets[i].string = NULL;
    hash->buckets[i].nCount = 0;
    hash->buckets[i].first = -1;
  }
  nHashCollisions = 0;
  for (i=0; i < nStrings; i++) {
    hashiterator_t hi = FindMatch(hash, strings[i]);
    if (hash->buckets[hi].string == NULL) {
      /* save a unique entry */
      assert(hash->buckets[hi].nCount == 0);
      hash->buckets[hi].string = strings[i];
      hash->buckets[hi].nCount = 1;
      hash->buckets[hi].first = i;
    } else {
      /* record a duplicate entry */
      assert(hash->buckets[hi].string != NULL);
      assert(strcmp(hash->buckets[hi].string, strings[i]) == 0);
      assert(hash->buckets[hi].first >= 0);
      hash->buckets[hi].nCount++;
    }
  }
  /*fprintf(stderr,"Hash %d items in %d buckets with %d collisions\n", nStrings, hash->nBuckets, nHashCollisions);*/
  return(hash);
}

hashstrings_t *DeleteHashtable(hashstrings_t* hash) {
  if (hash != NULL) {
    free(hash->buckets);
    free(hash);
  }
  return(NULL);
}

#define ANDMASK 0x04c11db7
hashiterator_t FindMatch(hashstrings_t *hash, char *string) {
  /* CRC32 */
  unsigned int crc = -1;
  char *p;
  for (p = string; *p != '\0'; p++) {
    unsigned char val = (unsigned char)*p;
    int bit;
    for (bit=0;bit<8;bit++) {
      if ((val>>7) ^ (crc>>31)) {
	crc = (crc<<1) ^ ANDMASK;
      } else {
	crc = (crc << 1);
      }
      val <<=1;
    }
  }
  hashiterator_t hi = (~crc) % hash->nBuckets;

  while(hash->buckets[hi].string != NULL
	&& strcmp(hash->buckets[hi].string, string) != 0) {
    nHashCollisions++;
    hi++;
    if (hi >= hash->nBuckets)
      hi = 0;
  }
  return(hi);
}

char *GetHashString(hashstrings_t *hash, hashiterator_t hi) {
  return(hash->buckets[hi].string);
}

int HashCount(hashstrings_t *hash, hashiterator_t hi) {
  return(hash->buckets[hi].nCount);
}

int HashFirst(hashstrings_t *hash, hashiterator_t hi) {
  return(hash->buckets[hi].first);
}
