X-Git-Url: https://git.sur5r.net/?a=blobdiff_plain;f=bacula%2Fsrc%2Flib%2Fhtable.c;h=a42ec022d0c82e5d30156a1ae70deeeb90cc3dd7;hb=897707854a8240d026e933215009f931bb9c5762;hp=7bd134ca23622425dbe02a248059f15a387d8bc9;hpb=ce6f02dc344d30a148017aace7f4bb630f0e7418;p=bacula%2Fbacula diff --git a/bacula/src/lib/htable.c b/bacula/src/lib/htable.c index 7bd134ca23..a42ec022d0 100644 --- a/bacula/src/lib/htable.c +++ b/bacula/src/lib/htable.c @@ -1,3 +1,30 @@ +/* + Bacula® - The Network Backup Solution + + Copyright (C) 2003-2010 Free Software Foundation Europe e.V. + + The main author of Bacula is Kern Sibbald, with contributions from + many others, a complete list can be found in the file AUTHORS. + This program is Free Software; you can redistribute it and/or + modify it under the terms of version three of the GNU Affero General Public + License as published by the Free Software Foundation and included + in the file LICENSE. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + + Bacula® is a registered trademark of Kern Sibbald. + The licensor of Bacula is the Free Software Foundation Europe + (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich, + Switzerland, email:ftf@fsfeurope.org. +*/ /* * Bacula hash table routines * @@ -8,47 +35,97 @@ * the randomness. In this program, the hash table can grow when * it gets too full, so the table size here is a binary number. The * hashing is provided using an idea from Tcl where the initial - * hash code is then "randomized" using a simple calculation from + * hash code is "randomized" using a simple calculation from * a random number generator that multiplies by a big number * (I multiply by a prime number, while Tcl did not) - * then shifts the results down and does the binary division + * then shifts the result down and does the binary division * by masking. Increasing the size of the hash table is simple. * Just create a new larger table, walk the old table and * re-hash insert each entry into the new table. * - * - * Kern Sibbald, July MMIII * - * Version $Id$ + * Kern Sibbald, July MMIII * */ -/* - Copyright (C) 2000-2003 Kern Sibbald and John Walker - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of - the License, or (at your option) any later version. +#include "bacula.h" +#include "htable.h" + +#define PAGE_SIZE 4096 +#define MAX_PAGES 2400 +#define MAX_BUF_SIZE (MAX_PAGES * PAGE_SIZE) /* approx 10MB */ - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. +static const int dbglvl = 500; - You should have received a copy of the GNU General Public - License along with this program; if not, write to the Free - Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, - MA 02111-1307, USA. +/* =================================================================== + * htable + */ +#ifdef BIG_MALLOC +/* + * This subroutine gets a big buffer. */ +void htable::malloc_big_buf(int size) +{ + struct h_mem *hmem; + + hmem = (struct h_mem *)malloc(size); + total_size += size; + blocks++; + hmem->next = mem_block; + mem_block = hmem; + hmem->mem = mem_block->first; + hmem->rem = (char *)hmem + size - hmem->mem; + Dmsg3(100, "malloc buf=%p size=%d rem=%d\n", hmem, size, hmem->rem); +} -#include "bacula.h" +/* This routine frees the whole tree */ +void htable::hash_big_free() +{ + struct h_mem *hmem, *rel; -#include "htable.h" + for (hmem=mem_block; hmem; ) { + rel = hmem; + hmem = hmem->next; + Dmsg1(100, "free malloc buf=%p\n", rel); + free(rel); + } +} -/* =================================================================== - * htable +#endif + +/* + * Normal hash malloc routine that gets a + * "small" buffer from the big buffer */ +char *htable::hash_malloc(int size) +{ +#ifdef BIG_MALLOC + char *buf; + int asize = BALIGN(size); + + if (mem_block->rem < asize) { + uint32_t mb_size; + if (total_size >= (MAX_BUF_SIZE / 2)) { + mb_size = MAX_BUF_SIZE; + } else { + mb_size = MAX_BUF_SIZE / 2; + } + malloc_big_buf(mb_size); + } + mem_block->rem -= asize; + buf = mem_block->mem; + mem_block->mem += asize; + return buf; +#else + total_size += size; + blocks++; + return (char *)malloc(size); +#endif +} + + + /* * Create hash of key, stored in hash then @@ -58,14 +135,17 @@ void htable::hash_index(char *key) { hash = 0; for (char *p=key; *p; p++) { - hash += (hash << 3) + (uint32_t)*p; +// hash += (hash << 3) + (uint32_t)*p; + hash += ((hash << 5) | (hash >> (sizeof(hash)*8-5))) + (uint32_t)*p; } /* Multiply by large prime number, take top bits, mask for remainder */ index = ((hash * 1103515249) >> rshift) & mask; - Dmsg2(100, "Leave hash_index hash=0x%x index=%d\n", hash, index); - return; + Dmsg2(dbglvl, "Leave hash_index hash=0x%x index=%d\n", hash, index); } +/* + * tsize is the estimated number of entries in the hash table + */ htable::htable(void *item, void *link, int tsize) { init(item, link, tsize); @@ -74,31 +154,25 @@ htable::htable(void *item, void *link, int tsize) void htable::init(void *item, void *link, int tsize) { int pwr; + + memset(this, 0, sizeof(htable)); + if (tsize < 31) { + tsize = 31; + } tsize >>= 2; for (pwr=0; tsize; pwr++) { tsize >>= 1; } loffset = (char *)link - (char *)item; - mask = ~((~0)< table size = 8 */ - rshift = 30 - pwr; /* start using bits 28, 29, 30 */ - num_items = 0; /* number of entries in table */ - buckets = 1< table size = 8 */ + rshift = 30 - pwr; /* start using bits 28, 29, 30 */ + buckets = 1<destroy(); - free(tbl); +#ifdef BIG_MALLOC + malloc_big_buf(MAX_BUF_SIZE); /* ***FIXME*** need variable or some estimate */ +#endif } uint32_t htable::size() @@ -106,34 +180,49 @@ uint32_t htable::size() return num_items; } -void htable::stats() +/* + * Take each hash link and walk down the chain of items + * that hash there counting them (i.e. the hits), + * then report that number. + * Obiously, the more hits in a chain, the more time + * it takes to reference them. Empty chains are not so + * hot either -- as it means unused or wasted space. + */ +#define MAX_COUNT 20 +void htable::stats() { - int count[10]; + int hits[MAX_COUNT]; int max = 0; int i, j; hlink *p; - printf("\n\nNumItems=%d\nBuckets=%d\n", num_items, buckets); - for (i=0; i<10; i++) { - count[i] = 0; + printf("\n\nNumItems=%d\nTotal buckets=%d\n", num_items, buckets); + printf("Hits/bucket: buckets\n"); + for (i=0; i < MAX_COUNT; i++) { + hits[i] = 0; } for (i=0; i<(int)buckets; i++) { p = table[i]; - j = 0; + j = 0; while (p) { - p = (hlink *)(p->next); - j++; + p = (hlink *)(p->next); + j++; } if (j > max) { - max = j; + max = j; } - if (j < 10) { - count[j]++; + if (j < MAX_COUNT) { + hits[j]++; } } - for (i=0; i<10; i++) { - printf("%2d: %d\n",i, count[i]); + for (i=0; i < MAX_COUNT; i++) { + printf("%2d: %d\n",i, hits[i]); } - printf("max = %d\n", max); + printf("buckets=%d num_items=%d max_items=%d\n", buckets, num_items, max_items); + printf("max hits in a bucket = %d\n", max); +#ifdef BIG_MALLOC + printf("total bytes malloced = %d\n", total_size); + printf("total blocks malloced = %d\n", blocks); +#endif } void htable::grow_table() @@ -141,19 +230,21 @@ void htable::grow_table() Dmsg1(100, "Grow called old size = %d\n", buckets); /* Setup a bigger table */ htable *big = (htable *)malloc(sizeof(htable)); + memcpy(big, this, sizeof(htable)); /* start with original class data */ big->loffset = loffset; big->mask = mask<<1 | 1; big->rshift = rshift - 1; big->num_items = 0; big->buckets = buckets * 2; big->max_items = big->buckets * 4; + /* Create a bigger hash table */ big->table = (hlink **)malloc(big->buckets * sizeof(hlink *)); memset(big->table, 0, big->buckets * sizeof(hlink *)); big->walkptr = NULL; big->walk_index = 0; /* Insert all the items in the new hash table */ Dmsg1(100, "Before copy num_items=%d\n", num_items); - /* + /* * We walk through the old smaller tree getting items, * but since we are overwriting the colision links, we must * explicitly save the item->next pointer and walk each @@ -165,10 +256,10 @@ void htable::grow_table() Dmsg1(100, "Grow insert: %s\n", ((hlink *)((char *)item+loffset))->key); big->insert(((hlink *)((char *)item+loffset))->key, item); if (ni) { - item = (void *)((char *)ni-loffset); + item = (void *)((char *)ni-loffset); } else { - walkptr = NULL; - item = next(); + walkptr = NULL; + item = next(); } } Dmsg1(100, "After copy new num_items=%d\n", big->num_items); @@ -185,27 +276,25 @@ bool htable::insert(char *key, void *item) { hlink *hp; if (lookup(key)) { - return false; /* already exists */ + return false; /* already exists */ } - sm_check(__FILE__, __LINE__, False); ASSERT(index < buckets); - Dmsg2(100, "Insert: hash=0x%x index=%d\n", (unsigned)hash, index); + Dmsg2(dbglvl, "Insert: hash=%p index=%d\n", hash, index); hp = (hlink *)(((char *)item)+loffset); - Dmsg4(100, "Insert hp=0x%x index=%d item=0x%x offset=%u\n", (unsigned)hp, - index, (unsigned)item, loffset); + Dmsg4(dbglvl, "Insert hp=%p index=%d item=%p offset=%u\n", hp, + index, item, loffset); hp->next = table[index]; hp->hash = hash; hp->key = key; table[index] = hp; - Dmsg3(100, "Insert hp->next=0x%x hp->hash=0x%x hp->key=%s\n", - (unsigned)hp->next, hp->hash, hp->key); + Dmsg3(dbglvl, "Insert hp->next=%p hp->hash=0x%x hp->key=%s\n", + hp->next, hp->hash, hp->key); if (++num_items >= max_items) { - Dmsg2(100, "num_items=%d max_items=%d\n", num_items, max_items); + Dmsg2(dbglvl, "num_items=%d max_items=%d\n", num_items, max_items); grow_table(); } - sm_check(__FILE__, __LINE__, False); - Dmsg3(100, "Leave insert index=%d num_items=%d key=%s\n", index, num_items, key); + Dmsg3(dbglvl, "Leave insert index=%d num_items=%d key=%s\n", index, num_items, key); return true; } @@ -213,10 +302,10 @@ void *htable::lookup(char *key) { hash_index(key); for (hlink *hp=table[index]; hp; hp=(hlink *)hp->next) { -// Dmsg2(100, "hp=0x%x key=%s\n", (long)hp, hp->key); +// Dmsg2(100, "hp=%p key=%s\n", hp, hp->key); if (hash == hp->hash && strcmp(key, hp->key) == 0) { - Dmsg1(100, "lookup return %x\n", ((char *)hp)-loffset); - return ((char *)hp)-loffset; + Dmsg1(dbglvl, "lookup return %p\n", ((char *)hp)-loffset); + return ((char *)hp)-loffset; } } return NULL; @@ -224,56 +313,61 @@ void *htable::lookup(char *key) void *htable::next() { - Dmsg1(100, "Enter next: walkptr=0x%x\n", (unsigned)walkptr); + Dmsg1(dbglvl, "Enter next: walkptr=%p\n", walkptr); if (walkptr) { walkptr = (hlink *)(walkptr->next); } while (!walkptr && walk_index < buckets) { walkptr = table[walk_index++]; if (walkptr) { - Dmsg3(100, "new walkptr=0x%x next=0x%x inx=%d\n", (unsigned)walkptr, - (unsigned)(walkptr->next), walk_index-1); + Dmsg3(dbglvl, "new walkptr=%p next=%p inx=%d\n", walkptr, + walkptr->next, walk_index-1); } } if (walkptr) { - Dmsg2(100, "next: rtn 0x%x walk_index=%d\n", - (unsigned)(((char *)walkptr)-loffset), walk_index); + Dmsg2(dbglvl, "next: rtn %p walk_index=%d\n", + ((char *)walkptr)-loffset, walk_index); return ((char *)walkptr)-loffset; - } - Dmsg0(100, "next: return NULL\n"); + } + Dmsg0(dbglvl, "next: return NULL\n"); return NULL; } void *htable::first() { - Dmsg0(100, "Enter first\n"); - walkptr = table[0]; /* get first bucket */ - walk_index = 1; /* Point to next index */ + Dmsg0(dbglvl, "Enter first\n"); + walkptr = table[0]; /* get first bucket */ + walk_index = 1; /* Point to next index */ while (!walkptr && walk_index < buckets) { walkptr = table[walk_index++]; /* go to next bucket */ if (walkptr) { - Dmsg3(100, "first new walkptr=0x%x next=0x%x inx=%d\n", (unsigned)walkptr, - (unsigned)(walkptr->next), walk_index-1); + Dmsg3(dbglvl, "first new walkptr=%p next=%p inx=%d\n", walkptr, + walkptr->next, walk_index-1); } } if (walkptr) { - Dmsg1(100, "Leave first walkptr=0x%x\n", (unsigned)walkptr); + Dmsg1(dbglvl, "Leave first walkptr=%p\n", walkptr); return ((char *)walkptr)-loffset; - } - Dmsg0(100, "Leave first walkptr=NULL\n"); + } + Dmsg0(dbglvl, "Leave first walkptr=NULL\n"); return NULL; } /* Destroy the table and its contents */ void htable::destroy() { +#ifdef BIG_MALLOC + hash_big_free(); +#else void *ni; void *li = first(); - do { + + while (li) { ni = next(); free(li); - li = ni; - } while (ni); + li=ni; + } +#endif free(table); table = NULL; @@ -289,7 +383,7 @@ struct MYJCR { hlink link; }; -#define NITEMS 10000 +#define NITEMS 5000000 int main() { @@ -298,20 +392,23 @@ int main() MYJCR *save_jcr = NULL, *item; MYJCR *jcr = NULL; int count = 0; - + jcrtbl = (htable *)malloc(sizeof(htable)); jcrtbl->init(jcr, &jcr->link, NITEMS); - + Dmsg1(000, "Inserting %d items\n", NITEMS); for (int i=0; ilink, (unsigned)jcr); - jcr->key = bstrdup(mkey); + int len; + len = sprintf(mkey, "This is htable item %d", i) + 1; + + jcr = (MYJCR *)jcrtbl->hash_malloc(sizeof(MYJCR)); + jcr->key = (char *)jcrtbl->hash_malloc(len); + memcpy(jcr->key, mkey, len); + Dmsg2(100, "link=%p jcr=%p\n", jcr->link, jcr); jcrtbl->insert(jcr->key, jcr); if (i == 10) { - save_jcr = jcr; + save_jcr = jcr; } } if (!(item = (MYJCR *)jcrtbl->lookup(save_jcr->key))) { @@ -322,9 +419,11 @@ int main() jcrtbl->stats(); printf("Walk the hash table:\n"); - for (MYJCR *jcr=(MYJCR *)jcrtbl->first(); jcr; jcr=(MYJCR *)jcrtbl->next() ) { + foreach_htable (jcr, jcrtbl) { // printf("htable item = %s\n", jcr->key); +#ifndef BIG_MALLOC free(jcr->key); +#endif count++; } printf("Got %d items -- %s\n", count, count==NITEMS?"OK":"***ERROR***"); @@ -334,7 +433,7 @@ int main() free(jcrtbl); printf("Freed jcrtbl\n"); - sm_dump(False); + sm_dump(false); /* unit test */ } #endif