2 Bacula® - The Network Backup Solution
4 Copyright (C) 2003-2008 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version two of the GNU General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of Kern Sibbald.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
29 * Bacula hash table routines
31 * htable is a hash table of items (pointers). This code is
32 * adapted and enhanced from code I wrote in 1982 for a
33 * relocatable linker. At that time, the hash table size
34 * was fixed and a primary number, which essentially provides
35 * the randomness. In this program, the hash table can grow when
36 * it gets too full, so the table size here is a binary number. The
37 * hashing is provided using an idea from Tcl where the initial
38 * hash code is "randomized" using a simple calculation from
39 * a random number generator that multiplies by a big number
40 * (I multiply by a prime number, while Tcl did not)
41 * then shifts the result down and does the binary division
42 * by masking. Increasing the size of the hash table is simple.
43 * Just create a new larger table, walk the old table and
44 * re-hash insert each entry into the new table.
47 * Kern Sibbald, July MMIII
57 static const int dbglvl = 500;
59 /* ===================================================================
65 * This subroutine gets a big buffer.
67 void htable::malloc_big_buf(int size)
71 hmem = (struct h_mem *)malloc(size);
74 hmem->next = mem_block;
76 hmem->mem = mem_block->first;
77 hmem->rem = (char *)hmem + size - hmem->mem;
78 Dmsg3(100, "malloc buf=%p size=%d rem=%d\n", hmem, size, hmem->rem);
81 /* This routine frees the whole tree */
82 void htable::hash_big_free()
84 struct h_mem *hmem, *rel;
86 for (hmem=mem_block; hmem; ) {
89 Dmsg1(100, "free malloc buf=%p\n", rel);
97 * Normal hash malloc routine that gets a
98 * "small" buffer from the big buffer
100 char *htable::hash_malloc(int size)
104 int asize = BALIGN(size);
106 if (mem_block->rem < asize) {
108 if (total_size >= 1000000) {
113 malloc_big_buf(mb_size);
115 mem_block->rem -= asize;
116 buf = mem_block->mem;
117 mem_block->mem += asize;
122 return (char *)malloc(size);
130 * Create hash of key, stored in hash then
131 * create and return the pseudo random bucket index
133 void htable::hash_index(char *key)
136 for (char *p=key; *p; p++) {
137 // hash += (hash << 3) + (uint32_t)*p;
138 hash += ((hash << 5) | (hash >> (sizeof(hash)*8-5))) + (uint32_t)*p;
140 /* Multiply by large prime number, take top bits, mask for remainder */
141 index = ((hash * 1103515249) >> rshift) & mask;
142 Dmsg2(dbglvl, "Leave hash_index hash=0x%x index=%d\n", hash, index);
146 * tsize is the estimated number of entries in the hash table
148 htable::htable(void *item, void *link, int tsize)
150 init(item, link, tsize);
153 void htable::init(void *item, void *link, int tsize)
157 memset(this, 0, sizeof(htable));
162 for (pwr=0; tsize; pwr++) {
165 loffset = (char *)link - (char *)item;
166 mask = ~((~0)<<pwr); /* 3 bits => table size = 8 */
167 rshift = 30 - pwr; /* start using bits 28, 29, 30 */
168 buckets = 1<<pwr; /* hash table size -- power of two */
169 max_items = buckets * 4; /* allow average 4 entries per chain */
170 table = (hlink **)malloc(buckets * sizeof(hlink *));
171 memset(table, 0, buckets * sizeof(hlink *));
173 malloc_big_buf(1000000); /* ***FIXME*** need variable or some estimate */
177 uint32_t htable::size()
183 * Take each hash link and walk down the chain of items
184 * that hash there counting them (i.e. the hits),
185 * then report that number.
186 * Obiously, the more hits in a chain, the more time
187 * it takes to reference them. Empty chains are not so
188 * hot either -- as it means unused or wasted space.
197 printf("\n\nNumItems=%d\nTotal buckets=%d\n", num_items, buckets);
198 printf("Hits/bucket: buckets\n");
199 for (i=0; i < MAX_COUNT; i++) {
202 for (i=0; i<(int)buckets; i++) {
206 p = (hlink *)(p->next);
216 for (i=0; i < MAX_COUNT; i++) {
217 printf("%2d: %d\n",i, hits[i]);
219 printf("buckets=%d num_items=%d max_items=%d\n", buckets, num_items, max_items);
220 printf("max hits in a bucket = %d\n", max);
222 printf("total bytes malloced = %d\n", total_size);
223 printf("total blocks malloced = %d\n", blocks);
227 void htable::grow_table()
229 Dmsg1(100, "Grow called old size = %d\n", buckets);
230 /* Setup a bigger table */
231 htable *big = (htable *)malloc(sizeof(htable));
232 memcpy(big, this, sizeof(htable)); /* start with original class data */
233 big->loffset = loffset;
234 big->mask = mask<<1 | 1;
235 big->rshift = rshift - 1;
237 big->buckets = buckets * 2;
238 big->max_items = big->buckets * 4;
239 /* Create a bigger hash table */
240 big->table = (hlink **)malloc(big->buckets * sizeof(hlink *));
241 memset(big->table, 0, big->buckets * sizeof(hlink *));
244 /* Insert all the items in the new hash table */
245 Dmsg1(100, "Before copy num_items=%d\n", num_items);
247 * We walk through the old smaller tree getting items,
248 * but since we are overwriting the colision links, we must
249 * explicitly save the item->next pointer and walk each
250 * colision chain ourselves. We do use next() for getting
251 * to the next bucket.
253 for (void *item=first(); item; ) {
254 void *ni = ((hlink *)((char *)item+loffset))->next; /* save link overwritten by insert */
255 Dmsg1(100, "Grow insert: %s\n", ((hlink *)((char *)item+loffset))->key);
256 big->insert(((hlink *)((char *)item+loffset))->key, item);
258 item = (void *)((char *)ni-loffset);
264 Dmsg1(100, "After copy new num_items=%d\n", big->num_items);
265 if (num_items != big->num_items) {
266 Dmsg0(000, "****** Big problems num_items mismatch ******\n");
269 memcpy(this, big, sizeof(htable)); /* move everything across */
271 Dmsg0(100, "Exit grow.\n");
274 bool htable::insert(char *key, void *item)
278 return false; /* already exists */
280 ASSERT(index < buckets);
281 Dmsg2(dbglvl, "Insert: hash=%p index=%d\n", hash, index);
282 hp = (hlink *)(((char *)item)+loffset);
283 Dmsg4(dbglvl, "Insert hp=%p index=%d item=%p offset=%u\n", hp,
284 index, item, loffset);
285 hp->next = table[index];
289 Dmsg3(dbglvl, "Insert hp->next=%p hp->hash=0x%x hp->key=%s\n",
290 hp->next, hp->hash, hp->key);
292 if (++num_items >= max_items) {
293 Dmsg2(dbglvl, "num_items=%d max_items=%d\n", num_items, max_items);
296 Dmsg3(dbglvl, "Leave insert index=%d num_items=%d key=%s\n", index, num_items, key);
300 void *htable::lookup(char *key)
303 for (hlink *hp=table[index]; hp; hp=(hlink *)hp->next) {
304 // Dmsg2(100, "hp=%p key=%s\n", hp, hp->key);
305 if (hash == hp->hash && strcmp(key, hp->key) == 0) {
306 Dmsg1(dbglvl, "lookup return %p\n", ((char *)hp)-loffset);
307 return ((char *)hp)-loffset;
315 Dmsg1(dbglvl, "Enter next: walkptr=%p\n", walkptr);
317 walkptr = (hlink *)(walkptr->next);
319 while (!walkptr && walk_index < buckets) {
320 walkptr = table[walk_index++];
322 Dmsg3(dbglvl, "new walkptr=%p next=%p inx=%d\n", walkptr,
323 walkptr->next, walk_index-1);
327 Dmsg2(dbglvl, "next: rtn %p walk_index=%d\n",
328 ((char *)walkptr)-loffset, walk_index);
329 return ((char *)walkptr)-loffset;
331 Dmsg0(dbglvl, "next: return NULL\n");
335 void *htable::first()
337 Dmsg0(dbglvl, "Enter first\n");
338 walkptr = table[0]; /* get first bucket */
339 walk_index = 1; /* Point to next index */
340 while (!walkptr && walk_index < buckets) {
341 walkptr = table[walk_index++]; /* go to next bucket */
343 Dmsg3(dbglvl, "first new walkptr=%p next=%p inx=%d\n", walkptr,
344 walkptr->next, walk_index-1);
348 Dmsg1(dbglvl, "Leave first walkptr=%p\n", walkptr);
349 return ((char *)walkptr)-loffset;
351 Dmsg0(dbglvl, "Leave first walkptr=NULL\n");
355 /* Destroy the table and its contents */
356 void htable::destroy()
373 Dmsg0(100, "Done destroy.\n");
385 #define NITEMS 5000000
391 MYJCR *save_jcr = NULL, *item;
395 jcrtbl = (htable *)malloc(sizeof(htable));
396 jcrtbl->init(jcr, &jcr->link, NITEMS);
398 Dmsg1(000, "Inserting %d items\n", NITEMS);
399 for (int i=0; i<NITEMS; i++) {
401 len = sprintf(mkey, "This is htable item %d", i) + 1;
403 jcr = (MYJCR *)jcrtbl->hash_malloc(sizeof(MYJCR));
404 jcr->key = (char *)jcrtbl->hash_malloc(len);
405 memcpy(jcr->key, mkey, len);
406 Dmsg2(100, "link=%p jcr=%p\n", jcr->link, jcr);
408 jcrtbl->insert(jcr->key, jcr);
413 if (!(item = (MYJCR *)jcrtbl->lookup(save_jcr->key))) {
414 printf("Bad news: %s not found.\n", save_jcr->key);
416 printf("Item 10's key is: %s\n", item->key);
420 printf("Walk the hash table:\n");
421 foreach_htable (jcr, jcrtbl) {
422 // printf("htable item = %s\n", jcr->key);
428 printf("Got %d items -- %s\n", count, count==NITEMS?"OK":"***ERROR***");
429 printf("Calling destroy\n");
433 printf("Freed jcrtbl\n");