2 Bacula® - The Network Backup Solution
4 Copyright (C) 2003-2010 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version two of the GNU General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of Kern Sibbald.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
29 * Bacula hash table routines
31 * htable is a hash table of items (pointers). This code is
32 * adapted and enhanced from code I wrote in 1982 for a
33 * relocatable linker. At that time, the hash table size
34 * was fixed and a primary number, which essentially provides
35 * the randomness. In this program, the hash table can grow when
36 * it gets too full, so the table size here is a binary number. The
37 * hashing is provided using an idea from Tcl where the initial
38 * hash code is "randomized" using a simple calculation from
39 * a random number generator that multiplies by a big number
40 * (I multiply by a prime number, while Tcl did not)
41 * then shifts the result down and does the binary division
42 * by masking. Increasing the size of the hash table is simple.
43 * Just create a new larger table, walk the old table and
44 * re-hash insert each entry into the new table.
47 * Kern Sibbald, July MMIII
55 static const int dbglvl = 500;
57 /* ===================================================================
63 * This subroutine gets a big buffer.
65 void htable::malloc_big_buf(int size)
69 hmem = (struct h_mem *)malloc(size);
72 hmem->next = mem_block;
74 hmem->mem = mem_block->first;
75 hmem->rem = (char *)hmem + size - hmem->mem;
76 Dmsg3(100, "malloc buf=%p size=%d rem=%d\n", hmem, size, hmem->rem);
79 /* This routine frees the whole tree */
80 void htable::hash_big_free()
82 struct h_mem *hmem, *rel;
84 for (hmem=mem_block; hmem; ) {
87 Dmsg1(100, "free malloc buf=%p\n", rel);
95 * Normal hash malloc routine that gets a
96 * "small" buffer from the big buffer
98 char *htable::hash_malloc(int size)
102 int asize = BALIGN(size);
104 if (mem_block->rem < asize) {
106 if (total_size >= 1000000) {
111 malloc_big_buf(mb_size);
113 mem_block->rem -= asize;
114 buf = mem_block->mem;
115 mem_block->mem += asize;
120 return (char *)malloc(size);
128 * Create hash of key, stored in hash then
129 * create and return the pseudo random bucket index
131 void htable::hash_index(char *key)
134 for (char *p=key; *p; p++) {
135 // hash += (hash << 3) + (uint32_t)*p;
136 hash += ((hash << 5) | (hash >> (sizeof(hash)*8-5))) + (uint32_t)*p;
138 /* Multiply by large prime number, take top bits, mask for remainder */
139 index = ((hash * 1103515249) >> rshift) & mask;
140 Dmsg2(dbglvl, "Leave hash_index hash=0x%x index=%d\n", hash, index);
144 * tsize is the estimated number of entries in the hash table
146 htable::htable(void *item, void *link, int tsize)
148 init(item, link, tsize);
151 void htable::init(void *item, void *link, int tsize)
155 memset(this, 0, sizeof(htable));
160 for (pwr=0; tsize; pwr++) {
163 loffset = (char *)link - (char *)item;
164 mask = ~((~0)<<pwr); /* 3 bits => table size = 8 */
165 rshift = 30 - pwr; /* start using bits 28, 29, 30 */
166 buckets = 1<<pwr; /* hash table size -- power of two */
167 max_items = buckets * 4; /* allow average 4 entries per chain */
168 table = (hlink **)malloc(buckets * sizeof(hlink *));
169 memset(table, 0, buckets * sizeof(hlink *));
171 malloc_big_buf(1000000); /* ***FIXME*** need variable or some estimate */
175 uint32_t htable::size()
181 * Take each hash link and walk down the chain of items
182 * that hash there counting them (i.e. the hits),
183 * then report that number.
184 * Obiously, the more hits in a chain, the more time
185 * it takes to reference them. Empty chains are not so
186 * hot either -- as it means unused or wasted space.
195 printf("\n\nNumItems=%d\nTotal buckets=%d\n", num_items, buckets);
196 printf("Hits/bucket: buckets\n");
197 for (i=0; i < MAX_COUNT; i++) {
200 for (i=0; i<(int)buckets; i++) {
204 p = (hlink *)(p->next);
214 for (i=0; i < MAX_COUNT; i++) {
215 printf("%2d: %d\n",i, hits[i]);
217 printf("buckets=%d num_items=%d max_items=%d\n", buckets, num_items, max_items);
218 printf("max hits in a bucket = %d\n", max);
220 printf("total bytes malloced = %d\n", total_size);
221 printf("total blocks malloced = %d\n", blocks);
225 void htable::grow_table()
227 Dmsg1(100, "Grow called old size = %d\n", buckets);
228 /* Setup a bigger table */
229 htable *big = (htable *)malloc(sizeof(htable));
230 memcpy(big, this, sizeof(htable)); /* start with original class data */
231 big->loffset = loffset;
232 big->mask = mask<<1 | 1;
233 big->rshift = rshift - 1;
235 big->buckets = buckets * 2;
236 big->max_items = big->buckets * 4;
237 /* Create a bigger hash table */
238 big->table = (hlink **)malloc(big->buckets * sizeof(hlink *));
239 memset(big->table, 0, big->buckets * sizeof(hlink *));
242 /* Insert all the items in the new hash table */
243 Dmsg1(100, "Before copy num_items=%d\n", num_items);
245 * We walk through the old smaller tree getting items,
246 * but since we are overwriting the colision links, we must
247 * explicitly save the item->next pointer and walk each
248 * colision chain ourselves. We do use next() for getting
249 * to the next bucket.
251 for (void *item=first(); item; ) {
252 void *ni = ((hlink *)((char *)item+loffset))->next; /* save link overwritten by insert */
253 Dmsg1(100, "Grow insert: %s\n", ((hlink *)((char *)item+loffset))->key);
254 big->insert(((hlink *)((char *)item+loffset))->key, item);
256 item = (void *)((char *)ni-loffset);
262 Dmsg1(100, "After copy new num_items=%d\n", big->num_items);
263 if (num_items != big->num_items) {
264 Dmsg0(000, "****** Big problems num_items mismatch ******\n");
267 memcpy(this, big, sizeof(htable)); /* move everything across */
269 Dmsg0(100, "Exit grow.\n");
272 bool htable::insert(char *key, void *item)
276 return false; /* already exists */
278 ASSERT(index < buckets);
279 Dmsg2(dbglvl, "Insert: hash=%p index=%d\n", hash, index);
280 hp = (hlink *)(((char *)item)+loffset);
281 Dmsg4(dbglvl, "Insert hp=%p index=%d item=%p offset=%u\n", hp,
282 index, item, loffset);
283 hp->next = table[index];
287 Dmsg3(dbglvl, "Insert hp->next=%p hp->hash=0x%x hp->key=%s\n",
288 hp->next, hp->hash, hp->key);
290 if (++num_items >= max_items) {
291 Dmsg2(dbglvl, "num_items=%d max_items=%d\n", num_items, max_items);
294 Dmsg3(dbglvl, "Leave insert index=%d num_items=%d key=%s\n", index, num_items, key);
298 void *htable::lookup(char *key)
301 for (hlink *hp=table[index]; hp; hp=(hlink *)hp->next) {
302 // Dmsg2(100, "hp=%p key=%s\n", hp, hp->key);
303 if (hash == hp->hash && strcmp(key, hp->key) == 0) {
304 Dmsg1(dbglvl, "lookup return %p\n", ((char *)hp)-loffset);
305 return ((char *)hp)-loffset;
313 Dmsg1(dbglvl, "Enter next: walkptr=%p\n", walkptr);
315 walkptr = (hlink *)(walkptr->next);
317 while (!walkptr && walk_index < buckets) {
318 walkptr = table[walk_index++];
320 Dmsg3(dbglvl, "new walkptr=%p next=%p inx=%d\n", walkptr,
321 walkptr->next, walk_index-1);
325 Dmsg2(dbglvl, "next: rtn %p walk_index=%d\n",
326 ((char *)walkptr)-loffset, walk_index);
327 return ((char *)walkptr)-loffset;
329 Dmsg0(dbglvl, "next: return NULL\n");
333 void *htable::first()
335 Dmsg0(dbglvl, "Enter first\n");
336 walkptr = table[0]; /* get first bucket */
337 walk_index = 1; /* Point to next index */
338 while (!walkptr && walk_index < buckets) {
339 walkptr = table[walk_index++]; /* go to next bucket */
341 Dmsg3(dbglvl, "first new walkptr=%p next=%p inx=%d\n", walkptr,
342 walkptr->next, walk_index-1);
346 Dmsg1(dbglvl, "Leave first walkptr=%p\n", walkptr);
347 return ((char *)walkptr)-loffset;
349 Dmsg0(dbglvl, "Leave first walkptr=NULL\n");
353 /* Destroy the table and its contents */
354 void htable::destroy()
371 Dmsg0(100, "Done destroy.\n");
383 #define NITEMS 5000000
389 MYJCR *save_jcr = NULL, *item;
393 jcrtbl = (htable *)malloc(sizeof(htable));
394 jcrtbl->init(jcr, &jcr->link, NITEMS);
396 Dmsg1(000, "Inserting %d items\n", NITEMS);
397 for (int i=0; i<NITEMS; i++) {
399 len = sprintf(mkey, "This is htable item %d", i) + 1;
401 jcr = (MYJCR *)jcrtbl->hash_malloc(sizeof(MYJCR));
402 jcr->key = (char *)jcrtbl->hash_malloc(len);
403 memcpy(jcr->key, mkey, len);
404 Dmsg2(100, "link=%p jcr=%p\n", jcr->link, jcr);
406 jcrtbl->insert(jcr->key, jcr);
411 if (!(item = (MYJCR *)jcrtbl->lookup(save_jcr->key))) {
412 printf("Bad news: %s not found.\n", save_jcr->key);
414 printf("Item 10's key is: %s\n", item->key);
418 printf("Walk the hash table:\n");
419 foreach_htable (jcr, jcrtbl) {
420 // printf("htable item = %s\n", jcr->key);
426 printf("Got %d items -- %s\n", count, count==NITEMS?"OK":"***ERROR***");
427 printf("Calling destroy\n");
431 printf("Freed jcrtbl\n");
433 sm_dump(false); /* unit test */