2 Bacula® - The Network Backup Solution
4 Copyright (C) 2003-2008 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version two of the GNU General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of John Walker.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
29 * Bacula hash table routines
31 * htable is a hash table of items (pointers). This code is
32 * adapted and enhanced from code I wrote in 1982 for a
33 * relocatable linker. At that time, the hash table size
34 * was fixed and a primary number, which essentially provides
35 * the randomness. In this program, the hash table can grow when
36 * it gets too full, so the table size here is a binary number. The
37 * hashing is provided using an idea from Tcl where the initial
38 * hash code is "randomized" using a simple calculation from
39 * a random number generator that multiplies by a big number
40 * (I multiply by a prime number, while Tcl did not)
41 * then shifts the result down and does the binary division
42 * by masking. Increasing the size of the hash table is simple.
43 * Just create a new larger table, walk the old table and
44 * re-hash insert each entry into the new table.
47 * Kern Sibbald, July MMIII
57 /* ===================================================================
63 * This subroutine gets a big buffer.
65 void htable::malloc_buf(int size)
69 hmem = (struct h_mem *)malloc(size);
72 hmem->next = this->mem;
74 hmem->mem = mem->first;
75 hmem->rem = (char *)hmem + size - hmem->mem;
76 Dmsg2(200, "malloc buf size=%d rem=%d\n", size, hmem->rem);
79 /* This routine frees the whole tree */
80 void htable::hash_free()
82 struct h_mem *hmem, *rel;
84 for (hmem=mem; hmem; ) {
93 char *htable::hash_malloc(int size)
97 int asize = BALIGN(size);
99 if (mem->rem < asize) {
101 if (total_size >= 1000000) {
115 return (char *)malloc(size);
123 * Create hash of key, stored in hash then
124 * create and return the pseudo random bucket index
126 void htable::hash_index(char *key)
129 for (char *p=key; *p; p++) {
130 // hash += (hash << 3) + (uint32_t)*p;
131 hash += ((hash << 5) | (hash >> (sizeof(hash)*8-5))) + (uint32_t)*p;
133 /* Multiply by large prime number, take top bits, mask for remainder */
134 index = ((hash * 1103515249) >> rshift) & mask;
135 Dmsg2(100, "Leave hash_index hash=0x%x index=%d\n", hash, index);
139 * tsize is the estimated number of entries in the hash table
141 htable::htable(void *item, void *link, int tsize)
143 init(item, link, tsize);
146 void htable::init(void *item, void *link, int tsize)
154 for (pwr=0; tsize; pwr++) {
157 loffset = (char *)link - (char *)item;
158 mask = ~((~0)<<pwr); /* 3 bits => table size = 8 */
159 rshift = 30 - pwr; /* start using bits 28, 29, 30 */
160 num_items = 0; /* number of entries in table */
161 buckets = 1<<pwr; /* hash table size -- power of two */
162 max_items = buckets * 4; /* allow average 4 entries per chain */
163 table = (hlink **)malloc(buckets * sizeof(hlink *));
164 memset(table, 0, buckets * sizeof(hlink *));
171 malloc_buf(1000000); /* ***FIXME*** need variable or some estimate */
175 uint32_t htable::size()
181 * Take each hash link and walk down the chain of items
182 * that hash there counting them (i.e. the hits),
183 * then report that number.
184 * Obiously, the more hits in a chain, the more time
185 * it takes to reference them. Empty chains are not so
186 * hot either -- as it means unused or wasted space.
195 printf("\n\nNumItems=%d\nTotal buckets=%d\n", num_items, buckets);
196 printf("Hits/bucket: buckets\n");
197 for (i=0; i < MAX_COUNT; i++) {
200 for (i=0; i<(int)buckets; i++) {
204 p = (hlink *)(p->next);
214 for (i=0; i < MAX_COUNT; i++) {
215 printf("%2d: %d\n",i, hits[i]);
217 printf("buckets=%d num_items=%d max_items=%d\n", buckets, num_items, max_items);
218 printf("max hits in a bucket = %d\n", max);
220 printf("total bytes malloced = %d\n", total_size);
221 printf("total blocks malloced = %d\n", blocks);
225 void htable::grow_table()
227 Dmsg1(100, "Grow called old size = %d\n", buckets);
228 /* Setup a bigger table */
229 htable *big = (htable *)malloc(sizeof(htable));
230 big->loffset = loffset;
231 big->mask = mask<<1 | 1;
232 big->rshift = rshift - 1;
234 big->buckets = buckets * 2;
235 big->max_items = big->buckets * 4;
236 big->table = (hlink **)malloc(big->buckets * sizeof(hlink *));
237 memset(big->table, 0, big->buckets * sizeof(hlink *));
240 /* Insert all the items in the new hash table */
241 Dmsg1(100, "Before copy num_items=%d\n", num_items);
243 * We walk through the old smaller tree getting items,
244 * but since we are overwriting the colision links, we must
245 * explicitly save the item->next pointer and walk each
246 * colision chain ourselves. We do use next() for getting
247 * to the next bucket.
249 for (void *item=first(); item; ) {
250 void *ni = ((hlink *)((char *)item+loffset))->next; /* save link overwritten by insert */
251 Dmsg1(100, "Grow insert: %s\n", ((hlink *)((char *)item+loffset))->key);
252 big->insert(((hlink *)((char *)item+loffset))->key, item);
254 item = (void *)((char *)ni-loffset);
260 Dmsg1(100, "After copy new num_items=%d\n", big->num_items);
261 if (num_items != big->num_items) {
262 Dmsg0(000, "****** Big problems num_items mismatch ******\n");
265 memcpy(this, big, sizeof(htable)); /* move everything across */
267 Dmsg0(100, "Exit grow.\n");
270 bool htable::insert(char *key, void *item)
274 return false; /* already exists */
276 ASSERT(index < buckets);
277 Dmsg2(100, "Insert: hash=%p index=%d\n", hash, index);
278 hp = (hlink *)(((char *)item)+loffset);
279 Dmsg4(100, "Insert hp=%p index=%d item=%p offset=%u\n", hp,
280 index, item, loffset);
281 hp->next = table[index];
285 Dmsg3(100, "Insert hp->next=%p hp->hash=0x%x hp->key=%s\n",
286 hp->next, hp->hash, hp->key);
288 if (++num_items >= max_items) {
289 Dmsg2(100, "num_items=%d max_items=%d\n", num_items, max_items);
292 Dmsg3(100, "Leave insert index=%d num_items=%d key=%s\n", index, num_items, key);
296 void *htable::lookup(char *key)
299 for (hlink *hp=table[index]; hp; hp=(hlink *)hp->next) {
300 // Dmsg2(100, "hp=%p key=%s\n", hp, hp->key);
301 if (hash == hp->hash && strcmp(key, hp->key) == 0) {
302 Dmsg1(100, "lookup return %p\n", ((char *)hp)-loffset);
303 return ((char *)hp)-loffset;
311 Dmsg1(100, "Enter next: walkptr=%p\n", walkptr);
313 walkptr = (hlink *)(walkptr->next);
315 while (!walkptr && walk_index < buckets) {
316 walkptr = table[walk_index++];
318 Dmsg3(100, "new walkptr=%p next=%p inx=%d\n", walkptr,
319 walkptr->next, walk_index-1);
323 Dmsg2(100, "next: rtn %p walk_index=%d\n",
324 ((char *)walkptr)-loffset, walk_index);
325 return ((char *)walkptr)-loffset;
327 Dmsg0(100, "next: return NULL\n");
331 void *htable::first()
333 Dmsg0(100, "Enter first\n");
334 walkptr = table[0]; /* get first bucket */
335 walk_index = 1; /* Point to next index */
336 while (!walkptr && walk_index < buckets) {
337 walkptr = table[walk_index++]; /* go to next bucket */
339 Dmsg3(100, "first new walkptr=%p next=%p inx=%d\n", walkptr,
340 walkptr->next, walk_index-1);
344 Dmsg1(100, "Leave first walkptr=%p\n", walkptr);
345 return ((char *)walkptr)-loffset;
347 Dmsg0(100, "Leave first walkptr=NULL\n");
351 /* Destroy the table and its contents */
352 void htable::destroy()
369 Dmsg0(100, "Done destroy.\n");
381 #define NITEMS 5000000
387 MYJCR *save_jcr = NULL, *item;
391 jcrtbl = (htable *)malloc(sizeof(htable));
392 jcrtbl->init(jcr, &jcr->link, NITEMS);
394 Dmsg1(000, "Inserting %d items\n", NITEMS);
395 for (int i=0; i<NITEMS; i++) {
397 len = sprintf(mkey, "This is htable item %d", i) + 1;
399 jcr = (MYJCR *)jcrtbl->hash_malloc(sizeof(MYJCR));
400 jcr->key = (char *)jcrtbl->hash_malloc(len);
401 memcpy(jcr->key, mkey, len);
402 Dmsg2(100, "link=%p jcr=%p\n", jcr->link, jcr);
404 jcrtbl->insert(jcr->key, jcr);
409 if (!(item = (MYJCR *)jcrtbl->lookup(save_jcr->key))) {
410 printf("Bad news: %s not found.\n", save_jcr->key);
412 printf("Item 10's key is: %s\n", item->key);
416 printf("Walk the hash table:\n");
417 foreach_htable (jcr, jcrtbl) {
418 // printf("htable item = %s\n", jcr->key);
424 printf("Got %d items -- %s\n", count, count==NITEMS?"OK":"***ERROR***");
425 printf("Calling destroy\n");
429 printf("Freed jcrtbl\n");