1 /*************************************************************************************************
2 * The B+ tree database API of Tokyo Cabinet
3 * Copyright (C) 2006-2008 Mikio Hirabayashi
4 * This file is part of Tokyo Cabinet.
5 * Tokyo Cabinet is free software; you can redistribute it and/or modify it under the terms of
6 * the GNU Lesser General Public License as published by the Free Software Foundation; either
7 * version 2.1 of the License or any later version. Tokyo Cabinet is distributed in the hope
8 * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
10 * License for more details.
11 * You should have received a copy of the GNU Lesser General Public License along with Tokyo
12 * Cabinet; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
13 * Boston, MA 02111-1307 USA.
14 *************************************************************************************************/
22 #define BDBFILEMODE 00644 // permission of created files
23 #define BDBOPAQUESIZ 64 // size of using opaque field
24 #define BDBPAGEBUFSIZ 32768 // size of a buffer to read each page
25 #define BDBNODEIDBASE ((1LL<<48)+1) // base number of node ID
26 #define BDBLEVELMAX 64 // max level of B+ tree
27 #define BDBCACHEOUT 8 // number of pages in a process of cacheout
29 #define BDBDEFLMEMB 128 // default number of members in each leaf
30 #define BDBMINLMEMB 4 // minimum number of members in each leaf
31 #define BDBDEFNMEMB 256 // default number of members in each node
32 #define BDBMINNMEMB 4 // minimum number of members in each node
33 #define BDBDEFBNUM 16381 // default bucket number
34 #define BDBDEFAPOW 8 // default alignment power
35 #define BDBDEFFPOW 10 // default free block pool power
36 #define BDBDEFLCNUM 1024 // default number of leaf cache
37 #define BDBDEFNCNUM 512 // default number of node cache
39 typedef struct { // type of structure for a record
40 char *kbuf; // pointer to the key region
41 int ksiz; // size of the key region
42 char *vbuf; // pointer to the value region
43 int vsiz; // size of the value region
44 TCLIST *rest; // list of value objects
47 typedef struct { // type of structure for a leaf page
48 uint64_t id; // ID number of the leaf
49 TCLIST *recs; // list of records
50 uint64_t prev; // ID number of the previous leaf
51 uint64_t next; // ID number of the next leaf
52 bool dirty; // whether to be written back
53 bool dead; // whether to be removed
56 typedef struct { // type of structure for a page index
57 uint64_t pid; // ID number of the referring page
58 char *kbuf; // pointer to the key region
59 int ksiz; // size of the key region
62 typedef struct { // type of structure for a node page
63 uint64_t id; // ID number of the node
64 uint64_t heir; // ID of the child before the first index
65 TCLIST *idxs; // list of indexes
66 bool dirty; // whether to be written back
69 enum { // enumeration for duplication behavior
70 BDBPDOVER, // overwrite an existing value
71 BDBPDKEEP, // keep the existing value
72 BDBPDCAT, // concatenate values
73 BDBPDDUP, // allow duplication of keys
74 BDBPDDUPB, // allow backward duplication
79 #define BDBLOCKMETHOD(TC_bdb, TC_wr) \
80 ((TC_bdb)->mmtx ? tcbdblockmethod((TC_bdb), (TC_wr)) : true)
81 #define BDBUNLOCKMETHOD(TC_bdb) \
82 ((TC_bdb)->mmtx ? tcbdbunlockmethod(TC_bdb) : true)
83 #define BDBLOCKCACHE(TC_bdb) \
84 ((TC_bdb)->mmtx ? tcbdblockcache(TC_bdb) : true)
85 #define BDBUNLOCKCACHE(TC_bdb) \
86 ((TC_bdb)->mmtx ? tcbdbunlockcache(TC_bdb) : true)
87 #define BDBLOCKTRAN(TC_bdb) \
88 ((TC_bdb)->mmtx ? tcbdblocktran(TC_bdb) : true)
89 #define BDBUNLOCKTRAN(TC_bdb) \
90 ((TC_bdb)->mmtx ? tcbdbunlocktran(TC_bdb) : true)
93 /* private function prototypes */
94 static void tcbdbclear(TCBDB *bdb);
95 static void tcdumpmeta(TCBDB *bdb);
96 static void tcloadmeta(TCBDB *bdb);
97 static BDBLEAF *tcbdbleafnew(TCBDB *bdb, uint64_t prev, uint64_t next);
98 static bool tcbdbleafcacheout(TCBDB *bdb, BDBLEAF *leaf);
99 static bool tcbdbleafsave(TCBDB *bdb, BDBLEAF *leaf);
100 static BDBLEAF *tcbdbleafload(TCBDB *bdb, uint64_t id);
101 static BDBLEAF *tcbdbgethistleaf(TCBDB *bdb, const char *kbuf, int ksiz);
102 static bool tcbdbleafaddrec(TCBDB *bdb, BDBLEAF *leaf, int dmode,
103 const char *kbuf, int ksiz, const char *vbuf, int vsiz);
104 static int tcbdbleafdatasize(BDBLEAF *leaf);
105 static BDBLEAF *tcbdbleafdivide(TCBDB *bdb, BDBLEAF *leaf);
106 static bool tcbdbleafkill(TCBDB *bdb, BDBLEAF *leaf);
107 static BDBNODE *tcbdbnodenew(TCBDB *bdb, uint64_t heir);
108 static bool tcbdbnodecacheout(TCBDB *bdb, BDBNODE *node);
109 static bool tcbdbnodesave(TCBDB *bdb, BDBNODE *node);
110 static BDBNODE *tcbdbnodeload(TCBDB *bdb, uint64_t id);
111 static void tcbdbnodeaddidx(TCBDB *bdb, BDBNODE *node, bool order, uint64_t pid,
112 const char *kbuf, int ksiz);
113 static bool tcbdbnodesubidx(TCBDB *bdb, BDBNODE *node, uint64_t pid);
114 static uint64_t tcbdbsearchleaf(TCBDB *bdb, const char *kbuf, int ksiz);
115 static BDBREC *tcbdbsearchrec(TCBDB *bdb, BDBLEAF *leaf, const char *kbuf, int ksiz, int *ip);
116 static bool tcbdbcacheadjust(TCBDB *bdb);
117 static void tcbdbcachepurge(TCBDB *bdb);
118 static bool tcbdbopenimpl(TCBDB *bdb, const char *path, int omode);
119 static bool tcbdbcloseimpl(TCBDB *bdb);
120 static bool tcbdbputimpl(TCBDB *bdb, const void *kbuf, int ksiz, const void *vbuf, int vsiz,
122 static bool tcbdboutimpl(TCBDB *bdb, const char *kbuf, int ksiz);
123 static bool tcbdboutlist(TCBDB *bdb, const char *kbuf, int ksiz);
124 static const char *tcbdbgetimpl(TCBDB *bdb, const char *kbuf, int ksiz, int *sp);
125 static int tcbdbgetnum(TCBDB *bdb, const char *kbuf, int ksiz);
126 static TCLIST *tcbdbgetlist(TCBDB *bdb, const char *kbuf, int ksiz);
127 static bool tcbdbrangeimpl(TCBDB *bdb, const char *bkbuf, int bksiz, bool binc,
128 const char *ekbuf, int eksiz, bool einc, int max, TCLIST *keys);
129 static bool tcbdbrangefwm(TCBDB *bdb, const char *pbuf, int psiz, int max, TCLIST *keys);
130 static bool tcbdboptimizeimpl(TCBDB *bdb, int32_t lmemb, int32_t nmemb,
131 int64_t bnum, int8_t apow, int8_t fpow, uint8_t opts);
132 static bool tcbdbvanishimpl(TCBDB *bdb);
133 static bool tcbdblockmethod(TCBDB *bdb, bool wr);
134 static bool tcbdbunlockmethod(TCBDB *bdb);
135 static bool tcbdblockcache(TCBDB *bdb);
136 static bool tcbdbunlockcache(TCBDB *bdb);
137 static bool tcbdblocktran(TCBDB *bdb);
138 static bool tcbdbunlocktran(TCBDB *bdb);
139 static bool tcbdbcurfirstimpl(BDBCUR *cur);
140 static bool tcbdbcurlastimpl(BDBCUR *cur);
141 static bool tcbdbcurjumpimpl(BDBCUR *cur, const char *kbuf, int ksiz, bool forward);
142 static bool tcbdbcuradjust(BDBCUR *cur, bool forward);
143 static bool tcbdbcurprevimpl(BDBCUR *cur);
144 static bool tcbdbcurnextimpl(BDBCUR *cur);
145 static bool tcbdbcurputimpl(BDBCUR *cur, const char *vbuf, int vsiz, int mode);
146 static bool tcbdbcuroutimpl(BDBCUR *cur);
147 static bool tcbdbcurrecimpl(BDBCUR *cur, const char **kbp, int *ksp, const char **vbp, int *vsp);
150 /* debugging function prototypes */
151 void tcbdbprintmeta(TCBDB *bdb);
152 void tcbdbprintleaf(TCBDB *bdb, BDBLEAF *leaf);
153 void tcbdbprintnode(TCBDB *bdb, BDBNODE *node);
157 /*************************************************************************************************
159 *************************************************************************************************/
162 /* Get the message string corresponding to an error code. */
163 const char *tcbdberrmsg(int ecode){
164 return tchdberrmsg(ecode);
168 /* Create a B+ tree database object. */
169 TCBDB *tcbdbnew(void){
171 TCMALLOC(bdb, sizeof(*bdb));
173 bdb->hdb = tchdbnew();
174 TCMALLOC(bdb->hist, sizeof(*bdb->hist) * BDBLEVELMAX);
179 /* Delete a B+ tree database object. */
180 void tcbdbdel(TCBDB *bdb){
182 if(bdb->open) tcbdbclose(bdb);
186 pthread_mutex_destroy(bdb->tmtx);
187 pthread_mutex_destroy(bdb->cmtx);
188 pthread_rwlock_destroy(bdb->mmtx);
197 /* Get the last happened error code of a B+ tree database object. */
198 int tcbdbecode(TCBDB *bdb){
200 return tchdbecode(bdb->hdb);
204 /* Set mutual exclusion control of a B+ tree database object for threading. */
205 bool tcbdbsetmutex(TCBDB *bdb){
207 if(!TCUSEPTHREAD) return true;
208 if(!tcglobalmutexlock()) return false;
209 if(bdb->mmtx || bdb->open){
210 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
211 tcglobalmutexunlock();
214 TCMALLOC(bdb->mmtx, sizeof(pthread_rwlock_t));
215 TCMALLOC(bdb->cmtx, sizeof(pthread_mutex_t));
216 TCMALLOC(bdb->tmtx, sizeof(pthread_mutex_t));
217 if(pthread_rwlock_init(bdb->mmtx, NULL) != 0 || pthread_mutex_init(bdb->cmtx, NULL) != 0 ||
218 pthread_mutex_init(bdb->tmtx, NULL) != 0){
225 tcglobalmutexunlock();
228 tcglobalmutexunlock();
229 return tchdbsetmutex(bdb->hdb);
233 /* Set the custom comparison function of a B+ tree database object. */
234 bool tcbdbsetcmpfunc(TCBDB *bdb, BDBCMP cmp, void *cmpop){
237 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
246 /* Set the tuning parameters of a B+ tree database object. */
247 bool tcbdbtune(TCBDB *bdb, int32_t lmemb, int32_t nmemb,
248 int64_t bnum, int8_t apow, int8_t fpow, uint8_t opts){
251 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
254 bdb->lmemb = (lmemb > 0) ? tclmax(lmemb, BDBMINLMEMB) : BDBDEFLMEMB;
255 bdb->nmemb = (nmemb > 0) ? tclmax(nmemb, BDBMINNMEMB) : BDBDEFNMEMB;
258 if(opts & BDBTLARGE) hopts |= HDBTLARGE;
259 if(opts & BDBTDEFLATE) hopts |= HDBTDEFLATE;
260 if(opts & BDBTTCBS) hopts |= HDBTTCBS;
261 bnum = (bnum > 0) ? bnum : BDBDEFBNUM;
262 apow = (apow >= 0) ? apow : BDBDEFAPOW;
263 fpow = (fpow >= 0) ? fpow : BDBDEFFPOW;
264 return tchdbtune(bdb->hdb, bnum, apow, fpow, hopts);
268 /* Set the caching parameters of a B+ tree database object. */
269 bool tcbdbsetcache(TCBDB *bdb, int32_t lcnum, int32_t ncnum){
272 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
275 if(lcnum > 0) bdb->lcnum = tclmax(lcnum, BDBLEVELMAX);
276 if(ncnum > 0) bdb->ncnum = tclmax(ncnum, BDBLEVELMAX);
281 /* Open a database file and connect a B+ tree database object. */
282 bool tcbdbopen(TCBDB *bdb, const char *path, int omode){
284 if(!BDBLOCKMETHOD(bdb, true)) return false;
286 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
287 BDBUNLOCKMETHOD(bdb);
290 bool rv = tcbdbopenimpl(bdb, path, omode);
291 BDBUNLOCKMETHOD(bdb);
296 /* Close a B+ tree database object. */
297 bool tcbdbclose(TCBDB *bdb){
299 if(!BDBLOCKMETHOD(bdb, true)) return false;
301 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
302 BDBUNLOCKMETHOD(bdb);
305 bool rv = tcbdbcloseimpl(bdb);
306 BDBUNLOCKMETHOD(bdb);
311 /* Store a record into a B+ tree database object. */
312 bool tcbdbput(TCBDB *bdb, const void *kbuf, int ksiz, const void *vbuf, int vsiz){
313 assert(bdb && kbuf && ksiz >= 0 && vbuf && vsiz >= 0);
314 if(!BDBLOCKMETHOD(bdb, true)) return false;
315 if(!bdb->open || !bdb->wmode){
316 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
317 BDBUNLOCKMETHOD(bdb);
320 bool rv = tcbdbputimpl(bdb, kbuf, ksiz, vbuf, vsiz, BDBPDOVER);
321 BDBUNLOCKMETHOD(bdb);
326 /* Store a string record into a B+ tree database object. */
327 bool tcbdbput2(TCBDB *bdb, const char *kstr, const char *vstr){
328 assert(bdb && kstr && vstr);
329 return tcbdbput(bdb, kstr, strlen(kstr), vstr, strlen(vstr));
333 /* Store a new record into a B+ tree database object. */
334 bool tcbdbputkeep(TCBDB *bdb, const void *kbuf, int ksiz, const void *vbuf, int vsiz){
335 assert(bdb && kbuf && ksiz >= 0 && vbuf && vsiz >= 0);
336 if(!BDBLOCKMETHOD(bdb, true)) return false;
337 if(!bdb->open || !bdb->wmode){
338 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
339 BDBUNLOCKMETHOD(bdb);
342 bool rv = tcbdbputimpl(bdb, kbuf, ksiz, vbuf, vsiz, BDBPDKEEP);
343 BDBUNLOCKMETHOD(bdb);
348 /* Store a new string record into a B+ tree database object. */
349 bool tcbdbputkeep2(TCBDB *bdb, const char *kstr, const char *vstr){
350 assert(bdb && kstr && vstr);
351 return tcbdbputkeep(bdb, kstr, strlen(kstr), vstr, strlen(vstr));
355 /* Concatenate a value at the end of the existing record in a B+ tree database object. */
356 bool tcbdbputcat(TCBDB *bdb, const void *kbuf, int ksiz, const void *vbuf, int vsiz){
357 assert(bdb && kbuf && ksiz >= 0 && vbuf && vsiz >= 0);
358 if(!BDBLOCKMETHOD(bdb, true)) return false;
359 if(!bdb->open || !bdb->wmode){
360 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
361 BDBUNLOCKMETHOD(bdb);
364 bool rv = tcbdbputimpl(bdb, kbuf, ksiz, vbuf, vsiz, BDBPDCAT);
365 BDBUNLOCKMETHOD(bdb);
370 /* Concatenate a string value at the end of the existing record in a B+ tree database object. */
371 bool tcbdbputcat2(TCBDB *bdb, const char *kstr, const char *vstr){
372 assert(bdb && kstr && vstr);
373 return tcbdbputcat(bdb, kstr, strlen(kstr), vstr, strlen(vstr));
377 /* Store a record into a B+ tree database object with allowing duplication of keys. */
378 bool tcbdbputdup(TCBDB *bdb, const void *kbuf, int ksiz, const void *vbuf, int vsiz){
379 assert(bdb && kbuf && ksiz >= 0 && vbuf && vsiz >= 0);
380 if(!BDBLOCKMETHOD(bdb, true)) return false;
381 if(!bdb->open || !bdb->wmode){
382 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
383 BDBUNLOCKMETHOD(bdb);
386 bool rv = tcbdbputimpl(bdb, kbuf, ksiz, vbuf, vsiz, BDBPDDUP);
387 BDBUNLOCKMETHOD(bdb);
392 /* Store a string record into a B+ tree database object with allowing duplication of keys. */
393 bool tcbdbputdup2(TCBDB *bdb, const char *kstr, const char *vstr){
394 assert(bdb && kstr && vstr);
395 return tcbdbputdup(bdb, kstr, strlen(kstr), vstr, strlen(vstr));
399 /* Store records into a B+ tree database object with allowing duplication of keys. */
400 bool tcbdbputdup3(TCBDB *bdb, const void *kbuf, int ksiz, const TCLIST *vals){
401 assert(bdb && kbuf && ksiz >= 0 && vals);
402 if(!BDBLOCKMETHOD(bdb, true)) return false;
403 if(!bdb->open || !bdb->wmode){
404 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
405 BDBUNLOCKMETHOD(bdb);
409 int ln = TCLISTNUM(vals);
410 for(int i = 0; i < ln; i++){
412 const char *vbuf = tclistval(vals, i, &vsiz);
413 if(!tcbdbputimpl(bdb, kbuf, ksiz, vbuf, vsiz, BDBPDDUP)) err = true;
415 BDBUNLOCKMETHOD(bdb);
420 /* Remove a record of a B+ tree database object. */
421 bool tcbdbout(TCBDB *bdb, const void *kbuf, int ksiz){
422 assert(bdb && kbuf && ksiz >= 0);
423 if(!BDBLOCKMETHOD(bdb, true)) return false;
424 if(!bdb->open || !bdb->wmode){
425 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
426 BDBUNLOCKMETHOD(bdb);
429 bool rv = tcbdboutimpl(bdb, kbuf, ksiz);
430 BDBUNLOCKMETHOD(bdb);
435 /* Remove a string record of a B+ tree database object. */
436 bool tcbdbout2(TCBDB *bdb, const char *kstr){
438 return tcbdbout(bdb, kstr, strlen(kstr));
442 /* Remove records of a B+ tree database object. */
443 bool tcbdbout3(TCBDB *bdb, const void *kbuf, int ksiz){
444 assert(bdb && kbuf && ksiz >= 0);
445 if(!BDBLOCKMETHOD(bdb, true)) return false;
446 if(!bdb->open || !bdb->wmode){
447 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
448 BDBUNLOCKMETHOD(bdb);
451 bool rv = tcbdboutlist(bdb, kbuf, ksiz);
452 BDBUNLOCKMETHOD(bdb);
457 /* Retrieve a record in a B+ tree database object. */
458 void *tcbdbget(TCBDB *bdb, const void *kbuf, int ksiz, int *sp){
459 assert(bdb && kbuf && ksiz >= 0 && sp);
460 if(!BDBLOCKMETHOD(bdb, false)) return NULL;
462 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
463 BDBUNLOCKMETHOD(bdb);
466 const char *vbuf = tcbdbgetimpl(bdb, kbuf, ksiz, sp);
469 TCMEMDUP(rv, vbuf, *sp);
473 bool adj = TCMAPRNUM(bdb->leafc) > bdb->lcnum || TCMAPRNUM(bdb->nodec) > bdb->ncnum;
474 BDBUNLOCKMETHOD(bdb);
475 if(adj && BDBLOCKMETHOD(bdb, true)){
476 if(!bdb->tran && !tcbdbcacheadjust(bdb)){
480 BDBUNLOCKMETHOD(bdb);
486 /* Retrieve a string record in a B+ tree database object. */
487 char *tcbdbget2(TCBDB *bdb, const char *kstr){
490 return tcbdbget(bdb, kstr, strlen(kstr), &vsiz);
494 /* Retrieve a record in a B+ tree database object and write the value into a buffer. */
495 const void *tcbdbget3(TCBDB *bdb, const void *kbuf, int ksiz, int *sp){
496 assert(bdb && kbuf && ksiz >= 0 && sp);
497 if(!BDBLOCKMETHOD(bdb, false)) return NULL;
499 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
500 BDBUNLOCKMETHOD(bdb);
503 const char *rv = tcbdbgetimpl(bdb, kbuf, ksiz, sp);
504 bool adj = TCMAPRNUM(bdb->leafc) > bdb->lcnum || TCMAPRNUM(bdb->nodec) > bdb->ncnum;
505 BDBUNLOCKMETHOD(bdb);
506 if(adj && BDBLOCKMETHOD(bdb, true)){
507 if(!bdb->tran && !tcbdbcacheadjust(bdb)) rv = NULL;
508 BDBUNLOCKMETHOD(bdb);
514 /* Retrieve records in a B+ tree database object. */
515 TCLIST *tcbdbget4(TCBDB *bdb, const void *kbuf, int ksiz){
516 assert(bdb && kbuf && ksiz >= 0);
517 if(!BDBLOCKMETHOD(bdb, false)) return NULL;
519 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
520 BDBUNLOCKMETHOD(bdb);
523 TCLIST *rv = tcbdbgetlist(bdb, kbuf, ksiz);
524 bool adj = TCMAPRNUM(bdb->leafc) > bdb->lcnum || TCMAPRNUM(bdb->nodec) > bdb->ncnum;
525 BDBUNLOCKMETHOD(bdb);
526 if(adj && BDBLOCKMETHOD(bdb, true)){
527 if(!bdb->tran && !tcbdbcacheadjust(bdb)){
528 if(rv) tclistdel(rv);
531 BDBUNLOCKMETHOD(bdb);
537 /* Get the number of records corresponding a key in a B+ tree database object. */
538 int tcbdbvnum(TCBDB *bdb, const void *kbuf, int ksiz){
539 assert(bdb && kbuf && ksiz >= 0);
540 if(!BDBLOCKMETHOD(bdb, false)) return 0;
542 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
543 BDBUNLOCKMETHOD(bdb);
546 int rv = tcbdbgetnum(bdb, kbuf, ksiz);
547 bool adj = TCMAPRNUM(bdb->leafc) > bdb->lcnum || TCMAPRNUM(bdb->nodec) > bdb->ncnum;
548 BDBUNLOCKMETHOD(bdb);
549 if(adj && BDBLOCKMETHOD(bdb, true)){
550 if(!bdb->tran && !tcbdbcacheadjust(bdb)) rv = 0;
551 BDBUNLOCKMETHOD(bdb);
557 /* Get the number of records corresponding a string key in a B+ tree database object. */
558 int tcbdbvnum2(TCBDB *bdb, const char *kstr){
560 return tcbdbvnum(bdb, kstr, strlen(kstr));
564 /* Get the size of the value of a record in a B+ tree database object. */
565 int tcbdbvsiz(TCBDB *bdb, const void *kbuf, int ksiz){
566 assert(bdb && kbuf && ksiz >= 0);
568 if(!tcbdbget3(bdb, kbuf, ksiz, &vsiz)) return -1;
573 /* Get the size of the value of a string record in a B+ tree database object. */
574 int tcbdbvsiz2(TCBDB *bdb, const char *kstr){
576 return tcbdbvsiz(bdb, kstr, strlen(kstr));
580 /* Get keys of ranged records in a B+ tree database object. */
581 TCLIST *tcbdbrange(TCBDB *bdb, const void *bkbuf, int bksiz, bool binc,
582 const void *ekbuf, int eksiz, bool einc, int max){
584 TCLIST *keys = tclistnew();
585 if(!BDBLOCKMETHOD(bdb, false)) return keys;
587 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
588 BDBUNLOCKMETHOD(bdb);
591 tcbdbrangeimpl(bdb, bkbuf, bksiz, binc, ekbuf, eksiz, einc, max, keys);
592 bool adj = TCMAPRNUM(bdb->leafc) > bdb->lcnum || TCMAPRNUM(bdb->nodec) > bdb->ncnum;
593 BDBUNLOCKMETHOD(bdb);
594 if(adj && BDBLOCKMETHOD(bdb, true)){
595 tcbdbcacheadjust(bdb);
596 BDBUNLOCKMETHOD(bdb);
602 /* Get string keys of ranged records in a B+ tree database object. */
603 TCLIST *tcbdbrange2(TCBDB *bdb, const char *bkstr, bool binc,
604 const char *ekstr, bool einc, int max){
605 return tcbdbrange(bdb, bkstr, bkstr ? strlen(bkstr) : 0, binc,
606 ekstr, ekstr ? strlen(ekstr) : 0, einc, max);
610 /* Get forward matching keys in a B+ tree database object. */
611 TCLIST *tcbdbfwmkeys(TCBDB *bdb, const void *pbuf, int psiz, int max){
612 assert(bdb && pbuf && psiz >= 0);
613 TCLIST *keys = tclistnew();
614 if(!BDBLOCKMETHOD(bdb, false)) return keys;
616 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
617 BDBUNLOCKMETHOD(bdb);
620 tcbdbrangefwm(bdb, pbuf, psiz, max, keys);
621 bool adj = TCMAPRNUM(bdb->leafc) > bdb->lcnum || TCMAPRNUM(bdb->nodec) > bdb->ncnum;
622 BDBUNLOCKMETHOD(bdb);
623 if(adj && BDBLOCKMETHOD(bdb, true)){
624 tcbdbcacheadjust(bdb);
625 BDBUNLOCKMETHOD(bdb);
631 /* Get forward matching string keys in a B+ tree database object. */
632 TCLIST *tcbdbfwmkeys2(TCBDB *bdb, const char *pstr, int max){
634 return tcbdbfwmkeys(bdb, pstr, strlen(pstr), max);
639 /* Synchronize updated contents of a B+ tree database object with the file and the device. */
640 bool tcbdbsync(TCBDB *bdb){
642 if(!BDBLOCKMETHOD(bdb, true)) return false;
643 if(!bdb->open || !bdb->wmode || bdb->tran){
644 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
645 BDBUNLOCKMETHOD(bdb);
648 bool rv = tcbdbmemsync(bdb, true);
649 BDBUNLOCKMETHOD(bdb);
654 /* Optimize the file of a B+ tree database object. */
655 bool tcbdboptimize(TCBDB *bdb, int32_t lmemb, int32_t nmemb,
656 int64_t bnum, int8_t apow, int8_t fpow, uint8_t opts){
658 if(!BDBLOCKMETHOD(bdb, true)) return false;
659 if(!bdb->open || !bdb->wmode || bdb->tran){
660 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
661 BDBUNLOCKMETHOD(bdb);
664 bool rv = tcbdboptimizeimpl(bdb, lmemb, nmemb, bnum, apow, fpow, opts);
665 BDBUNLOCKMETHOD(bdb);
670 /* Remove all records of a B+ tree database object. */
671 bool tcbdbvanish(TCBDB *bdb){
673 if(!BDBLOCKMETHOD(bdb, true)) return false;
674 if(!bdb->open || !bdb->wmode || bdb->tran){
675 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
676 BDBUNLOCKMETHOD(bdb);
679 bool rv = tcbdbvanishimpl(bdb);
680 BDBUNLOCKMETHOD(bdb);
685 /* Copy the database file of a B+ tree database object. */
686 bool tcbdbcopy(TCBDB *bdb, const char *path){
688 if(!BDBLOCKMETHOD(bdb, true)) return false;
690 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
691 BDBUNLOCKMETHOD(bdb);
694 TCLIST *lids = tclistnew();
695 TCLIST *nids = tclistnew();
698 TCMAP *leafc = bdb->leafc;
699 tcmapiterinit(leafc);
700 while((vbuf = tcmapiternext(leafc, &vsiz)) != NULL){
701 tclistpush(lids, vbuf, vsiz);
703 TCMAP *nodec = bdb->nodec;
704 tcmapiterinit(nodec);
705 while((vbuf = tcmapiternext(nodec, &vsiz)) != NULL){
706 tclistpush(nids, vbuf, vsiz);
708 BDBUNLOCKMETHOD(bdb);
710 int ln = TCLISTNUM(lids);
711 for(int i = 0; i < ln; i++){
712 vbuf = TCLISTVALPTR(lids, i);
713 if(BDBLOCKMETHOD(bdb, true)){
716 BDBLEAF *leaf = (BDBLEAF *)tcmapget(bdb->leafc, vbuf, sizeof(leaf->id), &rsiz);
717 if(leaf && leaf->dirty && !tcbdbleafsave(bdb, leaf)) err = true;
721 BDBUNLOCKMETHOD(bdb);
726 ln = TCLISTNUM(nids);
727 for(int i = 0; i < ln; i++){
728 vbuf = TCLISTVALPTR(nids, i);
729 if(BDBLOCKMETHOD(bdb, true)){
732 BDBNODE *node = (BDBNODE *)tcmapget(bdb->nodec, vbuf, sizeof(node->id), &rsiz);
733 if(node && node->dirty && !tcbdbnodesave(bdb, node)) err = true;
737 BDBUNLOCKMETHOD(bdb);
744 if(!tcbdbtranbegin(bdb)) err = true;
745 if(BDBLOCKMETHOD(bdb, false)){
746 if(!tchdbcopy(bdb->hdb, path)) err = true;
747 BDBUNLOCKMETHOD(bdb);
751 if(!tcbdbtrancommit(bdb)) err = true;
756 /* Begin the transaction of a B+ tree database object. */
757 bool tcbdbtranbegin(TCBDB *bdb){
759 if(!BDBLOCKMETHOD(bdb, true)) return false;
760 if(!bdb->open || !bdb->wmode || bdb->tran){
761 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
762 BDBUNLOCKMETHOD(bdb);
765 if(!tcbdbmemsync(bdb, false)){
766 BDBUNLOCKMETHOD(bdb);
769 if(!BDBLOCKTRAN(bdb)){
770 BDBUNLOCKMETHOD(bdb);
774 TCMEMDUP(bdb->rbopaque, bdb->opaque, BDBOPAQUESIZ);
775 BDBUNLOCKMETHOD(bdb);
780 /* Commit the transaction of a B+ tree database object. */
781 bool tcbdbtrancommit(TCBDB *bdb){
783 if(!BDBLOCKMETHOD(bdb, true)) return false;
784 if(!bdb->open || !bdb->wmode || !bdb->tran){
785 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
786 BDBUNLOCKMETHOD(bdb);
791 bdb->rbopaque = NULL;
793 if(!tcbdbmemsync(bdb, false)) err = true;
795 BDBUNLOCKMETHOD(bdb);
800 /* Abort the transaction of a B+ tree database object. */
801 bool tcbdbtranabort(TCBDB *bdb){
803 if(!BDBLOCKMETHOD(bdb, true)) return false;
804 if(!bdb->open || !bdb->wmode || !bdb->tran){
805 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
806 BDBUNLOCKMETHOD(bdb);
809 tcbdbcachepurge(bdb);
810 memcpy(bdb->opaque, bdb->rbopaque, BDBOPAQUESIZ);
814 bdb->rbopaque = NULL;
816 BDBUNLOCKMETHOD(bdb);
821 /* Get the file path of a B+ tree database object. */
822 const char *tcbdbpath(TCBDB *bdb){
824 if(!BDBLOCKMETHOD(bdb, false)) return NULL;
826 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
827 BDBUNLOCKMETHOD(bdb);
830 const char *rv = tchdbpath(bdb->hdb);
831 BDBUNLOCKMETHOD(bdb);
836 /* Get the number of records of a B+ tree database object. */
837 uint64_t tcbdbrnum(TCBDB *bdb){
839 if(!BDBLOCKMETHOD(bdb, false)) return 0;
841 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
842 BDBUNLOCKMETHOD(bdb);
845 uint64_t rv = bdb->rnum;
846 BDBUNLOCKMETHOD(bdb);
851 /* Get the size of the database file of a B+ tree database object. */
852 uint64_t tcbdbfsiz(TCBDB *bdb){
854 if(!BDBLOCKMETHOD(bdb, false)) return 0;
856 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
857 BDBUNLOCKMETHOD(bdb);
860 uint64_t rv = tchdbfsiz(bdb->hdb);
861 BDBUNLOCKMETHOD(bdb);
866 /* Create a cursor object. */
867 BDBCUR *tcbdbcurnew(TCBDB *bdb){
870 TCMALLOC(cur, sizeof(*cur));
879 /* Delete a cursor object. */
880 void tcbdbcurdel(BDBCUR *cur){
886 /* Move a cursor object to the first record. */
887 bool tcbdbcurfirst(BDBCUR *cur){
889 TCBDB *bdb = cur->bdb;
890 if(!BDBLOCKMETHOD(bdb, false)) return false;
892 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
893 BDBUNLOCKMETHOD(bdb);
896 bool rv = tcbdbcurfirstimpl(cur);
897 bool adj = TCMAPRNUM(bdb->leafc) > bdb->lcnum || TCMAPRNUM(bdb->nodec) > bdb->ncnum;
898 BDBUNLOCKMETHOD(bdb);
899 if(adj && BDBLOCKMETHOD(bdb, true)){
900 if(!bdb->tran && !tcbdbcacheadjust(bdb)) rv = false;
901 BDBUNLOCKMETHOD(bdb);
907 /* Move a cursor object to the last record. */
908 bool tcbdbcurlast(BDBCUR *cur){
910 TCBDB *bdb = cur->bdb;
911 if(!BDBLOCKMETHOD(bdb, false)) return false;
913 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
914 BDBUNLOCKMETHOD(bdb);
917 bool rv = tcbdbcurlastimpl(cur);
918 bool adj = TCMAPRNUM(bdb->leafc) > bdb->lcnum || TCMAPRNUM(bdb->nodec) > bdb->ncnum;
919 BDBUNLOCKMETHOD(bdb);
920 if(adj && BDBLOCKMETHOD(bdb, true)){
921 if(!bdb->tran && !tcbdbcacheadjust(bdb)) rv = false;
922 BDBUNLOCKMETHOD(bdb);
928 /* Move a cursor object to the front of records corresponding a key. */
929 bool tcbdbcurjump(BDBCUR *cur, const void *kbuf, int ksiz){
930 assert(cur && kbuf && ksiz >= 0);
931 TCBDB *bdb = cur->bdb;
932 if(!BDBLOCKMETHOD(bdb, false)) return false;
934 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
935 BDBUNLOCKMETHOD(bdb);
938 bool rv = tcbdbcurjumpimpl(cur, kbuf, ksiz, true);
939 bool adj = TCMAPRNUM(bdb->leafc) > bdb->lcnum || TCMAPRNUM(bdb->nodec) > bdb->ncnum;
940 BDBUNLOCKMETHOD(bdb);
941 if(adj && BDBLOCKMETHOD(bdb, true)){
942 if(!bdb->tran && !tcbdbcacheadjust(bdb)) rv = false;
943 BDBUNLOCKMETHOD(bdb);
949 /* Move a cursor object to the front of records corresponding a key string. */
950 bool tcbdbcurjump2(BDBCUR *cur, const char *kstr){
952 return tcbdbcurjump(cur, kstr, strlen(kstr));
956 /* Move a cursor object to the previous record. */
957 bool tcbdbcurprev(BDBCUR *cur){
959 TCBDB *bdb = cur->bdb;
960 if(!BDBLOCKMETHOD(bdb, false)) return false;
962 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
963 BDBUNLOCKMETHOD(bdb);
967 tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
968 BDBUNLOCKMETHOD(bdb);
971 bool rv = tcbdbcurprevimpl(cur);
972 bool adj = TCMAPRNUM(bdb->leafc) > bdb->lcnum || TCMAPRNUM(bdb->nodec) > bdb->ncnum;
973 BDBUNLOCKMETHOD(bdb);
974 if(adj && BDBLOCKMETHOD(bdb, true)){
975 if(!bdb->tran && !tcbdbcacheadjust(bdb)) rv = false;
976 BDBUNLOCKMETHOD(bdb);
982 /* Move a cursor object to the next record. */
983 bool tcbdbcurnext(BDBCUR *cur){
985 TCBDB *bdb = cur->bdb;
986 if(!BDBLOCKMETHOD(bdb, false)) return false;
988 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
989 BDBUNLOCKMETHOD(bdb);
993 tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
994 BDBUNLOCKMETHOD(bdb);
997 bool rv = tcbdbcurnextimpl(cur);
998 bool adj = TCMAPRNUM(bdb->leafc) > bdb->lcnum || TCMAPRNUM(bdb->nodec) > bdb->ncnum;
999 BDBUNLOCKMETHOD(bdb);
1000 if(adj && BDBLOCKMETHOD(bdb, true)){
1001 if(!bdb->tran && !tcbdbcacheadjust(bdb)) rv = false;
1002 BDBUNLOCKMETHOD(bdb);
1008 /* Insert a record around a cursor object. */
1009 bool tcbdbcurput(BDBCUR *cur, const void *vbuf, int vsiz, int cpmode){
1010 assert(cur && vbuf && vsiz >= 0);
1011 TCBDB *bdb = cur->bdb;
1012 if(!BDBLOCKMETHOD(bdb, true)) return false;
1013 if(!bdb->open || !bdb->wmode){
1014 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1015 BDBUNLOCKMETHOD(bdb);
1019 tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
1020 BDBUNLOCKMETHOD(bdb);
1023 bool rv = tcbdbcurputimpl(cur, vbuf, vsiz, cpmode);
1024 BDBUNLOCKMETHOD(bdb);
1029 /* Insert a string record around a cursor object. */
1030 bool tcbdbcurput2(BDBCUR *cur, const char *vstr, int cpmode){
1031 assert(cur && vstr);
1032 return tcbdbcurput(cur, vstr, strlen(vstr), cpmode);
1036 /* Delete the record where a cursor object is. */
1037 bool tcbdbcurout(BDBCUR *cur){
1039 TCBDB *bdb = cur->bdb;
1040 if(!BDBLOCKMETHOD(bdb, true)) return false;
1041 if(!bdb->open || !bdb->wmode){
1042 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1043 BDBUNLOCKMETHOD(bdb);
1047 tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
1048 BDBUNLOCKMETHOD(bdb);
1051 bool rv = tcbdbcuroutimpl(cur);
1052 BDBUNLOCKMETHOD(bdb);
1057 /* Get the key of the record where the cursor object is. */
1058 char *tcbdbcurkey(BDBCUR *cur, int *sp){
1060 TCBDB *bdb = cur->bdb;
1061 if(!BDBLOCKMETHOD(bdb, false)) return false;
1063 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1064 BDBUNLOCKMETHOD(bdb);
1068 tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
1069 BDBUNLOCKMETHOD(bdb);
1072 const char *kbuf, *vbuf;
1075 if(tcbdbcurrecimpl(cur, &kbuf, &ksiz, &vbuf, &vsiz)){
1076 TCMEMDUP(rv, kbuf, ksiz);
1081 BDBUNLOCKMETHOD(bdb);
1086 /* Get the key string of the record where the cursor object is. */
1087 char *tcbdbcurkey2(BDBCUR *cur){
1090 return tcbdbcurkey(cur, &ksiz);
1094 /* Get the key of the record where the cursor object is, as a volatile buffer. */
1095 const char *tcbdbcurkey3(BDBCUR *cur, int *sp){
1097 TCBDB *bdb = cur->bdb;
1098 if(!BDBLOCKMETHOD(bdb, false)) return false;
1100 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1101 BDBUNLOCKMETHOD(bdb);
1105 tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
1106 BDBUNLOCKMETHOD(bdb);
1109 const char *kbuf, *vbuf;
1112 if(tcbdbcurrecimpl(cur, &kbuf, &ksiz, &vbuf, &vsiz)){
1118 BDBUNLOCKMETHOD(bdb);
1123 /* Get the value of the record where the cursor object is. */
1124 char *tcbdbcurval(BDBCUR *cur, int *sp){
1126 TCBDB *bdb = cur->bdb;
1127 if(!BDBLOCKMETHOD(bdb, false)) return false;
1129 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1130 BDBUNLOCKMETHOD(bdb);
1134 tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
1135 BDBUNLOCKMETHOD(bdb);
1138 const char *kbuf, *vbuf;
1141 if(tcbdbcurrecimpl(cur, &kbuf, &ksiz, &vbuf, &vsiz)){
1142 TCMEMDUP(rv, vbuf, vsiz);
1147 BDBUNLOCKMETHOD(bdb);
1152 /* Get the value string of the record where the cursor object is. */
1153 char *tcbdbcurval2(BDBCUR *cur){
1156 return tcbdbcurval(cur, &vsiz);
1160 /* Get the value of the record where the cursor object is, as a volatile buffer. */
1161 const char *tcbdbcurval3(BDBCUR *cur, int *sp){
1163 TCBDB *bdb = cur->bdb;
1164 if(!BDBLOCKMETHOD(bdb, false)) return false;
1166 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1167 BDBUNLOCKMETHOD(bdb);
1171 tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
1172 BDBUNLOCKMETHOD(bdb);
1175 const char *kbuf, *vbuf;
1178 if(tcbdbcurrecimpl(cur, &kbuf, &ksiz, &vbuf, &vsiz)){
1184 BDBUNLOCKMETHOD(bdb);
1189 /* Get the key and the value of the record where the cursor object is. */
1190 bool tcbdbcurrec(BDBCUR *cur, TCXSTR *kxstr, TCXSTR *vxstr){
1191 assert(cur && kxstr && vxstr);
1192 TCBDB *bdb = cur->bdb;
1193 if(!BDBLOCKMETHOD(bdb, false)) return false;
1195 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1196 BDBUNLOCKMETHOD(bdb);
1200 tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
1201 BDBUNLOCKMETHOD(bdb);
1204 const char *kbuf, *vbuf;
1207 if(tcbdbcurrecimpl(cur, &kbuf, &ksiz, &vbuf, &vsiz)){
1209 TCXSTRCAT(kxstr, kbuf, ksiz);
1211 TCXSTRCAT(vxstr, vbuf, vsiz);
1216 BDBUNLOCKMETHOD(bdb);
1222 /*************************************************************************************************
1223 * features for experts
1224 *************************************************************************************************/
1227 /* Set the error code of a B+ tree database object. */
1228 void tcbdbsetecode(TCBDB *bdb, int ecode, const char *filename, int line, const char *func){
1229 assert(bdb && filename && line >= 1 && func);
1230 tchdbsetecode(bdb->hdb, ecode, filename, line, func);
1234 /* Set the file descriptor for debugging output. */
1235 void tcbdbsetdbgfd(TCBDB *bdb, int fd){
1236 assert(bdb && fd >= 0);
1237 tchdbsetdbgfd(bdb->hdb, fd);
1241 /* Get the file descriptor for debugging output. */
1242 int tcbdbdbgfd(TCBDB *bdb){
1244 return tchdbdbgfd(bdb->hdb);
1248 /* Synchronize updating contents on memory. */
1249 bool tcbdbmemsync(TCBDB *bdb, bool phys){
1251 if(!bdb->open || !bdb->wmode){
1252 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1256 bool clk = BDBLOCKCACHE(bdb);
1259 TCMAP *leafc = bdb->leafc;
1260 tcmapiterinit(leafc);
1261 while((vbuf = tcmapiternext(leafc, &vsiz)) != NULL){
1263 BDBLEAF *leaf = (BDBLEAF *)tcmapiterval(vbuf, &rsiz);
1264 if(leaf->dirty && !tcbdbleafsave(bdb, leaf)) err = true;
1266 TCMAP *nodec = bdb->nodec;
1267 tcmapiterinit(nodec);
1268 while((vbuf = tcmapiternext(nodec, &vsiz)) != NULL){
1270 BDBNODE *node = (BDBNODE *)tcmapiterval(vbuf, &rsiz);
1271 if(node->dirty && !tcbdbnodesave(bdb, node)) err = true;
1273 if(clk) BDBUNLOCKCACHE(bdb);
1275 if(!tchdbmemsync(bdb->hdb, phys)) err = true;
1280 /* Get the comparison function of a B+ tree database object. */
1281 BDBCMP tcbdbcmpfunc(TCBDB *bdb){
1287 /* Get the opaque object for the comparison function of a B+ tree database object. */
1288 void *tcbdbcmpop(TCBDB *bdb){
1294 /* Get the maximum number of cached leaf nodes of a B+ tree database object. */
1295 uint32_t tcbdblmemb(TCBDB *bdb){
1301 /* Get the maximum number of cached non-leaf nodes of a B+ tree database object. */
1302 uint32_t tcbdbnmemb(TCBDB *bdb){
1308 /* Get the number of the leaf nodes of B+ tree database object. */
1309 uint64_t tcbdblnum(TCBDB *bdb){
1312 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1319 /* Get the number of the non-leaf nodes of B+ tree database object. */
1320 uint64_t tcbdbnnum(TCBDB *bdb){
1323 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1330 /* Get the number of elements of the bucket array of a B+ tree database object. */
1331 uint64_t tcbdbbnum(TCBDB *bdb){
1334 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1337 return tchdbbnum(bdb->hdb);
1341 /* Get the record alignment of a B+ tree database object. */
1342 uint32_t tcbdbalign(TCBDB *bdb){
1345 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1348 return tchdbalign(bdb->hdb);
1352 /* Get the maximum number of the free block pool of a B+ tree database object. */
1353 uint32_t tcbdbfbpmax(TCBDB *bdb){
1356 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1359 return tchdbfbpmax(bdb->hdb);
1363 /* Get the inode number of the database file of a B+ tree database object. */
1364 uint64_t tcbdbinode(TCBDB *bdb){
1367 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1370 return tchdbinode(bdb->hdb);
1374 /* Get the modification time of the database file of a B+ tree database object. */
1375 time_t tcbdbmtime(TCBDB *bdb){
1378 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1381 return tchdbmtime(bdb->hdb);
1385 /* Get the additional flags of a B+ tree database object. */
1386 uint8_t tcbdbflags(TCBDB *bdb){
1389 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1392 return tchdbflags(bdb->hdb);
1396 /* Get the options of a B+ tree database object. */
1397 uint8_t tcbdbopts(TCBDB *bdb){
1400 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1407 /* Get the pointer to the opaque field of a B+ tree database object. */
1408 char *tcbdbopaque(TCBDB *bdb){
1411 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1414 return tchdbopaque(bdb->hdb) + BDBOPAQUESIZ;
1418 /* Get the number of used elements of the bucket array of a B+ tree database object. */
1419 uint64_t tcbdbbnumused(TCBDB *bdb){
1422 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1425 return tchdbbnumused(bdb->hdb);
1429 /* Set the maximum size of each leaf node. */
1430 bool tcbdbsetlsmax(TCBDB *bdb, uint32_t lsmax){
1433 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1441 /* Set the capacity number of records. */
1442 bool tcbdbsetcapnum(TCBDB *bdb, uint64_t capnum){
1445 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1448 bdb->capnum = capnum;
1453 /* Store a new record into a B+ tree database object with backward duplication. */
1454 bool tcbdbputdupback(TCBDB *bdb, const void *kbuf, int ksiz, const void *vbuf, int vsiz){
1455 assert(bdb && kbuf && ksiz >= 0 && vbuf && vsiz >= 0);
1456 if(!BDBLOCKMETHOD(bdb, true)) return false;
1457 if(!bdb->open || !bdb->wmode){
1458 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1459 BDBUNLOCKMETHOD(bdb);
1462 bool rv = tcbdbputimpl(bdb, kbuf, ksiz, vbuf, vsiz, BDBPDDUPB);
1463 BDBUNLOCKMETHOD(bdb);
1468 /* Store a new string record into a B+ tree database object with backward duplication. */
1469 bool tcbdbputdupback2(TCBDB *bdb, const char *kstr, const char *vstr){
1470 assert(bdb && kstr && vstr);
1471 return tcbdbputdupback(bdb, kstr, strlen(kstr), vstr, strlen(vstr));
1475 /* Move a cursor object to the rear of records corresponding a key. */
1476 bool tcbdbcurjumpback(BDBCUR *cur, const void *kbuf, int ksiz){
1477 assert(cur && kbuf && ksiz >= 0);
1478 TCBDB *bdb = cur->bdb;
1479 if(!BDBLOCKMETHOD(bdb, false)) return false;
1481 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1482 BDBUNLOCKMETHOD(bdb);
1485 bool rv = tcbdbcurjumpimpl(cur, kbuf, ksiz, false);
1486 BDBUNLOCKMETHOD(bdb);
1491 /* Move a cursor object to the rear of records corresponding a key string. */
1492 bool tcbdbcurjumpback2(BDBCUR *cur, const char *kstr){
1493 assert(cur && kstr);
1494 return tcbdbcurjumpback(cur, kstr, strlen(kstr));
1498 /* Compare keys of two records by lexical order. */
1499 int tcbdbcmplexical(const char *aptr, int asiz, const char *bptr, int bsiz, void *op){
1500 assert(aptr && asiz >= 0 && bptr && bsiz >= 0);
1502 TCCMPLEXICAL(rv, aptr, asiz, bptr, bsiz);
1507 /* Compare two keys as decimal strings of real numbers. */
1508 int tcbdbcmpdecimal(const char *aptr, int asiz, const char *bptr, int bsiz, void *op){
1509 assert(aptr && asiz >= 0 && bptr && bsiz >= 0);
1513 if(asiz > 0 && *aptr == '-'){
1518 for(int i = 0; i < asiz; i++){
1520 if(c < '0' || c > '9') continue;
1521 anum = anum * 10 + c - '0';
1526 if(bsiz > 0 && *bptr == '-'){
1531 for(int i = 0; i < bsiz; i++){
1533 if(c < '0' || c > '9') continue;
1534 bnum = bnum * 10 + c - '0';
1537 return (anum < bnum) ? -1 : anum > bnum;
1541 /* Compare two keys as 32-bit integers in the native byte order. */
1542 int tcbdbcmpint32(const char *aptr, int asiz, const char *bptr, int bsiz, void *op){
1543 assert(aptr && bptr);
1545 if(asiz == sizeof(int32_t)){
1546 memcpy(&anum, aptr, sizeof(int32_t));
1547 } else if(asiz < sizeof(int32_t)){
1548 memset(&anum, 0, sizeof(int32_t));
1549 memcpy(&anum, aptr, asiz);
1551 memcpy(&anum, aptr, sizeof(int32_t));
1553 if(bsiz == sizeof(int32_t)){
1554 memcpy(&bnum, bptr, sizeof(int32_t));
1555 } else if(bsiz < sizeof(int32_t)){
1556 memset(&bnum, 0, sizeof(int32_t));
1557 memcpy(&bnum, bptr, bsiz);
1559 memcpy(&bnum, bptr, sizeof(int32_t));
1561 return (anum < bnum) ? -1 : anum > bnum;
1565 /* Compare two keys as 64-bit integers in the native byte order. */
1566 int tcbdbcmpint64(const char *aptr, int asiz, const char *bptr, int bsiz, void *op){
1567 assert(aptr && bptr);
1569 if(asiz == sizeof(int64_t)){
1570 memcpy(&anum, aptr, sizeof(int64_t));
1571 } else if(asiz < sizeof(int64_t)){
1572 memset(&anum, 0, sizeof(int64_t));
1573 memcpy(&anum, aptr, asiz);
1575 memcpy(&anum, aptr, sizeof(int64_t));
1577 if(bsiz == sizeof(int64_t)){
1578 memcpy(&bnum, bptr, sizeof(int64_t));
1579 } else if(bsiz < sizeof(int64_t)){
1580 memset(&bnum, 0, sizeof(int64_t));
1581 memcpy(&bnum, bptr, bsiz);
1583 memcpy(&bnum, bptr, sizeof(int64_t));
1585 return (anum < bnum) ? -1 : anum > bnum;
1590 /*************************************************************************************************
1592 *************************************************************************************************/
1595 /* Clear all members.
1596 `bdb' specifies the B+ tree database object. */
1597 static void tcbdbclear(TCBDB *bdb){
1606 bdb->lmemb = BDBDEFLMEMB;
1607 bdb->nmemb = BDBDEFNMEMB;
1619 bdb->lcnum = BDBDEFLCNUM;
1620 bdb->ncnum = BDBDEFNCNUM;
1629 bdb->rbopaque = NULL;
1630 bdb->cnt_saveleaf = -1;
1631 bdb->cnt_loadleaf = -1;
1632 bdb->cnt_killleaf = -1;
1633 bdb->cnt_adjleafc = -1;
1634 bdb->cnt_savenode = -1;
1635 bdb->cnt_loadnode = -1;
1636 bdb->cnt_adjnodec = -1;
1637 TCDODEBUG(bdb->cnt_saveleaf = 0);
1638 TCDODEBUG(bdb->cnt_loadleaf = 0);
1639 TCDODEBUG(bdb->cnt_killleaf = 0);
1640 TCDODEBUG(bdb->cnt_adjleafc = 0);
1641 TCDODEBUG(bdb->cnt_savenode = 0);
1642 TCDODEBUG(bdb->cnt_loadnode = 0);
1643 TCDODEBUG(bdb->cnt_adjnodec = 0);
1647 /* Serialize meta data into the opaque field.
1648 `bdb' specifies the B+ tree database object. */
1649 static void tcdumpmeta(TCBDB *bdb){
1651 memset(bdb->opaque, 0, 64);
1652 char *wp = bdb->opaque;
1653 if(bdb->cmp == tcbdbcmplexical){
1654 *(uint8_t *)(wp++) = 0x0;
1655 } else if(bdb->cmp == tcbdbcmpdecimal){
1656 *(uint8_t *)(wp++) = 0x1;
1657 } else if(bdb->cmp == tcbdbcmpint32){
1658 *(uint8_t *)(wp++) = 0x2;
1659 } else if(bdb->cmp == tcbdbcmpint64){
1660 *(uint8_t *)(wp++) = 0x3;
1662 *(uint8_t *)(wp++) = 0xff;
1667 lnum = TCHTOIL(lnum);
1668 memcpy(wp, &lnum, sizeof(lnum));
1671 lnum = TCHTOIL(lnum);
1672 memcpy(wp, &lnum, sizeof(lnum));
1676 llnum = TCHTOILL(llnum);
1677 memcpy(wp, &llnum, sizeof(llnum));
1678 wp += sizeof(llnum);
1680 llnum = TCHTOILL(llnum);
1681 memcpy(wp, &llnum, sizeof(llnum));
1682 wp += sizeof(llnum);
1684 llnum = TCHTOILL(llnum);
1685 memcpy(wp, &llnum, sizeof(llnum));
1686 wp += sizeof(llnum);
1688 llnum = TCHTOILL(llnum);
1689 memcpy(wp, &llnum, sizeof(llnum));
1690 wp += sizeof(llnum);
1692 llnum = TCHTOILL(llnum);
1693 memcpy(wp, &llnum, sizeof(llnum));
1694 wp += sizeof(llnum);
1696 llnum = TCHTOILL(llnum);
1697 memcpy(wp, &llnum, sizeof(llnum));
1698 wp += sizeof(llnum);
1702 /* Deserialize meta data from the opaque field.
1703 `bdb' specifies the B+ tree database object. */
1704 static void tcloadmeta(TCBDB *bdb){
1705 const char *rp = bdb->opaque;
1706 uint8_t cnum = *(uint8_t *)(rp++);
1708 bdb->cmp = tcbdbcmplexical;
1709 } else if(cnum == 0x1){
1710 bdb->cmp = tcbdbcmpdecimal;
1711 } else if(cnum == 0x2){
1712 bdb->cmp = tcbdbcmpint32;
1713 } else if(cnum == 0x3){
1714 bdb->cmp = tcbdbcmpint64;
1718 memcpy(&lnum, rp, sizeof(lnum));
1720 bdb->lmemb = TCITOHL(lnum);
1721 memcpy(&lnum, rp, sizeof(lnum));
1723 bdb->nmemb = TCITOHL(lnum);
1725 memcpy(&llnum, rp, sizeof(llnum));
1726 bdb->root = TCITOHLL(llnum);
1727 rp += sizeof(llnum);
1728 memcpy(&llnum, rp, sizeof(llnum));
1729 bdb->first = TCITOHLL(llnum);
1730 rp += sizeof(llnum);
1731 memcpy(&llnum, rp, sizeof(llnum));
1732 bdb->last = TCITOHLL(llnum);
1733 rp += sizeof(llnum);
1734 memcpy(&llnum, rp, sizeof(llnum));
1735 bdb->lnum = TCITOHLL(llnum);
1736 rp += sizeof(llnum);
1737 memcpy(&llnum, rp, sizeof(llnum));
1738 bdb->nnum = TCITOHLL(llnum);
1739 rp += sizeof(llnum);
1740 memcpy(&llnum, rp, sizeof(llnum));
1741 bdb->rnum = TCITOHLL(llnum);
1742 rp += sizeof(llnum);
1746 /* Create a new leaf.
1747 `bdb' specifies the B+ tree database object.
1748 `prev' specifies the ID number of the previous leaf.
1749 `next' specifies the ID number of the next leaf.
1750 The return value is the new leaf object. */
1751 static BDBLEAF *tcbdbleafnew(TCBDB *bdb, uint64_t prev, uint64_t next){
1754 lent.id = ++bdb->lnum;
1755 lent.recs = tclistnew2(bdb->lmemb + 1);
1760 tcmapputkeep(bdb->leafc, &(lent.id), sizeof(lent.id), &lent, sizeof(lent));
1762 return (BDBLEAF *)tcmapget(bdb->leafc, &(lent.id), sizeof(lent.id), &rsiz);
1766 /* Remove a leaf from the cache.
1767 `bdb' specifies the B+ tree database object.
1768 `leaf' specifies the leaf object.
1769 If successful, the return value is true, else, it is false. */
1770 static bool tcbdbleafcacheout(TCBDB *bdb, BDBLEAF *leaf){
1771 assert(bdb && leaf);
1773 if(leaf->dirty && !tcbdbleafsave(bdb, leaf)) err = true;
1774 TCLIST *recs = leaf->recs;
1775 int ln = TCLISTNUM(recs);
1776 for(int i = 0; i < ln; i++){
1777 BDBREC *recp = (BDBREC *)TCLISTVALPTR(recs, i);
1780 if(recp->rest) tclistdel(recp->rest);
1783 tcmapout(bdb->leafc, &(leaf->id), sizeof(leaf->id));
1788 /* Save a leaf into the internal database.
1789 `bdb' specifies the B+ tree database object.
1790 `leaf' specifies the leaf object.
1791 If successful, the return value is true, else, it is false. */
1792 static bool tcbdbleafsave(TCBDB *bdb, BDBLEAF *leaf){
1793 assert(bdb && leaf);
1794 TCDODEBUG(bdb->cnt_saveleaf++);
1795 TCXSTR *rbuf = tcxstrnew3(BDBPAGEBUFSIZ);
1796 char hbuf[(sizeof(uint64_t)+1)*3];
1801 TCSETVNUMBUF64(step, wp, llnum);
1804 TCSETVNUMBUF64(step, wp, llnum);
1806 TCXSTRCAT(rbuf, hbuf, wp - hbuf);
1807 TCLIST *recs = leaf->recs;
1808 int ln = TCLISTNUM(recs);
1809 for(int i = 0; i < ln; i++){
1810 BDBREC *recp = (BDBREC *)TCLISTVALPTR(recs, i);
1814 TCSETVNUMBUF(step, wp, lnum);
1817 TCSETVNUMBUF(step, wp, lnum);
1819 TCLIST *rest = recp->rest;
1820 int rnum = rest ? TCLISTNUM(rest) : 0;
1821 TCSETVNUMBUF(step, wp, rnum);
1823 TCXSTRCAT(rbuf, hbuf, wp - hbuf);
1824 TCXSTRCAT(rbuf, recp->kbuf, recp->ksiz);
1825 TCXSTRCAT(rbuf, recp->vbuf, recp->vsiz);
1826 for(int j = 0; j < rnum; j++){
1828 const char *vbuf = tclistval(rest, j, &vsiz);
1829 TCSETVNUMBUF(step, hbuf, vsiz);
1830 TCXSTRCAT(rbuf, hbuf, step);
1831 TCXSTRCAT(rbuf, vbuf, vsiz);
1835 step = sprintf(hbuf, "%llx", (unsigned long long)leaf->id);
1836 if(ln < 1 && !tchdbout(bdb->hdb, hbuf, step) && tchdbecode(bdb->hdb) != TCENOREC)
1838 if(!leaf->dead && !tchdbput(bdb->hdb, hbuf, step, TCXSTRPTR(rbuf), TCXSTRSIZE(rbuf)))
1841 leaf->dirty = false;
1846 /* Load a leaf from the internal database.
1847 `bdb' specifies the B+ tree database object.
1848 `id' specifies the ID number of the leaf.
1849 The return value is the leaf object or `NULL' on failure. */
1850 static BDBLEAF *tcbdbleafload(TCBDB *bdb, uint64_t id){
1851 assert(bdb && id > 0);
1852 bool clk = BDBLOCKCACHE(bdb);
1854 BDBLEAF *leaf = (BDBLEAF *)tcmapget3(bdb->leafc, &id, sizeof(id), &rsiz);
1856 if(clk) BDBUNLOCKCACHE(bdb);
1859 if(clk) BDBUNLOCKCACHE(bdb);
1860 TCDODEBUG(bdb->cnt_loadleaf++);
1861 char hbuf[(sizeof(uint64_t)+1)*3];
1863 step = sprintf(hbuf, "%llx", (unsigned long long)id);
1865 char wbuf[BDBPAGEBUFSIZ];
1866 const char *rp = NULL;
1867 rsiz = tchdbget3(bdb->hdb, hbuf, step, wbuf, BDBPAGEBUFSIZ);
1869 tcbdbsetecode(bdb, TCEMISC, __FILE__, __LINE__, __func__);
1871 } else if(rsiz < BDBPAGEBUFSIZ){
1874 if(!(rbuf = tchdbget(bdb->hdb, hbuf, step, &rsiz))){
1875 tcbdbsetecode(bdb, TCEMISC, __FILE__, __LINE__, __func__);
1883 TCREADVNUMBUF64(rp, llnum, step);
1887 TCREADVNUMBUF64(rp, llnum, step);
1893 lent.recs = tclistnew2(bdb->lmemb + 1);
1897 TCREADVNUMBUF(rp, rec.ksiz, step);
1900 TCREADVNUMBUF(rp, rec.vsiz, step);
1904 TCREADVNUMBUF(rp, rnum, step);
1907 if(rsiz < rec.ksiz + rec.vsiz + rnum){
1911 TCMEMDUP(rec.kbuf, rp, rec.ksiz);
1914 TCMEMDUP(rec.vbuf, rp, rec.vsiz);
1918 rec.rest = tclistnew2(rnum);
1919 while(rnum-- > 0 && rsiz > 0){
1921 TCREADVNUMBUF(rp, vsiz, step);
1928 TCLISTPUSH(rec.rest, rp, vsiz);
1935 TCLISTPUSH(lent.recs, &rec, sizeof(rec));
1938 if(err || rsiz != 0){
1939 tcbdbsetecode(bdb, TCEMISC, __FILE__, __LINE__, __func__);
1942 clk = BDBLOCKCACHE(bdb);
1943 tcmapputkeep(bdb->leafc, &(lent.id), sizeof(lent.id), &lent, sizeof(lent));
1944 leaf = (BDBLEAF *)tcmapget(bdb->leafc, &(lent.id), sizeof(lent.id), &rsiz);
1945 if(clk) BDBUNLOCKCACHE(bdb);
1950 /* Load the historical leaf from the internal database.
1951 `bdb' specifies the B+ tree database object.
1952 `kbuf' specifies the pointer to the region of the key.
1953 `ksiz' specifies the size of the region of the key.
1954 If successful, the return value is the pointer to the leaf, else, it is `NULL'. */
1955 static BDBLEAF *tcbdbgethistleaf(TCBDB *bdb, const char *kbuf, int ksiz){
1956 assert(bdb && kbuf && ksiz >= 0);
1957 BDBLEAF *leaf = tcbdbleafload(bdb, bdb->hleaf);
1958 if(!leaf) return NULL;
1959 int ln = TCLISTNUM(leaf->recs);
1960 if(ln < 2) return NULL;
1961 BDBREC *recp = (BDBREC *)TCLISTVALPTR(leaf->recs, 0);
1963 if(bdb->cmp == tcbdbcmplexical){
1964 TCCMPLEXICAL(rv, kbuf, ksiz, recp->kbuf, recp->ksiz);
1966 rv = bdb->cmp(kbuf, ksiz, recp->kbuf, recp->ksiz, bdb->cmpop);
1968 if(rv == 0) return leaf;
1969 if(rv < 0) return NULL;
1970 recp = (BDBREC *)TCLISTVALPTR(leaf->recs, ln - 1);
1971 if(bdb->cmp == tcbdbcmplexical){
1972 TCCMPLEXICAL(rv, kbuf, ksiz, recp->kbuf, recp->ksiz);
1974 rv = bdb->cmp(kbuf, ksiz, recp->kbuf, recp->ksiz, bdb->cmpop);
1976 if(rv <= 0 || leaf->next < 1) return leaf;
1981 /* Add a record to a leaf.
1982 `bdb' specifies the B+ tree database object.
1983 `leaf' specifies the leaf object.
1984 `dmode' specifies behavior when the key overlaps.
1985 `kbuf' specifies the pointer to the region of the key.
1986 `ksiz' specifies the size of the region of the key.
1987 `vbuf' specifies the pointer to the region of the value.
1988 `vsiz' specifies the size of the region of the value.
1989 If successful, the return value is true, else, it is false. */
1990 static bool tcbdbleafaddrec(TCBDB *bdb, BDBLEAF *leaf, int dmode,
1991 const char *kbuf, int ksiz, const char *vbuf, int vsiz){
1992 assert(bdb && leaf && kbuf && ksiz >= 0 && vbuf && vsiz >= 0);
1993 BDBCMP cmp = bdb->cmp;
1994 void *cmpop = bdb->cmpop;
1995 TCLIST *recs = leaf->recs;
1996 int ln = TCLISTNUM(recs);
1999 int i = (left + right) / 2;
2000 while(right >= left && i < ln){
2001 BDBREC *recp = (BDBREC *)TCLISTVALPTR(recs, i);
2003 if(cmp == tcbdbcmplexical){
2004 TCCMPLEXICAL(rv, kbuf, ksiz, recp->kbuf, recp->ksiz);
2006 rv = cmp(kbuf, ksiz, recp->kbuf, recp->ksiz, cmpop);
2015 i = (left + right) / 2;
2018 BDBREC *recp = (BDBREC *)TCLISTVALPTR(recs, i);
2020 if(cmp == tcbdbcmplexical){
2021 TCCMPLEXICAL(rv, kbuf, ksiz, recp->kbuf, recp->ksiz);
2023 rv = cmp(kbuf, ksiz, recp->kbuf, recp->ksiz, cmpop);
2030 TCREALLOC(recp->vbuf, recp->vbuf, recp->vsiz + vsiz + 1);
2031 memcpy(recp->vbuf + recp->vsiz, vbuf, vsiz);
2033 recp->vbuf[recp->vsiz] = '\0';
2036 if(!recp->rest) recp->rest = tclistnew();
2037 TCLISTPUSH(recp->rest, vbuf, vsiz);
2041 if(!recp->rest) recp->rest = tclistnew();
2042 tclistunshift(recp->rest, recp->vbuf, recp->vsiz);
2043 if(vsiz > recp->vsiz) TCREALLOC(recp->vbuf, recp->vbuf, vsiz + 1);
2044 memcpy(recp->vbuf, vbuf, vsiz);
2045 recp->vbuf[vsiz] = '\0';
2050 if(vsiz > recp->vsiz) TCREALLOC(recp->vbuf, recp->vbuf, vsiz + 1);
2051 memcpy(recp->vbuf, vbuf, vsiz);
2052 recp->vbuf[vsiz] = '\0';
2059 TCMEMDUP(rec.kbuf, kbuf, ksiz);
2061 TCMEMDUP(rec.vbuf, vbuf, vsiz);
2064 tclistinsert(recs, i, &rec, sizeof(rec));
2072 TCMEMDUP(rec.kbuf, kbuf, ksiz);
2074 TCMEMDUP(rec.vbuf, vbuf, vsiz);
2077 TCLISTPUSH(recs, &rec, sizeof(rec));
2085 /* Calculate the size of data of a leaf object.
2086 `bdb' specifies the B+ tree database object.
2087 `leaf' specifies the leaf object.
2088 The return value is size of data of the leaf. */
2089 static int tcbdbleafdatasize(BDBLEAF *leaf){
2092 TCLIST *recs = leaf->recs;
2093 int ln = TCLISTNUM(recs);
2094 for(int i = 0; i < ln; i++){
2095 BDBREC *recp = (BDBREC *)TCLISTVALPTR(recs, i);
2096 sum += recp->ksiz + recp->vsiz;
2098 TCLIST *rest = recp->rest;
2099 int rnum = TCLISTNUM(rest);
2100 for(int j = 0; j < rnum; j++){
2102 tclistval(rest, j, &vsiz);
2111 /* Divide a leaf into two.
2112 `bdb' specifies the B+ tree database object.
2113 `leaf' specifies the leaf object.
2114 The return value is the new leaf object or `NULL' on failure. */
2115 static BDBLEAF *tcbdbleafdivide(TCBDB *bdb, BDBLEAF *leaf){
2116 assert(bdb && leaf);
2118 TCLIST *recs = leaf->recs;
2119 int mid = TCLISTNUM(recs) / 2;
2120 BDBREC *recp = (BDBREC *)TCLISTVALPTR(recs, mid);
2121 BDBLEAF *newleaf = tcbdbleafnew(bdb, leaf->id, leaf->next);
2122 if(newleaf->next > 0){
2123 BDBLEAF *nextleaf = tcbdbleafload(bdb, newleaf->next);
2124 if(!nextleaf) return NULL;
2125 nextleaf->prev = newleaf->id;
2126 nextleaf->dirty = true;
2128 leaf->next = newleaf->id;
2130 int ln = TCLISTNUM(recs);
2131 TCLIST *newrecs = newleaf->recs;
2132 for(int i = mid; i < ln; i++){
2133 recp = (BDBREC *)TCLISTVALPTR(recs, i);
2134 TCLISTPUSH(newrecs, recp, sizeof(*recp));
2136 ln = TCLISTNUM(newrecs);
2137 for(int i = 0; i < ln; i++){
2139 free(tclistpop(recs, &rsiz));
2145 /* Cut off the path to a leaf and mark it dead.
2146 `bdb' specifies the B+ tree database object.
2147 `leaf' specifies the leaf object.
2148 If successful, the return value is true, else, it is false. */
2149 static bool tcbdbleafkill(TCBDB *bdb, BDBLEAF *leaf){
2150 assert(bdb && leaf);
2151 BDBNODE *node = tcbdbnodeload(bdb, bdb->hist[bdb->hnum-1]);
2152 if(!node) return false;
2153 if(tcbdbnodesubidx(bdb, node, leaf->id)){
2154 TCDODEBUG(bdb->cnt_killleaf++);
2155 if(bdb->hleaf == leaf->id) bdb->hleaf = 0;
2157 BDBLEAF *tleaf = tcbdbleafload(bdb, leaf->prev);
2158 if(!tleaf) return false;
2159 tleaf->next = leaf->next;
2160 tleaf->dirty = true;
2161 if(bdb->last == leaf->id) bdb->last = leaf->prev;
2164 BDBLEAF *tleaf = tcbdbleafload(bdb, leaf->next);
2165 if(!tleaf) return false;
2166 tleaf->prev = leaf->prev;
2167 tleaf->dirty = true;
2168 if(bdb->first == leaf->id) bdb->first = leaf->next;
2176 /* Create a new node.
2177 `bdb' specifies the B+ tree database object.
2178 `heir' specifies the ID of the child before the first index.
2179 The return value is the new node object. */
2180 static BDBNODE *tcbdbnodenew(TCBDB *bdb, uint64_t heir){
2181 assert(bdb && heir > 0);
2183 nent.id = ++bdb->nnum + BDBNODEIDBASE;
2184 nent.idxs = tclistnew2(bdb->nmemb + 1);
2187 tcmapputkeep(bdb->nodec, &(nent.id), sizeof(nent.id), &nent, sizeof(nent));
2189 return (BDBNODE *)tcmapget(bdb->nodec, &(nent.id), sizeof(nent.id), &rsiz);
2193 /* Remove a node from the cache.
2194 `bdb' specifies the B+ tree database object.
2195 `node' specifies the node object.
2196 If successful, the return value is true, else, it is false. */
2197 static bool tcbdbnodecacheout(TCBDB *bdb, BDBNODE *node){
2198 assert(bdb && node);
2200 if(node->dirty && !tcbdbnodesave(bdb, node)) err = true;
2201 TCLIST *idxs = node->idxs;
2202 int ln = TCLISTNUM(idxs);
2203 for(int i = 0; i < ln; i++){
2204 BDBIDX *idxp = (BDBIDX *)TCLISTVALPTR(idxs, i);
2208 tcmapout(bdb->nodec, &(node->id), sizeof(node->id));
2213 /* Save a node into the internal database.
2214 `bdb' specifies the B+ tree database object.
2215 `node' specifies the node object.
2216 If successful, the return value is true, else, it is false. */
2217 static bool tcbdbnodesave(TCBDB *bdb, BDBNODE *node){
2218 assert(bdb && node);
2219 TCDODEBUG(bdb->cnt_savenode++);
2220 TCXSTR *rbuf = tcxstrnew3(BDBPAGEBUFSIZ);
2221 char hbuf[(sizeof(uint64_t)+1)*2];
2225 TCSETVNUMBUF64(step, hbuf, llnum);
2226 TCXSTRCAT(rbuf, hbuf, step);
2227 TCLIST *idxs = node->idxs;
2228 int ln = TCLISTNUM(idxs);
2229 for(int i = 0; i < ln; i++){
2230 BDBIDX *idxp = (BDBIDX *)TCLISTVALPTR(idxs, i);
2233 TCSETVNUMBUF64(step, wp, llnum);
2235 uint32_t lnum = idxp->ksiz;
2236 TCSETVNUMBUF(step, wp, lnum);
2238 TCXSTRCAT(rbuf, hbuf, wp - hbuf);
2239 TCXSTRCAT(rbuf, idxp->kbuf, idxp->ksiz);
2242 step = sprintf(hbuf, "#%llx", (unsigned long long)(node->id - BDBNODEIDBASE));
2243 if(!tchdbput(bdb->hdb, hbuf, step, TCXSTRPTR(rbuf), TCXSTRSIZE(rbuf))) err = true;
2245 node->dirty = false;
2250 /* Load a node from the internal database.
2251 `bdb' specifies the B+ tree database object.
2252 `id' specifies the ID number of the node.
2253 The return value is the node object or `NULL' on failure. */
2254 static BDBNODE *tcbdbnodeload(TCBDB *bdb, uint64_t id){
2255 assert(bdb && id > BDBNODEIDBASE);
2256 bool clk = BDBLOCKCACHE(bdb);
2258 BDBNODE *node = (BDBNODE *)tcmapget3(bdb->nodec, &id, sizeof(id), &rsiz);
2260 if(clk) BDBUNLOCKCACHE(bdb);
2263 if(clk) BDBUNLOCKCACHE(bdb);
2264 TCDODEBUG(bdb->cnt_loadnode++);
2265 char hbuf[(sizeof(uint64_t)+1)*2];
2267 step = sprintf(hbuf, "#%llx", (unsigned long long)(id - BDBNODEIDBASE));
2269 char wbuf[BDBPAGEBUFSIZ];
2270 const char *rp = NULL;
2271 rsiz = tchdbget3(bdb->hdb, hbuf, step, wbuf, BDBPAGEBUFSIZ);
2273 tcbdbsetecode(bdb, TCEMISC, __FILE__, __LINE__, __func__);
2275 } else if(rsiz < BDBPAGEBUFSIZ){
2278 if(!(rbuf = tchdbget(bdb->hdb, hbuf, step, &rsiz))){
2279 tcbdbsetecode(bdb, TCEMISC, __FILE__, __LINE__, __func__);
2287 TCREADVNUMBUF64(rp, llnum, step);
2292 nent.idxs = tclistnew2(bdb->nmemb + 1);
2296 TCREADVNUMBUF64(rp, idx.pid, step);
2299 TCREADVNUMBUF(rp, idx.ksiz, step);
2302 if(rsiz < idx.ksiz){
2306 TCMEMDUP(idx.kbuf, rp, idx.ksiz);
2309 TCLISTPUSH(nent.idxs, &idx, sizeof(idx));
2312 if(err || rsiz != 0){
2313 tcbdbsetecode(bdb, TCEMISC, __FILE__, __LINE__, __func__);
2316 clk = BDBLOCKCACHE(bdb);
2317 tcmapputkeep(bdb->nodec, &(nent.id), sizeof(nent.id), &nent, sizeof(nent));
2318 node = (BDBNODE *)tcmapget(bdb->nodec, &(nent.id), sizeof(nent.id), &rsiz);
2319 if(clk) BDBUNLOCKCACHE(bdb);
2324 /* Add an index to a node.
2325 `bdb' specifies the B+ tree database object.
2326 `node' specifies the node object.
2327 `order' specifies whether the calling sequence is orderd or not.
2328 `pid' specifies the ID number of referred page.
2329 `kbuf' specifies the pointer to the region of the key.
2330 `ksiz' specifies the size of the region of the key. */
2331 static void tcbdbnodeaddidx(TCBDB *bdb, BDBNODE *node, bool order, uint64_t pid,
2332 const char *kbuf, int ksiz){
2333 assert(bdb && node && pid > 0 && kbuf && ksiz >= 0);
2336 TCMEMDUP(idx.kbuf, kbuf, ksiz);
2338 BDBCMP cmp = bdb->cmp;
2339 void *cmpop = bdb->cmpop;
2340 TCLIST *idxs = node->idxs;
2342 TCLISTPUSH(idxs, &idx, sizeof(idx));
2344 int ln = TCLISTNUM(idxs);
2347 int i = (left + right) / 2;
2348 while(right >= left && i < ln){
2349 BDBIDX *idxp = (BDBIDX *)TCLISTVALPTR(idxs, i);
2351 if(cmp == tcbdbcmplexical){
2352 TCCMPLEXICAL(rv, kbuf, ksiz, idxp->kbuf, idxp->ksiz);
2354 rv = cmp(kbuf, ksiz, idxp->kbuf, idxp->ksiz, cmpop);
2363 i = (left + right) / 2;
2366 BDBIDX *idxp = (BDBIDX *)TCLISTVALPTR(idxs, i);
2368 if(cmp == tcbdbcmplexical){
2369 TCCMPLEXICAL(rv, kbuf, ksiz, idxp->kbuf, idxp->ksiz);
2371 rv = cmp(kbuf, ksiz, idxp->kbuf, idxp->ksiz, cmpop);
2374 tclistinsert(idxs, i, &idx, sizeof(idx));
2379 if(i >= ln) TCLISTPUSH(idxs, &idx, sizeof(idx));
2385 /* Subtract an index from a node.
2386 `bdb' specifies the B+ tree database object.
2387 `node' specifies the node object.
2388 `pid' specifies the ID number of referred page.
2389 The return value is whether the subtraction is completed. */
2390 static bool tcbdbnodesubidx(TCBDB *bdb, BDBNODE *node, uint64_t pid){
2391 assert(bdb && node && pid > 0);
2392 TCLIST *idxs = node->idxs;
2393 int ln = TCLISTNUM(idxs);
2394 if(ln < 2) return false;
2395 if(node->heir == pid){
2397 BDBIDX *idxp = (BDBIDX *)tclistshift(idxs, &rsiz);
2398 node->heir = idxp->pid;
2404 int ln = TCLISTNUM(idxs);
2405 for(int i = 0; i < ln; i++){
2406 BDBIDX *idxp = (BDBIDX *)TCLISTVALPTR(idxs, i);
2407 if(idxp->pid == pid){
2410 free(tclistremove(idxs, i, &rsiz));
2420 /* Search the leaf object corresponding to a key.
2421 `bdb' specifies the B+ tree database object.
2422 `kbuf' specifies the pointer to the region of the key.
2423 `ksiz' specifies the size of the region of the key.
2424 The return value is the ID number of the leaf object or 0 on failure. */
2425 static uint64_t tcbdbsearchleaf(TCBDB *bdb, const char *kbuf, int ksiz){
2426 assert(bdb && kbuf && ksiz >= 0);
2427 BDBCMP cmp = bdb->cmp;
2428 void *cmpop = bdb->cmpop;
2429 uint64_t *hist = bdb->hist;
2430 uint64_t pid = bdb->root;
2433 while(pid > BDBNODEIDBASE){
2434 BDBNODE *node = tcbdbnodeload(bdb, pid);
2436 tcbdbsetecode(bdb, TCEMISC, __FILE__, __LINE__, __func__);
2439 TCLIST *idxs = node->idxs;
2440 int ln = TCLISTNUM(idxs);
2442 tcbdbsetecode(bdb, TCEMISC, __FILE__, __LINE__, __func__);
2445 hist[hnum++] = node->id;
2448 int i = (left + right) / 2;
2449 BDBIDX *idxp = NULL;
2450 while(right >= left && i < ln){
2451 idxp = (BDBIDX *)TCLISTVALPTR(idxs, i);
2453 if(cmp == tcbdbcmplexical){
2454 TCCMPLEXICAL(rv, kbuf, ksiz, idxp->kbuf, idxp->ksiz);
2456 rv = cmp(kbuf, ksiz, idxp->kbuf, idxp->ksiz, cmpop);
2465 i = (left + right) / 2;
2469 idxp = (BDBIDX *)TCLISTVALPTR(idxs, i);
2471 if(cmp == tcbdbcmplexical){
2472 TCCMPLEXICAL(rv, kbuf, ksiz, idxp->kbuf, idxp->ksiz);
2474 rv = cmp(kbuf, ksiz, idxp->kbuf, idxp->ksiz, cmpop);
2481 idxp = (BDBIDX *)TCLISTVALPTR(idxs, i - 1);
2487 if(i >= ln) pid = idxp->pid;
2490 if(bdb->lleaf == pid) bdb->hleaf = pid;
2498 /* Search a record of a leaf.
2499 `bdb' specifies the B+ tree database object.
2500 `leaf' specifies the leaf object.
2501 `kbuf' specifies the pointer to the region of the key.
2502 `ksiz' specifies the size of the region of the key.
2503 `ip' specifies the pointer to a variable to fetch the index of the correspnding record.
2504 The return value is the pointer to a corresponding record or `NULL' on failure. */
2505 static BDBREC *tcbdbsearchrec(TCBDB *bdb, BDBLEAF *leaf, const char *kbuf, int ksiz, int *ip){
2506 assert(bdb && leaf && kbuf && ksiz >= 0);
2507 BDBCMP cmp = bdb->cmp;
2508 void *cmpop = bdb->cmpop;
2509 TCLIST *recs = leaf->recs;
2510 int ln = TCLISTNUM(recs);
2513 int i = (left + right) / 2;
2514 while(right >= left && i < ln){
2515 BDBREC *recp = (BDBREC *)TCLISTVALPTR(recs, i);
2517 if(cmp == tcbdbcmplexical){
2518 TCCMPLEXICAL(rv, kbuf, ksiz, recp->kbuf, recp->ksiz);
2520 rv = cmp(kbuf, ksiz, recp->kbuf, recp->ksiz, cmpop);
2530 i = (left + right) / 2;
2537 /* Adjust the caches for leaves and nodes.
2538 `bdb' specifies the B+ tree database object.
2539 The return value is true if successful, else, it is false. */
2540 static bool tcbdbcacheadjust(TCBDB *bdb){
2542 if(TCMAPRNUM(bdb->leafc) > bdb->lcnum){
2543 TCDODEBUG(bdb->cnt_adjleafc++);
2544 bool clk = BDBLOCKCACHE(bdb);
2545 TCMAP *leafc = bdb->leafc;
2546 tcmapiterinit(leafc);
2547 for(int i = 0; i < BDBCACHEOUT; i++){
2549 if(!tcbdbleafcacheout(bdb, (BDBLEAF *)tcmapiterval(tcmapiternext(leafc, &rsiz), &rsiz)))
2552 if(clk) BDBUNLOCKCACHE(bdb);
2554 if(TCMAPRNUM(bdb->nodec) > bdb->ncnum){
2555 TCDODEBUG(bdb->cnt_adjnodec++);
2556 bool clk = BDBLOCKCACHE(bdb);
2557 TCMAP *nodec = bdb->nodec;
2558 tcmapiterinit(nodec);
2559 for(int i = 0; i < BDBCACHEOUT; i++){
2561 if(!tcbdbnodecacheout(bdb, (BDBNODE *)tcmapiterval(tcmapiternext(nodec, &rsiz), &rsiz)))
2564 if(clk) BDBUNLOCKCACHE(bdb);
2570 /* Purge dirty pages of caches for leaves and nodes.
2571 `bdb' specifies the B+ tree database object. */
2572 static void tcbdbcachepurge(TCBDB *bdb){
2573 bool clk = BDBLOCKCACHE(bdb);
2576 tcmapiterinit(bdb->leafc);
2577 while((tmp = tcmapiternext(bdb->leafc, &tsiz)) != NULL){
2579 BDBLEAF *leaf = (BDBLEAF *)tcmapiterval(tmp, &lsiz);
2580 if(!leaf->dirty) continue;
2581 TCLIST *recs = leaf->recs;
2582 int ln = TCLISTNUM(recs);
2583 for(int i = 0; i < ln; i++){
2584 BDBREC *recp = (BDBREC *)TCLISTVALPTR(recs, i);
2587 if(recp->rest) tclistdel(recp->rest);
2590 tcmapout(bdb->leafc, tmp, tsiz);
2592 tcmapiterinit(bdb->nodec);
2593 while((tmp = tcmapiternext(bdb->nodec, &tsiz)) != NULL){
2595 BDBNODE *node = (BDBNODE *)tcmapiterval(tmp, &nsiz);
2596 if(!node->dirty) continue;
2597 TCLIST *idxs = node->idxs;
2598 int ln = TCLISTNUM(idxs);
2599 for(int i = 0; i < ln; i++){
2600 BDBIDX *idxp = (BDBIDX *)TCLISTVALPTR(idxs, i);
2604 tcmapout(bdb->nodec, tmp, tsiz);
2606 if(clk) BDBUNLOCKCACHE(bdb);
2610 /* Open a database file and connect a B+ tree database object.
2611 `bdb' specifies the B+ tree database object.
2612 `path' specifies the path of the internal database file.
2613 `omode' specifies the connection mode.
2614 If successful, the return value is true, else, it is false. */
2615 static bool tcbdbopenimpl(TCBDB *bdb, const char *path, int omode){
2616 assert(bdb && path);
2617 int homode = HDBOREADER;
2618 if(omode & BDBOWRITER){
2619 homode = HDBOWRITER;
2620 if(omode & BDBOCREAT) homode |= HDBOCREAT;
2621 if(omode & BDBOTRUNC) homode |= HDBOTRUNC;
2626 if(omode & BDBONOLCK) homode |= HDBONOLCK;
2627 if(omode & BDBOLCKNB) homode |= HDBOLCKNB;
2628 tchdbsettype(bdb->hdb, HDBTBTREE);
2629 if(!tchdbopen(bdb->hdb, path, homode)) return false;
2636 bdb->opaque = tchdbopaque(bdb->hdb);
2637 bdb->leafc = tcmapnew2(bdb->lcnum * 2 + 1);
2638 bdb->nodec = tcmapnew2(bdb->ncnum * 2 + 1);
2639 if(bdb->wmode && tchdbrnum(bdb->hdb) < 1){
2640 BDBLEAF *leaf = tcbdbleafnew(bdb, 0, 0);
2641 bdb->root = leaf->id;
2642 bdb->first = leaf->id;
2643 bdb->last = leaf->id;
2648 bdb->cmp = tcbdbcmplexical;
2652 if(!tcbdbleafsave(bdb, leaf)){
2653 tcmapdel(bdb->nodec);
2654 tcmapdel(bdb->leafc);
2655 tchdbclose(bdb->hdb);
2661 tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
2662 tcmapdel(bdb->nodec);
2663 tcmapdel(bdb->leafc);
2664 tchdbclose(bdb->hdb);
2667 if(bdb->lmemb < BDBMINLMEMB || bdb->nmemb < BDBMINNMEMB ||
2668 bdb->root < 1 || bdb->first < 1 || bdb->last < 1 ||
2669 bdb->lnum < 0 || bdb->nnum < 0 || bdb->rnum < 0){
2670 tcbdbsetecode(bdb, TCEMETA, __FILE__, __LINE__, __func__);
2671 tcmapdel(bdb->nodec);
2672 tcmapdel(bdb->leafc);
2673 tchdbclose(bdb->hdb);
2677 uint8_t hopts = tchdbopts(bdb->hdb);
2679 if(hopts & HDBTLARGE) opts |= BDBTLARGE;
2680 if(hopts & HDBTDEFLATE) opts |= BDBTDEFLATE;
2681 if(hopts & HDBTTCBS) opts |= BDBTTCBS;
2686 bdb->rbopaque = NULL;
2691 /* Close a B+ tree database object.
2692 `bdb' specifies the B+ tree database object.
2693 If successful, the return value is true, else, it is false. */
2694 static bool tcbdbcloseimpl(TCBDB *bdb){
2697 tcbdbcachepurge(bdb);
2698 memcpy(bdb->opaque, bdb->rbopaque, BDBOPAQUESIZ);
2700 free(bdb->rbopaque);
2702 bdb->rbopaque = NULL;
2709 TCMAP *leafc = bdb->leafc;
2710 tcmapiterinit(leafc);
2711 while((vbuf = tcmapiternext(leafc, &vsiz)) != NULL){
2712 if(!tcbdbleafcacheout(bdb, (BDBLEAF *)tcmapiterval(vbuf, &vsiz))) err = true;
2714 TCMAP *nodec = bdb->nodec;
2715 tcmapiterinit(nodec);
2716 while((vbuf = tcmapiternext(nodec, &vsiz)) != NULL){
2717 if(!tcbdbnodecacheout(bdb, (BDBNODE *)tcmapiterval(vbuf, &vsiz))) err = true;
2719 if(bdb->wmode) tcdumpmeta(bdb);
2720 tcmapdel(bdb->nodec);
2721 tcmapdel(bdb->leafc);
2722 if(!tchdbclose(bdb->hdb)) err = true;
2727 /* Store a record into a B+ tree database object.
2728 `bdb' specifies the B+ tree database object.
2729 `kbuf' specifies the pointer to the region of the key.
2730 `ksiz' specifies the size of the region of the key.
2731 `vbuf' specifies the pointer to the region of the value.
2732 `vsiz' specifies the size of the region of the value.
2733 `dmode' specifies behavior when the key overlaps.
2734 If successful, the return value is true, else, it is false. */
2735 static bool tcbdbputimpl(TCBDB *bdb, const void *kbuf, int ksiz, const void *vbuf, int vsiz,
2737 assert(bdb && kbuf && ksiz >= 0 && vbuf && vsiz >= 0);
2738 BDBLEAF *leaf = NULL;
2739 if(bdb->hleaf < 1 || !(leaf = tcbdbgethistleaf(bdb, kbuf, ksiz))){
2740 uint64_t pid = tcbdbsearchleaf(bdb, kbuf, ksiz);
2741 if(pid < 1) return false;
2742 if(!(leaf = tcbdbleafload(bdb, pid))) return false;
2744 if(!tcbdbleafaddrec(bdb, leaf, dmode, kbuf, ksiz, vbuf, vsiz)){
2745 tcbdbsetecode(bdb, TCEKEEP, __FILE__, __LINE__, __func__);
2748 int rnum = TCLISTNUM(leaf->recs);
2749 if(rnum > bdb->lmemb ||
2750 (bdb->lsmax > 0 && rnum > BDBMINLMEMB && (bdb->lschk++ & (0x8 - 1)) == 0 &&
2751 tcbdbleafdatasize(leaf) > bdb->lsmax)){
2753 BDBLEAF *newleaf = tcbdbleafdivide(bdb, leaf);
2754 if(!newleaf) return false;
2755 if(leaf->id == bdb->last) bdb->last = newleaf->id;
2756 uint64_t heir = leaf->id;
2757 uint64_t pid = newleaf->id;
2758 BDBREC *recp = (BDBREC *)TCLISTVALPTR(newleaf->recs, 0);
2759 int ksiz = recp->ksiz;
2761 TCMEMDUP(kbuf, recp->kbuf, ksiz);
2765 node = tcbdbnodenew(bdb, heir);
2766 tcbdbnodeaddidx(bdb, node, true, pid, kbuf, ksiz);
2767 bdb->root = node->id;
2771 uint64_t parent = bdb->hist[--bdb->hnum];
2772 if(!(node = tcbdbnodeload(bdb, parent))){
2776 tcbdbnodeaddidx(bdb, node, false, pid, kbuf, ksiz);
2778 TCLIST *idxs = node->idxs;
2779 int ln = TCLISTNUM(idxs);
2780 if(ln <= bdb->nmemb) break;
2782 BDBIDX *idxp = (BDBIDX *)TCLISTVALPTR(idxs, mid);
2783 BDBNODE *newnode = tcbdbnodenew(bdb, idxp->pid);
2786 TCMEMDUP(kbuf, idxp->kbuf, idxp->ksiz);
2788 for(int i = mid + 1; i < ln; i++){
2789 idxp = (BDBIDX *)TCLISTVALPTR(idxs, i);
2790 tcbdbnodeaddidx(bdb, newnode, true, idxp->pid, idxp->kbuf, idxp->ksiz);
2792 ln = TCLISTNUM(newnode->idxs);
2793 for(int i = 0; i < ln; i++){
2795 idxp = (BDBIDX *)tclistpop(idxs, &rsiz);
2801 if(bdb->capnum > 0 && bdb->rnum > bdb->capnum){
2802 uint64_t xnum = bdb->rnum - bdb->capnum;
2803 BDBCUR *cur = tcbdbcurnew(bdb);
2804 tcbdbcurfirstimpl(cur);
2805 while((xnum--) > 0){
2806 if(!tcbdbcuroutimpl(cur)){
2814 if(!bdb->tran && !tcbdbcacheadjust(bdb)) return false;
2819 /* Remove a record of a B+ tree database object.
2820 `hdb' specifies the B+ tree database object.
2821 `kbuf' specifies the pointer to the region of the key.
2822 `ksiz' specifies the size of the region of the key.
2823 If successful, the return value is true, else, it is false. */
2824 static bool tcbdboutimpl(TCBDB *bdb, const char *kbuf, int ksiz){
2825 assert(bdb && kbuf && ksiz >= 0);
2826 BDBLEAF *leaf = NULL;
2827 if(bdb->hleaf < 1 || !(leaf = tcbdbgethistleaf(bdb, kbuf, ksiz))){
2828 uint64_t pid = tcbdbsearchleaf(bdb, kbuf, ksiz);
2829 if(pid < 1) return false;
2830 if(!(leaf = tcbdbleafload(bdb, pid))) return false;
2833 BDBREC *recp = tcbdbsearchrec(bdb, leaf, kbuf, ksiz, &ri);
2835 tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
2840 recp->vbuf = tclistshift(recp->rest, &(recp->vsiz));
2841 if(TCLISTNUM(recp->rest) < 1){
2842 tclistdel(recp->rest);
2849 free(tclistremove(leaf->recs, ri, &rsiz));
2853 if(TCLISTNUM(leaf->recs) < 1 && bdb->hnum > 0 && !tcbdbleafkill(bdb, leaf)) return false;
2854 if(!bdb->tran && !tcbdbcacheadjust(bdb)) return false;
2859 /* Remove records of a B+ tree database object.
2860 `bdb' specifies the B+ tree database object.
2861 `kbuf' specifies the pointer to the region of the key.
2862 `ksiz' specifies the size of the region of the key.
2863 If successful, the return value is true, else, it is false. */
2864 static bool tcbdboutlist(TCBDB *bdb, const char *kbuf, int ksiz){
2865 assert(bdb && kbuf && ksiz >= 0);
2866 BDBLEAF *leaf = NULL;
2867 if(bdb->hleaf < 1 || !(leaf = tcbdbgethistleaf(bdb, kbuf, ksiz))){
2868 uint64_t pid = tcbdbsearchleaf(bdb, kbuf, ksiz);
2869 if(pid < 1) return false;
2870 if(!(leaf = tcbdbleafload(bdb, pid))) return false;
2873 BDBREC *recp = tcbdbsearchrec(bdb, leaf, kbuf, ksiz, &ri);
2875 tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
2880 rnum += TCLISTNUM(recp->rest);
2881 tclistdel(recp->rest);
2886 free(tclistremove(leaf->recs, ri, &rsiz));
2889 if(TCLISTNUM(leaf->recs) < 1 && bdb->hnum > 0 && !tcbdbleafkill(bdb, leaf)) return false;
2890 if(!bdb->tran && !tcbdbcacheadjust(bdb)) return false;
2895 /* Retrieve a record in a B+ tree database object.
2896 `bdb' specifies the B+ tree database object.
2897 `kbuf' specifies the pointer to the region of the key.
2898 `ksiz' specifies the size of the region of the key.
2899 `sp' specifies the pointer to the variable into which the size of the region of the return
2901 If successful, the return value is the pointer to the region of the value of the corresponding
2903 static const char *tcbdbgetimpl(TCBDB *bdb, const char *kbuf, int ksiz, int *sp){
2904 assert(bdb && kbuf && ksiz >= 0 && sp);
2905 BDBLEAF *leaf = NULL;
2906 if(bdb->hleaf < 1 || !(leaf = tcbdbgethistleaf(bdb, kbuf, ksiz))){
2907 uint64_t pid = tcbdbsearchleaf(bdb, kbuf, ksiz);
2908 if(pid < 1) return NULL;
2909 if(!(leaf = tcbdbleafload(bdb, pid))) return NULL;
2911 BDBREC *recp = tcbdbsearchrec(bdb, leaf, kbuf, ksiz, NULL);
2913 tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
2921 /* Get the number of records corresponding a key in a B+ tree database object.
2922 `bdb' specifies the B+ tree database object.
2923 `kbuf' specifies the pointer to the region of the key.
2924 `ksiz' specifies the size of the region of the key.
2925 If successful, the return value is the number of the corresponding records, else, it is 0. */
2926 static int tcbdbgetnum(TCBDB *bdb, const char *kbuf, int ksiz){
2927 assert(bdb && kbuf && ksiz >= 0);
2928 BDBLEAF *leaf = NULL;
2929 if(bdb->hleaf < 1 || !(leaf = tcbdbgethistleaf(bdb, kbuf, ksiz))){
2930 uint64_t pid = tcbdbsearchleaf(bdb, kbuf, ksiz);
2931 if(pid < 1) return 0;
2932 if(!(leaf = tcbdbleafload(bdb, pid))) return 0;
2934 BDBREC *recp = tcbdbsearchrec(bdb, leaf, kbuf, ksiz, NULL);
2936 tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
2939 return recp->rest ? TCLISTNUM(recp->rest) + 1 : 1;
2943 /* Retrieve records in a B+ tree database object.
2944 `bdb' specifies the B+ tree database object.
2945 `kbuf' specifies the pointer to the region of the key.
2946 `ksiz' specifies the size of the region of the key.
2947 If successful, the return value is a list object of the values of the corresponding records. */
2948 static TCLIST *tcbdbgetlist(TCBDB *bdb, const char *kbuf, int ksiz){
2949 assert(bdb && kbuf && ksiz >= 0);
2950 BDBLEAF *leaf = NULL;
2951 if(bdb->hleaf < 1 || !(leaf = tcbdbgethistleaf(bdb, kbuf, ksiz))){
2952 uint64_t pid = tcbdbsearchleaf(bdb, kbuf, ksiz);
2953 if(pid < 1) return NULL;
2954 if(!(leaf = tcbdbleafload(bdb, pid))) return NULL;
2956 BDBREC *recp = tcbdbsearchrec(bdb, leaf, kbuf, ksiz, NULL);
2958 tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
2962 TCLIST *rest = recp->rest;
2964 int ln = TCLISTNUM(rest);
2965 vals = tclistnew2(ln + 1);
2966 TCLISTPUSH(vals, recp->vbuf, recp->vsiz);
2967 for(int i = 0; i < ln; i++){
2969 const char *vbuf = tclistval(rest, i, &vsiz);
2970 TCLISTPUSH(vals, vbuf, vsiz);
2973 vals = tclistnew2(1);
2974 TCLISTPUSH(vals, recp->vbuf, recp->vsiz);
2980 /* Get keys of ranged records in a B+ tree database object.
2981 `bdb' specifies the B+ tree database object.
2982 `bkbuf' specifies the pointer to the region of the key of the beginning border.
2983 `bksiz' specifies the size of the region of the beginning key.
2984 `binc' specifies whether the beginning border is inclusive or not.
2985 `ekbuf' specifies the pointer to the region of the key of the ending border.
2986 `eksiz' specifies the size of the region of the ending key.
2987 `einc' specifies whether the ending border is inclusive or not.
2988 `max' specifies the maximum number of keys to be fetched.
2989 `keys' specifies a list object to store the result.
2990 If successful, the return value is true, else, it is false. */
2991 static bool tcbdbrangeimpl(TCBDB *bdb, const char *bkbuf, int bksiz, bool binc,
2992 const char *ekbuf, int eksiz, bool einc, int max, TCLIST *keys){
2993 assert(bdb && keys);
2995 BDBCUR *cur = tcbdbcurnew(bdb);
2997 tcbdbcurjumpimpl(cur, bkbuf, bksiz, true);
2999 tcbdbcurfirstimpl(cur);
3001 BDBCMP cmp = bdb->cmp;
3002 void *cmpop = bdb->cmpop;
3003 const char *lbuf = NULL;
3006 const char *kbuf, *vbuf;
3008 if(!tcbdbcurrecimpl(cur, &kbuf, &ksiz, &vbuf, &vsiz)){
3009 if(tchdbecode(bdb->hdb) != TCEINVALID && tchdbecode(bdb->hdb) != TCENOREC) err = true;
3013 if(cmp(kbuf, ksiz, bkbuf, bksiz, cmpop) == 0){
3021 if(cmp(kbuf, ksiz, ekbuf, eksiz, cmpop) > 0) break;
3023 if(cmp(kbuf, ksiz, ekbuf, eksiz, cmpop) >= 0) break;
3026 if(!lbuf || lsiz != ksiz || memcmp(kbuf, lbuf, ksiz)){
3027 TCLISTPUSH(keys, kbuf, ksiz);
3028 if(max >= 0 && TCLISTNUM(keys) >= max) break;
3032 tcbdbcurnextimpl(cur);
3039 /* Get forward matching keys in a B+ tree database object.
3040 `bdb' specifies the B+ tree database object.
3041 `pbuf' specifies the pointer to the region of the prefix.
3042 `psiz' specifies the size of the region of the prefix.
3043 `max' specifies the maximum number of keys to be fetched.
3044 `keys' specifies a list object to store the result.
3045 If successful, the return value is true, else, it is false. */
3046 static bool tcbdbrangefwm(TCBDB *bdb, const char *pbuf, int psiz, int max, TCLIST *keys){
3047 assert(bdb && pbuf && psiz >= 0 && keys);
3049 if(max < 0) max = INT_MAX;
3050 BDBCUR *cur = tcbdbcurnew(bdb);
3051 tcbdbcurjumpimpl(cur, pbuf, psiz, true);
3052 const char *lbuf = NULL;
3055 const char *kbuf, *vbuf;
3057 if(!tcbdbcurrecimpl(cur, &kbuf, &ksiz, &vbuf, &vsiz)){
3058 if(tchdbecode(bdb->hdb) != TCEINVALID && tchdbecode(bdb->hdb) != TCENOREC) err = true;
3061 if(ksiz < psiz || memcmp(kbuf, pbuf, psiz)) break;
3062 if(!lbuf || lsiz != ksiz || memcmp(kbuf, lbuf, ksiz)){
3063 TCLISTPUSH(keys, kbuf, ksiz);
3064 if(TCLISTNUM(keys) >= max) break;
3068 tcbdbcurnextimpl(cur);
3075 /* Optimize the file of a B+ tree database object.
3076 `bdb' specifies the B+ tree database object.
3077 `lmemb' specifies the number of members in each leaf page.
3078 `nmemb' specifies the number of members in each non-leaf page.
3079 `bnum' specifies the number of elements of the bucket array.
3080 `apow' specifies the size of record alignment by power of 2.
3081 `fpow' specifies the maximum number of elements of the free block pool by power of 2.
3082 `opts' specifies options by bitwise or.
3083 If successful, the return value is true, else, it is false. */
3084 static bool tcbdboptimizeimpl(TCBDB *bdb, int32_t lmemb, int32_t nmemb,
3085 int64_t bnum, int8_t apow, int8_t fpow, uint8_t opts){
3087 if(lmemb < 1) lmemb = bdb->lmemb;
3088 if(nmemb < 1) nmemb = bdb->nmemb;
3089 if(bnum < 1) bnum = tchdbrnum(bdb->hdb) * 2 + 1;
3090 if(opts == UINT8_MAX) opts = bdb->opts;
3091 const char *path = tchdbpath(bdb->hdb);
3092 char *tpath = tcsprintf("%s%ctmp%c%llu", path, MYEXTCHR, MYEXTCHR, tchdbinode(bdb->hdb));
3093 TCBDB *tbdb = tcbdbnew();
3094 tcbdbsetcmpfunc(tbdb, bdb->cmp, bdb->cmpop);
3095 tcbdbtune(tbdb, lmemb, nmemb, bnum, apow, fpow, opts);
3096 tcbdbsetlsmax(tbdb, bdb->lsmax);
3097 if(!tcbdbopen(tbdb, tpath, BDBOWRITER | BDBOCREAT | BDBOTRUNC)){
3098 tcbdbsetecode(bdb, tcbdbecode(tbdb), __FILE__, __LINE__, __func__);
3104 BDBCUR *cur = tcbdbcurnew(bdb);
3105 tcbdbcurfirstimpl(cur);
3106 const char *kbuf, *vbuf;
3108 while(!err && cur->id > 0 && tcbdbcurrecimpl(cur, &kbuf, &ksiz, &vbuf, &vsiz)){
3109 if(!tcbdbputdup(tbdb, kbuf, ksiz, vbuf, vsiz)){
3110 tcbdbsetecode(bdb, tcbdbecode(tbdb), __FILE__, __LINE__, __func__);
3113 tcbdbcurnextimpl(cur);
3116 if(!tcbdbclose(tbdb)){
3117 tcbdbsetecode(bdb, tcbdbecode(tbdb), __FILE__, __LINE__, __func__);
3121 if(unlink(path) == -1){
3122 tcbdbsetecode(bdb, TCEUNLINK, __FILE__, __LINE__, __func__);
3125 if(rename(tpath, path) == -1){
3126 tcbdbsetecode(bdb, TCERENAME, __FILE__, __LINE__, __func__);
3130 if(err) return false;
3131 tpath = tcstrdup(path);
3132 int omode = (tchdbomode(bdb->hdb) & ~BDBOCREAT) & ~BDBOTRUNC;
3133 if(!tcbdbcloseimpl(bdb)){
3137 bool rv = tcbdbopenimpl(bdb, tpath, omode);
3143 /* Remove all records of a B+ tree database object.
3144 `bdb' specifies the B+ tree database object.
3145 If successful, the return value is true, else, it is false. */
3146 static bool tcbdbvanishimpl(TCBDB *bdb){
3148 char *path = tcstrdup(tchdbpath(bdb->hdb));
3149 int omode = tchdbomode(bdb->hdb);
3151 if(!tcbdbcloseimpl(bdb)) err = true;
3152 if(!tcbdbopenimpl(bdb, path, BDBOTRUNC | omode)) err = true;
3158 /* Lock a method of the B+ tree database object.
3159 `bdb' specifies the B+ tree database object.
3160 `wr' specifies whether the lock is writer or not.
3161 If successful, the return value is true, else, it is false. */
3162 static bool tcbdblockmethod(TCBDB *bdb, bool wr){
3164 if(wr ? pthread_rwlock_wrlock(bdb->mmtx) != 0 : pthread_rwlock_rdlock(bdb->mmtx) != 0){
3165 tcbdbsetecode(bdb, TCETHREAD, __FILE__, __LINE__, __func__);
3173 /* Unlock a method of the B+ tree database object.
3174 `hdb' specifies the B+ tree database object.
3175 If successful, the return value is true, else, it is false. */
3176 static bool tcbdbunlockmethod(TCBDB *bdb){
3178 if(pthread_rwlock_unlock(bdb->mmtx) != 0){
3179 tcbdbsetecode(bdb, TCETHREAD, __FILE__, __LINE__, __func__);
3187 /* Lock the cache of the B+ tree database object.
3188 `hdb' specifies the B+ tree database object.
3189 If successful, the return value is true, else, it is false. */
3190 static bool tcbdblockcache(TCBDB *bdb){
3192 if(pthread_mutex_lock(bdb->cmtx) != 0){
3193 tcbdbsetecode(bdb, TCETHREAD, __FILE__, __LINE__, __func__);
3201 /* Unlock the cache of the B+ tree database object.
3202 `hdb' specifies the B+ tree database object.
3203 If successful, the return value is true, else, it is false. */
3204 static bool tcbdbunlockcache(TCBDB *bdb){
3206 if(pthread_mutex_unlock(bdb->cmtx) != 0){
3207 tcbdbsetecode(bdb, TCETHREAD, __FILE__, __LINE__, __func__);
3215 /* Lock the transaction of the B+ tree database object.
3216 `hdb' specifies the B+ tree database object.
3217 If successful, the return value is true, else, it is false. */
3218 static bool tcbdblocktran(TCBDB *bdb){
3220 if(pthread_mutex_lock(bdb->tmtx) != 0){
3221 tcbdbsetecode(bdb, TCETHREAD, __FILE__, __LINE__, __func__);
3229 /* Unlock the transaction of the B+ tree database object.
3230 `hdb' specifies the B+ tree database object.
3231 If successful, the return value is true, else, it is false. */
3232 static bool tcbdbunlocktran(TCBDB *bdb){
3234 if(pthread_mutex_unlock(bdb->tmtx) != 0){
3235 tcbdbsetecode(bdb, TCETHREAD, __FILE__, __LINE__, __func__);
3243 /* Move a cursor object to the first record.
3244 `cur' specifies the cursor object.
3245 If successful, the return value is true, else, it is false. */
3246 static bool tcbdbcurfirstimpl(BDBCUR *cur){
3248 cur->id = cur->bdb->first;
3251 return tcbdbcuradjust(cur, true);
3255 /* Move a cursor object to the last record.
3256 `cur' specifies the cursor object.
3257 If successful, the return value is true, else, it is false. */
3258 static bool tcbdbcurlastimpl(BDBCUR *cur){
3260 cur->id = cur->bdb->last;
3261 cur->kidx = INT_MAX;
3262 cur->vidx = INT_MAX;
3263 return tcbdbcuradjust(cur, false);
3267 /* Move a cursor object to around records corresponding a key.
3268 `cur' specifies the cursor object.
3269 `kbuf' specifies the pointer to the region of the key.
3270 `ksiz' specifies the size of the region of the key.
3271 `forward' specifies whether the cursor is to be the front of records.
3272 If successful, the return value is true, else, it is false. */
3273 static bool tcbdbcurjumpimpl(BDBCUR *cur, const char *kbuf, int ksiz, bool forward){
3274 assert(cur && kbuf && ksiz >= 0);
3275 TCBDB *bdb = cur->bdb;
3276 uint64_t pid = tcbdbsearchleaf(bdb, kbuf, ksiz);
3283 BDBLEAF *leaf = tcbdbleafload(bdb, pid);
3290 if(TCLISTNUM(leaf->recs) < 1){
3294 return forward ? tcbdbcurnextimpl(cur) : tcbdbcurprevimpl(cur);
3297 BDBREC *recp = tcbdbsearchrec(bdb, leaf, kbuf, ksiz, &ri);
3304 cur->vidx = recp->rest ? TCLISTNUM(recp->rest) : 0;
3309 if(ri > 0 && ri >= TCLISTNUM(leaf->recs)) ri = TCLISTNUM(leaf->recs) - 1;
3311 recp = (BDBREC *)TCLISTVALPTR(leaf->recs, ri);
3314 if(bdb->cmp == tcbdbcmplexical){
3315 TCCMPLEXICAL(rv, kbuf, ksiz, recp->kbuf, recp->ksiz);
3317 rv = bdb->cmp(kbuf, ksiz, recp->kbuf, recp->ksiz, bdb->cmpop);
3319 if(rv < 0) return true;
3320 cur->vidx = recp->rest ? TCLISTNUM(recp->rest) : 0;
3321 return tcbdbcurnextimpl(cur);
3324 if(bdb->cmp == tcbdbcmplexical){
3325 TCCMPLEXICAL(rv, kbuf, ksiz, recp->kbuf, recp->ksiz);
3327 rv = bdb->cmp(kbuf, ksiz, recp->kbuf, recp->ksiz, bdb->cmpop);
3329 if(rv > 0) return true;
3331 return tcbdbcurprevimpl(cur);
3335 /* Adjust a cursor object forward to the suitable record.
3336 `cur' specifies the cursor object.
3337 If successful, the return value is true, else, it is false. */
3338 static bool tcbdbcuradjust(BDBCUR *cur, bool forward){
3340 TCBDB *bdb = cur->bdb;
3343 tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
3349 BDBLEAF *leaf = tcbdbleafload(bdb, cur->id);
3350 if(!leaf) return false;
3351 TCLIST *recs = leaf->recs;
3352 int knum = TCLISTNUM(recs);
3358 cur->id = leaf->prev;
3359 cur->kidx = INT_MAX;
3360 cur->vidx = INT_MAX;
3362 } else if(cur->kidx >= knum){
3364 cur->id = leaf->next;
3368 cur->kidx = knum - 1;
3369 cur->vidx = INT_MAX;
3372 BDBREC *recp = (BDBREC *)TCLISTVALPTR(recs, cur->kidx);
3373 int vnum = recp->rest ? TCLISTNUM(recp->rest) + 1 : 1;
3379 cur->vidx = INT_MAX;
3381 } else if(cur->vidx >= vnum){
3385 if(cur->kidx >= knum){
3386 cur->id = leaf->next;
3393 cur->vidx = vnum - 1;
3394 if(cur->vidx >= 0) break;
3405 /* Move a cursor object to the previous record.
3406 `cur' specifies the cursor object.
3407 If successful, the return value is true, else, it is false. */
3408 static bool tcbdbcurprevimpl(BDBCUR *cur){
3411 return tcbdbcuradjust(cur, false);
3415 /* Move a cursor object to the next record.
3416 `cur' specifies the cursor object.
3417 If successful, the return value is true, else, it is false. */
3418 static bool tcbdbcurnextimpl(BDBCUR *cur){
3421 return tcbdbcuradjust(cur, true);
3425 /* Insert a record around a cursor object.
3426 `cur' specifies the cursor object.
3427 `vbuf' specifies the pointer to the region of the value.
3428 `vsiz' specifies the size of the region of the value.
3429 `cpmode' specifies detail adjustment.
3430 If successful, the return value is true, else, it is false. */
3431 static bool tcbdbcurputimpl(BDBCUR *cur, const char *vbuf, int vsiz, int cpmode){
3432 assert(cur && vbuf && vsiz >= 0);
3433 TCBDB *bdb = cur->bdb;
3434 BDBLEAF *leaf = tcbdbleafload(bdb, cur->id);
3435 if(!leaf) return false;
3436 TCLIST *recs = leaf->recs;
3437 if(cur->kidx >= TCLISTNUM(recs)){
3438 tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
3441 BDBREC *recp = (BDBREC *)TCLISTVALPTR(recs, cur->kidx);
3442 int vnum = recp->rest ? TCLISTNUM(recp->rest) + 1 : 1;
3443 if(cur->vidx >= vnum){
3444 tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
3450 if(vsiz > recp->vsiz) TCREALLOC(recp->vbuf, recp->vbuf, vsiz + 1);
3451 memcpy(recp->vbuf, vbuf, vsiz);
3452 recp->vbuf[vsiz] = '\0';
3455 tclistover(recp->rest, cur->vidx - 1, vbuf, vsiz);
3460 if(!recp->rest) recp->rest = tclistnew();
3461 tclistunshift(recp->rest, recp->vbuf, recp->vsiz);
3462 if(vsiz > recp->vsiz) TCREALLOC(recp->vbuf, recp->vbuf, vsiz + 1);
3463 memcpy(recp->vbuf, vbuf, vsiz);
3464 recp->vbuf[vsiz] = '\0';
3467 tclistinsert(recp->rest, cur->vidx - 1, vbuf, vsiz);
3472 if(!recp->rest) recp->rest = tclistnew();
3473 tclistinsert(recp->rest, cur->vidx, vbuf, vsiz);
3483 /* Delete the record where a cursor object is.
3484 `cur' specifies the cursor object.
3485 If successful, the return value is true, else, it is false. */
3486 static bool tcbdbcuroutimpl(BDBCUR *cur){
3488 TCBDB *bdb = cur->bdb;
3489 BDBLEAF *leaf = tcbdbleafload(bdb, cur->id);
3490 if(!leaf) return false;
3491 TCLIST *recs = leaf->recs;
3492 if(cur->kidx >= TCLISTNUM(recs)){
3493 tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
3496 BDBREC *recp = (BDBREC *)TCLISTVALPTR(recs, cur->kidx);
3497 int vnum = recp->rest ? TCLISTNUM(recp->rest) + 1 : 1;
3498 if(cur->vidx >= vnum){
3499 tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
3505 recp->vbuf = tclistshift(recp->rest, &(recp->vsiz));
3508 free(tclistremove(recp->rest, cur->vidx - 1, &vsiz));
3510 if(TCLISTNUM(recp->rest) < 1){
3511 tclistdel(recp->rest);
3515 if(TCLISTNUM(recs) < 2 && bdb->hnum > 0){
3516 uint64_t pid = tcbdbsearchleaf(bdb, recp->kbuf, recp->ksiz);
3517 if(pid < 1) return false;
3518 if(!(leaf = tcbdbleafload(bdb, pid))) return false;
3519 if(!tcbdbleafkill(bdb, leaf)) return false;
3524 free(tclistremove(leaf->recs, cur->kidx, &rsiz));
3528 return tcbdbcuradjust(cur, true) || tchdbecode(bdb->hdb) == TCENOREC;
3532 /* Get the key and the value of the current record of the cursor object.
3533 `cur' specifies the cursor object.
3534 `kbp' specifies the pointer to the variable into which the pointer to the region of the key is
3536 `ksp' specifies the pointer to the variable into which the size of the key region is assigned.
3537 `vbp' specifies the pointer to the variable into which the pointer to the region of the value
3539 `vsp' specifies the pointer to the variable into which the size of the value region is
3541 static bool tcbdbcurrecimpl(BDBCUR *cur, const char **kbp, int *ksp, const char **vbp, int *vsp){
3542 assert(cur && kbp && ksp && vbp && vsp);
3543 TCBDB *bdb = cur->bdb;
3544 BDBLEAF *leaf = tcbdbleafload(bdb, cur->id);
3545 if(!leaf) return false;
3546 TCLIST *recs = leaf->recs;
3547 if(cur->kidx >= TCLISTNUM(recs)){
3548 tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
3551 BDBREC *recp = (BDBREC *)TCLISTVALPTR(recs, cur->kidx);
3552 int vnum = recp->rest ? TCLISTNUM(recp->rest) + 1 : 1;
3553 if(cur->vidx >= vnum){
3554 tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
3560 *vbp = tclistval(recp->rest, cur->vidx - 1, vsp);
3570 /*************************************************************************************************
3571 * debugging functions
3572 *************************************************************************************************/
3575 /* Print meta data of the header into the debugging output.
3576 `bdb' specifies the B+ tree database object. */
3577 void tcbdbprintmeta(TCBDB *bdb){
3579 int dbgfd = tchdbdbgfd(bdb->hdb);
3580 if(dbgfd < 0) return;
3581 char buf[BDBPAGEBUFSIZ];
3583 wp += sprintf(wp, "META:");
3584 wp += sprintf(wp, " mmtx=%p", (void *)bdb->mmtx);
3585 wp += sprintf(wp, " cmtx=%p", (void *)bdb->cmtx);
3586 wp += sprintf(wp, " tmtx=%p", (void *)bdb->tmtx);
3587 wp += sprintf(wp, " hdb=%p", (void *)bdb->hdb);
3588 wp += sprintf(wp, " opaque=%p", (void *)bdb->opaque);
3589 wp += sprintf(wp, " open=%d", bdb->open);
3590 wp += sprintf(wp, " wmode=%d", bdb->wmode);
3591 wp += sprintf(wp, " lmemb=%u", bdb->lmemb);
3592 wp += sprintf(wp, " nmemb=%u", bdb->nmemb);
3593 wp += sprintf(wp, " opts=%u", bdb->opts);
3594 wp += sprintf(wp, " root=%llu", (unsigned long long)bdb->root);
3595 wp += sprintf(wp, " first=%llu", (unsigned long long)bdb->first);
3596 wp += sprintf(wp, " last=%llu", (unsigned long long)bdb->last);
3597 wp += sprintf(wp, " lnum=%llu", (unsigned long long)bdb->lnum);
3598 wp += sprintf(wp, " nnum=%llu", (unsigned long long)bdb->nnum);
3599 wp += sprintf(wp, " rnum=%llu", (unsigned long long)bdb->rnum);
3600 wp += sprintf(wp, " leafc=%p", (void *)bdb->leafc);
3601 wp += sprintf(wp, " nodec=%p", (void *)bdb->nodec);
3602 wp += sprintf(wp, " cmp=%p", (void *)(intptr_t)bdb->cmp);
3603 wp += sprintf(wp, " cmpop=%p", (void *)bdb->cmpop);
3604 wp += sprintf(wp, " lcnum=%u", bdb->lcnum);
3605 wp += sprintf(wp, " ncnum=%u", bdb->ncnum);
3606 wp += sprintf(wp, " lsmax=%u", bdb->lsmax);
3607 wp += sprintf(wp, " lschk=%u", bdb->lschk);
3608 wp += sprintf(wp, " capnum=%llu", (unsigned long long)bdb->capnum);
3609 wp += sprintf(wp, " hist=%p", (void *)bdb->hist);
3610 wp += sprintf(wp, " hnum=%d", bdb->hnum);
3611 wp += sprintf(wp, " hleaf=%llu", (unsigned long long)bdb->hleaf);
3612 wp += sprintf(wp, " lleaf=%llu", (unsigned long long)bdb->lleaf);
3613 wp += sprintf(wp, " tran=%d", bdb->tran);
3614 wp += sprintf(wp, " rbopaque=%p", (void *)bdb->rbopaque);
3615 wp += sprintf(wp, " cnt_saveleaf=%lld", (long long)bdb->cnt_saveleaf);
3616 wp += sprintf(wp, " cnt_loadleaf=%lld", (long long)bdb->cnt_loadleaf);
3617 wp += sprintf(wp, " cnt_killleaf=%lld", (long long)bdb->cnt_killleaf);
3618 wp += sprintf(wp, " cnt_adjleafc=%lld", (long long)bdb->cnt_adjleafc);
3619 wp += sprintf(wp, " cnt_savenode=%lld", (long long)bdb->cnt_savenode);
3620 wp += sprintf(wp, " cnt_loadnode=%lld", (long long)bdb->cnt_loadnode);
3621 wp += sprintf(wp, " cnt_adjnodec=%lld", (long long)bdb->cnt_adjnodec);
3623 tcwrite(dbgfd, buf, wp - buf);
3627 /* Print records of a leaf object into the debugging output.
3628 `bdb' specifies the B+ tree database object.
3629 `leaf' specifies the leaf object. */
3630 void tcbdbprintleaf(TCBDB *bdb, BDBLEAF *leaf){
3631 assert(bdb && leaf);
3632 int dbgfd = tchdbdbgfd(bdb->hdb);
3633 TCLIST *recs = leaf->recs;
3634 if(dbgfd < 0) return;
3635 char buf[BDBPAGEBUFSIZ];
3637 wp += sprintf(wp, "LEAF:");
3638 wp += sprintf(wp, " id:%llx", (unsigned long long)leaf->id);
3639 wp += sprintf(wp, " prev:%llx", (unsigned long long)leaf->prev);
3640 wp += sprintf(wp, " next:%llx", (unsigned long long)leaf->next);
3641 wp += sprintf(wp, " dirty:%d", leaf->dirty);
3642 wp += sprintf(wp, " dead:%d", leaf->dead);
3643 wp += sprintf(wp, " rnum:%d", TCLISTNUM(recs));
3645 for(int i = 0; i < TCLISTNUM(recs); i++){
3646 tcwrite(dbgfd, buf, wp - buf);
3648 BDBREC *recp = (BDBREC *)TCLISTVALPTR(recs, i);
3649 wp += sprintf(wp, " [%s:%s]", recp->kbuf, recp->vbuf);
3650 TCLIST *rest = recp->rest;
3652 for(int j = 0; j < TCLISTNUM(rest); j++){
3653 wp += sprintf(wp, ":%s", (char *)TCLISTVALPTR(rest, j));
3658 tcwrite(dbgfd, buf, wp - buf);
3662 /* Print indexes of a node object into the debugging output.
3663 `bdb' specifies the B+ tree database object.
3664 `node' specifies the node object. */
3665 void tcbdbprintnode(TCBDB *bdb, BDBNODE *node){
3666 assert(bdb && node);
3667 int dbgfd = tchdbdbgfd(bdb->hdb);
3668 TCLIST *idxs = node->idxs;
3669 if(dbgfd < 0) return;
3670 char buf[BDBPAGEBUFSIZ];
3672 wp += sprintf(wp, "NODE:");
3673 wp += sprintf(wp, " id:%llx", (unsigned long long)node->id);
3674 wp += sprintf(wp, " heir:%llx", (unsigned long long)node->heir);
3675 wp += sprintf(wp, " dirty:%d", node->dirty);
3676 wp += sprintf(wp, " rnum:%d", TCLISTNUM(idxs));
3678 for(int i = 0; i < TCLISTNUM(idxs); i++){
3679 tcwrite(dbgfd, buf, wp - buf);
3681 BDBIDX *idxp = (BDBIDX *)TCLISTVALPTR(idxs, i);
3682 wp += sprintf(wp, " [%llx:%s]", (unsigned long long)idxp->pid, idxp->kbuf);
3685 tcwrite(dbgfd, buf, wp - buf);