--- /dev/null
+.TH MDB_LOAD 1 "2014/06/20" "LMDB 0.9.14"
+.\" Copyright 2014 Howard Chu, Symas Corp. All Rights Reserved.
+.\" Copying restrictions apply. See COPYRIGHT/LICENSE.
+.SH NAME
+mdb_load \- LMDB environment import tool
+.SH SYNOPSIS
+.B mdb_load
+.BR \ envpath
+[\c
+.BR \-V ]
+[\c
+.BI \-f \ file\fR]
+[\c
+.BR \-n ]
+[\c
+.BI \-s \ subdb\fR]
+[\c
+.BR \-N ]
+[\c
+.BR \-T ]
+.SH DESCRIPTION
+The
+.B mdb_load
+utility reads from the standard input and loads it into the
+LMDB environment
+.BR envpath .
+
+The input to
+.B mdb_load
+must be in the output format specified by the
+.BR mdb_dump (1)
+utility or as specified by the
+.B -T
+option below.
+.SH OPTIONS
+.TP
+.BR \-V
+Write the library version number to the standard output, and exit.
+.TP
+.BR \-f \ file
+Read from the specified file instead of from the standard input.
+.TP
+.BR \-n
+Load an LMDB database which does not use subdirectories.
+.TP
+.BR \-s \ subdb
+Load a specific subdatabase. If no database is specified, data is loaded into the main database.
+.TP
+.BR \-N
+Don't overwrite existing records when loading into an already existing database; just skip them.
+.TP
+.BR \-T
+Load data from simple text files. The input must be paired lines of text, where the first
+line of the pair is the key item, and the second line of the pair is its corresponding
+data item.
+
+A simple escape mechanism, where newline and backslash (\\) characters are special, is
+applied to the text input. Newline characters are interpreted as record separators.
+Backslash characters in the text will be interpreted in one of two ways: If the backslash
+character precedes another backslash character, the pair will be interpreted as a literal
+backslash. If the backslash character precedes any other character, the two characters
+following the backslash will be interpreted as a hexadecimal specification of a single
+character; for example, \\0a is a newline character in the ASCII character set.
+
+For this reason, any backslash or newline characters that naturally occur in the text
+input must be escaped to avoid misinterpretation by
+.BR mdb_load .
+
+.SH DIAGNOSTICS
+Exit status is zero if no errors occur.
+Errors result in a non-zero exit status and
+a diagnostic message being written to standard error.
+
+.SH "SEE ALSO"
+.BR mdb_dump (1)
+.SH AUTHOR
+Howard Chu of Symas Corporation <http://www.symas.com>
--- /dev/null
+/* mdb_load.c - memory-mapped database load tool */
+/*
+ * Copyright 2011-2014 Howard Chu, Symas Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted only as authorized by the OpenLDAP
+ * Public License.
+ *
+ * A copy of this license is available in the file LICENSE in the
+ * top-level directory of the distribution or, alternatively, at
+ * <http://www.OpenLDAP.org/license.html>.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <ctype.h>
+#include <unistd.h>
+#include "lmdb.h"
+
+#define PRINT 1
+#define NOHDR 2
+static int mode;
+
+static char *subname = NULL;
+
+static size_t lineno;
+static int version;
+
+static int flags;
+
+static char *prog;
+
+static int eof;
+
+static MDB_val kbuf, dbuf;
+
+#define STRLENOF(s) (sizeof(s)-1)
+
+typedef struct flagbit {
+ int bit;
+ char *name;
+ int len;
+} flagbit;
+
+#define S(s) s, STRLENOF(s)
+
+flagbit dbflags[] = {
+ { MDB_REVERSEKEY, S("reversekey") },
+ { MDB_DUPSORT, S("dupsort") },
+ { MDB_INTEGERKEY, S("integerkey") },
+ { MDB_DUPFIXED, S("dupfixed") },
+ { MDB_INTEGERDUP, S("integerdup") },
+ { MDB_REVERSEDUP, S("reversedup") },
+ { 0, NULL, 0 }
+};
+
+static const char hexc[] = "0123456789abcdef";
+
+static void readhdr()
+{
+ char *ptr;
+
+ while (fgets(dbuf.mv_data, dbuf.mv_size, stdin) != NULL) {
+ lineno++;
+ if (!strncmp(dbuf.mv_data, "VERSION=", STRLENOF("VERSION="))) {
+ version=atoi(dbuf.mv_data+STRLENOF("VERSION="));
+ if (version > 3) {
+ fprintf(stderr, "%s: line %zd: unsupported VERSION %d\n",
+ prog, lineno, version);
+ exit(EXIT_FAILURE);
+ }
+ } else if (!strncmp(dbuf.mv_data, "HEADER=END", STRLENOF("HEADER=END"))) {
+ break;
+ } else if (!strncmp(dbuf.mv_data, "format=", STRLENOF("format="))) {
+ if (!strncmp(dbuf.mv_data+STRLENOF("FORMAT="), "print", STRLENOF("print")))
+ mode |= PRINT;
+ else if (strncmp(dbuf.mv_data+STRLENOF("FORMAT="), "bytevalue", STRLENOF("bytevalue"))) {
+ fprintf(stderr, "%s: line %zd: unsupported FORMAT %s\n",
+ prog, lineno, (char *)dbuf.mv_data+STRLENOF("FORMAT="));
+ exit(EXIT_FAILURE);
+ }
+ } else if (!strncmp(dbuf.mv_data, "database=", STRLENOF("database="))) {
+ ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size);
+ if (ptr) *ptr = '\0';
+ if (subname) free(subname);
+ subname = strdup(dbuf.mv_data+STRLENOF("database="));
+ } else if (!strncmp(dbuf.mv_data, "type=", STRLENOF("type="))) {
+ if (strncmp(dbuf.mv_data+STRLENOF("type="), "btree", STRLENOF("btree"))) {
+ fprintf(stderr, "%s: line %zd: unsupported type %s\n",
+ prog, lineno, (char *)dbuf.mv_data+STRLENOF("type="));
+ exit(EXIT_FAILURE);
+ }
+ } else {
+ int i;
+ for (i=0; dbflags[i].bit; i++) {
+ if (!strncmp(dbuf.mv_data, dbflags[i].name, dbflags[i].len) &&
+ ((char *)dbuf.mv_data)[dbflags[i].len] == '=') {
+ flags |= dbflags[i].bit;
+ break;
+ }
+ }
+ if (!dbflags[i].bit) {
+ ptr = memchr(dbuf.mv_data, '=', dbuf.mv_size);
+ if (!ptr) {
+ fprintf(stderr, "%s: line %zd: unexpected format\n",
+ prog, lineno);
+ exit(EXIT_FAILURE);
+ } else {
+ *ptr = '\0';
+ fprintf(stderr, "%s: line %zd: unrecognized keyword ignored: %s\n",
+ prog, lineno, (char *)dbuf.mv_data);
+ }
+ }
+ }
+ }
+}
+
+static void badend()
+{
+ fprintf(stderr, "%s: line %zd: unexpected end of input\n",
+ prog, lineno);
+}
+
+static int unhex(unsigned char *c2)
+{
+ int x, c;
+ x = *c2++ & 0x4f;
+ if (x & 0x40)
+ x -= 54;
+ c = x << 4;
+ x = *c2 & 0x4f;
+ if (x & 0x40)
+ x -= 54;
+ c |= x;
+ return c;
+}
+
+static int readline(MDB_val *out, MDB_val *buf)
+{
+ unsigned char *c1, *c2, *end;
+ size_t len;
+ int c;
+
+ if (!(mode & NOHDR)) {
+ c = fgetc(stdin);
+ if (c == EOF) {
+ eof = 1;
+ return EOF;
+ }
+ if (c != ' ') {
+ if (fgets(buf->mv_data, buf->mv_size, stdin) == NULL) {
+badend:
+ eof = 1;
+ badend();
+ return EOF;
+ }
+ if (c == 'D' && !strncmp(buf->mv_data, "ATA=END", STRLENOF("ATA=END")))
+ return EOF;
+ goto badend;
+ }
+ }
+ if (fgets(buf->mv_data, buf->mv_size, stdin) == NULL) {
+ eof = 1;
+ return EOF;
+ }
+ lineno++;
+
+ c1 = buf->mv_data;
+ len = strlen((char *)c1);
+
+ /* Is buffer too short? */
+ while (c1[len-1] != '\n') {
+ buf->mv_data = realloc(buf->mv_data, buf->mv_size*2);
+ if (!buf->mv_data) {
+ eof = 1;
+ fprintf(stderr, "%s: line %zd: out of memory, line too long\n",
+ prog, lineno);
+ return EOF;
+ }
+ c1 = buf->mv_data;
+ c1 += buf->mv_size;
+ if (fgets((char *)c1, buf->mv_size, stdin) == NULL) {
+ eof = 1;
+ badend();
+ return EOF;
+ }
+ buf->mv_size *= 2;
+ len = strlen((char *)c1);
+ }
+ c1 = c2 = buf->mv_data;
+ len = strlen((char *)c1);
+ c1[--len] = '\0';
+ end = c1 + len;
+
+ if (mode & PRINT) {
+ while (c2 < end) {
+ if (*c2 == '\\') {
+ if (c2[1] == '\\') {
+ c1++; c2 += 2;
+ } else {
+ if (c2+3 >= end || !isxdigit(c2[1]) || !isxdigit(c2[2])) {
+ eof = 1;
+ badend();
+ return EOF;
+ }
+ *c1++ = unhex(++c2);
+ c2 += 2;
+ }
+ } else {
+ c1++; c2++;
+ }
+ }
+ } else {
+ /* odd length not allowed */
+ if (len & 1) {
+ eof = 1;
+ badend();
+ return EOF;
+ }
+ while (c2 < end) {
+ if (!isxdigit(*c2) || !isxdigit(c2[1])) {
+ eof = 1;
+ badend();
+ return EOF;
+ }
+ *c1++ = unhex(c2);
+ c2 += 2;
+ }
+ }
+ c2 = out->mv_data = buf->mv_data;
+ out->mv_size = c1 - c2;
+
+ return 0;
+}
+
+static void usage()
+{
+ fprintf(stderr, "usage: %s dbpath [-V] [-f input] [-n] [-s name] [-N] [-T]\n", prog);
+ exit(EXIT_FAILURE);
+}
+
+int main(int argc, char *argv[])
+{
+ int i, rc;
+ MDB_env *env;
+ MDB_txn *txn;
+ MDB_cursor *mc;
+ MDB_dbi dbi;
+ char *envname;
+ int envflags = 0, putflags = 0;
+
+ prog = argv[0];
+
+ if (argc < 2) {
+ usage(prog);
+ }
+
+ /* -f: load file instead of stdin
+ * -n: use NOSUBDIR flag on env_open
+ * -s: load into named subDB
+ * -N: use NOOVERWRITE on puts
+ * -T: read plaintext
+ * -V: print version and exit
+ */
+ while ((i = getopt(argc, argv, "f:ns:NTV")) != EOF) {
+ switch(i) {
+ case 'V':
+ printf("%s\n", MDB_VERSION_STRING);
+ exit(0);
+ break;
+ case 'f':
+ if (freopen(optarg, "r", stdin) == NULL) {
+ fprintf(stderr, "%s: %s: reopen: %s\n",
+ prog, optarg, strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+ break;
+ case 'n':
+ envflags |= MDB_NOSUBDIR;
+ break;
+ case 's':
+ subname = strdup(optarg);
+ break;
+ case 'N':
+ putflags = MDB_NOOVERWRITE|MDB_NODUPDATA;
+ break;
+ case 'T':
+ mode |= NOHDR;
+ break;
+ default:
+ usage(prog);
+ }
+ }
+
+ if (optind != argc - 1)
+ usage(prog);
+
+ envname = argv[optind];
+ rc = mdb_env_create(&env);
+
+ if (subname) {
+ mdb_env_set_maxdbs(env, 2);
+ }
+
+ rc = mdb_env_open(env, envname, envflags, 0664);
+ if (rc) {
+ printf("mdb_env_open failed, error %d %s\n", rc, mdb_strerror(rc));
+ goto env_close;
+ }
+
+ kbuf.mv_size = mdb_env_get_maxkeysize(env) * 2 + 2;
+ kbuf.mv_data = malloc(kbuf.mv_size);
+ dbuf.mv_size = 4096;
+ dbuf.mv_data = malloc(dbuf.mv_size);
+
+ while(!eof) {
+ MDB_val key, data;
+ int batch = 0;
+ flags = 0;
+
+ if (!(mode & NOHDR))
+ readhdr();
+
+ rc = mdb_txn_begin(env, NULL, 0, &txn);
+ if (rc) {
+ printf("mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc));
+ goto env_close;
+ }
+
+ rc = mdb_open(txn, subname, flags, &dbi);
+ if (rc) {
+ printf("mdb_open failed, error %d %s\n", rc, mdb_strerror(rc));
+ goto txn_abort;
+ }
+
+ rc = mdb_cursor_open(txn, dbi, &mc);
+ if (rc) {
+ printf("mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc));
+ goto txn_abort;
+ }
+
+ while(1) {
+ rc = readline(&key, &kbuf);
+ if (rc == EOF)
+ break;
+ if (rc)
+ goto txn_abort;
+
+ rc = readline(&data, &dbuf);
+ if (rc)
+ goto txn_abort;
+
+ rc = mdb_cursor_put(mc, &key, &data, putflags);
+ if (rc == MDB_KEYEXIST && putflags)
+ continue;
+ if (rc)
+ goto txn_abort;
+ batch++;
+ if (batch == 100) {
+ rc = mdb_txn_commit(txn);
+ if (rc) {
+ fprintf(stderr, "%s: line %zd: txn_commit: %s\n",
+ prog, lineno, mdb_strerror(rc));
+ goto env_close;
+ }
+ rc = mdb_txn_begin(env, NULL, 0, &txn);
+ if (rc) {
+ printf("mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc));
+ goto env_close;
+ }
+ rc = mdb_cursor_open(txn, dbi, &mc);
+ if (rc) {
+ printf("mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc));
+ goto txn_abort;
+ }
+ batch = 0;
+ }
+ }
+ rc = mdb_txn_commit(txn);
+ txn = NULL;
+ if (rc) {
+ fprintf(stderr, "%s: line %zd: txn_commit: %s\n",
+ prog, lineno, mdb_strerror(rc));
+ goto env_close;
+ }
+ mdb_dbi_close(env, dbi);
+ }
+
+txn_abort:
+ mdb_txn_abort(txn);
+env_close:
+ mdb_env_close(env);
+
+ return rc ? EXIT_FAILURE : EXIT_SUCCESS;
+}