[pkg-db-devel] Bug#783147: db5.3-dev: The library implementation improperly contrains the BTREE comparison function.

Lee Ward lee at sandia.gov
Wed Apr 22 20:50:36 UTC 2015


Package: libdb5.3-dev
Version: 5.3.28-9
Severity: normal
File: db5.3-dev

Dear Maintainer,

The BTREE comparison function is used by the library both to determine record
order within the database and to determine lexicographic identity of the keys
themselves. The second usage contradicts the documentation claim that it
is "reasonable for a comparison function to not examine an entire key in
some applications".

In my application, the database is keyed by interval, i.e. an <offset, extent>
pair. In such a database two keys will compare "equal" if two intervals
intersect, lesser if they do not intersect and the first interval
lies below the second, or greater if they do not intersect and the first lies
above. Thus, the constraint that the BTREE comparison function "must cause the
keys in the database to be well-ordered" is met.

However, the database implementation is also, improperly, utilizing the
comparison function to determine lexicographical order of the keys themselves.
This results in mishandling of the key content in some cases.

For example, in my application:

DB->put <0,10>
DBC->open(DB)
DBC->get(DBC, key:<0,0>, DB_SET_RANGE) returns the record with key <0,10>
DBC->del(DBC)
DB->put(DB, key:<0,5>, DB_NOOVERWITE)
DBC->get(DBC, ..., DB_NEXT) erroneously returns a record with key <0,10>

Oops! The returned record key reflects the deleted record instead of the
record just inserted. As a side note, this record with the
resurrected key does reflect a change in the data value.

What happened? Internally, the second DB->put has discovered a deleted
record and then improperly applied the BTREE comparison function to test
for lexicographic order of the key content itself. Since my comparison function
has returned "equal" it has decided to reuse the old content instead of copying
what the caller supplied.

This usage is both a tighter contraint than the documentation requirement
for a comparison function that reflects "well-ordered" and erroneous since
the documentation also says:

It is reasonable for a comparison function to not examine an entire key in
some applications, which implies partial keys may be specified to the Berkeley
DB interfaces. When partial keys are specified to Berkeley DB, interfaces
which retrieve data items based on a user-specified key (for example,
DB->get() and DBC->get() with the DB_SET flag), will modify the
user-specified key by returning the actual key stored in the database.

Please find a reproducer attached below. I compile with -std=c99 and, of
course, link it using -ldbd. Just create a directory named "DB" in the
current working directory and run the binary with no arguments. It will
print a few lines documenting what it is attempting and eventually (very
quickly) fail with an error reflecting attempted insertion of a record with
a key that conflicts with one already present. That "already present" record is
one that suffered from the resurrected key-content mishandling described above.

-- System Information:
Debian Release: 8.0
  APT prefers testing-updates
  APT policy: (500, 'testing-updates'), (500, 'testing')
Architecture: amd64 (x86_64)

Kernel: Linux 3.16.0-4-amd64 (SMP w/4 CPU cores)
Locale: LANG=en_US.UTF-8, LC_CTYPE=en_US.UTF-8 (charmap=UTF-8)
Shell: /bin/sh linked to /bin/dash
Init: systemd (via /run/systemd/system)

Versions of packages libdb5.3-dev depends on:
ii  libdb5.3  5.3.28-9

libdb5.3-dev recommends no packages.

Versions of packages libdb5.3-dev suggests:
ii  db5.3-doc  5.3.28-9

-- no debconf information

++++++ BEGIN REPRODUCER +++++
#define _POSIX_C_SOURCE 199309L
#define _BSD_SOURCE

#include <string.h>
#include <assert.h>
#include <stdlib.h>
#include <limits.h>
#include <errno.h>
#include <db.h>

struct segment_key {
	int64_t	anchor;
	u_int64_t nrec;
};

struct segment_info {
	struct segment_key skey;
	u_int64_t rlen;
};

static void
dbt_usetup(DBT *dbt, void *data, size_t ulen)
{

	(void )memset(dbt, 0, sizeof(DBT));
	dbt->data = data;
	dbt->ulen = dbt->size = ulen;
	dbt->flags = DB_DBT_USERMEM;
}

struct enumerator_info {
	DB	*db;
	DB_TXN	*txn;
	int64_t	from;
	DBC	*cursor;
	struct segment_info seginfo;
	void	*data;
	void	*private;
};

int debug = 1;

#define DIAG(_pre, _seg) \
	do { \
		if (debug) \
			sayseg((_pre), (_seg)); \
	} while (0)

static void
sayseg(const char *pre, const struct segment_info *seg)
{

	(void )fprintf(stderr,
		       "%s: <%zu, %zu>@%lld\n",
		       pre,
		       seg->skey.nrec,
		       seg->rlen,
		       (long long )seg->skey.anchor);
}

static int
skey_cmp(const struct segment_key *a, const struct segment_key *b)
{

	if (a->anchor > b->anchor) {
		if ((size_t )(a->anchor - b->anchor) >= b->nrec)
			return 1;
	} else if (b->anchor > a->anchor) {
		if ((size_t )(b->anchor - a->anchor) >= a->nrec)
			return -1;
	}
	return 0;
}

static int
bt_cmp(DB *db __attribute__ ((unused)),
       const DBT *dbt1,
       const DBT *dbt2)
{
	struct segment_key a, b;
	int	ret;

	(void )memcpy(&a, dbt1->data, sizeof(struct segment_key));
	(void )memcpy(&b, dbt2->data, sizeof(struct segment_key));
	ret = skey_cmp(&a, &b);
(void )fprintf(stderr, "CMP: %zu@%lld %zu@%lld rtn %d\n", a.nrec, (long long)a.anchor, b.nrec, (long long )b.anchor, ret);
	return ret;
}

static int
enumerate(struct enumerator_info *ei,
	  int (*f)(const struct enumerator_info *, int *),
	  int32_t flags)
{
	int	err, more, i;
	DBT	key, data;

	err = (*ei->db->cursor)(ei->db, ei->txn, &ei->cursor, 0);
	if (err)
		return err;
	ei->seginfo.skey.anchor = ei->from;
	ei->seginfo.skey.nrec = 0;
	dbt_usetup(&key, &ei->seginfo.skey, sizeof(ei->seginfo.skey));
	(void )memset(&data, 0, sizeof(data));
	data.data = NULL;
	data.size = 0;
	data.flags |= DB_DBT_REALLOC;
	more = 1;
	if (debug)
		(void )fprintf(stderr, "ENUM start\n");
	assert(!(flags & ~DB_RMW));
	err =
	    (*ei->cursor->get)(ei->cursor,
			       &key,
			       &data,
			       DB_SET_RANGE | flags);
	for (;;) {
		if (err)
			break;
		ei->data = data.data;
		assert(ei->seginfo.skey.nrec);
		ei->seginfo.rlen = data.size / ei->seginfo.skey.nrec;
		DIAG("ENUM", &ei->seginfo);
		err = (*f)(ei, &more);
		if (err || !more)
			break;
		err =
		    (*ei->cursor->get)(ei->cursor,
				       &key,
				       &data,
				       DB_NEXT | flags);
	}
	if (debug)
		(void )fprintf(stderr, "ENUM end (err is %d)\n", err);
	i = (*ei->cursor->close)(ei->cursor);
	if (i && !err)
		err = i;
	do {
		if (data.data == NULL)
			break;
		free(data.data);
	} while (0);
	return err;
}

struct probe_info {
	struct segment_info *buf;
	size_t remain;
};

static int
probe_segment(const struct enumerator_info *ei, int *moreptr)
{
	struct probe_info *pi;
	struct segment_info *seg;

	pi = ei->private;
	seg = pi->buf++;
	*seg = ei->seginfo;
	if (!--pi->remain)
		*moreptr = 0;
	return 0;
}

int
probe(DB *db,
      DB_TXN *txn,
      int64_t from,
      struct segment_info *buf,
      size_t *nelptr)
{
	struct probe_info pinfo;
	struct enumerator_info eninfo;
	int	err;

	pinfo.buf = buf;
	pinfo.remain = *nelptr;
	if (!pinfo.remain)
		return 0;
	eninfo.db = db;
	eninfo.txn = txn;
	eninfo.from = from;
	eninfo.private = &pinfo;
	err = enumerate(&eninfo, probe_segment, 0);
	do {
		if (err)
			break;
		*nelptr -= pinfo.remain;
	} while (0);
	return err;
}

static int
txnl_engine(DB_ENV *dbenv,
	    DB_TXN *txn,
	    int (*f)(DB_TXN *, void *),
	    void *args)
{
	DB_TXN	*subtxn;
	int	err;

	for (;;) {
		err = (*dbenv->txn_begin)(dbenv, txn, &subtxn, 0);
		if (err)
			break;
		err = (*f)(subtxn, args);
		if (err) {
			if ((*subtxn->abort)(subtxn) != 0 ||
			    err != DB_LOCK_DEADLOCK)
				break;
			continue;
		}
		err = (*subtxn->commit)(subtxn, 0);
		break;
	}
	return err;
}

struct xfr_control {
	void	*buf;
	size_t	buflen;
	struct segment_info *segs;
	size_t	nsegs;
};

static int
prepare_sto(const struct segment_info *seg,
	    const void *buf,
	    DBT *key,
	    DBT *data)
{

	if (INT64_MAX - (u_int64_t )seg->skey.anchor < seg->skey.nrec)
		return EINVAL;
	DIAG("DB-INS", seg);
	dbt_usetup(key, (void *)&seg->skey, sizeof(seg->skey));
	dbt_usetup(data, (void *)buf, seg->skey.nrec * seg->rlen);
	return 0;
}

static int
sto_seg(DB *db,
	DB_TXN *txn,
	const struct segment_info *seg,
	void *buf)
{
	DBT	key, data;
	int	err;

	if ((err = prepare_sto(seg, buf, &key, &data)))
		return err;
	return (*db->put)(db, txn, &key, &data, DB_NOOVERWRITE);
}

static int
trim_seg(const struct enumerator_info *ei, int *moreptr)
{
	struct segment_info *seg, segment;
	size_t	nrec, n;
	int	err;

	seg = ei->private;
	if (seg->skey.anchor < ei->seginfo.skey.anchor) {
		nrec = (size_t )(ei->seginfo.skey.anchor - seg->skey.anchor);
		if (seg->skey.nrec < nrec) {
			*moreptr = 0;
			return 0;
		}
	} else {
		nrec = (size_t )(seg->skey.anchor - ei->seginfo.skey.anchor);
		assert(ei->seginfo.skey.nrec >= nrec);
	}
	DIAG("DB-DEL", &ei->seginfo);
	err = (*ei->cursor->del)(ei->cursor, 0);
	if (err)
		return err;
	segment.skey.anchor = ei->seginfo.skey.anchor;
	segment.skey.nrec = nrec;
	segment.rlen = ei->seginfo.rlen;
	if (ei->seginfo.skey.anchor < seg->skey.anchor) {
		if (nrec) {
			err =
			    sto_seg(ei->db,
			    	    ei->txn,
				    &segment,
				    ei->data);
			if (err)
				return err;
		}
		n = seg->skey.nrec + nrec;
	} else
		n = seg->skey.nrec - nrec;
	if (ei->seginfo.skey.nrec <= n)
		return 0;
	segment.skey.anchor += n;
	segment.skey.nrec = ei->seginfo.skey.nrec - n;
	*moreptr = 0;
	err =
	    sto_seg(ei->db,
	    	    ei->txn,
		    &segment,
		    (char *)ei->data + n * segment.rlen);
	if (err)
		return err;
	return 0;
}

struct store_arguments {
	DB	*db;
	struct xfr_control *ctl;
};

static int
sto_txnl(DB_TXN *txn, struct store_arguments *args)
{
	struct enumerator_info eninfo;
	struct segment_info *seg;
	int	err;
	size_t	nbytes;

	eninfo.db = args->db;
	eninfo.txn = txn;
	for (;
	     args->ctl->nsegs;
	     args->ctl->segs++, --args->ctl->nsegs) {
		seg = args->ctl->segs;
		DIAG("STORE", seg);
		if (!seg->skey.nrec ||
		    (size_t )(INT64_MAX - seg->skey.anchor) < seg->skey.nrec)
			return EINVAL;
		if (args->ctl->buflen / seg->skey.nrec < seg->rlen)
			return EINVAL;
		eninfo.from = seg->skey.anchor;
		eninfo.private = seg;
		err = enumerate(&eninfo, trim_seg, DB_RMW);
		if (err && err != DB_NOTFOUND)
			return err;
		err =
		    sto_seg(eninfo.db,
			    eninfo.txn,
			    seg,
			    args->ctl->buf);
		if (err)
			return err;
		nbytes = seg->skey.nrec * seg->rlen;
		args->ctl->buf = (char *)args->ctl->buf + nbytes;
		args->ctl->buflen -= nbytes;
	}
	return 0;
}

int
store(DB *db, DB_TXN *txn, struct xfr_control *ctl)
{
	struct store_arguments args;
	int	err;

	args.db = db;
	args.ctl = ctl;
	err =
	    txnl_engine((*args.db->get_env)(args.db),
			txn,
			(int (*)(DB_TXN *, void *))sto_txnl,
			&args);
	return err;
}

#include <time.h>

unsigned verbose = 1;
unsigned int seed;

static int parseu(const char *str, unsigned *uptr);
static void usage(void);

int
main(int argc, char *const argv[])
{
	size_t	nr;
	size_t	bufsiz;
	int	seedset, i, err;
	struct segment_info segs[1];
	struct xfr_control control;
	char	*buf;
	DB_ENV	*dbenv;
	DB	*db;
	struct timespec t;
	unsigned int seed;
	float	f;
	size_t	n;

	nr = 1024;
	bufsiz = 1024 * 1024;
	seedset = 0;
	while ((i = getopt(argc, argv, "s:")) != -1)
		switch (i) {
		case 's':
			seedset = 1;
			if ((err = parseu(optarg, &seed) != 0)) {
				(void )fprintf(stderr,
					       "Bad seed: %s\n",
					       strerror(err));
				return EXIT_FAILURE;
			}
			break;
		default:
			usage();
		}

	buf = NULL;
	dbenv = NULL;
	do {
		err = db_env_create(&dbenv, 0);
		if (err) {
			dbenv = NULL;
			(void )fprintf(stderr,
				       "db_env_create: %s\n",
				       db_strerror(err));
			break;
		}
		err = (*dbenv->set_lk_detect)(dbenv, DB_LOCK_DEFAULT);
		if (err) {
			(*dbenv->err)(dbenv, err, "DB_ENV->set_lk_detect");
			break;
		}
		err =
		    (*dbenv->open)(dbenv,
				   "DB",
				   (DB_INIT_LOCK|DB_INIT_MPOOL|DB_INIT_TXN|
				    DB_CREATE|DB_USE_ENVIRON),
				   0);
		if (err) {
			(*dbenv->err)(dbenv, err, "DB_ENV->open");
			break;
		}
		err = db_create(&db, dbenv, 0);
		if (err) {
			db = NULL;
			(*dbenv->err)(dbenv, err, "db_create");
			break;
		}
		err = (*db->set_bt_compare)(db, bt_cmp);
		if (err) {
			(*db->err)(db, err, "DB->set_bt_compare");
			break;
		}
		err =
		    (*db->open)(db,
				NULL,
				"play",
				"database",
				DB_BTREE,
				DB_AUTO_COMMIT|DB_CREATE,
				0);
		if (err) {
			(*db->err)(db, err, "play/database");
			break;
		}
		buf = malloc(bufsiz);
		if (buf == NULL) {
			err = errno;
			perror("malloc");
			break;
		}
	} while (0);
	if (err) {
		if (db != NULL)
			(void )(*db->close)(db, 0);
		if (dbenv != NULL)
			(void )(*dbenv->close)(dbenv, 0);
		return EXIT_FAILURE;
	}
	if (!seedset) {
		if (clock_gettime(CLOCK_REALTIME, &t) != 0) {
			assert(0);
			abort();
		}
		seed = t.tv_sec;
	}
	(void )printf("Seed is 0x%x\n", seed);
	(void )fflush(stdout);
	do {
		control.buf = buf;
		control.buflen = bufsiz;
		control.segs = segs;
		control.nsegs = 1;
		f = rand_r(&seed);
		f = f / RAND_MAX;
		n = f * control.buflen;
		segs[0].skey.nrec =
		    rand_r(&seed) %
		    (nr > n ? n : nr) +
		    1;
		segs[0].rlen = n / segs[0].skey.nrec;
		segs[0].skey.anchor =
		    rand_r(&seed) % (nr - segs[0].skey.nrec + 1);
		err = store(db, NULL, &control);
	} while (!err);
	if (err)
		(*db->err)(db, err, "store");
	if ((i = (*db->close)(db, 0))) {
		(*dbenv->err)(dbenv, i, "DB close failed");
		if (!err)
			err = i;
	}
	if ((err = (*dbenv->close)(dbenv, 0))) {
		(void )fprintf(stderr, "DB_ENV->close: %s\n", db_strerror(err));
		if (!err)
			err = i;
	}
	free(buf);
	return err ? EXIT_FAILURE : EXIT_SUCCESS;
}

static int
parseul(const char *str, unsigned long *ulptr)
{
	char	*end;
	unsigned long ul;

	ul = strtoul(str, &end, 0);
	if (*end != '\0')
		return EINVAL;
	if (ul == ULONG_MAX && errno == ERANGE)
		return ERANGE;
	*ulptr = ul;
	return 0;
}

static int
parseu(const char *str, unsigned *uptr)
{
	unsigned long ul;
	int	err;

	if ((err = parseul(str, &ul)))
		return err;
	if (ul > UINT_MAX)
		return ERANGE;
	*uptr = ul;
	return 0;
}

static void
usage()
{

	(void )fprintf(stderr,
		       ("Usage: %s"
			" [-s <seed>]"
		        "\n"),
		       "a.out");
	exit(EXIT_FAILURE);
}
+++++++ END REPRODUCER ++++++



More information about the pkg-db-devel mailing list