[dpkg] 28/192: libdpkg: Improve tar metadata parsing

Ximin Luo infinity0 at debian.org
Tue Oct 17 11:03:53 UTC 2017


This is an automated email from the git hooks/post-receive script.

infinity0 pushed a commit to branch pu/reproducible_builds
in repository dpkg.

commit 19cf51bc93a023fed4aa843dfd4090e537b20833
Author: Guillem Jover <guillem at debian.org>
Date:   Wed Jan 11 23:19:50 2017 +0100

    libdpkg: Improve tar metadata parsing
    
    Make the existing octal parser more robust, by checking for the expected
    format of leading zeros or spaces, followed by any ASCII octal characters
    (0-7), followed by zero or more space or NULs. Even though POSIX specifies
    that numerical fields should end with at least one space or NUL, we still
    support this extension which has been present in other implementations
    for a long time.
    
    Add support for base-256 encoded numeric fields, to support large
    values, for UID/GID, device number, size and even negative timestamps.
    This is necessary not only to be able to store larger values, but to
    cover packages that can already be generated by dpkg-deb, given that it
    uses the system GNU tar when building.
    
    Closes: #850834
---
 debian/changelog       |   8 +++
 lib/dpkg/libdpkg.map   |   2 +
 lib/dpkg/t/.gitignore  |   1 +
 lib/dpkg/t/Makefile.am |   1 +
 lib/dpkg/t/t-tar.c     | 148 ++++++++++++++++++++++++++++++++++++++++++++++
 lib/dpkg/tarfn.c       | 155 ++++++++++++++++++++++++++++++++++++++++++++-----
 lib/dpkg/tarfn.h       |   9 ++-
 man/deb.man            |   9 ++-
 8 files changed, 314 insertions(+), 19 deletions(-)

diff --git a/debian/changelog b/debian/changelog
index e92b510..f87a379 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -20,6 +20,14 @@ dpkg (1.18.24) UNRELEASED; urgency=medium
     unpack errors. Regression introduced in dpkg 1.16.9. Closes: #861217
   * Improve tar entry metadata parsing in dpkg:
     - Do not parse device numbers for non block nor char tar entry objects.
+    - Make the existing octal parser more robust, by checking for the
+      expected format of leading zeros or spaces, followed by any ASCII
+      octal characters (0-7), followed by zero or more space or NULs.
+    - Add support for base-256 encoded numeric fields, to support large
+      values, for UID/GID, device number, size and even signed timestamps.
+      This is necessary not only to be able to store larger values, but to
+      cover packages that can already be generated by dpkg-deb, given that
+      it uses the system GNU tar when building. Closes: #850834
   * Architecture support:
     - Add support for ARM64 ILP32. Closes: #824742
       Thanks to Wookey <wookey at wookware.org>.
diff --git a/lib/dpkg/libdpkg.map b/lib/dpkg/libdpkg.map
index 690a1e4..5e685c9 100644
--- a/lib/dpkg/libdpkg.map
+++ b/lib/dpkg/libdpkg.map
@@ -226,6 +226,8 @@ LIBDPKG_PRIVATE {
 	progress_done;
 
 	# Tar support
+	tar_atoul;
+	tar_atosl;
 	tar_extractor;
 	tar_entry_update_from_system;
 
diff --git a/lib/dpkg/t/.gitignore b/lib/dpkg/t/.gitignore
index d2b628b..e22bcc9 100644
--- a/lib/dpkg/t/.gitignore
+++ b/lib/dpkg/t/.gitignore
@@ -18,6 +18,7 @@ t-pkg-queue
 t-progname
 t-string
 t-subproc
+t-tar
 t-test
 t-test-skip
 t-trigger
diff --git a/lib/dpkg/t/Makefile.am b/lib/dpkg/t/Makefile.am
index db449d6..12dc4d8 100644
--- a/lib/dpkg/t/Makefile.am
+++ b/lib/dpkg/t/Makefile.am
@@ -33,6 +33,7 @@ test_programs = \
 	t-command \
 	t-varbuf \
 	t-ar \
+	t-tar \
 	t-deb-version \
 	t-arch \
 	t-version \
diff --git a/lib/dpkg/t/t-tar.c b/lib/dpkg/t/t-tar.c
new file mode 100644
index 0000000..8417ed3
--- /dev/null
+++ b/lib/dpkg/t/t-tar.c
@@ -0,0 +1,148 @@
+/*
+ * libdpkg - Debian packaging suite library routines
+ * t-tar.c - test tar implementation
+ *
+ * Copyright © 2017 Guillem Jover <guillem at debian.org>
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <compat.h>
+
+#include <errno.h>
+
+#include <dpkg/test.h>
+#include <dpkg/tarfn.h>
+
+static void
+test_tar_atol8(void)
+{
+	uintmax_t u;
+
+	/* Test valid octal numbers. */
+	u = tar_atoul("000000\0\0\0\0\0\0", 12, UINTMAX_MAX);
+	test_pass(u == 0);
+	u = tar_atoul("00000000000\0", 12, UINTMAX_MAX);
+	test_pass(u == 0);
+	u = tar_atoul("00000000001\0", 12, UINTMAX_MAX);
+	test_pass(u == 1);
+	u = tar_atoul("00000000777\0", 12, UINTMAX_MAX);
+	test_pass(u == 511);
+	u = tar_atoul("77777777777\0", 12, UINTMAX_MAX);
+	test_pass(u == 8589934591);
+
+	/* Test legacy formatted octal numbers. */
+	u = tar_atoul("          0\0", 12, UINTMAX_MAX);
+	test_pass(u == 0);
+	u = tar_atoul("          1\0", 12, UINTMAX_MAX);
+	test_pass(u == 1);
+	u = tar_atoul("        777\0", 12, UINTMAX_MAX);
+	test_pass(u == 511);
+
+	/* Test extended octal numbers not terminated by space or NUL,
+	 * (as is required by POSIX), but accepted by several implementations
+	 * to get one byte larger values. */
+	u = tar_atoul("000000000000", 12, UINTMAX_MAX);
+	test_pass(u == 0);
+	u = tar_atoul("000000000001", 12, UINTMAX_MAX);
+	test_pass(u == 1);
+	u = tar_atoul("000000000777", 12, UINTMAX_MAX);
+	test_pass(u == 511);
+	u = tar_atoul("777777777777", 12, UINTMAX_MAX);
+	test_pass(u == 68719476735);
+
+	/* Test invalid octal numbers. */
+	errno = 0;
+	u = tar_atoul("            ", 12, UINTMAX_MAX);
+	test_pass(u == 0);
+	test_pass(errno == EINVAL);
+
+	errno = 0;
+	u = tar_atoul("   11111aaa ", 12, UINTMAX_MAX);
+	test_pass(u == 0);
+	test_pass(errno == EINVAL);
+
+	errno = 0;
+	u = tar_atoul("          8 ", 12, UINTMAX_MAX);
+	test_pass(u == 0);
+	test_pass(errno == EINVAL);
+
+	errno = 0;
+	u = tar_atoul("         18 ", 12, UINTMAX_MAX);
+	test_pass(u == 0);
+	test_pass(errno == EINVAL);
+
+	errno = 0;
+	u = tar_atoul("    aa      ", 12, UINTMAX_MAX);
+	test_pass(u == 0);
+	test_pass(errno == EINVAL);
+}
+
+static void
+test_tar_atol256(void)
+{
+	uintmax_t u;
+	intmax_t i;
+
+	/* Test positive numbers. */
+	u = tar_atoul("\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", 12, UINTMAX_MAX);
+	test_pass(u == 0);
+	u = tar_atoul("\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01", 12, UINTMAX_MAX);
+	test_pass(u == 1);
+	u = tar_atoul("\x80\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00", 12, UINTMAX_MAX);
+	test_pass(u == 8589934592);
+	u = tar_atoul("\x80\x00\x00\x00\x7F\xFF\xFF\xFF\xFF\xFF\xFF\xFF", 12, UINTMAX_MAX);
+	test_pass(u == INTMAX_MAX);
+
+	/* Test overflow. */
+	errno = 0;
+	u = tar_atoul("\x80\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00", 12, UINTMAX_MAX);
+	test_pass(u == UINTMAX_MAX);
+	test_pass(errno == ERANGE);
+
+	errno = 0;
+	u = tar_atoul("\x80\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00", 12, UINTMAX_MAX);
+	test_pass(u == UINTMAX_MAX);
+	test_pass(errno == ERANGE);
+
+	/* Test negative numbers. */
+	i = tar_atosl("\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF", 12, INTMAX_MIN, INTMAX_MAX);
+	test_pass(i == -1);
+	i = tar_atosl("\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFE", 12, INTMAX_MIN, INTMAX_MAX);
+	test_pass(i == -2);
+	i = tar_atosl("\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFE\x00\x00\x00\x00", 12, INTMAX_MIN, INTMAX_MAX);
+	test_pass(i == -8589934592);
+	i = tar_atosl("\xFF\xFF\xFF\xFF\x80\x00\x00\x00\x00\x00\x00\x00", 12, INTMAX_MIN, INTMAX_MAX);
+	test_pass(i == INTMAX_MIN);
+
+	/* Test underflow. */
+	errno = 0;
+	i = tar_atosl("\xFF\xFF\xFF\xFF\x00\x00\x00\x00\x00\x00\x00\x00", 12, INTMAX_MIN, INTMAX_MAX);
+	test_pass(i == INTMAX_MIN);
+	test_pass(errno == ERANGE);
+
+	errno = 0;
+	i = tar_atosl("\xFF\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", 12, INTMAX_MIN, INTMAX_MAX);
+	test_pass(i == INTMAX_MIN);
+	test_pass(errno == ERANGE);
+}
+
+TEST_ENTRY(test)
+{
+	test_plan(38);
+
+	test_tar_atol8();
+	test_tar_atol256();
+}
diff --git a/lib/dpkg/tarfn.c b/lib/dpkg/tarfn.c
index ca921f0..27952f9 100644
--- a/lib/dpkg/tarfn.c
+++ b/lib/dpkg/tarfn.c
@@ -3,7 +3,7 @@
  * tarfn.c - tar archive extraction functions
  *
  * Copyright © 1995 Bruce Perens
- * Copyright © 2007-2011, 2013-2015 Guillem Jover <guillem at debian.org>
+ * Copyright © 2007-2011, 2013-2017 Guillem Jover <guillem at debian.org>
  *
  * This is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -40,6 +40,22 @@
 #define TAR_MAGIC_USTAR "ustar\0" "00"
 #define TAR_MAGIC_GNU   "ustar "  " \0"
 
+#define TAR_TYPE_SIGNED(t)	(!((t)0 < (t)-1))
+
+#define TAR_TYPE_MIN(t) \
+	(TAR_TYPE_SIGNED(t) ? \
+	 ~(t)TAR_TYPE_MAX(t) : \
+	 (t)0)
+#define TAR_TYPE_MAX(t) \
+	(TAR_TYPE_SIGNED(t) ? \
+	 ((((t)1 << (sizeof(t) * 8 - 2)) - 1) * 2 + 1) : \
+	 ~(t)0)
+
+#define TAR_ATOUL(str, type) \
+	(type)tar_atoul(str, sizeof(str), TAR_TYPE_MAX(type))
+#define TAR_ATOSL(str, type) \
+	(type)tar_atosl(str, sizeof(str), TAR_TYPE_MIN(type), TAR_TYPE_MAX(type))
+
 struct tar_header {
 	char name[100];
 	char mode[8];
@@ -62,21 +78,123 @@ struct tar_header {
 	char prefix[155];
 };
 
+static inline uintmax_t
+tar_ret_errno(int err, uintmax_t ret)
+{
+	errno = err;
+	return ret;
+}
+
 /**
  * Convert an ASCII octal string to an intmax_t.
  */
-static intmax_t
-tar_oct2int(const char *s, int size)
+static uintmax_t
+tar_atol8(const char *s, size_t size)
 {
-	intmax_t n = 0;
+	const char *end = s + size;
+	uintmax_t n = 0;
 
-	while (*s == ' ') {
+	/* Old implementations might precede the value with spaces. */
+	while (s < end && *s == ' ')
 		s++;
+
+	if (s == end)
+		return tar_ret_errno(EINVAL, 0);
+
+	while (s < end) {
+		if (*s == '\0' || *s == ' ')
+			break;
+		if (*s < '0' || *s > '7')
+			return tar_ret_errno(EINVAL, 0);
+		n = (n * 010) + (*s++ - '0');
+	}
+
+	while (s < end) {
+		if (*s != '\0' && *s != ' ')
+			return tar_ret_errno(EINVAL, 0);
+		s++;
+	}
+
+	if (s < end)
+		return tar_ret_errno(EINVAL, 0);
+
+	return n;
+}
+
+/**
+ * Convert a base-256 two-complement number to an intmax_t.
+ */
+static uintmax_t
+tar_atol256(const char *s, size_t size, intmax_t min, uintmax_t max)
+{
+	uintmax_t n = 0;
+	unsigned char c;
+	int sign;
+
+	/* The encoding always sets the first bit to one, so that it can be
+	 * distinguished from the ASCII encoding. For positive numbers we
+	 * need to reset it. For negative numbers we initialize n to -1. */
+	c = *s++;
+	if (c == 0x80)
+		c = 0;
+	else
+		n = ~(uintmax_t)0;
+	sign = c;
+
+	/* Check for overflows. */
+	while (size > sizeof(uintmax_t)) {
+		if (c != sign)
+			return tar_ret_errno(ERANGE, sign ? (uintmax_t)min : max);
+		c = *s++;
 		size--;
 	}
 
-	while (--size >= 0 && *s >= '0' && *s <= '7')
-		n = (n * 010) + (*s++ - '0');
+	if ((c & 0x80) != (sign & 0x80))
+		return tar_ret_errno(ERANGE, sign ? (uintmax_t)min : max);
+
+	for (;;) {
+		n = (n << 8) | c;
+		if (--size <= 0)
+			break;
+		c = *s++;
+	}
+
+	return n;
+}
+
+static uintmax_t
+tar_atol(const char *s, size_t size, intmax_t min, uintmax_t max)
+{
+	const unsigned char *a = (const unsigned char *)s;
+
+	/* Check if it is a long two-complement base-256 number, positive or
+	 * negative. */
+	if (*a == 0xff || *a == 0x80)
+		return tar_atol256(s, size, min, max);
+	else
+		return tar_atol8(s, size);
+}
+
+uintmax_t
+tar_atoul(const char *s, size_t size, uintmax_t max)
+{
+	uintmax_t n = tar_atol(s, size, 0, UINTMAX_MAX);
+
+	if (n > max)
+		return tar_ret_errno(ERANGE, UINTMAX_MAX);
+
+	return n;
+}
+
+intmax_t
+tar_atosl(const char *s, size_t size, intmax_t min, intmax_t max)
+{
+	intmax_t n = tar_atol(s, size, INTMAX_MIN, INTMAX_MAX);
+
+	if (n < min)
+		return tar_ret_errno(ERANGE, INTMAX_MIN);
+	if (n > max)
+		return tar_ret_errno(ERANGE, INTMAX_MAX);
 
 	return n;
 }
@@ -122,7 +240,7 @@ tar_header_get_unix_mode(struct tar_header *h)
 		break;
 	}
 
-	mode |= tar_oct2int(h->mode, sizeof(h->mode));
+	mode |= TAR_ATOUL(h->mode, mode_t);
 
 	return mode;
 }
@@ -155,6 +273,8 @@ tar_header_decode(struct tar_header *h, struct tar_entry *d)
 {
 	long checksum;
 
+	errno = 0;
+
 	if (memcmp(h->magic, TAR_MAGIC_GNU, 6) == 0)
 		d->format = TAR_FORMAT_GNU;
 	else if (memcmp(h->magic, TAR_MAGIC_USTAR, 6) == 0)
@@ -173,12 +293,14 @@ tar_header_decode(struct tar_header *h, struct tar_entry *d)
 		d->name = m_strndup(h->name, sizeof(h->name));
 	d->linkname = m_strndup(h->linkname, sizeof(h->linkname));
 	d->stat.mode = tar_header_get_unix_mode(h);
-	d->size = (off_t)tar_oct2int(h->size, sizeof(h->size));
-	d->mtime = (time_t)tar_oct2int(h->mtime, sizeof(h->mtime));
+	/* Even though off_t is signed, we use an unsigned parser here because
+	 * negative offsets are not allowed. */
+	d->size = TAR_ATOUL(h->size, off_t);
+	d->mtime = TAR_ATOSL(h->mtime, time_t);
 
 	if (d->type == TAR_FILETYPE_CHARDEV || d->type == TAR_FILETYPE_BLOCKDEV)
-		d->dev = makedev(tar_oct2int(h->devmajor, sizeof(h->devmajor)),
-				 tar_oct2int(h->devminor, sizeof(h->devminor)));
+		d->dev = makedev(TAR_ATOUL(h->devmajor, dev_t),
+		                 TAR_ATOUL(h->devminor, dev_t));
 	else
 		d->dev = makedev(0, 0);
 
@@ -186,16 +308,19 @@ tar_header_decode(struct tar_header *h, struct tar_entry *d)
 		d->stat.uname = m_strndup(h->user, sizeof(h->user));
 	else
 		d->stat.uname = NULL;
-	d->stat.uid = (uid_t)tar_oct2int(h->uid, sizeof(h->uid));
+	d->stat.uid = TAR_ATOUL(h->uid, uid_t);
 
 	if (*h->group)
 		d->stat.gname = m_strndup(h->group, sizeof(h->group));
 	else
 		d->stat.gname = NULL;
-	d->stat.gid = (gid_t)tar_oct2int(h->gid, sizeof(h->gid));
+	d->stat.gid = TAR_ATOUL(h->gid, gid_t);
 
-	checksum = tar_oct2int(h->checksum, sizeof(h->checksum));
+	checksum = tar_atol8(h->checksum, sizeof(h->checksum));
 
+	/* Check for parse errors. */
+	if (errno)
+		return 0;
 	return tar_header_checksum(h) == checksum;
 }
 
diff --git a/lib/dpkg/tarfn.h b/lib/dpkg/tarfn.h
index ce69424..37269de 100644
--- a/lib/dpkg/tarfn.h
+++ b/lib/dpkg/tarfn.h
@@ -3,7 +3,7 @@
  * tarfn.h - tar archive extraction functions
  *
  * Copyright © 1995 Bruce Perens
- * Copyright © 2009-2014 Guillem Jover <guillem at debian.org>
+ * Copyright © 2009-2014, 2017 Guillem Jover <guillem at debian.org>
  *
  * This is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -24,6 +24,8 @@
 
 #include <sys/types.h>
 
+#include <stdint.h>
+
 #include <dpkg/file.h>
 
 /**
@@ -87,6 +89,11 @@ struct tar_operations {
 	tar_make_func *mknod;
 };
 
+uintmax_t
+tar_atoul(const char *s, size_t size, uintmax_t max);
+intmax_t
+tar_atosl(const char *s, size_t size, intmax_t min, intmax_t max);
+
 void
 tar_entry_update_from_system(struct tar_entry *te);
 
diff --git a/man/deb.man b/man/deb.man
index 3d1754a..2b8da5f 100644
--- a/man/deb.man
+++ b/man/deb.man
@@ -3,7 +3,7 @@
 .\" Copyright © 1995 Raul Miller
 .\" Copyright © 1996 Ian Jackson <ijackson at chiark.greenend.org.uk>
 .\" Copyright © 2000 Wichert Akkerman <wakkerma at debian.org>
-.\" Copyright © 2006-2014 Guillem Jover <guillem at debian.org>
+.\" Copyright © 2006-2017 Guillem Jover <guillem at debian.org>
 .\"
 .\" This is free software; you can redistribute it and/or modify
 .\" it under the terms of the GNU General Public License as published by
@@ -46,12 +46,15 @@ File sizes are limited to 10 ASCII decimal digits, allowing for up to
 approximately 9536.74 MiB member files.
 .PP
 The \fBtar\fP archives currently allowed are, the old-style (v7) format,
-the pre-POSIX ustar format, a subset of the GNU format (only the new
-style long pathnames and long linknames, supported since dpkg 1.4.1.17),
+the pre-POSIX ustar format, a subset of the GNU format (new style long
+pathnames and long linknames, supported since dpkg 1.4.1.17; large file
+metadata since dpkg 1.18.24),
 and the POSIX ustar format (long names supported since dpkg 1.15.0).
 Unrecognized tar typeflags are considered an error.
 Each tar entry size inside a tar archive is limited to 11 ASCII octal
 digits, allowing for up to 8 GiB tar entries.
+The GNU large file metadata support permits 95-bit tar entry sizes and
+negative timestamps, and 63-bit UID, GID and device numbers.
 .PP
 The first member is named
 .B debian\-binary

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/dpkg.git



More information about the Reproducible-commits mailing list