[dpkg] 166/192: Dpkg::Control::HashCore: Optimize trailing space matching on parse()

Ximin Luo infinity0 at debian.org
Tue Oct 17 11:04:14 UTC 2017


This is an automated email from the git hooks/post-receive script.

infinity0 pushed a commit to branch pu/reproducible_builds
in repository dpkg.

commit 1b2bb72f7b5da8419be5521b0c1a25eab6ce6f06
Author: Guillem Jover <guillem at debian.org>
Date:   Wed Sep 27 02:10:07 2017 +0200

    Dpkg::Control::HashCore: Optimize trailing space matching on parse()
    
    We remove the trailing space after the chomp, so that we cover the
    common case of a single \n with chomp, and do not need to check for
    trailing spaces more than once while parsing.
    
    We preserve the chomp'ed string to be used for the Armor Header checks,
    which have a different set of allowed whitespace, than what \s covers.
---
 debian/changelog                 |  2 ++
 scripts/Dpkg/Control/HashCore.pm | 16 ++++++++++------
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/debian/changelog b/debian/changelog
index 38a2a0b..83b379e 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -94,6 +94,8 @@ dpkg (1.19.0) UNRELEASED; urgency=medium
       STORE method.
     - Expect deb822 stanza delimiters more often than OpenPGP Armor Headers
       in Dpkg::Control::HashCore parse method.
+    - Optimize trailing space matching on Dpkg::Control::HashCore parse
+      method, by trimming it just once at the beginning of the iteration.
   * Documentation:
     - Document currently accepted syntax for changelogs in deb-changelog(5).
       Closes: #858579
diff --git a/scripts/Dpkg/Control/HashCore.pm b/scripts/Dpkg/Control/HashCore.pm
index a3ea4bc..3504e99 100644
--- a/scripts/Dpkg/Control/HashCore.pm
+++ b/scripts/Dpkg/Control/HashCore.pm
@@ -199,8 +199,14 @@ sub parse {
     local $_;
 
     while (<$fh>) {
+        # In the common case there will be just a trailing \n character,
+        # so using chomp here which is very fast will avoid the latter
+        # s/// doing anything, which gives usa significant speed up.
 	chomp;
-	next if m/^\s*$/ and $paraborder;
+        my $armor = $_;
+        s/\s*$//;
+
+        next if length == 0 and $paraborder;
 	next if substr($_, 0, 1) eq '#';
 	$paraborder = 0;
 	if (m/^(\S+?)\s*:\s*(.*)$/) {
@@ -214,7 +220,6 @@ sub parse {
 		    $self->parse_error($desc, g_('duplicate field %s found'), $name);
 		}
 	    }
-	    $value =~ s/\s*$//;
 	    $self->{$name} = $value;
 	    $cf = $name;
 	} elsif (m/^\s(\s*\S.*)$/) {
@@ -222,13 +227,12 @@ sub parse {
 	    unless (defined($cf)) {
 		$self->parse_error($desc, g_('continued value line not in field'));
             }
-	    $line =~ s/\s*$//;
 	    if ($line =~ /^\.+$/) {
 		$line = substr $line, 1;
 	    }
 	    $self->{$cf} .= "\n$line";
-	} elsif (m/^\s*$/ ||
-	         ($expect_pgp_sig && m/^-----BEGIN PGP SIGNATURE-----[\r\t ]*$/)) {
+        } elsif (length == 0 ||
+                 ($expect_pgp_sig && $armor =~ m/^-----BEGIN PGP SIGNATURE-----[\r\t ]*$/)) {
 	    if ($expect_pgp_sig) {
 		# Skip empty lines
 		$_ = <$fh> while defined && m/^\s*$/;
@@ -254,7 +258,7 @@ sub parse {
 		$$self->{is_pgp_signed} = 1;
 	    }
 	    last; # Finished parsing one block
-        } elsif (m/^-----BEGIN PGP SIGNED MESSAGE-----[\r\t ]*$/) {
+        } elsif ($armor =~ m/^-----BEGIN PGP SIGNED MESSAGE-----[\r\t ]*$/) {
             $expect_pgp_sig = 1;
             if ($$self->{allow_pgp} and not $parabody) {
                 # Skip OpenPGP headers

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/dpkg.git



More information about the Reproducible-commits mailing list