[Pkg-owncloud-commits] [owncloud-client] 04/218: csync_excluded: Add fast _traversal() function #3638

Sandro Knauß hefee-guest at moszumanska.debian.org
Sat Oct 17 14:30:30 UTC 2015


This is an automated email from the git hooks/post-receive script.

hefee-guest pushed a commit to branch master
in repository owncloud-client.

commit fe49255bff4ac7d9589ca384c648f66cb36c66f3
Author: Christian Kamm <mail at ckamm.de>
Date:   Mon Aug 24 09:44:54 2015 +0200

    csync_excluded: Add fast _traversal() function #3638
    
    This function only checks the full path and the basename and is thus
    around 7x faster. It is very useful in a csync_update context where
    we know that the leading dirs have already been checked for exclusion.
---
 csync/src/csync_exclude.c                     | 334 ++++++++++++--------------
 csync/src/csync_exclude.h                     |  22 ++
 csync/tests/csync_tests/check_csync_exclude.c |  95 +++++++-
 3 files changed, 265 insertions(+), 186 deletions(-)

diff --git a/csync/src/csync_exclude.c b/csync/src/csync_exclude.c
index 42adfe8..baf14b7 100644
--- a/csync/src/csync_exclude.c
+++ b/csync/src/csync_exclude.c
@@ -186,199 +186,185 @@ bool csync_is_windows_reserved_word(const char* filename) {
   return false;
 }
 
-CSYNC_EXCLUDE_TYPE csync_excluded_no_ctx(c_strlist_t *excludes, const char *path, int filetype) {
-  size_t i = 0;
-  const char *p = NULL;
-  char *bname = NULL;
-  char *dname = NULL;
-  char *conflict = NULL;
-  int rc = -1;
-  CSYNC_EXCLUDE_TYPE match = CSYNC_NOT_EXCLUDED;
-  CSYNC_EXCLUDE_TYPE type  = CSYNC_NOT_EXCLUDED;
-
-    for (p = path; *p; p++) {
-      switch (*p) {
-        case '\\':
-        case ':':
-        case '?':
-        case '*':
-        case '"':
-        case '>':
-        case '<':
-        case '|':
-          return CSYNC_FILE_EXCLUDE_INVALID_CHAR;
-        default:
-          break;
+static CSYNC_EXCLUDE_TYPE _csync_excluded_common(c_strlist_t *excludes, const char *path, int filetype, bool check_leading_dirs) {
+    size_t i = 0;
+    const char *p = NULL;
+    const char *bname = NULL;
+    size_t blen = 0;
+    char *conflict = NULL;
+    int rc = -1;
+    CSYNC_EXCLUDE_TYPE match = CSYNC_NOT_EXCLUDED;
+    CSYNC_EXCLUDE_TYPE type  = CSYNC_NOT_EXCLUDED;
+
+      for (p = path; *p; p++) {
+        switch (*p) {
+          case '\\':
+          case ':':
+          case '?':
+          case '*':
+          case '"':
+          case '>':
+          case '<':
+          case '|':
+            return CSYNC_FILE_EXCLUDE_INVALID_CHAR;
+          default:
+            break;
+        }
       }
-    }
 
-  /* split up the path */
-  dname = c_dirname(path);
-  bname = c_basename(path);
+    /* split up the path */
+    bname = strrchr(path, '/');
+    if (bname) {
+        bname += 1; // don't include the /
+    } else {
+        bname = path;
+    }
+    blen = strlen(bname);
 
-  if (bname == NULL || dname == NULL) {
-      match = CSYNC_NOT_EXCLUDED;
-      SAFE_FREE(bname);
-      SAFE_FREE(dname);
-      goto out;
-  }
+    rc = csync_fnmatch(".csync_journal.db*", bname, 0);
+    if (rc == 0) {
+        match = CSYNC_FILE_SILENTLY_EXCLUDED;
+        goto out;
+    }
 
-  rc = csync_fnmatch(".csync_journal.db*", bname, 0);
-  if (rc == 0) {
-      match = CSYNC_FILE_SILENTLY_EXCLUDED;
-      SAFE_FREE(bname);
-      SAFE_FREE(dname);
-      goto out;
-  }
+    // check the strlen and ignore the file if its name is longer than 254 chars.
+    // whenever changing this also check createDownloadTmpFileName
+    if (blen > 254) {
+        match = CSYNC_FILE_EXCLUDE_LONG_FILENAME;
+        goto out;
+    }
 
-  // check the strlen and ignore the file if its name is longer than 254 chars.
-  // whenever changing this also check createDownloadTmpFileName
-  if (strlen(bname) > 254) {
-      match = CSYNC_FILE_EXCLUDE_LONG_FILENAME;
-      SAFE_FREE(bname);
-      SAFE_FREE(dname);
-      goto out;
-  }
+  #ifdef _WIN32
+    // Windows cannot sync files ending in spaces (#2176). It also cannot
+    // distinguish files ending in '.' from files without an ending,
+    // as '.' is a separator that is not stored internally, so let's
+    // not allow to sync those to avoid file loss/ambiguities (#416)
+    if (blen > 1 && (bname[blen-1]== ' ' || bname[blen-1]== '.' )) {
+        match = CSYNC_FILE_EXCLUDE_INVALID_CHAR;
+        goto out;
+    }
 
-#ifdef _WIN32
-  // Windows cannot sync files ending in spaces (#2176). It also cannot
-  // distinguish files ending in '.' from files without an ending,
-  // as '.' is a separator that is not stored internally, so let's
-  // not allow to sync those to avoid file loss/ambiguities (#416)
-  size_t blen = strlen(bname);
-  if (blen > 1 && (bname[blen-1]== ' ' || bname[blen-1]== '.' )) {
+    if (csync_is_windows_reserved_word(bname)) {
       match = CSYNC_FILE_EXCLUDE_INVALID_CHAR;
-      SAFE_FREE(bname);
-      SAFE_FREE(dname);
       goto out;
-  }
+    }
+  #endif
 
-  if (csync_is_windows_reserved_word(bname)) {
-    match = CSYNC_FILE_EXCLUDE_INVALID_CHAR;
-    SAFE_FREE(bname);
-    SAFE_FREE(dname);
-    goto out;
-  }
-#endif
+    rc = csync_fnmatch(".owncloudsync.log*", bname, 0);
+    if (rc == 0) {
+        match = CSYNC_FILE_SILENTLY_EXCLUDED;
+        goto out;
+    }
 
-  rc = csync_fnmatch(".owncloudsync.log*", bname, 0);
-  if (rc == 0) {
-      match = CSYNC_FILE_SILENTLY_EXCLUDED;
-      SAFE_FREE(bname);
-      SAFE_FREE(dname);
-      goto out;
-  }
+    /* Always ignore conflict files, not only via the exclude list */
+    rc = csync_fnmatch("*_conflict-*", bname, 0);
+    if (rc == 0) {
+        match = CSYNC_FILE_SILENTLY_EXCLUDED;
+        goto out;
+    }
 
-  /* Always ignore conflict files, not only via the exclude list */
-  rc = csync_fnmatch("*_conflict-*", bname, 0);
-  if (rc == 0) {
-      match = CSYNC_FILE_SILENTLY_EXCLUDED;
-      SAFE_FREE(bname);
-      SAFE_FREE(dname);
-      goto out;
-  }
+    if (getenv("CSYNC_CONFLICT_FILE_USERNAME")) {
+        rc = asprintf(&conflict, "*_conflict_%s-*", getenv("CSYNC_CONFLICT_FILE_USERNAME"));
+        if (rc < 0) {
+            goto out;
+        }
+        rc = csync_fnmatch(conflict, path, 0);
+        if (rc == 0) {
+            match = CSYNC_FILE_SILENTLY_EXCLUDED;
+            SAFE_FREE(conflict);
+            goto out;
+        }
+        SAFE_FREE(conflict);
+    }
 
-  if (getenv("CSYNC_CONFLICT_FILE_USERNAME")) {
-      rc = asprintf(&conflict, "*_conflict_%s-*", getenv("CSYNC_CONFLICT_FILE_USERNAME"));
-      if (rc < 0) {
-          SAFE_FREE(bname);
-          SAFE_FREE(dname);
-          goto out;
-      }
-      rc = csync_fnmatch(conflict, path, 0);
-      if (rc == 0) {
-          match = CSYNC_FILE_SILENTLY_EXCLUDED;
-          SAFE_FREE(conflict);
-          SAFE_FREE(bname);
-          SAFE_FREE(dname);
-          goto out;
-      }
-      SAFE_FREE(conflict);
-  }
+    if( ! excludes ) {
+        goto out;
+    }
 
-  SAFE_FREE(bname);
-  SAFE_FREE(dname);
+    /* Loop over all exclude patterns and evaluate the given path */
+    for (i = 0; match == CSYNC_NOT_EXCLUDED && i < excludes->count; i++) {
+        bool match_dirs_only = false;
+        char *pattern_stored = c_strdup(excludes->vector[i]);
+        char* pattern = pattern_stored;
 
-  if( ! excludes ) {
-      goto out;
-  }
+        type = CSYNC_FILE_EXCLUDE_LIST;
+        if (strlen(pattern) < 1) {
+            SAFE_FREE(pattern_stored);
+            continue;
+        }
+        /* Excludes starting with ']' means it can be cleanup */
+        if (pattern[0] == ']') {
+            ++pattern;
+            if (filetype == CSYNC_FTW_TYPE_FILE) {
+                type = CSYNC_FILE_EXCLUDE_AND_REMOVE;
+            }
+        }
+        /* Check if the pattern applies to pathes only. */
+        if (pattern[strlen(pattern)-1] == '/') {
+            match_dirs_only = true;
+            pattern[strlen(pattern)-1] = '\0'; /* Cut off the slash */
+        }
 
-  /* Loop over all exclude patterns and evaluate the given path */
-  for (i = 0; match == CSYNC_NOT_EXCLUDED && i < excludes->count; i++) {
-      bool match_dirs_only = false;
-      char *pattern_stored = c_strdup(excludes->vector[i]);
-      char* pattern = pattern_stored;
+        /* check if the pattern contains a / and if, compare to the whole path */
+        if (strchr(pattern, '/')) {
+            rc = csync_fnmatch(pattern, path, FNM_PATHNAME);
+            if( rc == 0 ) {
+                match = type;
+            }
+            /* if the pattern requires a dir, but path is not, its still not excluded. */
+            if (match_dirs_only && filetype != CSYNC_FTW_TYPE_DIR) {
+                match = CSYNC_NOT_EXCLUDED;
+            }
+        }
 
-      type = CSYNC_FILE_EXCLUDE_LIST;
-      if (strlen(pattern) < 1) {
-	  SAFE_FREE(pattern_stored);
-          continue;
-      }
-      /* Ecludes starting with ']' means it can be cleanup */
-      if (pattern[0] == ']') {
-          ++pattern;
-          if (filetype == CSYNC_FTW_TYPE_FILE) {
-              type = CSYNC_FILE_EXCLUDE_AND_REMOVE;
-          }
-      }
-      /* Check if the pattern applies to pathes only. */
-      if (pattern[strlen(pattern)-1] == '/') {
-          match_dirs_only = true;
-          pattern[strlen(pattern)-1] = '\0'; /* Cut off the slash */
-      }
+        /* if still not excluded, check each component and leading directory of the path */
+        if (match == CSYNC_NOT_EXCLUDED) {
+            char *segmented_path = strdup(path);
+            size_t len = strlen(segmented_path);
+            bool check_segname = !match_dirs_only || filetype != CSYNC_FTW_TYPE_FILE;
+            for (int j = len; ; --j) {
+                // read backwards until a path separator
+                if (j != 0 && segmented_path[j-1] != '/') {
+                    continue;
+                }
+
+                // check 'basename', i.e. for "/foo/bar/fi" we'd check 'fi', 'bar', 'foo'
+                if (check_segname && segmented_path[j] != 0) {
+                    rc = csync_fnmatch(pattern, segmented_path + j, 0);
+                    if (rc == 0) {
+                        match = type;
+                        break;
+                    }
+                }
+                check_segname = true;
+
+                if (j == 0 || !check_leading_dirs) {
+                    break;
+                }
+
+                // check 'dirname', i.e. for "/foo/bar/fi" we'd check '/foo/bar', '/foo'
+                segmented_path[j-1] = '\0';
+                rc = csync_fnmatch(pattern, segmented_path, 0);
+                if (rc == 0) {
+                    match = type;
+                    break;
+                }
+            }
+            SAFE_FREE(segmented_path);
+        }
+        SAFE_FREE(pattern_stored);
+    }
 
-      /* check if the pattern contains a / and if, compare to the whole path */
-      if (strchr(pattern, '/')) {
-          rc = csync_fnmatch(pattern, path, FNM_PATHNAME);
-          if( rc == 0 ) {
-              match = type;
-          }
-          /* if the pattern requires a dir, but path is not, its still not excluded. */
-          if (match_dirs_only && filetype != CSYNC_FTW_TYPE_DIR) {
-              match = CSYNC_NOT_EXCLUDED;
-          }
-      }
+  out:
 
-      /* if still not excluded, check each component and leading directory of the path */
-      if (match == CSYNC_NOT_EXCLUDED) {
-          char *segmented_path = strdup(path);
-          size_t len = strlen(segmented_path);
-          bool check_segname = !match_dirs_only || filetype != CSYNC_FTW_TYPE_FILE;
-          for (int j = len; ; --j) {
-              // read backwards until a path separator
-              if (j != 0 && segmented_path[j-1] != '/') {
-                  continue;
-              }
-
-              // check 'basename', i.e. for "/foo/bar/fi" we'd check 'fi', 'bar', 'foo'
-              if (check_segname && segmented_path[j] != 0) {
-                  rc = csync_fnmatch(pattern, segmented_path + j, 0);
-                  if (rc == 0) {
-                      match = type;
-                      break;
-                  }
-              }
-              check_segname = true;
-
-              if (j == 0) {
-                  break;
-              }
-
-              // check 'dirname', i.e. for "/foo/bar/fi" we'd check '/foo/bar', '/foo'
-              segmented_path[j-1] = '\0';
-              rc = csync_fnmatch(pattern, segmented_path, 0);
-              if (rc == 0) {
-                  match = type;
-                  break;
-              }
-          }
-          SAFE_FREE(segmented_path);
-      }
-      SAFE_FREE(pattern_stored);
-  }
+    return match;
+}
 
-out:
+CSYNC_EXCLUDE_TYPE csync_excluded_traversal(c_strlist_t *excludes, const char *path, int filetype) {
+  return _csync_excluded_common(excludes, path, filetype, false);
+}
 
-  return match;
+CSYNC_EXCLUDE_TYPE csync_excluded_no_ctx(c_strlist_t *excludes, const char *path, int filetype) {
+  return _csync_excluded_common(excludes, path, filetype, true);
 }
 
diff --git a/csync/src/csync_exclude.h b/csync/src/csync_exclude.h
index bfb8719..c0dab57 100644
--- a/csync/src/csync_exclude.h
+++ b/csync/src/csync_exclude.h
@@ -66,6 +66,11 @@ void csync_exclude_destroy(CSYNC *ctx);
  *
  * This excludes also paths which can't be used without unix extensions.
  *
+ * The exclude list is checked against the full path, each component of
+ * the path and all leading directory strings, e.g.
+ * '/foo/bar/file' checks ('/foo/bar/file', 'foo', 'bar', 'file',
+ * '/foo/bar', '/foo').
+ *
  * @param ctx   The synchronizer context.
  * @param path  The patch to check.
  *
@@ -74,6 +79,23 @@ void csync_exclude_destroy(CSYNC *ctx);
 CSYNC_EXCLUDE_TYPE csync_excluded(CSYNC *ctx, const char *path, int filetype);
 
 /**
+ * @brief Check if the given path should be excluded in a traversal situation.
+ *
+ * It does only part of the work that csync_excluded does because it's assumed
+ * that all leading directories have been run through csync_excluded_traversal()
+ * before. This can be significantly faster.
+ *
+ * That means for '/foo/bar/file' only ('/foo/bar/file', 'file') is checked
+ * against the exclude patterns.
+ *
+ * @param ctx   The synchronizer context.
+ * @param path  The patch to check.
+ *
+ * @return  2 if excluded and needs cleanup, 1 if excluded, 0 if not.
+ */
+CSYNC_EXCLUDE_TYPE csync_excluded_traversal(c_strlist_t *excludes, const char *path, int filetype);
+
+/**
  * @brief csync_excluded_no_ctx
  * @param excludes
  * @param path
diff --git a/csync/tests/csync_tests/check_csync_exclude.c b/csync/tests/csync_tests/check_csync_exclude.c
index b0b57ff..0bcbda9 100644
--- a/csync/tests/csync_tests/check_csync_exclude.c
+++ b/csync/tests/csync_tests/check_csync_exclude.c
@@ -142,6 +142,56 @@ static void check_csync_excluded(void **state)
 
 }
 
+static void check_csync_excluded_traversal(void **state)
+{
+    CSYNC *csync = *state;
+    int rc;
+
+    _csync_exclude_add( &(csync->excludes), "/exclude" );
+
+    /* Check toplevel dir, the pattern only works for toplevel dir. */
+    rc = csync_excluded_traversal(csync->excludes, "/exclude", CSYNC_FTW_TYPE_DIR);
+    assert_int_equal(rc, CSYNC_FILE_EXCLUDE_LIST);
+
+    rc = csync_excluded_traversal(csync->excludes, "/foo/exclude", CSYNC_FTW_TYPE_DIR);
+    assert_int_equal(rc, CSYNC_NOT_EXCLUDED);
+
+    /* check for a file called exclude. Must still work */
+    rc = csync_excluded_traversal(csync->excludes, "/exclude", CSYNC_FTW_TYPE_FILE);
+    assert_int_equal(rc, CSYNC_FILE_EXCLUDE_LIST);
+
+    rc = csync_excluded_traversal(csync->excludes, "/foo/exclude", CSYNC_FTW_TYPE_FILE);
+    assert_int_equal(rc, CSYNC_NOT_EXCLUDED);
+
+    /* Add an exclude for directories only: excl/ */
+    _csync_exclude_add( &(csync->excludes), "excl/" );
+    rc = csync_excluded_traversal(csync->excludes, "/excl", CSYNC_FTW_TYPE_DIR);
+    assert_int_equal(rc, CSYNC_FILE_EXCLUDE_LIST);
+
+    rc = csync_excluded_traversal(csync->excludes, "meep/excl", CSYNC_FTW_TYPE_DIR);
+    assert_int_equal(rc, CSYNC_FILE_EXCLUDE_LIST);
+
+    rc = csync_excluded_traversal(csync->excludes, "meep/excl/file", CSYNC_FTW_TYPE_FILE);
+    assert_int_equal(rc, CSYNC_NOT_EXCLUDED); // because leading dirs aren't checked!
+
+    rc = csync_excluded_traversal(csync->excludes, "/excl", CSYNC_FTW_TYPE_FILE);
+    assert_int_equal(rc, CSYNC_NOT_EXCLUDED);
+
+    _csync_exclude_add(&csync->excludes, "/excludepath/withsubdir");
+
+    rc = csync_excluded_traversal(csync->excludes, "/excludepath/withsubdir", CSYNC_FTW_TYPE_DIR);
+    assert_int_equal(rc, CSYNC_FILE_EXCLUDE_LIST);
+
+    rc = csync_excluded_traversal(csync->excludes, "/excludepath/withsubdir", CSYNC_FTW_TYPE_FILE);
+    assert_int_equal(rc, CSYNC_FILE_EXCLUDE_LIST);
+
+    rc = csync_excluded_traversal(csync->excludes, "/excludepath/withsubdir2", CSYNC_FTW_TYPE_DIR);
+    assert_int_equal(rc, CSYNC_NOT_EXCLUDED);
+
+    rc = csync_excluded_traversal(csync->excludes, "/excludepath/withsubdir/foo", CSYNC_FTW_TYPE_DIR);
+    assert_int_equal(rc, CSYNC_NOT_EXCLUDED); // because leading dirs aren't checked!
+}
+
 static void check_csync_pathes(void **state)
 {
     CSYNC *csync = *state;
@@ -218,21 +268,41 @@ static void check_csync_excluded_performance(void **state)
     int totalRc = 0;
 
     // Being able to use QElapsedTimer for measurement would be nice...
-    struct timeval before, after;
-    gettimeofday(&before, 0);
-
-    for (int i = 0; i < N; ++i) {
-        totalRc += csync_excluded(csync, "/this/is/quite/a/long/path/with/many/components", CSYNC_FTW_TYPE_DIR);
-        totalRc += csync_excluded(csync, "/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/22/23/24/25/26/27/29", CSYNC_FTW_TYPE_FILE);
+    {
+        struct timeval before, after;
+        gettimeofday(&before, 0);
+
+        for (int i = 0; i < N; ++i) {
+            totalRc += csync_excluded(csync, "/this/is/quite/a/long/path/with/many/components", CSYNC_FTW_TYPE_DIR);
+            totalRc += csync_excluded(csync, "/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/22/23/24/25/26/27/29", CSYNC_FTW_TYPE_FILE);
+        }
+        assert_int_equal(totalRc, CSYNC_NOT_EXCLUDED); // mainly to avoid optimization
+
+        gettimeofday(&after, 0);
+
+        const double total = (after.tv_sec - before.tv_sec)
+                + (after.tv_usec - before.tv_usec) / 1.0e6;
+        const double perCallMs = total / 2 / N * 1000;
+        printf("csync_excluded: %f ms per call\n", perCallMs);
     }
-    assert_int_equal(totalRc, CSYNC_NOT_EXCLUDED); // mainly to avoid optimization
 
-    gettimeofday(&after, 0);
+    {
+        struct timeval before, after;
+        gettimeofday(&before, 0);
+
+        for (int i = 0; i < N; ++i) {
+            totalRc += csync_excluded_traversal(csync->excludes, "/this/is/quite/a/long/path/with/many/components", CSYNC_FTW_TYPE_DIR);
+            totalRc += csync_excluded_traversal(csync->excludes, "/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/22/23/24/25/26/27/29", CSYNC_FTW_TYPE_FILE);
+        }
+        assert_int_equal(totalRc, CSYNC_NOT_EXCLUDED); // mainly to avoid optimization
 
-    const double total = (after.tv_sec - before.tv_sec)
-            + (after.tv_usec - before.tv_usec) / 1.0e6;
-    const double perCallMs = total / 2 / N * 1000;
-    printf("csync_excluded: %f ms per call\n", perCallMs);
+        gettimeofday(&after, 0);
+
+        const double total = (after.tv_sec - before.tv_sec)
+                + (after.tv_usec - before.tv_usec) / 1.0e6;
+        const double perCallMs = total / 2 / N * 1000;
+        printf("csync_excluded_traversal: %f ms per call\n", perCallMs);
+    }
 }
 
 int torture_run_tests(void)
@@ -241,6 +311,7 @@ int torture_run_tests(void)
         unit_test_setup_teardown(check_csync_exclude_add, setup, teardown),
         unit_test_setup_teardown(check_csync_exclude_load, setup, teardown),
         unit_test_setup_teardown(check_csync_excluded, setup_init, teardown),
+        unit_test_setup_teardown(check_csync_excluded_traversal, setup_init, teardown),
         unit_test_setup_teardown(check_csync_pathes, setup_init, teardown),
         unit_test_setup_teardown(check_csync_is_windows_reserved_word, setup_init, teardown),
         unit_test_setup_teardown(check_csync_excluded_performance, setup_init, teardown),

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-owncloud/owncloud-client.git



More information about the Pkg-owncloud-commits mailing list