[Pkg-owncloud-commits] [owncloud-client] 04/218: csync_excluded: Add fast _traversal() function #3638
Sandro Knauß
hefee-guest at moszumanska.debian.org
Sat Oct 17 14:30:30 UTC 2015
This is an automated email from the git hooks/post-receive script.
hefee-guest pushed a commit to branch master
in repository owncloud-client.
commit fe49255bff4ac7d9589ca384c648f66cb36c66f3
Author: Christian Kamm <mail at ckamm.de>
Date: Mon Aug 24 09:44:54 2015 +0200
csync_excluded: Add fast _traversal() function #3638
This function only checks the full path and the basename and is thus
around 7x faster. It is very useful in a csync_update context where
we know that the leading dirs have already been checked for exclusion.
---
csync/src/csync_exclude.c | 334 ++++++++++++--------------
csync/src/csync_exclude.h | 22 ++
csync/tests/csync_tests/check_csync_exclude.c | 95 +++++++-
3 files changed, 265 insertions(+), 186 deletions(-)
diff --git a/csync/src/csync_exclude.c b/csync/src/csync_exclude.c
index 42adfe8..baf14b7 100644
--- a/csync/src/csync_exclude.c
+++ b/csync/src/csync_exclude.c
@@ -186,199 +186,185 @@ bool csync_is_windows_reserved_word(const char* filename) {
return false;
}
-CSYNC_EXCLUDE_TYPE csync_excluded_no_ctx(c_strlist_t *excludes, const char *path, int filetype) {
- size_t i = 0;
- const char *p = NULL;
- char *bname = NULL;
- char *dname = NULL;
- char *conflict = NULL;
- int rc = -1;
- CSYNC_EXCLUDE_TYPE match = CSYNC_NOT_EXCLUDED;
- CSYNC_EXCLUDE_TYPE type = CSYNC_NOT_EXCLUDED;
-
- for (p = path; *p; p++) {
- switch (*p) {
- case '\\':
- case ':':
- case '?':
- case '*':
- case '"':
- case '>':
- case '<':
- case '|':
- return CSYNC_FILE_EXCLUDE_INVALID_CHAR;
- default:
- break;
+static CSYNC_EXCLUDE_TYPE _csync_excluded_common(c_strlist_t *excludes, const char *path, int filetype, bool check_leading_dirs) {
+ size_t i = 0;
+ const char *p = NULL;
+ const char *bname = NULL;
+ size_t blen = 0;
+ char *conflict = NULL;
+ int rc = -1;
+ CSYNC_EXCLUDE_TYPE match = CSYNC_NOT_EXCLUDED;
+ CSYNC_EXCLUDE_TYPE type = CSYNC_NOT_EXCLUDED;
+
+ for (p = path; *p; p++) {
+ switch (*p) {
+ case '\\':
+ case ':':
+ case '?':
+ case '*':
+ case '"':
+ case '>':
+ case '<':
+ case '|':
+ return CSYNC_FILE_EXCLUDE_INVALID_CHAR;
+ default:
+ break;
+ }
}
- }
- /* split up the path */
- dname = c_dirname(path);
- bname = c_basename(path);
+ /* split up the path */
+ bname = strrchr(path, '/');
+ if (bname) {
+ bname += 1; // don't include the /
+ } else {
+ bname = path;
+ }
+ blen = strlen(bname);
- if (bname == NULL || dname == NULL) {
- match = CSYNC_NOT_EXCLUDED;
- SAFE_FREE(bname);
- SAFE_FREE(dname);
- goto out;
- }
+ rc = csync_fnmatch(".csync_journal.db*", bname, 0);
+ if (rc == 0) {
+ match = CSYNC_FILE_SILENTLY_EXCLUDED;
+ goto out;
+ }
- rc = csync_fnmatch(".csync_journal.db*", bname, 0);
- if (rc == 0) {
- match = CSYNC_FILE_SILENTLY_EXCLUDED;
- SAFE_FREE(bname);
- SAFE_FREE(dname);
- goto out;
- }
+ // check the strlen and ignore the file if its name is longer than 254 chars.
+ // whenever changing this also check createDownloadTmpFileName
+ if (blen > 254) {
+ match = CSYNC_FILE_EXCLUDE_LONG_FILENAME;
+ goto out;
+ }
- // check the strlen and ignore the file if its name is longer than 254 chars.
- // whenever changing this also check createDownloadTmpFileName
- if (strlen(bname) > 254) {
- match = CSYNC_FILE_EXCLUDE_LONG_FILENAME;
- SAFE_FREE(bname);
- SAFE_FREE(dname);
- goto out;
- }
+ #ifdef _WIN32
+ // Windows cannot sync files ending in spaces (#2176). It also cannot
+ // distinguish files ending in '.' from files without an ending,
+ // as '.' is a separator that is not stored internally, so let's
+ // not allow to sync those to avoid file loss/ambiguities (#416)
+ if (blen > 1 && (bname[blen-1]== ' ' || bname[blen-1]== '.' )) {
+ match = CSYNC_FILE_EXCLUDE_INVALID_CHAR;
+ goto out;
+ }
-#ifdef _WIN32
- // Windows cannot sync files ending in spaces (#2176). It also cannot
- // distinguish files ending in '.' from files without an ending,
- // as '.' is a separator that is not stored internally, so let's
- // not allow to sync those to avoid file loss/ambiguities (#416)
- size_t blen = strlen(bname);
- if (blen > 1 && (bname[blen-1]== ' ' || bname[blen-1]== '.' )) {
+ if (csync_is_windows_reserved_word(bname)) {
match = CSYNC_FILE_EXCLUDE_INVALID_CHAR;
- SAFE_FREE(bname);
- SAFE_FREE(dname);
goto out;
- }
+ }
+ #endif
- if (csync_is_windows_reserved_word(bname)) {
- match = CSYNC_FILE_EXCLUDE_INVALID_CHAR;
- SAFE_FREE(bname);
- SAFE_FREE(dname);
- goto out;
- }
-#endif
+ rc = csync_fnmatch(".owncloudsync.log*", bname, 0);
+ if (rc == 0) {
+ match = CSYNC_FILE_SILENTLY_EXCLUDED;
+ goto out;
+ }
- rc = csync_fnmatch(".owncloudsync.log*", bname, 0);
- if (rc == 0) {
- match = CSYNC_FILE_SILENTLY_EXCLUDED;
- SAFE_FREE(bname);
- SAFE_FREE(dname);
- goto out;
- }
+ /* Always ignore conflict files, not only via the exclude list */
+ rc = csync_fnmatch("*_conflict-*", bname, 0);
+ if (rc == 0) {
+ match = CSYNC_FILE_SILENTLY_EXCLUDED;
+ goto out;
+ }
- /* Always ignore conflict files, not only via the exclude list */
- rc = csync_fnmatch("*_conflict-*", bname, 0);
- if (rc == 0) {
- match = CSYNC_FILE_SILENTLY_EXCLUDED;
- SAFE_FREE(bname);
- SAFE_FREE(dname);
- goto out;
- }
+ if (getenv("CSYNC_CONFLICT_FILE_USERNAME")) {
+ rc = asprintf(&conflict, "*_conflict_%s-*", getenv("CSYNC_CONFLICT_FILE_USERNAME"));
+ if (rc < 0) {
+ goto out;
+ }
+ rc = csync_fnmatch(conflict, path, 0);
+ if (rc == 0) {
+ match = CSYNC_FILE_SILENTLY_EXCLUDED;
+ SAFE_FREE(conflict);
+ goto out;
+ }
+ SAFE_FREE(conflict);
+ }
- if (getenv("CSYNC_CONFLICT_FILE_USERNAME")) {
- rc = asprintf(&conflict, "*_conflict_%s-*", getenv("CSYNC_CONFLICT_FILE_USERNAME"));
- if (rc < 0) {
- SAFE_FREE(bname);
- SAFE_FREE(dname);
- goto out;
- }
- rc = csync_fnmatch(conflict, path, 0);
- if (rc == 0) {
- match = CSYNC_FILE_SILENTLY_EXCLUDED;
- SAFE_FREE(conflict);
- SAFE_FREE(bname);
- SAFE_FREE(dname);
- goto out;
- }
- SAFE_FREE(conflict);
- }
+ if( ! excludes ) {
+ goto out;
+ }
- SAFE_FREE(bname);
- SAFE_FREE(dname);
+ /* Loop over all exclude patterns and evaluate the given path */
+ for (i = 0; match == CSYNC_NOT_EXCLUDED && i < excludes->count; i++) {
+ bool match_dirs_only = false;
+ char *pattern_stored = c_strdup(excludes->vector[i]);
+ char* pattern = pattern_stored;
- if( ! excludes ) {
- goto out;
- }
+ type = CSYNC_FILE_EXCLUDE_LIST;
+ if (strlen(pattern) < 1) {
+ SAFE_FREE(pattern_stored);
+ continue;
+ }
+ /* Excludes starting with ']' means it can be cleanup */
+ if (pattern[0] == ']') {
+ ++pattern;
+ if (filetype == CSYNC_FTW_TYPE_FILE) {
+ type = CSYNC_FILE_EXCLUDE_AND_REMOVE;
+ }
+ }
+ /* Check if the pattern applies to pathes only. */
+ if (pattern[strlen(pattern)-1] == '/') {
+ match_dirs_only = true;
+ pattern[strlen(pattern)-1] = '\0'; /* Cut off the slash */
+ }
- /* Loop over all exclude patterns and evaluate the given path */
- for (i = 0; match == CSYNC_NOT_EXCLUDED && i < excludes->count; i++) {
- bool match_dirs_only = false;
- char *pattern_stored = c_strdup(excludes->vector[i]);
- char* pattern = pattern_stored;
+ /* check if the pattern contains a / and if, compare to the whole path */
+ if (strchr(pattern, '/')) {
+ rc = csync_fnmatch(pattern, path, FNM_PATHNAME);
+ if( rc == 0 ) {
+ match = type;
+ }
+ /* if the pattern requires a dir, but path is not, its still not excluded. */
+ if (match_dirs_only && filetype != CSYNC_FTW_TYPE_DIR) {
+ match = CSYNC_NOT_EXCLUDED;
+ }
+ }
- type = CSYNC_FILE_EXCLUDE_LIST;
- if (strlen(pattern) < 1) {
- SAFE_FREE(pattern_stored);
- continue;
- }
- /* Ecludes starting with ']' means it can be cleanup */
- if (pattern[0] == ']') {
- ++pattern;
- if (filetype == CSYNC_FTW_TYPE_FILE) {
- type = CSYNC_FILE_EXCLUDE_AND_REMOVE;
- }
- }
- /* Check if the pattern applies to pathes only. */
- if (pattern[strlen(pattern)-1] == '/') {
- match_dirs_only = true;
- pattern[strlen(pattern)-1] = '\0'; /* Cut off the slash */
- }
+ /* if still not excluded, check each component and leading directory of the path */
+ if (match == CSYNC_NOT_EXCLUDED) {
+ char *segmented_path = strdup(path);
+ size_t len = strlen(segmented_path);
+ bool check_segname = !match_dirs_only || filetype != CSYNC_FTW_TYPE_FILE;
+ for (int j = len; ; --j) {
+ // read backwards until a path separator
+ if (j != 0 && segmented_path[j-1] != '/') {
+ continue;
+ }
+
+ // check 'basename', i.e. for "/foo/bar/fi" we'd check 'fi', 'bar', 'foo'
+ if (check_segname && segmented_path[j] != 0) {
+ rc = csync_fnmatch(pattern, segmented_path + j, 0);
+ if (rc == 0) {
+ match = type;
+ break;
+ }
+ }
+ check_segname = true;
+
+ if (j == 0 || !check_leading_dirs) {
+ break;
+ }
+
+ // check 'dirname', i.e. for "/foo/bar/fi" we'd check '/foo/bar', '/foo'
+ segmented_path[j-1] = '\0';
+ rc = csync_fnmatch(pattern, segmented_path, 0);
+ if (rc == 0) {
+ match = type;
+ break;
+ }
+ }
+ SAFE_FREE(segmented_path);
+ }
+ SAFE_FREE(pattern_stored);
+ }
- /* check if the pattern contains a / and if, compare to the whole path */
- if (strchr(pattern, '/')) {
- rc = csync_fnmatch(pattern, path, FNM_PATHNAME);
- if( rc == 0 ) {
- match = type;
- }
- /* if the pattern requires a dir, but path is not, its still not excluded. */
- if (match_dirs_only && filetype != CSYNC_FTW_TYPE_DIR) {
- match = CSYNC_NOT_EXCLUDED;
- }
- }
+ out:
- /* if still not excluded, check each component and leading directory of the path */
- if (match == CSYNC_NOT_EXCLUDED) {
- char *segmented_path = strdup(path);
- size_t len = strlen(segmented_path);
- bool check_segname = !match_dirs_only || filetype != CSYNC_FTW_TYPE_FILE;
- for (int j = len; ; --j) {
- // read backwards until a path separator
- if (j != 0 && segmented_path[j-1] != '/') {
- continue;
- }
-
- // check 'basename', i.e. for "/foo/bar/fi" we'd check 'fi', 'bar', 'foo'
- if (check_segname && segmented_path[j] != 0) {
- rc = csync_fnmatch(pattern, segmented_path + j, 0);
- if (rc == 0) {
- match = type;
- break;
- }
- }
- check_segname = true;
-
- if (j == 0) {
- break;
- }
-
- // check 'dirname', i.e. for "/foo/bar/fi" we'd check '/foo/bar', '/foo'
- segmented_path[j-1] = '\0';
- rc = csync_fnmatch(pattern, segmented_path, 0);
- if (rc == 0) {
- match = type;
- break;
- }
- }
- SAFE_FREE(segmented_path);
- }
- SAFE_FREE(pattern_stored);
- }
+ return match;
+}
-out:
+CSYNC_EXCLUDE_TYPE csync_excluded_traversal(c_strlist_t *excludes, const char *path, int filetype) {
+ return _csync_excluded_common(excludes, path, filetype, false);
+}
- return match;
+CSYNC_EXCLUDE_TYPE csync_excluded_no_ctx(c_strlist_t *excludes, const char *path, int filetype) {
+ return _csync_excluded_common(excludes, path, filetype, true);
}
diff --git a/csync/src/csync_exclude.h b/csync/src/csync_exclude.h
index bfb8719..c0dab57 100644
--- a/csync/src/csync_exclude.h
+++ b/csync/src/csync_exclude.h
@@ -66,6 +66,11 @@ void csync_exclude_destroy(CSYNC *ctx);
*
* This excludes also paths which can't be used without unix extensions.
*
+ * The exclude list is checked against the full path, each component of
+ * the path and all leading directory strings, e.g.
+ * '/foo/bar/file' checks ('/foo/bar/file', 'foo', 'bar', 'file',
+ * '/foo/bar', '/foo').
+ *
* @param ctx The synchronizer context.
* @param path The patch to check.
*
@@ -74,6 +79,23 @@ void csync_exclude_destroy(CSYNC *ctx);
CSYNC_EXCLUDE_TYPE csync_excluded(CSYNC *ctx, const char *path, int filetype);
/**
+ * @brief Check if the given path should be excluded in a traversal situation.
+ *
+ * It does only part of the work that csync_excluded does because it's assumed
+ * that all leading directories have been run through csync_excluded_traversal()
+ * before. This can be significantly faster.
+ *
+ * That means for '/foo/bar/file' only ('/foo/bar/file', 'file') is checked
+ * against the exclude patterns.
+ *
+ * @param ctx The synchronizer context.
+ * @param path The patch to check.
+ *
+ * @return 2 if excluded and needs cleanup, 1 if excluded, 0 if not.
+ */
+CSYNC_EXCLUDE_TYPE csync_excluded_traversal(c_strlist_t *excludes, const char *path, int filetype);
+
+/**
* @brief csync_excluded_no_ctx
* @param excludes
* @param path
diff --git a/csync/tests/csync_tests/check_csync_exclude.c b/csync/tests/csync_tests/check_csync_exclude.c
index b0b57ff..0bcbda9 100644
--- a/csync/tests/csync_tests/check_csync_exclude.c
+++ b/csync/tests/csync_tests/check_csync_exclude.c
@@ -142,6 +142,56 @@ static void check_csync_excluded(void **state)
}
+static void check_csync_excluded_traversal(void **state)
+{
+ CSYNC *csync = *state;
+ int rc;
+
+ _csync_exclude_add( &(csync->excludes), "/exclude" );
+
+ /* Check toplevel dir, the pattern only works for toplevel dir. */
+ rc = csync_excluded_traversal(csync->excludes, "/exclude", CSYNC_FTW_TYPE_DIR);
+ assert_int_equal(rc, CSYNC_FILE_EXCLUDE_LIST);
+
+ rc = csync_excluded_traversal(csync->excludes, "/foo/exclude", CSYNC_FTW_TYPE_DIR);
+ assert_int_equal(rc, CSYNC_NOT_EXCLUDED);
+
+ /* check for a file called exclude. Must still work */
+ rc = csync_excluded_traversal(csync->excludes, "/exclude", CSYNC_FTW_TYPE_FILE);
+ assert_int_equal(rc, CSYNC_FILE_EXCLUDE_LIST);
+
+ rc = csync_excluded_traversal(csync->excludes, "/foo/exclude", CSYNC_FTW_TYPE_FILE);
+ assert_int_equal(rc, CSYNC_NOT_EXCLUDED);
+
+ /* Add an exclude for directories only: excl/ */
+ _csync_exclude_add( &(csync->excludes), "excl/" );
+ rc = csync_excluded_traversal(csync->excludes, "/excl", CSYNC_FTW_TYPE_DIR);
+ assert_int_equal(rc, CSYNC_FILE_EXCLUDE_LIST);
+
+ rc = csync_excluded_traversal(csync->excludes, "meep/excl", CSYNC_FTW_TYPE_DIR);
+ assert_int_equal(rc, CSYNC_FILE_EXCLUDE_LIST);
+
+ rc = csync_excluded_traversal(csync->excludes, "meep/excl/file", CSYNC_FTW_TYPE_FILE);
+ assert_int_equal(rc, CSYNC_NOT_EXCLUDED); // because leading dirs aren't checked!
+
+ rc = csync_excluded_traversal(csync->excludes, "/excl", CSYNC_FTW_TYPE_FILE);
+ assert_int_equal(rc, CSYNC_NOT_EXCLUDED);
+
+ _csync_exclude_add(&csync->excludes, "/excludepath/withsubdir");
+
+ rc = csync_excluded_traversal(csync->excludes, "/excludepath/withsubdir", CSYNC_FTW_TYPE_DIR);
+ assert_int_equal(rc, CSYNC_FILE_EXCLUDE_LIST);
+
+ rc = csync_excluded_traversal(csync->excludes, "/excludepath/withsubdir", CSYNC_FTW_TYPE_FILE);
+ assert_int_equal(rc, CSYNC_FILE_EXCLUDE_LIST);
+
+ rc = csync_excluded_traversal(csync->excludes, "/excludepath/withsubdir2", CSYNC_FTW_TYPE_DIR);
+ assert_int_equal(rc, CSYNC_NOT_EXCLUDED);
+
+ rc = csync_excluded_traversal(csync->excludes, "/excludepath/withsubdir/foo", CSYNC_FTW_TYPE_DIR);
+ assert_int_equal(rc, CSYNC_NOT_EXCLUDED); // because leading dirs aren't checked!
+}
+
static void check_csync_pathes(void **state)
{
CSYNC *csync = *state;
@@ -218,21 +268,41 @@ static void check_csync_excluded_performance(void **state)
int totalRc = 0;
// Being able to use QElapsedTimer for measurement would be nice...
- struct timeval before, after;
- gettimeofday(&before, 0);
-
- for (int i = 0; i < N; ++i) {
- totalRc += csync_excluded(csync, "/this/is/quite/a/long/path/with/many/components", CSYNC_FTW_TYPE_DIR);
- totalRc += csync_excluded(csync, "/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/22/23/24/25/26/27/29", CSYNC_FTW_TYPE_FILE);
+ {
+ struct timeval before, after;
+ gettimeofday(&before, 0);
+
+ for (int i = 0; i < N; ++i) {
+ totalRc += csync_excluded(csync, "/this/is/quite/a/long/path/with/many/components", CSYNC_FTW_TYPE_DIR);
+ totalRc += csync_excluded(csync, "/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/22/23/24/25/26/27/29", CSYNC_FTW_TYPE_FILE);
+ }
+ assert_int_equal(totalRc, CSYNC_NOT_EXCLUDED); // mainly to avoid optimization
+
+ gettimeofday(&after, 0);
+
+ const double total = (after.tv_sec - before.tv_sec)
+ + (after.tv_usec - before.tv_usec) / 1.0e6;
+ const double perCallMs = total / 2 / N * 1000;
+ printf("csync_excluded: %f ms per call\n", perCallMs);
}
- assert_int_equal(totalRc, CSYNC_NOT_EXCLUDED); // mainly to avoid optimization
- gettimeofday(&after, 0);
+ {
+ struct timeval before, after;
+ gettimeofday(&before, 0);
+
+ for (int i = 0; i < N; ++i) {
+ totalRc += csync_excluded_traversal(csync->excludes, "/this/is/quite/a/long/path/with/many/components", CSYNC_FTW_TYPE_DIR);
+ totalRc += csync_excluded_traversal(csync->excludes, "/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/22/23/24/25/26/27/29", CSYNC_FTW_TYPE_FILE);
+ }
+ assert_int_equal(totalRc, CSYNC_NOT_EXCLUDED); // mainly to avoid optimization
- const double total = (after.tv_sec - before.tv_sec)
- + (after.tv_usec - before.tv_usec) / 1.0e6;
- const double perCallMs = total / 2 / N * 1000;
- printf("csync_excluded: %f ms per call\n", perCallMs);
+ gettimeofday(&after, 0);
+
+ const double total = (after.tv_sec - before.tv_sec)
+ + (after.tv_usec - before.tv_usec) / 1.0e6;
+ const double perCallMs = total / 2 / N * 1000;
+ printf("csync_excluded_traversal: %f ms per call\n", perCallMs);
+ }
}
int torture_run_tests(void)
@@ -241,6 +311,7 @@ int torture_run_tests(void)
unit_test_setup_teardown(check_csync_exclude_add, setup, teardown),
unit_test_setup_teardown(check_csync_exclude_load, setup, teardown),
unit_test_setup_teardown(check_csync_excluded, setup_init, teardown),
+ unit_test_setup_teardown(check_csync_excluded_traversal, setup_init, teardown),
unit_test_setup_teardown(check_csync_pathes, setup_init, teardown),
unit_test_setup_teardown(check_csync_is_windows_reserved_word, setup_init, teardown),
unit_test_setup_teardown(check_csync_excluded_performance, setup_init, teardown),
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-owncloud/owncloud-client.git
More information about the Pkg-owncloud-commits
mailing list