From 23e3efd1f17d61dfc437f53589c43ae9c2107230 Mon Sep 17 00:00:00 2001 From: Chris P Date: Thu, 17 Nov 2016 19:46:25 +0100 Subject: [PATCH] preprocess: change --{consider,ignore}-mtime into a more flexible --mtime-window=T option and fix tests/docs --- docs/rmlint.1.rst | 7 +++- lib/cfg.c | 2 +- lib/cfg.h | 2 +- lib/cmdline.c | 41 +++++++++---------- lib/preprocess.c | 9 +++- ...consider_mtime.py => test_mtime_window.py} | 13 ++++-- 6 files changed, 43 insertions(+), 31 deletions(-) rename tests/test_options/{test_consider_mtime.py => test_mtime_window.py} (69%) diff --git a/docs/rmlint.1.rst b/docs/rmlint.1.rst index 7dc1163c..8ea57936 100644 --- a/docs/rmlint.1.rst +++ b/docs/rmlint.1.rst @@ -468,10 +468,13 @@ Rarely used, miscellaneous options Also it might be useful for approximate comparison where it suffices when the file is the same in the middle part. -:``-z --consider-mtime`` / ``-Z --ignore-mtime`` (**default**): +:``-z --mtime-window=T`` (**default\:** *-1*) Only consider those files as duplicates that have the same content and - the same modification time (mtime). + the same modification time (mtime) within a certain window of *T* seconds. + If *T* is 0, both files need to have the same mtime. For *T=1* they may + differ one second and so on. If the window is negative, the mtime of + duplicates will not be considered. :``--with-fiemap`` (**default**) / ``--without-fiemap``: diff --git a/lib/cfg.c b/lib/cfg.c index 2c9ada68..e9749a26 100644 --- a/lib/cfg.c +++ b/lib/cfg.c @@ -76,7 +76,7 @@ void rm_cfg_set_default(RmCfg *cfg) { cfg->use_absolute_end_offset = false; cfg->skip_start_offset = 0; cfg->skip_end_offset = 0; - cfg->consider_mtime = false; + cfg->mtime_window = -1; rm_trie_init(&cfg->file_trie); } diff --git a/lib/cfg.h b/lib/cfg.h index 72b31695..6e18a13e 100644 --- a/lib/cfg.h +++ b/lib/cfg.h @@ -55,7 +55,6 @@ typedef struct RmCfg { gboolean find_hardlinked_dupes; gboolean limits_specified; gboolean filter_mtime; - gboolean consider_mtime; gboolean match_basename; gboolean unmatched_basenames; gboolean match_with_extension; @@ -74,6 +73,7 @@ typedef struct RmCfg { int permissions; time_t min_mtime; + gint64 mtime_window; gint depth; gint verbosity; diff --git a/lib/cmdline.c b/lib/cmdline.c index 65e35f11..59549900 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -1326,8 +1326,8 @@ bool rm_cmd_parse_args(int argc, char **argv, RmSession *session) { HIDDEN = G_OPTION_FLAG_HIDDEN, OPTIONAL = G_OPTION_FLAG_OPTIONAL_ARG; /* Free/Used Options: - Used: abBcCdDeEfFgGHhiI kKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ| - Free: jJ | + Used: abBcCdDeEfFgGHhiI kKlLmMnNoOpPqQrRsStTuUvVwWxXyYz | + Free: jJ Z| */ /* clang-format off */ @@ -1352,25 +1352,24 @@ bool rm_cmd_parse_args(int argc, char **argv, RmSession *session) { {"quiet" , 'V' , EMPTY , G_OPTION_ARG_CALLBACK , FUNC(quiet) , _("Be less verbose (-VVV for much less)") , NULL} , /* Trivial boolean options */ - {"no-with-color" , 'W' , DISABLE , G_OPTION_ARG_NONE , &cfg->with_color , _("Be not that colorful") , NULL} , - {"hidden" , 'r' , DISABLE , G_OPTION_ARG_NONE , &cfg->ignore_hidden , _("Find hidden files") , NULL} , - {"followlinks" , 'f' , EMPTY , G_OPTION_ARG_CALLBACK , FUNC(follow_symlinks) , _("Follow symlinks") , NULL} , - {"no-followlinks" , 'F' , DISABLE , G_OPTION_ARG_NONE , &cfg->follow_symlinks , _("Ignore symlinks") , NULL} , - {"paranoid" , 'p' , EMPTY , G_OPTION_ARG_CALLBACK , FUNC(paranoid) , _("Use more paranoid hashing") , NULL} , - {"no-crossdev" , 'x' , DISABLE , G_OPTION_ARG_NONE , &cfg->crossdev , _("Do not cross mounpoints") , NULL} , - {"keep-all-tagged" , 'k' , 0 , G_OPTION_ARG_NONE , &cfg->keep_all_tagged , _("Keep all tagged files") , NULL} , - {"keep-all-untagged" , 'K' , 0 , G_OPTION_ARG_NONE , &cfg->keep_all_untagged , _("Keep all untagged files") , NULL} , - {"must-match-tagged" , 'm' , 0 , G_OPTION_ARG_NONE , &cfg->must_match_tagged , _("Must have twin in tagged dir") , NULL} , - {"must-match-untagged" , 'M' , 0 , G_OPTION_ARG_NONE , &cfg->must_match_untagged , _("Must have twin in untagged dir") , NULL} , - {"match-basename" , 'b' , 0 , G_OPTION_ARG_NONE , &cfg->match_basename , _("Only find twins with same basename") , NULL} , - {"match-extension" , 'e' , 0 , G_OPTION_ARG_NONE , &cfg->match_with_extension , _("Only find twins with same extension") , NULL} , - {"match-without-extension" , 'i' , 0 , G_OPTION_ARG_NONE , &cfg->match_without_extension , _("Only find twins with same basename minus extension") , NULL} , - {"merge-directories" , 'D' , EMPTY , G_OPTION_ARG_CALLBACK , FUNC(merge_directories) , _("Find duplicate directories") , NULL} , - {"perms" , 'z' , OPTIONAL , G_OPTION_ARG_CALLBACK , FUNC(permissions) , _("Only use files with certain permissions") , "[RWX]+"} , - {"no-hardlinked" , 'L' , DISABLE , G_OPTION_ARG_NONE , &cfg->find_hardlinked_dupes , _("Ignore hardlink twins") , NULL} , - {"partial-hidden" , 0 , EMPTY , G_OPTION_ARG_CALLBACK , FUNC(partial_hidden) , _("Find hidden files in duplicate folders only") , NULL} , - {"consider-mtime" , 'z' , 0 , G_OPTION_ARG_NONE , &cfg->consider_mtime , _("Consider duplicates only equal when mtime is equal") , NULL} , - {"ignore-mtime" , 'Z' , DISABLE , G_OPTION_ARG_NONE , &cfg->consider_mtime , _("Do not consider mtime for duplicate equality") , NULL} , + {"no-with-color" , 'W' , DISABLE , G_OPTION_ARG_NONE , &cfg->with_color , _("Be not that colorful") , NULL} , + {"hidden" , 'r' , DISABLE , G_OPTION_ARG_NONE , &cfg->ignore_hidden , _("Find hidden files") , NULL} , + {"followlinks" , 'f' , EMPTY , G_OPTION_ARG_CALLBACK , FUNC(follow_symlinks) , _("Follow symlinks") , NULL} , + {"no-followlinks" , 'F' , DISABLE , G_OPTION_ARG_NONE , &cfg->follow_symlinks , _("Ignore symlinks") , NULL} , + {"paranoid" , 'p' , EMPTY , G_OPTION_ARG_CALLBACK , FUNC(paranoid) , _("Use more paranoid hashing") , NULL} , + {"no-crossdev" , 'x' , DISABLE , G_OPTION_ARG_NONE , &cfg->crossdev , _("Do not cross mounpoints") , NULL} , + {"keep-all-tagged" , 'k' , 0 , G_OPTION_ARG_NONE , &cfg->keep_all_tagged , _("Keep all tagged files") , NULL} , + {"keep-all-untagged" , 'K' , 0 , G_OPTION_ARG_NONE , &cfg->keep_all_untagged , _("Keep all untagged files") , NULL} , + {"must-match-tagged" , 'm' , 0 , G_OPTION_ARG_NONE , &cfg->must_match_tagged , _("Must have twin in tagged dir") , NULL} , + {"must-match-untagged" , 'M' , 0 , G_OPTION_ARG_NONE , &cfg->must_match_untagged , _("Must have twin in untagged dir") , NULL} , + {"match-basename" , 'b' , 0 , G_OPTION_ARG_NONE , &cfg->match_basename , _("Only find twins with same basename") , NULL} , + {"match-extension" , 'e' , 0 , G_OPTION_ARG_NONE , &cfg->match_with_extension , _("Only find twins with same extension") , NULL} , + {"match-without-extension" , 'i' , 0 , G_OPTION_ARG_NONE , &cfg->match_without_extension , _("Only find twins with same basename minus extension") , NULL} , + {"merge-directories" , 'D' , EMPTY , G_OPTION_ARG_CALLBACK , FUNC(merge_directories) , _("Find duplicate directories") , NULL} , + {"perms" , 'z' , OPTIONAL , G_OPTION_ARG_CALLBACK , FUNC(permissions) , _("Only use files with certain permissions") , "[RWX]+"} , + {"no-hardlinked" , 'L' , DISABLE , G_OPTION_ARG_NONE , &cfg->find_hardlinked_dupes , _("Ignore hardlink twins") , NULL} , + {"partial-hidden" , 0 , EMPTY , G_OPTION_ARG_CALLBACK , FUNC(partial_hidden) , _("Find hidden files in duplicate folders only") , NULL} , + {"mtime-window" , 'z' , 0 , G_OPTION_ARG_INT64 , &cfg->mtime_window , _("Consider duplicates only equal when mtime differs at max. T seconds") , "T"} , /* Callback */ {"show-man" , 'H' , EMPTY , G_OPTION_ARG_CALLBACK , rm_cmd_show_manpage , _("Show the manpage") , NULL} , diff --git a/lib/preprocess.c b/lib/preprocess.c index 8c63a0e6..e7c0e3a0 100644 --- a/lib/preprocess.c +++ b/lib/preprocess.c @@ -84,8 +84,13 @@ gint rm_file_cmp(const RmFile *file_a, const RmFile *file_b) { : 0; } - if(result == 0 && cfg->consider_mtime) { - result = (gint64)file_a->mtime - (gint64)file_b->mtime; + if(result == 0 && cfg->mtime_window >= 0) { + gint64 diff = (gint64)file_a->mtime - (gint64)file_b->mtime; + if(ABS(diff) <= cfg->mtime_window) { + result = 0; + } else { + result = diff; + } } return result; diff --git a/tests/test_options/test_consider_mtime.py b/tests/test_options/test_mtime_window.py similarity index 69% rename from tests/test_options/test_consider_mtime.py rename to tests/test_options/test_mtime_window.py index 1b966a81..91e28162 100644 --- a/tests/test_options/test_consider_mtime.py +++ b/tests/test_options/test_mtime_window.py @@ -19,17 +19,22 @@ def set_mtime(path, mtime): set_mtime('a', '2004-02-29 16:21:42') set_mtime('b', '2004-02-29 16:21:42') - set_mtime('c', '2004-02-29 16:21:43') + set_mtime('c', '2004-02-29 16:21:44') - head, *data, footer = run_rmlint('--ignore-mtime') + head, *data, footer = run_rmlint('--mtime-window=-1') assert len(data) == 3 assert footer['total_files'] == 3 assert footer['total_lint_size'] == 6 assert footer['duplicates'] == 2 - head, *data, footer = run_rmlint('--consider-mtime') - + head, *data, footer = run_rmlint('--mtime-window=+1') assert len(data) == 2 assert footer['total_files'] == 3 assert footer['total_lint_size'] == 3 assert footer['duplicates'] == 1 + + head, *data, footer = run_rmlint('--mtime-window=+2') + assert len(data) == 3 + assert footer['total_files'] == 3 + assert footer['total_lint_size'] == 6 + assert footer['duplicates'] == 2