Skip to content

Commit

Permalink
preprocess: change --{consider,ignore}-mtime into a more flexible --m…
Browse files Browse the repository at this point in the history
…time-window=T option and fix tests/docs
  • Loading branch information
sahib committed Nov 17, 2016
1 parent fdd120d commit 23e3efd
Show file tree
Hide file tree
Showing 6 changed files with 43 additions and 31 deletions.
7 changes: 5 additions & 2 deletions docs/rmlint.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -468,10 +468,13 @@ Rarely used, miscellaneous options
Also it might be useful for approximate comparison where it suffices when
the file is the same in the middle part.

:``-z --consider-mtime`` / ``-Z --ignore-mtime`` (**default**):
:``-z --mtime-window=T`` (**default\:** *-1*)

Only consider those files as duplicates that have the same content and
the same modification time (mtime).
the same modification time (mtime) within a certain window of *T* seconds.
If *T* is 0, both files need to have the same mtime. For *T=1* they may
differ one second and so on. If the window is negative, the mtime of
duplicates will not be considered.

:``--with-fiemap`` (**default**) / ``--without-fiemap``:

Expand Down
2 changes: 1 addition & 1 deletion lib/cfg.c
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ void rm_cfg_set_default(RmCfg *cfg) {
cfg->use_absolute_end_offset = false;
cfg->skip_start_offset = 0;
cfg->skip_end_offset = 0;
cfg->consider_mtime = false;
cfg->mtime_window = -1;

rm_trie_init(&cfg->file_trie);
}
2 changes: 1 addition & 1 deletion lib/cfg.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ typedef struct RmCfg {
gboolean find_hardlinked_dupes;
gboolean limits_specified;
gboolean filter_mtime;
gboolean consider_mtime;
gboolean match_basename;
gboolean unmatched_basenames;
gboolean match_with_extension;
Expand All @@ -74,6 +73,7 @@ typedef struct RmCfg {
int permissions;

time_t min_mtime;
gint64 mtime_window;
gint depth;
gint verbosity;

Expand Down
41 changes: 20 additions & 21 deletions lib/cmdline.c
Original file line number Diff line number Diff line change
Expand Up @@ -1326,8 +1326,8 @@ bool rm_cmd_parse_args(int argc, char **argv, RmSession *session) {
HIDDEN = G_OPTION_FLAG_HIDDEN, OPTIONAL = G_OPTION_FLAG_OPTIONAL_ARG;

/* Free/Used Options:
Used: abBcCdDeEfFgGHhiI kKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ|
Free: jJ |
Used: abBcCdDeEfFgGHhiI kKlLmMnNoOpPqQrRsStTuUvVwWxXyYz |
Free: jJ Z|
*/

/* clang-format off */
Expand All @@ -1352,25 +1352,24 @@ bool rm_cmd_parse_args(int argc, char **argv, RmSession *session) {
{"quiet" , 'V' , EMPTY , G_OPTION_ARG_CALLBACK , FUNC(quiet) , _("Be less verbose (-VVV for much less)") , NULL} ,

/* Trivial boolean options */
{"no-with-color" , 'W' , DISABLE , G_OPTION_ARG_NONE , &cfg->with_color , _("Be not that colorful") , NULL} ,
{"hidden" , 'r' , DISABLE , G_OPTION_ARG_NONE , &cfg->ignore_hidden , _("Find hidden files") , NULL} ,
{"followlinks" , 'f' , EMPTY , G_OPTION_ARG_CALLBACK , FUNC(follow_symlinks) , _("Follow symlinks") , NULL} ,
{"no-followlinks" , 'F' , DISABLE , G_OPTION_ARG_NONE , &cfg->follow_symlinks , _("Ignore symlinks") , NULL} ,
{"paranoid" , 'p' , EMPTY , G_OPTION_ARG_CALLBACK , FUNC(paranoid) , _("Use more paranoid hashing") , NULL} ,
{"no-crossdev" , 'x' , DISABLE , G_OPTION_ARG_NONE , &cfg->crossdev , _("Do not cross mounpoints") , NULL} ,
{"keep-all-tagged" , 'k' , 0 , G_OPTION_ARG_NONE , &cfg->keep_all_tagged , _("Keep all tagged files") , NULL} ,
{"keep-all-untagged" , 'K' , 0 , G_OPTION_ARG_NONE , &cfg->keep_all_untagged , _("Keep all untagged files") , NULL} ,
{"must-match-tagged" , 'm' , 0 , G_OPTION_ARG_NONE , &cfg->must_match_tagged , _("Must have twin in tagged dir") , NULL} ,
{"must-match-untagged" , 'M' , 0 , G_OPTION_ARG_NONE , &cfg->must_match_untagged , _("Must have twin in untagged dir") , NULL} ,
{"match-basename" , 'b' , 0 , G_OPTION_ARG_NONE , &cfg->match_basename , _("Only find twins with same basename") , NULL} ,
{"match-extension" , 'e' , 0 , G_OPTION_ARG_NONE , &cfg->match_with_extension , _("Only find twins with same extension") , NULL} ,
{"match-without-extension" , 'i' , 0 , G_OPTION_ARG_NONE , &cfg->match_without_extension , _("Only find twins with same basename minus extension") , NULL} ,
{"merge-directories" , 'D' , EMPTY , G_OPTION_ARG_CALLBACK , FUNC(merge_directories) , _("Find duplicate directories") , NULL} ,
{"perms" , 'z' , OPTIONAL , G_OPTION_ARG_CALLBACK , FUNC(permissions) , _("Only use files with certain permissions") , "[RWX]+"} ,
{"no-hardlinked" , 'L' , DISABLE , G_OPTION_ARG_NONE , &cfg->find_hardlinked_dupes , _("Ignore hardlink twins") , NULL} ,
{"partial-hidden" , 0 , EMPTY , G_OPTION_ARG_CALLBACK , FUNC(partial_hidden) , _("Find hidden files in duplicate folders only") , NULL} ,
{"consider-mtime" , 'z' , 0 , G_OPTION_ARG_NONE , &cfg->consider_mtime , _("Consider duplicates only equal when mtime is equal") , NULL} ,
{"ignore-mtime" , 'Z' , DISABLE , G_OPTION_ARG_NONE , &cfg->consider_mtime , _("Do not consider mtime for duplicate equality") , NULL} ,
{"no-with-color" , 'W' , DISABLE , G_OPTION_ARG_NONE , &cfg->with_color , _("Be not that colorful") , NULL} ,
{"hidden" , 'r' , DISABLE , G_OPTION_ARG_NONE , &cfg->ignore_hidden , _("Find hidden files") , NULL} ,
{"followlinks" , 'f' , EMPTY , G_OPTION_ARG_CALLBACK , FUNC(follow_symlinks) , _("Follow symlinks") , NULL} ,
{"no-followlinks" , 'F' , DISABLE , G_OPTION_ARG_NONE , &cfg->follow_symlinks , _("Ignore symlinks") , NULL} ,
{"paranoid" , 'p' , EMPTY , G_OPTION_ARG_CALLBACK , FUNC(paranoid) , _("Use more paranoid hashing") , NULL} ,
{"no-crossdev" , 'x' , DISABLE , G_OPTION_ARG_NONE , &cfg->crossdev , _("Do not cross mounpoints") , NULL} ,
{"keep-all-tagged" , 'k' , 0 , G_OPTION_ARG_NONE , &cfg->keep_all_tagged , _("Keep all tagged files") , NULL} ,
{"keep-all-untagged" , 'K' , 0 , G_OPTION_ARG_NONE , &cfg->keep_all_untagged , _("Keep all untagged files") , NULL} ,
{"must-match-tagged" , 'm' , 0 , G_OPTION_ARG_NONE , &cfg->must_match_tagged , _("Must have twin in tagged dir") , NULL} ,
{"must-match-untagged" , 'M' , 0 , G_OPTION_ARG_NONE , &cfg->must_match_untagged , _("Must have twin in untagged dir") , NULL} ,
{"match-basename" , 'b' , 0 , G_OPTION_ARG_NONE , &cfg->match_basename , _("Only find twins with same basename") , NULL} ,
{"match-extension" , 'e' , 0 , G_OPTION_ARG_NONE , &cfg->match_with_extension , _("Only find twins with same extension") , NULL} ,
{"match-without-extension" , 'i' , 0 , G_OPTION_ARG_NONE , &cfg->match_without_extension , _("Only find twins with same basename minus extension") , NULL} ,
{"merge-directories" , 'D' , EMPTY , G_OPTION_ARG_CALLBACK , FUNC(merge_directories) , _("Find duplicate directories") , NULL} ,
{"perms" , 'z' , OPTIONAL , G_OPTION_ARG_CALLBACK , FUNC(permissions) , _("Only use files with certain permissions") , "[RWX]+"} ,
{"no-hardlinked" , 'L' , DISABLE , G_OPTION_ARG_NONE , &cfg->find_hardlinked_dupes , _("Ignore hardlink twins") , NULL} ,
{"partial-hidden" , 0 , EMPTY , G_OPTION_ARG_CALLBACK , FUNC(partial_hidden) , _("Find hidden files in duplicate folders only") , NULL} ,
{"mtime-window" , 'z' , 0 , G_OPTION_ARG_INT64 , &cfg->mtime_window , _("Consider duplicates only equal when mtime differs at max. T seconds") , "T"} ,

/* Callback */
{"show-man" , 'H' , EMPTY , G_OPTION_ARG_CALLBACK , rm_cmd_show_manpage , _("Show the manpage") , NULL} ,
Expand Down
9 changes: 7 additions & 2 deletions lib/preprocess.c
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,13 @@ gint rm_file_cmp(const RmFile *file_a, const RmFile *file_b) {
: 0;
}

if(result == 0 && cfg->consider_mtime) {
result = (gint64)file_a->mtime - (gint64)file_b->mtime;
if(result == 0 && cfg->mtime_window >= 0) {
gint64 diff = (gint64)file_a->mtime - (gint64)file_b->mtime;
if(ABS(diff) <= cfg->mtime_window) {
result = 0;
} else {
result = diff;
}
}

return result;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,22 @@ def set_mtime(path, mtime):

set_mtime('a', '2004-02-29 16:21:42')
set_mtime('b', '2004-02-29 16:21:42')
set_mtime('c', '2004-02-29 16:21:43')
set_mtime('c', '2004-02-29 16:21:44')

head, *data, footer = run_rmlint('--ignore-mtime')
head, *data, footer = run_rmlint('--mtime-window=-1')
assert len(data) == 3
assert footer['total_files'] == 3
assert footer['total_lint_size'] == 6
assert footer['duplicates'] == 2

head, *data, footer = run_rmlint('--consider-mtime')

head, *data, footer = run_rmlint('--mtime-window=+1')
assert len(data) == 2
assert footer['total_files'] == 3
assert footer['total_lint_size'] == 3
assert footer['duplicates'] == 1

head, *data, footer = run_rmlint('--mtime-window=+2')
assert len(data) == 3
assert footer['total_files'] == 3
assert footer['total_lint_size'] == 6
assert footer['duplicates'] == 2

0 comments on commit 23e3efd

Please sign in to comment.