diff --git a/CHANGELOG.md b/CHANGELOG.md index 179c9f834..8417b06c6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,9 +5,10 @@ All notable changes to this project will be documented in this file. The format follows [keepachangelog.com]. Please stick to it. ## [2.11.0 Evolving Echidna -- unreleased] - ### Added +* Implement --hash-uniques option to generate full checksums of unique files too. +* Implement --hash-unmatched option, similar to --hash-uniques but only for size twins. * Implement --rank-by f option to rank originals by directory full path ## [2.10.1 Ludicrous Lemur] -- 2020-06-13 diff --git a/SConstruct b/SConstruct index 53455ab63..84777e00c 100755 --- a/SConstruct +++ b/SConstruct @@ -515,7 +515,7 @@ AddOption( action='store', metavar='DIR', help='libdir name (lib or lib64)' ) -for suffix in ['libelf', 'gettext', 'fiemap', 'blkid', 'json-glib', 'gui']: +for suffix in ['libelf', 'gettext', 'fiemap', 'blkid', 'gui']: AddOption( '--without-' + suffix, action='store_const', default=False, const=False, dest='with_' + suffix @@ -599,18 +599,12 @@ conf.env['HAVE_BLKID'] = 0 conf.check_pkg('blkid', 'HAVE_BLKID', required=False) conf.env['HAVE_JSON_GLIB'] = 0 -conf.check_pkg('json-glib-1.0', 'HAVE_JSON_GLIB', required=False) +conf.check_pkg('json-glib-1.0', 'HAVE_JSON_GLIB', required=True) -if GetOption('with_json-glib') is False: - conf.env['HAVE_JSON_GLIB'] = 0 - -packages = ['glib-2.0'] +packages = ['glib-2.0', 'json-glib-1.0'] if conf.env['HAVE_BLKID']: packages.append('blkid') -if conf.env['HAVE_JSON_GLIB']: - packages.append('json-glib-1.0') - if conf.env['HAVE_GIO_UNIX']: packages.append('gio-unix-2.0') @@ -885,7 +879,6 @@ if 'config' in COMMAND_LINE_TARGETS: Support for SHA512 (needs glib >= 2.31) : {sha512} Build manpage from docs/rmlint.1.rst : {sphinx} Support for caching checksums in file's xattr : {xattr} - Support for reading json caches (needs json-glib) : {json_glib} Checking for proper support of big files >= 4GB : {bigfiles} (needs either sizeof(off_t) >= 8 ...) : {bigofft} (... or presence of stat64) : {bigstat} @@ -916,7 +909,6 @@ Type 'scons' to actually compile rmlint now. Good luck. locale=yesno(env['HAVE_LIBINTL']), msgfmt=yesno(env['HAVE_MSGFMT']), xattr=yesno(env['HAVE_XATTR']), - json_glib=yesno(env['HAVE_JSON_GLIB']), nonrotational=yesno(env['HAVE_GIO_UNIX'] & env['HAVE_BLKID']), gio_unix=yesno(env['HAVE_GIO_UNIX']), blkid=yesno(env['HAVE_BLKID']), diff --git a/docs/developers.rst b/docs/developers.rst index 02c5c8146..4459e9b45 100644 --- a/docs/developers.rst +++ b/docs/developers.rst @@ -198,11 +198,6 @@ Arguments Do not link with ``libblkid``, which is needed to differentiate between normal rotational harddisks and non-rotational disks. -:--without-json-glib: - - Do not link with ``libjson-glib``, which is needed to load json-cache files. - Without this library a warning is printed when using ``--replay``. - :--without-fiemap: Do not attempt to use the ``FIEMAP ioctl(2)``. diff --git a/docs/install.rst b/docs/install.rst index b22e22a9f..1d80c80dc 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -16,13 +16,13 @@ Hard dependencies: ~~~~~~~~~~~~~~~~~~ * **glib** :math:`\geq 2.32` (general C Utility Library) +* **libjson-glib** (parsing rmlint's own json as caching layer) Soft dependencies: ~~~~~~~~~~~~~~~~~~ * **libblkid** (detecting mountpoints) * **libelf** (nonstripped binary detection) -* **libjson-glib** (parsing rmlint's own json as caching layer) Build dependencies: ~~~~~~~~~~~~~~~~~~~ diff --git a/docs/rmlint.1.rst b/docs/rmlint.1.rst index f3da0ba2c..11aae5830 100644 --- a/docs/rmlint.1.rst +++ b/docs/rmlint.1.rst @@ -799,7 +799,7 @@ OTHER STAND-ALONE COMMANDS By default this will use hashing to compare the files and/or directories. -:``rmlint --dedupe [-r] [-v|-V] ``: +:``rmlint --dedupe [OPTION…] ``: If the filesystem supports files sharing physical storage between multiple files, and if ``src`` and ``dest`` have same content, this command makes the @@ -808,26 +808,33 @@ OTHER STAND-ALONE COMMANDS This command is similar to ``cp --reflink=always `` except that it (a) checks that ``src`` and ``dest`` have identical data, and - it makes no changes to ``dest``'s metadata. + (b) it makes no changes to ``dest``'s metadata. + + Options: + * -h, --help Show help options + * -x, --xattr Check extended attributes to see if the file is already deduplicated + * -r, --readonly Even dedupe read-only [btrfs] snapshots (needs root) + * -f, --followlinks Follow symlinks + * -i, --inline-extents Try to dedupe files with inline extents + * -v, --loud Be more verbose (-vvv for much more) + * -V, --quiet Be less verbose (-VVV for much less) - Running with ``-r`` option will enable deduplication of read-only [btrfs] - snapshots (requires root). :``rmlint --is-reflink [-v|-V] ``: Tests whether ``file1`` and ``file2`` are reflinks (reference same data). This command makes ``rmlint`` exit with one of the following exit codes: - * 0: files are reflinks - * 1: files are not reflinks - * 3: not a regular file - * 4: file sizes differ - * 5: fiemaps can't be read - * 6: file1 and file2 are the same path - * 7: file1 and file2 are the same file under different mountpoints - * 8: files are hardlinks - * 9: files are symlinks - * 10: files are not on same device - * 11: other error encountered + * 0: Files are reflinks + * 1: An error occurred during checking + * 3: Not a regular file + * 4: File sizes differ + * 5: Files have inline extents + * 6: Same file and path + * 7: Same file but with different path + * 8: Hardlink + * 9: Symlink + * 10: Files are on different devices + * 11: Not linked EXAMPLES diff --git a/docs/tutorial.rst b/docs/tutorial.rst index da68cb2de..7bc24ec55 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -797,6 +797,21 @@ Here's just a list of options that are nice to know, but are not essential: processed file. But be sure to read the caveats stated in the `manpage`_! Especially keep in mind that you need to have write access to the files for this to work. + Note that the above example will only save checksums for duplicate files. + Alternatively, with the ``--hash-unmatched`` options, checksums are calulated + and saved in xattributes for all files that have "size twins", ie files of + the same length. This may make the first run very slow but will greatly speed up + future runs. + + .. code-block:: python + + $ rmlint large_dataset/ --xattr --hash-unmatched + $ rmlint large_dataset/ --xattr + + There is also the the ``--hash-uniques`` option, which is similar to + ``--hash-unmatched`` but also hashes files with no size twins. This will be + even slower than ``--hash-unmatched`` on the first run. + - ``-r`` (``--hidden``): Include hidden files and directories. The default is to ignore these, to save you from destroying git repositories (or similar programs) that save their information in a ``.git`` directory where ``rmlint`` @@ -877,6 +892,8 @@ little `tool`_ which makes it really easy to extract data from a ``json`` file: $ rmlint t -o json -o uniques:unique_files | jq -r '.[1:-1][] | select(.is_original) | .path' | sort > original_files # Now we only need to combine both files: $ cat unique_files original_files + # Alternatively in one step: + $ rmlint t -o json -c json:unique | jq -r '.[1:-1][] | select(.is_original) | .path' | sort > original_files Filter by regular expressions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/gui/shredder/application.py b/gui/shredder/application.py index a9d24c2c2..7e737c1e8 100644 --- a/gui/shredder/application.py +++ b/gui/shredder/application.py @@ -79,13 +79,6 @@ def __init__(self, options): self.cmd_opts = options self.settings = self.win = None - # Check compile time features of rmlint that we need later. - if not have_feature('replay'): - LOGGER.error('No support for +replay in rmlint binary.') - LOGGER.error('Please recompile with --with-json-glib…') - LOGGER.error('…and `json-glib-1.0` installed on your system.') - sys.exit(-1) - def do_activate(self, **kw): Gtk.Application.do_activate(self, **kw) self.win.present() diff --git a/lib/SConscript b/lib/SConscript index e00231dd1..c6064eff8 100644 --- a/lib/SConscript +++ b/lib/SConscript @@ -20,7 +20,6 @@ def build_config_template(target, source, env): INSTALL_PREFIX=GetOption('actual_prefix') or GetOption('prefix'), HAVE_LIBINTL=env['HAVE_GETTEXT'], HAVE_LIBELF=env['HAVE_LIBELF'], - HAVE_JSON_GLIB=env['HAVE_JSON_GLIB'], HAVE_GIO_UNIX=env['HAVE_GIO_UNIX'], HAVE_FIEMAP=env['HAVE_FIEMAP'], HAVE_XATTR=env['HAVE_XATTR'], diff --git a/lib/cfg.c b/lib/cfg.c index 97f059620..fee1876ee 100644 --- a/lib/cfg.c +++ b/lib/cfg.c @@ -27,6 +27,7 @@ #include #include "cfg.h" +#include "logger.h" #include "utilities.h" static void rm_path_free(RmPath *rmpath) { diff --git a/lib/cfg.h b/lib/cfg.h index 54fc22c55..976cb785b 100644 --- a/lib/cfg.h +++ b/lib/cfg.h @@ -83,7 +83,6 @@ typedef struct RmCfg { gboolean write_cksum_to_xattr; gboolean read_cksum_from_xattr; gboolean clear_xattr_fields; - gboolean write_unfinished; gboolean build_fiemap; gboolean use_buffered_read; gboolean fake_fiemap; @@ -93,6 +92,8 @@ typedef struct RmCfg { gboolean read_stdin; gboolean read_stdin0; gboolean backup; + gboolean hash_uniques; + gboolean hash_unmatched; int permissions; @@ -178,13 +179,6 @@ typedef struct RmCfg { * (or directories) */ gboolean run_equal_mode; - /* --dedupe options */ - bool dedupe; - bool dedupe_check_xattr; - bool dedupe_readonly; - - /* for --is-reflink option */ - bool is_reflink; /* don't use sse accelerations */ bool no_sse; diff --git a/lib/checksum.c b/lib/checksum.c index 6ba1c3400..afd29e287 100644 --- a/lib/checksum.c +++ b/lib/checksum.c @@ -47,6 +47,7 @@ #include "checksums/sha3/sha3.h" #include "checksums/xxhash/xxhash.h" +#include "logger.h" #include "utilities.h" #define _RM_CHECKSUM_DEBUG 0 diff --git a/lib/cmdline.c b/lib/cmdline.c index 0954c3f6d..bd9db8f75 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -42,6 +42,7 @@ #include "cmdline.h" #include "formats.h" #include "hash-utility.h" +#include "logger.h" #include "md-scheduler.h" #include "preprocess.h" #include "replay.h" @@ -50,6 +51,8 @@ #include "treemerge.h" #include "utilities.h" +#define EXIT_EQUAL_UNKNOWN 2 + /* define paranoia levels */ static const RmDigestType RM_PARANOIA_LEVELS[] = {RM_DIGEST_METRO, RM_DIGEST_METRO256, @@ -75,7 +78,6 @@ static void rm_cmd_show_version(void) { {.name = "sha512", .enabled = HAVE_SHA512}, {.name = "bigfiles", .enabled = HAVE_BIGFILES}, {.name = "intl", .enabled = HAVE_LIBINTL}, - {.name = "replay", .enabled = HAVE_JSON_GLIB}, {.name = "xattr", .enabled = HAVE_XATTR}, {.name = "btrfs-support", .enabled = HAVE_BTRFS_H}, {.name = NULL, .enabled = 0}}; @@ -124,119 +126,6 @@ static void rm_cmd_show_manpage(void) { exit(0); } -/* -* Debian and Ubuntu based distributions fuck up setuptools -* by expecting packages to be installed to dist-packages and not site-packages -* like expected by setuptools. This breaks a lot of packages with the reasoning -* to reduce conflicts between system and user packages: -* -* https://stackoverflow.com/questions/9387928/whats-the-difference-between-dist-packages-and-site-packages -* -* We try to work around this by manually installing dist-packages to the -* sys.path by first calling a small bootstrap script. -*/ -static const char RM_PY_BOOTSTRAP[] = - "" - "# This is a bootstrap script for the rmlint-gui. \n" - "# See the src/rmlint.c in rmlint's source for more info. \n" - "import sys, os, site \n" - " \n" - "# Also default to dist-packages on debian(-based): \n" - "sites = site.getsitepackages() \n" - "sys.path.extend([d.replace('dist-packages', 'site-packages') for d in sites]) \n" - "sys.path.extend(sites) \n" - " \n" - "# Cleanup self: \n" - "try: \n" - " os.remove(sys.argv[0]) \n" - "except: \n" - " print('Note: Could not remove bootstrap script at ', sys.argv[0]) \n" - " \n" - "# Run shredder by importing the main: \n" - "try: \n" - " import shredder \n" - " shredder.run_gui() \n" - "except ImportError as err: \n" - " print('Failed to load shredder:', err) \n" - " print('This might be due to a corrupted install; try reinstalling.') \n"; - -static void rm_cmd_start_gui(int argc, const char **argv) { - const char *commands[] = {"python3", "python", NULL}; - const char **command = &commands[0]; - - GError *error = NULL; - gchar *bootstrap_path = NULL; - int bootstrap_fd = - g_file_open_tmp(".shredder-bootstrap.py.XXXXXX", &bootstrap_path, &error); - - if(bootstrap_fd < 0) { - rm_log_warning("Could not bootstrap gui: Unable to create tempfile: %s", - error->message); - g_error_free(error); - return; - } - - if(write(bootstrap_fd, RM_PY_BOOTSTRAP, sizeof(RM_PY_BOOTSTRAP)) < 0) { - rm_log_warning_line("Could not bootstrap gui: Unable to write to tempfile: %s", - g_strerror(errno)); - return; - } - - close(bootstrap_fd); - - while(*command) { - const char *all_argv[512]; - const char **argp = &all_argv[0]; - memset(all_argv, 0, sizeof(all_argv)); - - *argp++ = *command; - *argp++ = bootstrap_path; - - for(size_t i = 0; i < (size_t)argc && i < sizeof(all_argv) / 2; i++) { - *argp++ = argv[i]; - } - - if(execvp(*command, (char *const *)all_argv) == -1) { - rm_log_warning("Executed: %s ", *command); - for(int j = 0; j < (argp - all_argv); j++) { - rm_log_warning("%s ", all_argv[j]); - } - rm_log_warning("\n"); - rm_log_error_line("%s %d", g_strerror(errno), errno == ENOENT); - } else { - /* This is not reached anymore when execve suceeded */ - break; - } - - /* Try next command... */ - command++; - } -} - -static int rm_cmd_maybe_switch_to_gui(int argc, const char **argv) { - for(int i = 0; i < argc; i++) { - if(g_strcmp0("--gui", argv[i]) == 0) { - argv[i] = "shredder"; - rm_cmd_start_gui(argc - i - 1, &argv[i + 1]); - - /* We returned? Something's wrong */ - return EXIT_FAILURE; - } - } - - return EXIT_SUCCESS; -} - -static int rm_cmd_maybe_switch_to_hasher(int argc, const char **argv) { - for(int i = 0; i < argc; i++) { - if(g_strcmp0("--hash", argv[i]) == 0) { - argv[i] = argv[0]; - exit(rm_hasher_main(argc - i, &argv[i])); - } - } - - return EXIT_SUCCESS; -} /* clang-format off */ static const struct FormatSpec { @@ -447,16 +336,9 @@ static gboolean rm_cmd_parse_xattr(_UNUSED const char *option_name, session->cfg->write_cksum_to_xattr = true; session->cfg->read_cksum_from_xattr= true; session->cfg->clear_xattr_fields = false; - session->cfg->write_unfinished = true; return true; } -static GLogLevelFlags VERBOSITY_TO_LOG_LEVEL[] = {[0] = G_LOG_LEVEL_CRITICAL, - [1] = G_LOG_LEVEL_ERROR, - [2] = G_LOG_LEVEL_WARNING, - [3] = G_LOG_LEVEL_MESSAGE | - G_LOG_LEVEL_INFO, - [4] = G_LOG_LEVEL_DEBUG}; static bool rm_cmd_read_paths_from_stdin(RmSession *session, bool is_prefd, bool null_separated) { char delim = null_separated ? 0 : '\n'; @@ -904,13 +786,6 @@ static gboolean rm_cmd_parse_timestamp_file(const char *option_name, return success; } -static void rm_cmd_set_verbosity_from_cnt(RmCfg *cfg, int verbosity_counter) { - cfg->verbosity = VERBOSITY_TO_LOG_LEVEL[CLAMP( - verbosity_counter, - 1, - (int)(sizeof(VERBOSITY_TO_LOG_LEVEL) / sizeof(GLogLevelFlags)) - 1)]; -} - static void rm_cmd_set_paranoia_from_cnt(RmCfg *cfg, int paranoia_counter, GError **error) { /* Handle the paranoia option */ @@ -1046,21 +921,6 @@ static gboolean rm_cmd_parse_no_progress(_UNUSED const char *option_name, _UNUSED GError **error) { rm_fmt_clear(session->formats); rm_cmd_set_default_outputs(session); - rm_cmd_set_verbosity_from_cnt(session->cfg, session->verbosity_count); - return true; -} - -static gboolean rm_cmd_parse_loud(_UNUSED const char *option_name, - _UNUSED const gchar *count, RmSession *session, - _UNUSED GError **error) { - rm_cmd_set_verbosity_from_cnt(session->cfg, ++session->verbosity_count); - return true; -} - -static gboolean rm_cmd_parse_quiet(_UNUSED const char *option_name, - _UNUSED const gchar *count, RmSession *session, - _UNUSED GError **error) { - rm_cmd_set_verbosity_from_cnt(session->cfg, --session->verbosity_count); return true; } @@ -1255,18 +1115,38 @@ static gboolean rm_cmd_parse_equal(_UNUSED const char *option_name, } static gboolean rm_cmd_parse_btrfs_clone(_UNUSED const char *option_name, - _UNUSED const gchar *x, RmSession *session, + _UNUSED const gchar *x, _UNUSED RmSession *session, _UNUSED GError **error) { - rm_log_warning_line("option --btrfs-clone is deprecated, use --dedupe"); - session->cfg->dedupe = true; - return true; + rm_log_error_line("option --btrfs-clone is deprecated, use --dedupe"); + return false; } static gboolean rm_cmd_parse_btrfs_readonly(_UNUSED const char *option_name, - _UNUSED const gchar *x, RmSession *session, + _UNUSED const gchar *x, _UNUSED RmSession *session, _UNUSED GError **error) { - session->cfg->dedupe_readonly = true; - return true; + rm_log_error_line("option --btrfs-clone is deprecated, use --dedupe"); + return false; +} + +static gboolean rm_cmd_parse_dedupe_xattr(_UNUSED const char *option_name, + _UNUSED const gchar *x, _UNUSED RmSession *session, + _UNUSED GError **error) { + rm_log_error_line("option --dedupe-xattr is deprecated, use --dedupe --xattr"); + return false; +} + +static gboolean rm_cmd_parse_dedupe_readonly(_UNUSED const char *option_name, + _UNUSED const gchar *x, _UNUSED RmSession *session, + _UNUSED GError **error) { + rm_log_error_line("option --dedupe-readonly is deprecated, use --dedupe --readonly"); + return false; +} + +static gboolean rm_cmd_parse_write_unfinished(_UNUSED const char *option_name, + _UNUSED const gchar *x, _UNUSED RmSession *session, + _UNUSED GError **error) { + rm_log_error_line("option --write-unfinished is deprecated, use --hash-unmatched"); + return false; } static bool rm_cmd_set_cwd(RmCfg *cfg) { @@ -1367,19 +1247,6 @@ static char * rm_cmd_find_own_executable_path(RmSession *session, char **argv) { bool rm_cmd_parse_args(int argc, char **argv, RmSession *session) { RmCfg *cfg = session->cfg; - /* Handle --gui before all other processing, - * since we need to pass other args to the python interpreter. - * This is not possible with GOption alone. - */ - if(rm_cmd_maybe_switch_to_gui(argc, (const char **)argv) == EXIT_FAILURE) { - rm_log_error_line(_("Could not start graphical user interface.")); - return false; - } - - if(rm_cmd_maybe_switch_to_hasher(argc, (const char **)argv) == EXIT_FAILURE) { - return false; - } - /* List of paths we got passed (or NULL) */ char **paths = NULL; @@ -1412,11 +1279,11 @@ bool rm_cmd_parse_args(int argc, char **argv, RmSession *session) { {"xattr" , 'C' , EMPTY , G_OPTION_ARG_CALLBACK , FUNC(xattr) , _("Enable xattr based caching") , ""} , /* Non-trivial switches */ - {"progress" , 'g' , EMPTY , G_OPTION_ARG_CALLBACK , FUNC(progress) , _("Enable progressbar") , NULL} , - {"loud" , 'v' , EMPTY , G_OPTION_ARG_CALLBACK , FUNC(loud) , _("Be more verbose (-vvv for much more)") , NULL} , - {"quiet" , 'V' , EMPTY , G_OPTION_ARG_CALLBACK , FUNC(quiet) , _("Be less verbose (-VVV for much less)") , NULL} , - {"replay" , 'Y' , 0 , G_OPTION_ARG_CALLBACK , FUNC(replay) , _("Re-output a json file") , "path/to/rmlint.json"} , - {"equal" , 0 , EMPTY , G_OPTION_ARG_CALLBACK , FUNC(equal) , _("Test for equality of PATHS") , "PATHS"} , + {"progress" , 'g' , EMPTY , G_OPTION_ARG_CALLBACK , FUNC(progress) , _("Enable progressbar") , NULL} , + {"loud" , 'v' , EMPTY , G_OPTION_ARG_CALLBACK , rm_logger_louder , _("Be more verbose (-vvv for much more)") , NULL} , + {"quiet" , 'V' , EMPTY , G_OPTION_ARG_CALLBACK , rm_logger_quieter , _("Be less verbose (-VVV for much less)") , NULL} , + {"replay" , 'Y' , 0 , G_OPTION_ARG_CALLBACK , FUNC(replay) , _("Re-output a json file") , "path/to/rmlint.json"} , + {"equal" , 0 , EMPTY , G_OPTION_ARG_CALLBACK , FUNC(equal) , _("Test for equality of PATHS") , "PATHS"} , /* Trivial boolean options */ {"no-with-color" , 'W' , DISABLE , G_OPTION_ARG_NONE , &cfg->with_color , _("Be not that colorful") , NULL} , @@ -1443,10 +1310,10 @@ bool rm_cmd_parse_args(int argc, char **argv, RmSession *session) { {"backup" , 0 , 0 , G_OPTION_ARG_NONE , &cfg->backup , _("Do create backups of previous result files") , NULL} , /* COW filesystem deduplication support */ - {"dedupe" , 0 , 0 , G_OPTION_ARG_NONE , &cfg->dedupe , _("Dedupe matching extents from source to dest (if filesystem supports)") , NULL} , - {"dedupe-xattr" , 0 , 0 , G_OPTION_ARG_NONE , &cfg->dedupe_check_xattr , _("Check extended attributes to see if the file is already deduplicated") , NULL} , - {"dedupe-readonly" , 0 , 0 , G_OPTION_ARG_NONE , &cfg->dedupe_readonly , _("(--dedupe option) even dedupe read-only snapshots (needs root)") , NULL} , - {"is-reflink" , 0 , 0 , G_OPTION_ARG_NONE , &cfg->is_reflink , _("Test if two files are reflinks (share same data extents)") , NULL} , + {"dedupe" , 0 , 0 , 0 , NULL , _("Dedupe matching extents from source to dest (if filesystem supports)") , NULL} , + {"dedupe-xattr" , 0 , 0 , G_OPTION_ARG_CALLBACK , FUNC(dedupe_xattr) , "Deprecated, use --dedupe --xattr" , NULL} , + {"dedupe-readonly" , 0 , 0 , G_OPTION_ARG_CALLBACK , FUNC(dedupe_readonly) , "Deprecated, use --dedupe --readonly" , NULL} , + {"is-reflink" , 0 , 0 , 0 , NULL , _("Test if two files are reflinks (share same data extents)") , NULL} , /* Callback */ {"show-man" , 'H' , EMPTY , G_OPTION_ARG_CALLBACK , rm_cmd_show_manpage , _("Show the manpage") , NULL} , @@ -1487,7 +1354,9 @@ bool rm_cmd_parse_args(int argc, char **argv, RmSession *session) { {"sweep-files" , 0 , HIDDEN , G_OPTION_ARG_CALLBACK , FUNC(sweep_count) , "Specify max. file count per pass when scanning disks" , "S"} , {"threads" , 't' , HIDDEN , G_OPTION_ARG_INT64 , &cfg->threads , "Specify max. number of hasher threads" , "N"} , {"threads-per-disk" , 0 , HIDDEN , G_OPTION_ARG_INT , &cfg->threads_per_disk , "Specify number of reader threads per physical disk" , NULL} , - {"write-unfinished" , 'U' , HIDDEN , G_OPTION_ARG_NONE , &cfg->write_unfinished , "Output unfinished checksums" , NULL} , + {"write-unfinished" , 0 , EMPTY | HIDDEN , G_OPTION_ARG_CALLBACK , FUNC(write_unfinished) , "Output unfinished checksums (deprecated)" , NULL} , + {"hash-uniques" , 0 , HIDDEN , G_OPTION_ARG_NONE , &cfg->hash_uniques , "Hash (whole of) unique files too (for json or xattr output)" , NULL} , + {"hash-unmatched" , 'U' , HIDDEN , G_OPTION_ARG_NONE , &cfg->hash_unmatched , "Same as --hash-uniques but only for files with size twin" , NULL} , {"xattr-write" , 0 , HIDDEN , G_OPTION_ARG_NONE , &cfg->write_cksum_to_xattr , "Cache checksum in file attributes" , NULL} , {"xattr-read" , 0 , HIDDEN , G_OPTION_ARG_NONE , &cfg->read_cksum_from_xattr , "Read cached checksums from file attributes" , NULL} , {"xattr-clear" , 0 , HIDDEN , G_OPTION_ARG_NONE , &cfg->clear_xattr_fields , "Clear xattrs from all seen files" , NULL} , @@ -1507,15 +1376,12 @@ bool rm_cmd_parse_args(int argc, char **argv, RmSession *session) { const GOptionEntry deprecated_option_entries[] = { {"btrfs-clone" , 0 , EMPTY | HIDDEN , G_OPTION_ARG_CALLBACK , FUNC(btrfs_clone) , "Deprecated, use --dedupe instead" , NULL}, - {"btrfs-readonly" , 0 , EMPTY | HIDDEN , G_OPTION_ARG_CALLBACK , FUNC(btrfs_readonly) , "Deprecated, use --dedupe-readonly instead" , NULL}, + {"btrfs-readonly" , 0 , EMPTY | HIDDEN , G_OPTION_ARG_CALLBACK , FUNC(btrfs_readonly) , "Deprecated, use --dedupe --readonly instead" , NULL}, {NULL , 0 , HIDDEN , 0 , NULL , NULL , NULL} }; /* clang-format on */ - /* Initialize default verbosity */ - rm_cmd_set_verbosity_from_cnt(cfg, session->verbosity_count); - if(!rm_cmd_set_cwd(cfg)) { g_set_error(&error, RM_ERROR_QUARK, 0, _("Cannot set current working directory")); goto cleanup; @@ -1596,18 +1462,6 @@ bool rm_cmd_parse_args(int argc, char **argv, RmSession *session) { goto cleanup; } - if(cfg->replay && (cfg->dedupe || cfg->is_reflink)) { - error = g_error_new( - RM_ERROR_QUARK, 0, - _("--replay (-Y) is incompatible with --dedupe or --is-reflink") - ); goto cleanup; - } - - if(cfg->dedupe) { - /* dedupe session; regular rmlint configs are ignored */ - goto cleanup; - } - /* Silent fixes of invalid numeric input */ cfg->threads = CLAMP(cfg->threads, 1, 128); cfg->depth = CLAMP(cfg->depth, 1, PATH_MAX / 2 + 1); @@ -1687,7 +1541,7 @@ bool rm_cmd_parse_args(int argc, char **argv, RmSession *session) { if(cfg->progress_enabled) { /* Set verbosity to minimal */ - rm_cmd_set_verbosity_from_cnt(session->cfg, 1); + rm_logger_set_verbosity(1); } g_option_context_free(option_parser); @@ -1728,6 +1582,8 @@ static int rm_cmd_replay_main(RmSession *session) { int rm_cmd_main(RmSession *session) { int exit_state = EXIT_SUCCESS; + session->equal_exit_code = EXIT_EQUAL_UNKNOWN; + RmCfg *cfg = session->cfg; rm_fmt_set_state(session->formats, RM_PROGRESS_STATE_INIT); @@ -1834,6 +1690,7 @@ int rm_cmd_main(RmSession *session) { rm_fmt_set_state(session->formats, RM_PROGRESS_STATE_PRE_SHUTDOWN); rm_fmt_set_state(session->formats, RM_PROGRESS_STATE_SUMMARY); + if(session->shred_bytes_remaining != 0) { rm_log_error_line("BUG: Number of remaining bytes is %" LLU " (not 0). Please report this.", @@ -1849,7 +1706,7 @@ int rm_cmd_main(RmSession *session) { } if(exit_state == EXIT_SUCCESS && cfg->run_equal_mode) { - return session->equal_exit_code; + return session->equal_exit_code == EXIT_SUCCESS ? EXIT_SUCCESS : EXIT_FAILURE; } if(exit_state == EXIT_SUCCESS && rm_session_was_aborted()) { diff --git a/lib/config.h.in b/lib/config.h.in index 300251713..b05185729 100644 --- a/lib/config.h.in +++ b/lib/config.h.in @@ -6,7 +6,6 @@ #define HAVE_BLKID ({HAVE_BLKID}) #define HAVE_LIBINTL ({HAVE_LIBINTL}) #define HAVE_LIBELF ({HAVE_LIBELF}) -#define HAVE_JSON_GLIB ({HAVE_JSON_GLIB}) #define HAVE_GIO_UNIX ({HAVE_GIO_UNIX}) #define HAVE_FIEMAP ({HAVE_FIEMAP}) #define HAVE_XATTR ({HAVE_XATTR}) @@ -44,13 +43,6 @@ || Z <= RM_VERSION_MICRO \ ) -/* These colors should only be used with the rm_log_* macros below */ -#define RED "\x1b[31;01m" -#define YELLOW "\x1b[33;01m" -#define RESET "\x1b[0m" -#define GREEN "\x1b[32;01m" -#define BLUE "\x1b[34;01m" - #include #if HAVE_LINUX_LIMITS @@ -61,29 +53,6 @@ # endif #endif -#define rm_log_debug(...) \ - g_log("rmlint", G_LOG_LEVEL_DEBUG, __VA_ARGS__) -#define rm_log_info(...) \ - g_log("rmlint", G_LOG_LEVEL_INFO, __VA_ARGS__) -#define rm_log_warning(...) \ - g_log("rmlint", G_LOG_LEVEL_WARNING, __VA_ARGS__) -#define rm_log_error(...) \ - g_log("rmlint", G_LOG_LEVEL_CRITICAL, __VA_ARGS__) - -#define rm_log_perror(message) \ - if(errno) {{ \ - rm_log_error_line("%s:%d: %s: %s", __FILE__, __LINE__, message, g_strerror(errno)); \ - }} \ - -#define rm_log_perrorf(message, ...) \ - if(errno) {{ \ - int _errsv = errno; \ - char *msg = g_strdup_printf(message, __VA_ARGS__); \ - rm_log_error_line("%s:%d: %s: %s", __FILE__, __LINE__, msg, \ - g_strerror(_errsv)); \ - g_free(msg); \ - }} - #define _UNUSED G_GNUC_UNUSED #define LLU G_GUINT64_FORMAT #define LLI G_GINT64_FORMAT @@ -120,70 +89,8 @@ # define N_(String) gettext_noop (String) #endif -static inline GMutex* rm_log_get_mutex(void) {{ - static GMutex RM_LOG_MTX; - return &RM_LOG_MTX; -}} - -#define RM_LOG_INIT \ - g_mutex_init(rm_log_get_mutex()); - typedef guint64 RmOff; -/* Stupid macros to make printing error lines easier */ -#define rm_log_error_prefix() \ - rm_log_error(RED); \ - rm_log_error(_("ERROR")); \ - rm_log_error(": "RESET); \ - -#define rm_log_warning_prefix() \ - rm_log_warning(YELLOW); \ - rm_log_warning(_("WARNING")); \ - rm_log_warning(": "RESET); \ - -#define rm_log_info_prefix() \ - rm_log_warning(GREEN); \ - rm_log_warning(_("INFO")); \ - rm_log_warning(": "RESET); \ - -#define rm_log_debug_prefix() \ - rm_log_debug(BLUE); \ - rm_log_debug(_("DEBUG")); \ - rm_log_debug(": "RESET); \ - -/////////////// - -#define rm_log_error_line(...) \ - g_mutex_lock(rm_log_get_mutex()); \ - rm_log_error_prefix() \ - rm_log_error(__VA_ARGS__); \ - rm_log_error("\n"); \ - g_mutex_unlock(rm_log_get_mutex()); \ - -#define rm_log_warning_line(...) \ - g_mutex_lock(rm_log_get_mutex()); \ - rm_log_warning_prefix() \ - rm_log_warning(__VA_ARGS__); \ - rm_log_warning("\n"); \ - g_mutex_unlock(rm_log_get_mutex()); \ - -#define rm_log_info_line(...) \ - g_mutex_lock(rm_log_get_mutex()); \ - rm_log_info_prefix() \ - rm_log_warning(__VA_ARGS__); \ - rm_log_warning("\n"); \ - g_mutex_unlock(rm_log_get_mutex()); \ - -#define rm_log_debug_line(...) \ - g_mutex_lock(rm_log_get_mutex()); \ - rm_log_debug_prefix() \ - rm_log_debug(__VA_ARGS__); \ - rm_log_debug("\n"); \ - g_mutex_unlock(rm_log_get_mutex()); \ - -/* Domain for reporting errors. Needed by GOptions */ -#define RM_ERROR_QUARK (g_quark_from_static_string("rmlint")) - #ifdef RM_DEBUG #undef NDEBUG #else diff --git a/lib/file.h b/lib/file.h index 5b36667a9..f91c950c5 100644 --- a/lib/file.h +++ b/lib/file.h @@ -70,7 +70,7 @@ typedef enum RmLintType { RM_LINT_TYPE_DUPE_DIR_CANDIDATE, /* Special type for files that got sieved out during shreddering. - * if cfg->write_unfinished is true, those may be included in the + * Depending on output settings, these may be included in the * json/xattr/csv output. * * This is mainly useful for caching. diff --git a/lib/formats.c b/lib/formats.c index e935f1977..cb9756f50 100644 --- a/lib/formats.c +++ b/lib/formats.c @@ -30,6 +30,7 @@ #include "file.h" #include "formats.h" +#include "logger.h" /* A group of output files. * These are only created when caching to the end of the run is requested. diff --git a/lib/formats/_equal.c b/lib/formats/_equal.c index 4a9edb15c..02d1ec5fa 100644 --- a/lib/formats/_equal.c +++ b/lib/formats/_equal.c @@ -24,6 +24,7 @@ */ #include "../formats.h" +#include "../logger.h" #include "../utilities.h" #include "../preprocess.h" @@ -38,7 +39,7 @@ typedef struct RmFmtHandlerEqual { /* Checksum of the last checked file (or NULL) */ char *last_checksum; - /* Set to true once a mismatch (colliding cksum) was found */ + /* Set to true once a mismatch (differing cksum) was found */ bool mismatch_found; /* session->cfg->paths turned to a set for efficient member test */ @@ -49,10 +50,8 @@ typedef struct RmFmtHandlerEqual { // ACTUAL CALLBACKS // ///////////////////////// -static void rm_fmt_report_failure(RmFmtHandlerEqual *self, RmSession *session) { +static void rm_fmt_report_failure(_UNUSED RmFmtHandlerEqual *self, RmSession *session) { session->equal_exit_code = EXIT_FAILURE; - self->mismatch_found = true; - rm_session_abort(); } static void rm_fmt_head(RmSession *session, RmFmtHandler *parent, _UNUSED FILE *out) { @@ -78,7 +77,6 @@ static void rm_fmt_elem( /* No need to check anymore, it's not equal. */ if(self->mismatch_found) { - rm_fmt_report_failure(self, session); return; } @@ -86,38 +84,42 @@ static void rm_fmt_elem( /* We do not want to handle unique files here. * If it is unique, it will be not equal... * */ - rm_fmt_report_failure(self, session); - return; + self->mismatch_found = TRUE; } + else { - RM_DEFINE_PATH(file); + RM_DEFINE_PATH(file); - if(g_hash_table_contains(self->input_paths, file_path) == false) { - /* Ignore; this path was not given explicitly on the cmdline */ - return; - } + if(g_hash_table_contains(self->input_paths, file_path) == false) { + /* Ignore; this path was not given explicitly on the cmdline */ + return; + } - size_t cksum_bytes = rm_digest_get_bytes(file->digest) * 2 + 1; - char *checksum = g_malloc0(cksum_bytes); - - memset(checksum, '0', cksum_bytes); - checksum[cksum_bytes - 1] = 0; - rm_digest_hexstring(file->digest, checksum); - - if(self->last_checksum != NULL) { - if(!strncmp(checksum, self->last_checksum, cksum_bytes)) { - session->equal_exit_code = EXIT_SUCCESS; - } else { - rm_fmt_report_failure(self, session); - rm_log_debug_line( - "First differing items:\n\t%s (%s)\n\tlast checksum: (%s)", - file_path, checksum, self->last_checksum - ); + size_t cksum_bytes = rm_digest_get_bytes(file->digest) * 2 + 1; + char *checksum = g_malloc0(cksum_bytes); + + memset(checksum, '0', cksum_bytes); + checksum[cksum_bytes - 1] = 0; + rm_digest_hexstring(file->digest, checksum); + + if(self->last_checksum != NULL) { + if(!strncmp(checksum, self->last_checksum, cksum_bytes)) { + session->equal_exit_code = EXIT_SUCCESS; + } else { + self->mismatch_found = TRUE; + rm_log_debug_line( + "First differing items:\n\t%s (%s)\n\tlast checksum: (%s)", + file_path, checksum, self->last_checksum + ); + } + g_free(self->last_checksum); } - g_free(self->last_checksum); + self->last_checksum = checksum; + } + if(self->mismatch_found) { + rm_fmt_report_failure(self, session); } - self->last_checksum = checksum; } static void rm_fmt_foot( @@ -141,7 +143,7 @@ static RmFmtHandlerEqual EQUAL_HANDLE_IMPL = { .valid_keys = {NULL}, }, .last_checksum = NULL, - .mismatch_found = false + .mismatch_found = false, }; RmFmtHandler *EQUAL_HANDLER = (RmFmtHandler *) &EQUAL_HANDLE_IMPL; diff --git a/lib/formats/csv.c b/lib/formats/csv.c index 157d11807..c944a3168 100644 --- a/lib/formats/csv.c +++ b/lib/formats/csv.c @@ -60,7 +60,7 @@ static void rm_fmt_elem(_UNUSED RmSession *session, _UNUSED RmFmtHandler *parent FILE *out, RmFile *file) { if(file->lint_type == RM_LINT_TYPE_UNIQUE_FILE) { if(!rm_fmt_get_config_value(session->formats, "csv", "unique")) { - if(!file->digest || !session->cfg->write_unfinished) { + if(!file->digest || !(session->cfg->hash_uniques || session->cfg->hash_unmatched)) { return; } } diff --git a/lib/formats/fdupes.c b/lib/formats/fdupes.c index 64cea2307..c2a622f78 100644 --- a/lib/formats/fdupes.c +++ b/lib/formats/fdupes.c @@ -24,6 +24,7 @@ */ #include "../formats.h" +#include "../logger.h" #include #include diff --git a/lib/formats/json.c b/lib/formats/json.c index 954207832..c10f44ec1 100644 --- a/lib/formats/json.c +++ b/lib/formats/json.c @@ -23,25 +23,34 @@ * */ +#include +#include +#include +#include +#include +#include + #include "../checksums/murmur3.h" #include "../formats.h" +#include "../logger.h" #include "../preprocess.h" -#include "../utilities.h" #include "../treemerge.h" - -#include -#include -#include +#include "../utilities.h" typedef struct RmFmtHandlerJSON { /* must be first */ RmFmtHandler parent; - /* More human readable output? */ - bool pretty; - /* set of already existing ids */ GHashTable *id_set; + + GOutputStream *stream; + JsonGenerator *generator; + JsonNode *root; + + bool need_comma_before_next_elem; + bool pretty; + } RmFmtHandlerJSON; ////////////////////////////////////////// @@ -69,201 +78,125 @@ static guint32 rm_fmt_json_generate_id(RmFmtHandlerJSON *self, RmFile *file, return hash; } -////////////////////////////////////////// -// POOR MAN'S JSON FORMATTING TOOLBOX // -////////////////////////////////////////// - -static void rm_fmt_json_key(FILE *out, const char *key, const char *value) { - fprintf(out, "\"%s\": \"%s\"", key, value); -} - -static void rm_fmt_json_key_bool(FILE *out, const char *key, bool value) { - fprintf(out, "\"%s\": %s", key, value ? "true" : "false"); -} - -static void rm_fmt_json_key_int(FILE *out, const char *key, RmOff value) { - fprintf(out, "\"%s\": %" LLU "", key, value); +static void rm_fmt_json_sep(RmFmtHandlerJSON *self) { + g_output_stream_printf( + self->stream, NULL, NULL, NULL, ",%s", self->pretty ? " " : "\n"); } -static void rm_fmt_json_key_float(FILE *out, const char *key, gdouble value) { - // Make sure that the floating point number gets printed with a '.', - // not with a comma as usual in e.g. the german language. - gchar buf[G_ASCII_DTOSTR_BUF_SIZE]; - fprintf(out, "\"%s\": %s", key, g_ascii_dtostr(buf, sizeof(buf) - 1, value)); -} +static void rm_fmt_json_open(RmSession *session, RmFmtHandlerJSON *self, FILE *out) { + self->need_comma_before_next_elem = false; + self->stream = g_unix_output_stream_new(fileno(out), false); + self->generator = json_generator_new(); + self->pretty = !rm_fmt_get_config_value(session->formats, "json", "oneline"); + json_generator_set_pretty(self->generator, self->pretty); -static bool rm_fmt_json_fix(const char *string, char *fixed, size_t fixed_len) { - /* More information here: - * - * http://stackoverflow.com/questions/4901133/json-and-escaping-characters/4908960#4908960 - */ - - int n = strlen(string); - char *safe_iter = fixed; - - for(int i = 0; i < n && (size_t)(safe_iter - fixed) < fixed_len; ++i) { - unsigned char *curr = (unsigned char *)&string[i]; - - char text[20]; - memset(text, 0, sizeof(text)); - - if(*curr == '"' || *curr == '\\') { - /* Printable, but needs to be escaped */ - text[0] = '\\'; - text[1] = *curr; - } else if((*curr > 0 && *curr < 0x1f) || *curr == 0x7f) { - /* Something unprintable */ - switch(*curr) { - case '\b': - g_snprintf(text, sizeof(text), "\\b"); - break; - case '\f': - g_snprintf(text, sizeof(text), "\\f"); - break; - case '\n': - g_snprintf(text, sizeof(text), "\\n"); - break; - case '\r': - g_snprintf(text, sizeof(text), "\\r"); - break; - case '\t': - g_snprintf(text, sizeof(text), "\\t"); - break; - default: - g_snprintf(text, sizeof(text), "\\u00%02x", (guint)*curr); - break; - } - } else { - /* Take it unmodified */ - text[0] = *curr; - } + self->root = json_node_alloc(); + json_generator_set_root(self->generator, self->root); - safe_iter = g_stpcpy(safe_iter, text); - } + self->id_set = g_hash_table_new(NULL, NULL); - return (size_t)(safe_iter - fixed) < fixed_len; + // write the start of the json array + g_output_stream_printf(self->stream, NULL, NULL, NULL, "[\n"); } -static void rm_fmt_json_key_unsafe(FILE *out, const char *key, const char *value) { - char safe_value[PATH_MAX + 4 + 1]; - memset(safe_value, 0, sizeof(safe_value)); +static void rm_fmt_json_close(RmFmtHandlerJSON *self) { + // free up memory + json_node_unref(self->root); + g_object_unref(self->generator); - if(rm_fmt_json_fix(value, safe_value, sizeof(safe_value))) { - fprintf(out, "\"%s\": \"%s\"", key, safe_value); - } else { - /* This should never happen but give at least means of debugging */ - fprintf(out, "\"%s\": \"\"", key); - } -} + // write the end of the json array + g_output_stream_printf(self->stream, NULL, NULL, NULL, "]\n"); + g_object_unref(self->stream); -static void rm_fmt_json_open(RmFmtHandlerJSON *self, FILE *out) { - fprintf(out, "{%s", self->pretty ? "\n " : ""); + g_hash_table_unref(self->id_set); } -static void rm_fmt_json_close(RmFmtHandlerJSON *self, FILE *out) { - if(self->pretty) { - fprintf(out, "\n}, "); +static void rm_fmt_object_write_and_free(JsonObject *obj, RmFmtHandlerJSON *self) { + if(self->need_comma_before_next_elem) { + rm_fmt_json_sep(self); } else { - fprintf(out, "},\n"); + self->need_comma_before_next_elem = true; } -} + GError *error = NULL; + json_node_set_object(self->root, obj); + json_generator_set_root(self->generator, self->root); -static void rm_fmt_json_sep(RmFmtHandlerJSON *self, FILE *out) { - fprintf(out, ",%s", self->pretty ? "\n " : ""); + if(!json_generator_to_stream(self->generator, self->stream, false, &error)) { + rm_log_error_line("Error writing to json stream"); + } + json_object_unref(obj); } ///////////////////////// // ACTUAL CALLBACKS // ///////////////////////// -static void rm_fmt_head(RmSession *session, _UNUSED RmFmtHandler *parent, FILE *out) { - fprintf(out, "[\n"); - +static void rm_fmt_head(RmSession *session, RmFmtHandler *parent, FILE *out) { RmFmtHandlerJSON *self = (RmFmtHandlerJSON *)parent; - self->id_set = g_hash_table_new(NULL, NULL); - if(rm_fmt_get_config_value(session->formats, "json", "oneline")) { - self->pretty = false; - } + rm_fmt_json_open(session, self, out); if(!rm_fmt_get_config_value(session->formats, "json", "no_header")) { - rm_fmt_json_open(self, out); - { - rm_fmt_json_key(out, "description", "rmlint json-dump of lint files"); - rm_fmt_json_sep(self, out); - rm_fmt_json_key(out, "cwd", session->cfg->iwd); - rm_fmt_json_sep(self, out); - rm_fmt_json_key(out, "args", session->cfg->joined_argv); - rm_fmt_json_sep(self, out); - rm_fmt_json_key(out, "version", RM_VERSION); - rm_fmt_json_sep(self, out); - rm_fmt_json_key(out, "rev", RM_VERSION_GIT_REVISION); - rm_fmt_json_sep(self, out); - rm_fmt_json_key_int(out, "progress", 0); /* Header is always first. */ - rm_fmt_json_sep(self, out); - rm_fmt_json_key(out, "checksum_type", - rm_digest_type_to_string(session->cfg->checksum_type)); - if(session->hash_seed) { - rm_fmt_json_sep(self, out); - rm_fmt_json_key_int(out, "hash_seed", session->hash_seed); - } - - rm_fmt_json_sep(self, out); - rm_fmt_json_key_bool(out, "merge_directories", session->cfg->merge_directories); + JsonObject *header = json_object_new(); + json_object_set_string_member(header, "description", + "rmlint json-dump of lint files"); + json_object_set_string_member(header, "cwd", session->cfg->iwd); + json_object_set_string_member(header, "args", session->cfg->joined_argv); + json_object_set_string_member(header, "version", RM_VERSION); + json_object_set_string_member(header, "rev", RM_VERSION_GIT_REVISION); + json_object_set_int_member(header, "progress", 0); /* Header is always first. */ + json_object_set_string_member( + header, "checksum_type", + rm_digest_type_to_string(session->cfg->checksum_type)); + if(session->hash_seed) { + json_object_set_int_member(header, "hash_seed", session->hash_seed); } - rm_fmt_json_close(self, out); + json_object_set_boolean_member(header, "merge_directories", + session->cfg->merge_directories); + rm_fmt_object_write_and_free(header, self); } } -static void rm_fmt_foot(_UNUSED RmSession *session, RmFmtHandler *parent, FILE *out) { +static void rm_fmt_foot(_UNUSED RmSession *session, RmFmtHandler *parent, + _UNUSED FILE *out) { RmFmtHandlerJSON *self = (RmFmtHandlerJSON *)parent; - if(rm_fmt_get_config_value(session->formats, "json", "no_footer")) { - fprintf(out, "{}"); - } else { - rm_fmt_json_open(self, out); - { - rm_fmt_json_key_bool(out, "aborted", rm_session_was_aborted()); - rm_fmt_json_sep(self, out); - rm_fmt_json_key_int(out, "progress", 100); /* Footer is always last. */ - rm_fmt_json_sep(self, out); - rm_fmt_json_key_int(out, "total_files", session->total_files); - rm_fmt_json_sep(self, out); - rm_fmt_json_key_int(out, "ignored_files", session->ignored_files); - rm_fmt_json_sep(self, out); - rm_fmt_json_key_int(out, "ignored_folders", session->ignored_folders); - rm_fmt_json_sep(self, out); - rm_fmt_json_key_int(out, "duplicates", session->dup_counter); - rm_fmt_json_sep(self, out); - rm_fmt_json_key_int(out, "duplicate_sets", session->dup_group_counter); - rm_fmt_json_sep(self, out); - rm_fmt_json_key_int(out, "total_lint_size", session->total_lint_size); - } - if(self->pretty) { - fprintf(out, "\n}"); - } else { - fprintf(out, "}\n"); - } + if(!rm_fmt_get_config_value(session->formats, "json", "no_footer")) { + JsonObject *footer = json_object_new(); + json_object_set_boolean_member(footer, "aborted", rm_session_was_aborted()); + json_object_set_int_member(footer, "progress", 100); /* Footer is always last. */ + json_object_set_int_member(footer, "total_files", session->total_files); + json_object_set_int_member(footer, "ignored_files", session->ignored_files); + json_object_set_int_member(footer, "ignored_folders", session->ignored_folders); + json_object_set_int_member(footer, "duplicates", session->dup_counter); + json_object_set_int_member(footer, "duplicate_sets", session->dup_group_counter); + json_object_set_int_member(footer, "total_lint_size", session->total_lint_size); + rm_fmt_object_write_and_free(footer, self); } - - fprintf(out, "]\n"); - g_hash_table_unref(self->id_set); + rm_fmt_json_close(self); } -static void rm_fmt_json_cksum(RmFile *file, char *checksum_str, size_t size) { - memset(checksum_str, '0', size); - checksum_str[size - 1] = 0; +static char *rm_fmt_json_cksum(RmFile *file) { + if(file->digest == NULL) { + return NULL; + } + size_t checksum_size = rm_digest_get_bytes(file->digest) * 2 + 1; + char *checksum_str = g_malloc0(checksum_size); rm_digest_hexstring(file->digest, checksum_str); + checksum_str[checksum_size - 1] = 0; + return checksum_str; } -static void rm_fmt_elem(RmSession *session, _UNUSED RmFmtHandler *parent, FILE *out, RmFile *file) { +static void rm_fmt_elem(RmSession *session, _UNUSED RmFmtHandler *parent, + _UNUSED FILE *out, RmFile *file) { if(rm_fmt_get_config_value(session->formats, "json", "no_body")) { return; } if(file->lint_type == RM_LINT_TYPE_UNIQUE_FILE) { if(!rm_fmt_get_config_value(session->formats, "json", "unique")) { - if(!file->digest || !session->cfg->write_unfinished) { + if(!file->digest || + !(session->cfg->hash_uniques || session->cfg->hash_unmatched)) { return; } } @@ -278,120 +211,81 @@ static void rm_fmt_elem(RmSession *session, _UNUSED RmFmtHandler *parent, FILE * file->is_original = true; } } + char *checksum_str = rm_fmt_json_cksum(file); + + RmFmtHandlerJSON *self = (RmFmtHandlerJSON *)parent; - char *checksum_str = NULL; - size_t checksum_size = 0; + RM_DEFINE_PATH(file); + + JsonObject *elem = json_object_new(); + json_object_set_int_member( + elem, "id", rm_fmt_json_generate_id(self, file, file_path, checksum_str)); + json_object_set_string_member(elem, "type", + rm_file_lint_type_to_string(file->lint_type)); + gdouble progress = 0; + if(session->shred_bytes_after_preprocess) { + progress = CLAMP(100 - 100 * ((gdouble)session->shred_bytes_remaining / + (gdouble)session->shred_bytes_after_preprocess), + 0, + 100); + json_object_set_int_member(elem, "progress", progress); + } - if(file->digest != NULL) { - checksum_size = rm_digest_get_bytes(file->digest) * 2 + 1; - checksum_str = g_slice_alloc0(checksum_size); - rm_fmt_json_cksum(file, checksum_str, checksum_size); - checksum_str[checksum_size - 1] = 0; + if(file->digest) { + json_object_set_string_member(elem, "checksum", checksum_str); } - RmFmtHandlerJSON *self = (RmFmtHandlerJSON *)parent; + json_object_set_string_member(elem, "path", file_path); + json_object_set_int_member(elem, "size", file->actual_file_size); + json_object_set_int_member(elem, "depth", file->depth); + json_object_set_int_member(elem, "inode", file->inode); + json_object_set_int_member(elem, "disk_id", file->dev); + json_object_set_boolean_member(elem, "is_original", file->is_original); - /* Make it look like a json element */ - rm_fmt_json_open(self, out); - { - RM_DEFINE_PATH(file); - - rm_fmt_json_key_int(out, "id", - rm_fmt_json_generate_id(self, file, file_path, checksum_str)); - rm_fmt_json_sep(self, out); - rm_fmt_json_key(out, "type", rm_file_lint_type_to_string(file->lint_type)); - rm_fmt_json_sep(self, out); - - gdouble progress = 0; - if(session->shred_bytes_after_preprocess) { - progress = CLAMP( - 100 - 100 * ( - (gdouble)session->shred_bytes_remaining / - (gdouble)session->shred_bytes_after_preprocess - ), - 0, - 100 - ); - } - rm_fmt_json_key_int(out, "progress", progress); - rm_fmt_json_sep(self, out); + if(file->lint_type == RM_LINT_TYPE_DUPE_DIR_CANDIDATE) { + json_object_set_int_member(elem, "n_children", file->n_children); + } - if(file->digest) { - rm_fmt_json_key(out, "checksum", checksum_str); - rm_fmt_json_sep(self, out); + if(file->lint_type != RM_LINT_TYPE_UNIQUE_FILE) { + if(file->twin_count >= 0) { + json_object_set_int_member(elem, "twins", file->twin_count); } - rm_fmt_json_key_unsafe(out, "path", file_path); - rm_fmt_json_sep(self, out); - rm_fmt_json_key_int(out, "size", file->actual_file_size); - rm_fmt_json_sep(self, out); - rm_fmt_json_key_int(out, "depth", file->depth); - rm_fmt_json_sep(self, out); - rm_fmt_json_key_int(out, "inode", file->inode); - rm_fmt_json_sep(self, out); - rm_fmt_json_key_int(out, "disk_id", file->dev); - rm_fmt_json_sep(self, out); - rm_fmt_json_key_bool(out, "is_original", file->is_original); - rm_fmt_json_sep(self, out); - - if(file->lint_type == RM_LINT_TYPE_DUPE_DIR_CANDIDATE) { - rm_fmt_json_key_int(out, "n_children", file->n_children); - rm_fmt_json_sep(self, out); + if(file->lint_type == RM_LINT_TYPE_PART_OF_DIRECTORY && file->parent_dir) { + json_object_set_string_member(elem, "parent_path", + rm_directory_get_dirname(file->parent_dir)); } - if(file->lint_type != RM_LINT_TYPE_UNIQUE_FILE) { - if(file->twin_count >= 0) { - rm_fmt_json_key_int(out, "twins", file->twin_count); - rm_fmt_json_sep(self, out); - } - - - if(file->lint_type == RM_LINT_TYPE_PART_OF_DIRECTORY && file->parent_dir) { - rm_fmt_json_key_unsafe(out, "parent_path", rm_directory_get_dirname(file->parent_dir)); - rm_fmt_json_sep(self, out); + if(session->cfg->find_hardlinked_dupes) { + RmFile *hardlink_head = RM_FILE_HARDLINK_HEAD(file); - } - - if(session->cfg->find_hardlinked_dupes) { - RmFile *hardlink_head = RM_FILE_HARDLINK_HEAD(file); - - if(hardlink_head && hardlink_head != file && file->digest) { - char orig_checksum_str[rm_digest_get_bytes(file->digest) * 2 + 1]; - rm_fmt_json_cksum(hardlink_head, orig_checksum_str, - sizeof(orig_checksum_str)); - - RM_DEFINE_PATH(hardlink_head); - - guint32 orig_id = rm_fmt_json_generate_id( - self, hardlink_head, hardlink_head_path, orig_checksum_str); - - rm_fmt_json_key_int(out, "hardlink_of", orig_id); - rm_fmt_json_sep(self, out); - } + if(hardlink_head && hardlink_head != file && file->digest) { + char *orig_checksum_str = rm_fmt_json_cksum(hardlink_head); + RM_DEFINE_PATH(hardlink_head); + guint32 orig_id = rm_fmt_json_generate_id( + self, hardlink_head, hardlink_head_path, orig_checksum_str); + g_free(orig_checksum_str); + json_object_set_int_member(elem, "hardlink_of", orig_id); } } - - rm_fmt_json_key_float(out, "mtime", file->mtime); } - rm_fmt_json_close(self, out); - if(checksum_str != NULL) { - g_slice_free1(checksum_size, checksum_str); - } + json_object_set_double_member(elem, "mtime", file->mtime); + + rm_fmt_object_write_and_free(elem, self); + g_free(checksum_str); } static RmFmtHandlerJSON JSON_HANDLER_IMPL = { /* Initialize parent */ - .parent = - { - .size = sizeof(JSON_HANDLER_IMPL), - .name = "json", - .head = rm_fmt_head, - .elem = rm_fmt_elem, - .prog = NULL, - .foot = rm_fmt_foot, - .valid_keys = {"no_header", "no_footer", "no_body", "oneline", "unique", NULL}, - }, - .pretty = true}; + .parent = { + .size = sizeof(JSON_HANDLER_IMPL), + .name = "json", + .head = rm_fmt_head, + .elem = rm_fmt_elem, + .prog = NULL, + .foot = rm_fmt_foot, + .valid_keys = {"no_header", "no_footer", "no_body", "oneline", "unique", NULL}, + }}; RmFmtHandler *JSON_HANDLER = (RmFmtHandler *)&JSON_HANDLER_IMPL; diff --git a/lib/formats/pretty.c b/lib/formats/pretty.c index e962b556f..b6fcb8a46 100644 --- a/lib/formats/pretty.c +++ b/lib/formats/pretty.c @@ -24,6 +24,7 @@ */ #include "../formats.h" +#include "../logger.h" #include "../preprocess.h" #include diff --git a/lib/formats/progressbar.c b/lib/formats/progressbar.c index f899f39bf..fb27cf389 100644 --- a/lib/formats/progressbar.c +++ b/lib/formats/progressbar.c @@ -24,6 +24,7 @@ */ #include "../formats.h" +#include "../logger.h" #include "../utilities.h" #include diff --git a/lib/formats/py.c.in b/lib/formats/py.c.in index 5f30a7411..3833f95df 100644 --- a/lib/formats/py.c.in +++ b/lib/formats/py.c.in @@ -24,6 +24,7 @@ */ #include "../formats.h" +#include "../logger.h" #include "../utilities.h" #include "../preprocess.h" diff --git a/lib/formats/sh.c.in b/lib/formats/sh.c.in index 2d85070d2..a7bc00e5a 100644 --- a/lib/formats/sh.c.in +++ b/lib/formats/sh.c.in @@ -25,6 +25,7 @@ #include "../config.h" #include "../formats.h" +#include "../logger.h" #include "../preprocess.h" #include @@ -98,6 +99,7 @@ static bool rm_sh_emit_handler_clone(RmFmtHandlerShScript *self, char **out, RmF int link_type = rm_util_link_type(dupe_path, orig_path); switch(link_type) { case RM_LINK_REFLINK: + case RM_LINK_INLINE_EXTENTS: *out = g_strdup_printf("skip_reflink '%s' '%s'", dupe_escaped, orig_escaped); return TRUE; case RM_LINK_SAME_FILE: @@ -110,7 +112,6 @@ static bool rm_sh_emit_handler_clone(RmFmtHandlerShScript *self, char **out, RmF rm_log_warning_line("Unexpected return code %d from rm_util_link_type()", link_type); return FALSE; case RM_LINK_HARDLINK: - case RM_LINK_MAYBE_REFLINK: case RM_LINK_NONE: *out = g_strdup_printf("clone '%s' '%s'", dupe_escaped, orig_escaped); return TRUE; @@ -144,7 +145,7 @@ static bool rm_sh_emit_handler_reflink(RmFmtHandlerShScript *self, char **out, R return FALSE; case RM_LINK_HARDLINK: case RM_LINK_SYMLINK: - case RM_LINK_MAYBE_REFLINK: + case RM_LINK_INLINE_EXTENTS: case RM_LINK_NONE: *out = g_strdup_printf("cp_reflink '%s' '%s'", dupe_escaped, orig_escaped); return TRUE; @@ -303,7 +304,7 @@ static char *rm_fmt_sh_get_extra_dedupe_args(RmSession *session) { RmCfg *cfg = session->cfg; if(cfg->write_cksum_to_xattr) { - return " --dedupe-xattr"; + return " --xattr"; } return ""; diff --git a/lib/formats/sh.sh b/lib/formats/sh.sh index b5e495d55..ee956eeb8 100644 --- a/lib/formats/sh.sh +++ b/lib/formats/sh.sh @@ -235,7 +235,7 @@ clone() { printf "${COL_YELLOW}Cloning to: ${COL_RESET}%%s\n" "$1" if [ -z "$DO_DRY_RUN" ]; then if [ -n "$DO_CLONE_READONLY" ]; then - $SUDO_COMMAND $RMLINT_BINARY --dedupe %s --dedupe-readonly -- "$2" "$1" + $SUDO_COMMAND $RMLINT_BINARY --dedupe %s --readonly -- "$2" "$1" else $RMLINT_BINARY --dedupe %s -- "$2" "$1" fi diff --git a/lib/formats/stats.c b/lib/formats/stats.c index 246ab2131..1d81b7dd5 100644 --- a/lib/formats/stats.c +++ b/lib/formats/stats.c @@ -24,6 +24,7 @@ */ #include "../formats.h" +#include "../logger.h" #include #include diff --git a/lib/formats/summary.c b/lib/formats/summary.c index 5424e6ca3..6cc42411e 100644 --- a/lib/formats/summary.c +++ b/lib/formats/summary.c @@ -24,6 +24,7 @@ */ #include "../formats.h" +#include "../logger.h" #include #include @@ -55,7 +56,7 @@ static void rm_fmt_prog(RmSession *session, return; } - if(rm_session_was_aborted()) { + if(rm_session_was_aborted() || session->equal_exit_code==EXIT_FAILURE) { /* Clear the whole terminal line. * Progressbar might leave some junk. */ diff --git a/lib/gui.c b/lib/gui.c new file mode 100644 index 000000000..982ad8ec4 --- /dev/null +++ b/lib/gui.c @@ -0,0 +1,124 @@ +/* + * This file is part of rmlint. + * + * rmlint is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * rmlint is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with rmlint. If not, see . + * + * Authors: + * + * - Christopher Pahl 2010-2020 (https://github.com/sahib) + * - Daniel T. 2014-2020 (https://github.com/SeeSpotRun) + * + * Hosted on http://github.com/sahib/rmlint + * + */ + +#include +#include +#include +#include + +#include "config.h" +#include "logger.h" + +/* + * Debian and Ubuntu based distributions fuck up setuptools + * by expecting packages to be installed to dist-packages and not site-packages + * like expected by setuptools. This breaks a lot of packages with the reasoning + * to reduce conflicts between system and user packages: + * + * https://stackoverflow.com/questions/9387928/whats-the-difference-between-dist-packages-and-site-packages + * + * We try to work around this by manually installing dist-packages to the + * sys.path by first calling a small bootstrap script. + */ +static const char RM_PY_BOOTSTRAP[] = + "" + "# This is a bootstrap script for the rmlint-gui. \n" + "# See the src/rmlint.c in rmlint's source for more info. \n" + "import sys, os, site \n" + " \n" + "# Also default to dist-packages on debian(-based): \n" + "sites = site.getsitepackages() \n" + "sys.path.extend([d.replace('dist-packages', 'site-packages') for d in sites]) \n" + "sys.path.extend(sites) \n" + " \n" + "# Cleanup self: \n" + "try: \n" + " os.remove(sys.argv[0]) \n" + "except: \n" + " print('Note: Could not remove bootstrap script at ', sys.argv[0]) \n" + " \n" + "# Run shredder by importing the main: \n" + "try: \n" + " import shredder \n" + " shredder.run_gui() \n" + "except ImportError as err: \n" + " print('Failed to load shredder:', err) \n" + " print('This might be due to a corrupted install; try reinstalling.') \n"; + +int rm_gui_launch(int argc, const char **argv) { + const char *commands[] = {"python3", "python", NULL}; + const char **command = &commands[0]; + + GError *error = NULL; + gchar *bootstrap_path = NULL; + int bootstrap_fd = + g_file_open_tmp(".shredder-bootstrap.py.XXXXXX", &bootstrap_path, &error); + + if(bootstrap_fd < 0) { + rm_log_error_line("Could not bootstrap gui: Unable to create tempfile: %s", + error->message); + g_error_free(error); + return EXIT_FAILURE; + } + + if(write(bootstrap_fd, RM_PY_BOOTSTRAP, sizeof(RM_PY_BOOTSTRAP)) < 0) { + rm_log_warning_line("Could not bootstrap gui: Unable to write to tempfile: %s", + g_strerror(errno)); + return EXIT_FAILURE; + } + + close(bootstrap_fd); + + while(*command) { + const char *all_argv[512]; + const char **argp = &all_argv[0]; + memset(all_argv, 0, sizeof(all_argv)); + + *argp++ = *command; + *argp++ = bootstrap_path; + + for(size_t i = 1; i < (size_t)argc && i < sizeof(all_argv) / 2; i++) { + *argp++ = argv[i]; + } + + if(execvp(*command, (char *const *)all_argv) == -1) { + rm_log_warning("Executed: %s ", *command); + for(int j = 0; j < (argp - all_argv); j++) { + rm_log_warning("%s ", all_argv[j]); + } + rm_log_warning("\n"); + rm_log_error_line("%s %d", g_strerror(errno), errno == ENOENT); + } else { + /* This is not reached anymore when execvp suceeded */ + return EXIT_SUCCESS; + } + + /* Try next command... */ + command++; + } + + rm_log_error_line("Could not launch gui"); + return EXIT_FAILURE; +} diff --git a/lib/gui.h b/lib/gui.h new file mode 100644 index 000000000..cfd3fe717 --- /dev/null +++ b/lib/gui.h @@ -0,0 +1,44 @@ +/* + * This file is part of rmlint. + * + * rmlint is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * rmlint is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with rmlint. If not, see . + * + * Authors: + * + * - Christopher Pahl 2010-2020 (https://github.com/sahib) + * - Daniel T. 2014-2020 (https://github.com/SeeSpotRun) + * + * Hosted on http://github.com/sahib/rmlint + * + */ + +#ifndef RM_GUI_H +#define RM_GUI_H + +/** + * @file gui.h + * @brief Launch rmlint gui session (python) + **/ + +/** + * @brief Launch rmlint gui session (python) + * + * @param argc arg count passed from main + * @param argv command line args; argv[0] is "shredder" + * @retval EXIT_SUCCESS or EXIT_FAILURE + * + **/ +int rm_gui_launch(int argc, const char **argv); + +#endif /* end of include guard */ diff --git a/lib/hash-utility.c b/lib/hash-utility.c index 7a8f2e11c..506a594c2 100644 --- a/lib/hash-utility.c +++ b/lib/hash-utility.c @@ -30,6 +30,7 @@ #include "../lib/config.h" #include "../lib/hasher.h" +#include "../lib/logger.h" #include "../lib/utilities.h" typedef struct RmHasherSession { diff --git a/lib/hasher.c b/lib/hasher.c index 187dfd9c6..f941cdd87 100644 --- a/lib/hasher.c +++ b/lib/hasher.c @@ -30,6 +30,7 @@ #include #include "hasher.h" +#include "logger.h" #include "utilities.h" /* Flags for the fadvise() call that tells the kernel diff --git a/lib/logger.c b/lib/logger.c new file mode 100644 index 000000000..becc2fa69 --- /dev/null +++ b/lib/logger.c @@ -0,0 +1,87 @@ +/* + * This file is part of rmlint. + * + * rmlint is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * rmlint is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with rmlint. If not, see . + * + * Authors: + * + * - Christopher Pahl 2010-2020 (https://github.com/sahib) + * - Daniel T. 2014-2020 (https://github.com/SeeSpotRun) + * + * Hosted on http://github.com/sahib/rmlint + * + */ + +#include "logger.h" + +#include +#include +#include + +static gboolean with_stderr_color = TRUE; + +static GLogLevelFlags VERBOSITY_TO_LOG_LEVEL[] = { + [0] = G_LOG_LEVEL_CRITICAL, + [1] = G_LOG_LEVEL_ERROR, + [2] = G_LOG_LEVEL_WARNING, + [3] = G_LOG_LEVEL_MESSAGE | G_LOG_LEVEL_INFO, + [4] = G_LOG_LEVEL_DEBUG}; + +static gint verbosity = 2; +static GLogLevelFlags min_log_level = G_LOG_LEVEL_WARNING; + +static char *remove_color_escapes(char *message) { + char *dst = message; + for(char *src = message; src && *src; src++) { + if(*src == '\x1b') { + src = strchr(src, 'm'); + } else { + *dst++ = *src; + } + } + + if(dst) { + *dst = 0; + } + return message; +} + +void rm_logger_callback(_UNUSED const gchar *log_domain, + GLogLevelFlags log_level, + const gchar *message, + _UNUSED gpointer user_data) { + if(min_log_level >= log_level) { + if(!with_stderr_color) { + message = remove_color_escapes((char *)message); + } + fputs(message, stderr); + } +} + +void rm_logger_set_verbosity(const gint new_verbosity) { + verbosity = new_verbosity; + min_log_level = VERBOSITY_TO_LOG_LEVEL[CLAMP( + verbosity, + 1, + (int)(sizeof(VERBOSITY_TO_LOG_LEVEL) / sizeof(GLogLevelFlags)) - 1)]; +} + +void rm_logger_set_pretty(const gboolean is_pretty) { + with_stderr_color = is_pretty; +} + +void rm_logger_incr_verbosity_by(const gint incr) { + verbosity += incr; + rm_logger_set_verbosity(verbosity); +} diff --git a/lib/logger.h b/lib/logger.h new file mode 100644 index 000000000..d6dc9afcf --- /dev/null +++ b/lib/logger.h @@ -0,0 +1,156 @@ +/* + * This file is part of rmlint. + * + * rmlint is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * rmlint is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with rmlint. If not, see . + * + * Authors: + * + * - Christopher Pahl 2010-2020 (https://github.com/sahib) + * - Daniel T. 2014-2020 (https://github.com/SeeSpotRun) + * + * Hosted on http://github.com/sahib/rmlint + * + */ + +#ifndef RM_LOGGER_H +#define RM_LOGGER_H + +#include + +#include "config.h" + +/** + * @file logger.h + * @brief High level API for debug / error logging to STDERR + * + **/ + +#define rm_log_debug(...) g_log("rmlint", G_LOG_LEVEL_DEBUG, __VA_ARGS__) +#define rm_log_info(...) g_log("rmlint", G_LOG_LEVEL_INFO, __VA_ARGS__) +#define rm_log_warning(...) g_log("rmlint", G_LOG_LEVEL_WARNING, __VA_ARGS__) +#define rm_log_error(...) g_log("rmlint", G_LOG_LEVEL_CRITICAL, __VA_ARGS__) + +#define rm_log_perror(message) \ + if(errno) { \ + rm_log_error_line( \ + "%s:%d: %s: %s", __FILE__, __LINE__, message, g_strerror(errno)); \ + } + +#define rm_log_perrorf(message, ...) \ + if(errno) { \ + int _errsv = errno; \ + char *msg = g_strdup_printf(message, __VA_ARGS__); \ + rm_log_error_line("%s:%d: %s: %s", __FILE__, __LINE__, msg, g_strerror(_errsv)); \ + g_free(msg); \ + } + +static inline GMutex *rm_log_get_mutex(void) { + static GMutex RM_LOG_MTX; + return &RM_LOG_MTX; +} + +#define RM_LOG_INIT g_mutex_init(rm_log_get_mutex()); + +/* These colors should only be used with the rm_log_* macros below */ +#define RED "\x1b[31;01m" +#define YELLOW "\x1b[33;01m" +#define RESET "\x1b[0m" +#define GREEN "\x1b[32;01m" +#define BLUE "\x1b[34;01m" + +/* Stupid macros to make printing error lines easier */ +#define rm_log_error_prefix() \ + rm_log_error(RED); \ + rm_log_error(_("ERROR")); \ + rm_log_error(": " RESET); + +#define rm_log_warning_prefix() \ + rm_log_warning(YELLOW); \ + rm_log_warning(_("WARNING")); \ + rm_log_warning(": " RESET); + +#define rm_log_info_prefix() \ + rm_log_warning(GREEN); \ + rm_log_warning(_("INFO")); \ + rm_log_warning(": " RESET); + +#define rm_log_debug_prefix() \ + rm_log_debug(BLUE); \ + rm_log_debug(_("DEBUG")); \ + rm_log_debug(": " RESET); + +/////////////// + +#define rm_log_error_line(...) \ + g_mutex_lock(rm_log_get_mutex()); \ + rm_log_error_prefix() rm_log_error(__VA_ARGS__); \ + rm_log_error("\n"); \ + g_mutex_unlock(rm_log_get_mutex()); + +#define rm_log_warning_line(...) \ + g_mutex_lock(rm_log_get_mutex()); \ + rm_log_warning_prefix() rm_log_warning(__VA_ARGS__); \ + rm_log_warning("\n"); \ + g_mutex_unlock(rm_log_get_mutex()); + +#define rm_log_info_line(...) \ + g_mutex_lock(rm_log_get_mutex()); \ + rm_log_info_prefix() rm_log_warning(__VA_ARGS__); \ + rm_log_warning("\n"); \ + g_mutex_unlock(rm_log_get_mutex()); + +#define rm_log_debug_line(...) \ + g_mutex_lock(rm_log_get_mutex()); \ + rm_log_debug_prefix() rm_log_debug(__VA_ARGS__); \ + rm_log_debug("\n"); \ + g_mutex_unlock(rm_log_get_mutex()); + +/* Domain for reporting errors. Needed by GOptions */ +#define RM_ERROR_QUARK (g_quark_from_static_string("rmlint")) + +/** + * @brief + * + * @param + * @retval + **/ + +void rm_logger_callback(_UNUSED const gchar *log_domain, + GLogLevelFlags log_level, + const gchar *message, + _UNUSED gpointer user_data); + +void rm_logger_set_pretty(const gboolean is_pretty); + +void rm_logger_set_verbosity(const gint new_verbosity); + +void rm_logger_incr_verbosity_by(const gint incr); + +static inline gboolean rm_logger_louder(_UNUSED const char *option_name, + _UNUSED const gchar *count, + _UNUSED gpointer user_data, + _UNUSED GError **error) { + rm_logger_incr_verbosity_by(1); + return TRUE; +} + +static inline gboolean rm_logger_quieter(_UNUSED const char *option_name, + _UNUSED const gchar *count, + _UNUSED gpointer user_data, + _UNUSED GError **error) { + rm_logger_incr_verbosity_by(-1); + return TRUE; +} + +#endif /* end of include guard */ diff --git a/lib/md-scheduler.c b/lib/md-scheduler.c index 8a1baa38a..92c52dfe7 100644 --- a/lib/md-scheduler.c +++ b/lib/md-scheduler.c @@ -24,6 +24,7 @@ */ #include "md-scheduler.h" +#include "logger.h" /* How many milliseconds to sleep if we encounter an empty file queue. * This prevents a "starving" RmShredDevice from hogging cpu and cluttering up diff --git a/lib/preprocess.c b/lib/preprocess.c index e798cf1b9..ab1d6bb34 100644 --- a/lib/preprocess.c +++ b/lib/preprocess.c @@ -30,6 +30,7 @@ #include "cmdline.h" #include "formats.h" +#include "logger.h" #include "preprocess.h" #include "shredder.h" #include "utilities.h" diff --git a/lib/reflink.c b/lib/reflink.c new file mode 100644 index 000000000..dd263205b --- /dev/null +++ b/lib/reflink.c @@ -0,0 +1,513 @@ +/* + * This file is part of rmlint. + * + * rmlint is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * rmlint is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with rmlint. If not, see . + * + * Authors: + * + * - Christopher Pahl 2010-2020 (https://github.com/sahib) + * - Daniel T. 2014-2020 (https://github.com/SeeSpotRun) + * + * Hosted on http://github.com/sahib/rmlint + * + */ + +#include +#include +#include +#include +#include + +#include "config.h" + +#if HAVE_BTRFS_H +#include +#endif + +#if HAVE_LINUX_FS_H +#include +#endif + +#ifdef FIDEDUPERANGE +#define HAVE_FIDEDUPERANGE 1 +#else +#define HAVE_FIDEDUPERANGE 0 +#endif + +#include "logger.h" +#include "utilities.h" +#include "xattr.h" + +/* FIDEDUPERANGE supercedes the btrfs-only BTRFS_IOC_FILE_EXTENT_SAME as of Linux 4.5 and + * should work for ocfs2 and xfs as well as btrfs. We should still support the older + * btrfs ioctl so that this still works on Linux 4.2 to 4.4. The two ioctl's are + * identical apart from field names so we can use #define's to accommodate both. */ + +/* TODO: test this on system running kernel 4.[2|3|4] ; if the c headers + * support FIDEDUPERANGE but kernel doesn't, then this will fail at runtime + * because the BTRFS_IOC_FILE_EXTENT_SAME is decided at compile time... + */ + +#if HAVE_FIDEDUPERANGE +# define _DEDUPE_IOCTL_NAME "FIDEDUPERANGE" +# define _DEDUPE_IOCTL FIDEDUPERANGE +# define _DEST_FD dest_fd +# define _SRC_OFFSET src_offset +# define _DEST_OFFSET dest_offset +# define _SRC_LENGTH src_length +# define _DATA_DIFFERS FILE_DEDUPE_RANGE_DIFFERS +# define _FILE_DEDUPE_RANGE file_dedupe_range +# define _FILE_DEDUPE_RANGE_INFO file_dedupe_range_info +# define _MIN_LINUX_SUBVERSION 5 +#else +# define _DEDUPE_IOCTL_NAME "BTRFS_IOC_FILE_EXTENT_SAME" +# define _DEDUPE_IOCTL BTRFS_IOC_FILE_EXTENT_SAME +# define _DEST_FD fd +# define _SRC_OFFSET logical_offset +# define _DEST_OFFSET logical_offset +# define _SRC_LENGTH length +# define _DATA_DIFFERS BTRFS_SAME_DATA_DIFFERS +# define _FILE_DEDUPE_RANGE btrfs_ioctl_same_args +# define _FILE_DEDUPE_RANGE_INFO btrfs_ioctl_same_extent_info +# define _MIN_LINUX_SUBVERSION 2 +#endif + + +RmLinkType rm_reflink_type_from_fd(int fd1, int fd2, guint64 file_size) { +#if HAVE_FIEMAP + + RmOff logical_current = 0; + + bool is_last_1 = false; + bool is_last_2 = false; + bool is_inline_1 = false; + bool is_inline_2 = false; + bool at_least_one_checked = false; + + while(!rm_session_was_aborted()) { + RmOff logical_next_1 = 0; + RmOff logical_next_2 = 0; + + RmOff physical_1 = rm_offset_get_from_fd(fd1, logical_current, &logical_next_1, &is_last_1, &is_inline_1); + RmOff physical_2 = rm_offset_get_from_fd(fd2, logical_current, &logical_next_2, &is_last_2, &is_inline_2); + + if(is_last_1 != is_last_2) { + return RM_LINK_NONE; + } + + if(is_last_1 && is_last_2 && at_least_one_checked) { + return RM_LINK_REFLINK; + } + + if(physical_1 != physical_2) { +#if _RM_OFFSET_DEBUG + rm_log_debug_line("Physical offsets differ at byte %" G_GUINT64_FORMAT + ": %"G_GUINT64_FORMAT "<> %" G_GUINT64_FORMAT, + logical_current, physical_1, physical_2); +#endif + return RM_LINK_NONE; + } + if(logical_next_1 != logical_next_2) { +#if _RM_OFFSET_DEBUG + rm_log_debug_line("File offsets differ after %" G_GUINT64_FORMAT + " bytes: %" G_GUINT64_FORMAT "<> %" G_GUINT64_FORMAT, + logical_current, logical_next_1, logical_next_2); +#endif + return RM_LINK_NONE; + } + + if(is_inline_1 || is_inline_2) { + return RM_LINK_INLINE_EXTENTS; + } + + if(physical_1 == 0) { +#if _RM_OFFSET_DEBUG + rm_log_debug_line( + "Can't determine whether files are clones"); +#endif + return RM_LINK_ERROR; + } + +#if _RM_OFFSET_DEBUG + rm_log_debug_line("Offsets match at fd1=%d, fd2=%d, logical=%" G_GUINT64_FORMAT ", physical=%" G_GUINT64_FORMAT, + fd1, fd2, logical_current, physical_1); +#endif + if(logical_next_1 <= logical_current) { + /* oops we seem to be getting nowhere (this shouldn't really happen) */ + rm_log_info_line( + "rm_util_link_type() giving up: file1_offset_next<=file_offset_current"); + return RM_LINK_ERROR; + } + + if(logical_next_1 >= (guint64)file_size) { + /* phew, we got to the end */ +#if _RM_OFFSET_DEBUG + rm_log_debug_line("Files are clones (share same data)") +#endif + return RM_LINK_REFLINK; + } + + logical_current = logical_next_1; + at_least_one_checked = true; + } + + return RM_LINK_ERROR; +#else + return RM_LINK_NONE; +#endif +} + +static void print_usage(GOptionContext *context) { + char* usage = g_option_context_get_help(context, TRUE, NULL); + printf("%s", usage); + g_free(usage); +} + + +/** + * *********** dedupe session main ************ + **/ +int rm_dedupe_main(int argc, const char **argv) { + + + gboolean check_xattr = FALSE; + gboolean dedupe_readonly = FALSE; + gboolean follow_symlinks = FALSE; + gboolean skip_inline_extents = TRUE; + + const GOptionEntry options[] = { + {"xattr" , 'x' , 0 , G_OPTION_ARG_NONE , &check_xattr , _("Check extended attributes to see if the file is already deduplicated") , NULL}, + {"readonly" , 'r' , 0 , G_OPTION_ARG_NONE , &dedupe_readonly , _("Even dedupe read-only snapshots (needs root)") , NULL}, + {"followlinks" , 'f' , 0 , G_OPTION_ARG_NONE , &follow_symlinks , _("Follow symlinks") , NULL}, + {"inline-extents", 'i' , G_OPTION_FLAG_REVERSE , G_OPTION_ARG_NONE , &follow_symlinks , _("Try to dedupe files with inline extents") , NULL}, + {"loud" , 'v' , G_OPTION_FLAG_NO_ARG , G_OPTION_ARG_CALLBACK , rm_logger_louder , _("Be more verbose (-vvv for much more)") , NULL}, + {"quiet" , 'V' , G_OPTION_FLAG_NO_ARG , G_OPTION_ARG_CALLBACK , rm_logger_quieter , _("Be less verbose (-VVV for much less)") , NULL}, + {NULL , 0 , 0 , 0 , NULL , NULL , NULL}}; + + + GError *error = NULL; + GOptionContext *context = g_option_context_new ("file1 file2"); + g_option_context_add_main_entries (context, options, NULL); + g_option_context_set_help_enabled(context, TRUE); + g_option_context_set_summary(context, _("Dedupe matching extents from source to dest (if filesystem supports)")); + + if (!g_option_context_parse (context, &argc, (char ***)&argv, &error)) + { + rm_log_error_line(_("Error parsing command line:\n%s"), error->message); + return(EXIT_FAILURE); + } + +#if HAVE_FIDEDUPERANGE || HAVE_BTRFS_H + if(argc != 3) { + rm_log_error("Error: rmlint --dedupe %s\n\n", _("must have exactly two files\n\n")); + print_usage(context); + return EXIT_FAILURE; + } + + g_option_context_free(context); + + const char* source_path = argv[1]; + const char* dest_path = argv[2]; + + + rm_log_debug_line("Cloning %s -> %s", source_path, dest_path); + + if(check_xattr) { + // Check if we actually need to deduplicate. + // This utility will write a value to the extended attributes + // of the file so we know that we do not need to do it again + // next time. This is supposed to avoid disk thrashing. + // (See also: https://github.com/sahib/rmlint/issues/349) + if(rm_xattr_is_deduplicated(dest_path, follow_symlinks)) { + rm_log_debug_line("Already deduplicated according to xattr!"); + return EXIT_SUCCESS; + } + } + + RmLinkType link_type = rm_util_link_type(source_path, dest_path); + if(link_type == RM_LINK_REFLINK) { + rm_log_debug_line("Already an exact reflink!"); + return EXIT_SUCCESS; + } + else if (link_type == RM_LINK_INLINE_EXTENTS && skip_inline_extents) { + rm_log_debug_line("Skipping files with inline extents"); + return EXIT_SUCCESS; + } + + int source_fd = rm_sys_open(source_path, O_RDONLY); + if(source_fd < 0) { + rm_log_error_line(_("dedupe: failed to open source file")); + return EXIT_FAILURE; + } + + int open_mode = dedupe_readonly ? O_RDONLY : O_RDWR; + + char *cloneto_path = NULL; + int cloneto_fd; + + if(link_type == RM_LINK_HARDLINK) { + rm_log_debug_line("dedupe: renaming hardlink so we can clone to it"); + cloneto_path = g_strconcat(dest_path, ".XXXXXX", NULL); + cloneto_fd = g_mkstemp(cloneto_path); + if(cloneto_fd == -1) { + rm_log_error_line(_("dedupe: failed to create temp file")); + rm_sys_close(source_fd); + return EXIT_FAILURE; + } + open_mode = O_CREAT | O_WRONLY; + } else { + cloneto_fd = rm_sys_open(dest_path, open_mode); + cloneto_path = g_strdup(dest_path); + } + + int result = EXIT_SUCCESS; + RmStat source_stat; + + if(cloneto_fd < 0) { + rm_log_error_line(_("dedupe: error %i: failed to open dest file.%s"), + errno, + dedupe_readonly + ? "" + : _("\n\t(if target is a read-only snapshot " + "then -r option is required)")); + result = EXIT_FAILURE; + } else if(rm_sys_stat(source_path, &source_stat) < 0) { + rm_log_error_line("failed to stat %s: %s", source_path, g_strerror(errno)); + result = EXIT_FAILURE; + } else if(link_type == RM_LINK_HARDLINK) { +#ifdef FICLONE + rm_log_debug_line("dedupe: creating clone"); + if(ioctl(cloneto_fd, FICLONE, source_fd) == -1) { + // create hardlink instead + rm_log_warning_line(_("dedupe: error %s create clone via FICLONE; original " + "hardlink left unchanged"), + g_strerror(errno)); + unlink(cloneto_path); + result = EXIT_FAILURE; + } else { + // Copy metadata from original to clone + struct utimbuf puttime; + puttime.modtime = source_stat.st_mtime; + puttime.actime = source_stat.st_atime; + + if(utime(cloneto_path, &puttime)) { + rm_log_warning_line("dedupe: failed to preserve times for %s", + source_path); + } + + if(lchown(cloneto_path, source_stat.st_uid, source_stat.st_gid) != 0) { + rm_log_warning_line("dedupe: failed to preserve ownership for %s", + source_path); + // try to preserve group ID + (void)lchown(cloneto_path, -1, source_stat.st_gid); + } + + if(lchmod(cloneto_path, source_stat.st_mode) != 0) { + rm_log_warning_line("dedupe: failed to preserve permissions for %s", + source_path); + } + } + rm_sys_close(cloneto_fd); + cloneto_fd = -1; + /* atomically rename temp file to over-write dest */ + if(rename(cloneto_path, dest_path) != 0) { + rm_log_error_line("Clone rename from '%s' to '%s' failed", cloneto_path, + dest_path); + // probably safer to leave a mess than to: + // unlink(cloneto_path); + result = EXIT_FAILURE; + } +#else + rm_log_error_line(_("dedupe: Can't create clone of hardlink because FICLONE not " + "defined on your system"), + g_strerror(errno)); + result = EXIT_FAILURE; +#endif + } else { + gint64 bytes_deduped = 0; + /* a poorly-documented limit for dedupe ioctl's */ + static const gint64 max_dedupe_chunk = 16 * 1024 * 1024; + + /* how fine a resolution to use once difference detected; + * use btrfs default node size (16k): */ + static const gint64 min_dedupe_chunk = 16 * 1024; + + rm_log_debug_line("Cloning using %s", _DEDUPE_IOCTL_NAME); + + struct { + struct _FILE_DEDUPE_RANGE args; + struct _FILE_DEDUPE_RANGE_INFO info; + } dedupe; + memset(&dedupe, 0, sizeof(dedupe)); + dedupe.info._DEST_FD = cloneto_fd; + + /* fsync's needed to flush extent mapping */ + if(fsync(source_fd) != 0) { + rm_log_warning_line("Error syncing source file %s: %s", source_path, + strerror(errno)); + } + + if(fsync(dedupe.info._DEST_FD) != 0) { + rm_log_warning_line("Error syncing dest file %s: %s", dest_path, + strerror(errno)); + } + + int ret = 0; + gint64 dedupe_chunk = max_dedupe_chunk; + while(bytes_deduped < source_stat.st_size && !rm_session_was_aborted()) { + dedupe.args.dest_count = 1; + /* TODO: multiple destinations at same time? */ + dedupe.args._SRC_OFFSET = bytes_deduped; + dedupe.info._DEST_OFFSET = bytes_deduped; + + /* try to dedupe the rest of the file */ + dedupe.args._SRC_LENGTH = + MIN(dedupe_chunk, source_stat.st_size - bytes_deduped); + + ret = ioctl(source_fd, _DEDUPE_IOCTL, &dedupe); + + if(ret != 0) { + break; + } else if(dedupe.info.status == _DATA_DIFFERS) { + if(dedupe_chunk != min_dedupe_chunk) { + dedupe_chunk = min_dedupe_chunk; + rm_log_debug_line("Dropping to %" G_GINT64_FORMAT + "-byte chunks " + "after %" G_GINT64_FORMAT " bytes", + dedupe_chunk, bytes_deduped); + continue; + } else { + break; + } + } else if(dedupe.info.status != 0) { + ret = -dedupe.info.status; + errno = ret; + break; + } else if(dedupe.info.bytes_deduped == 0) { + break; + } + + bytes_deduped += dedupe.info.bytes_deduped; + } + rm_log_debug_line("Bytes deduped: %" G_GINT64_FORMAT, bytes_deduped); + + if(ret != 0) { + rm_log_perrorf(_("%s returned error: (%d)"), _DEDUPE_IOCTL_NAME, ret); + } else if(bytes_deduped == 0) { + rm_log_info_line(_("Files don't match - not deduped")); + } else if(bytes_deduped < source_stat.st_size) { + rm_log_info_line(_("Only first %" G_GINT64_FORMAT " bytes deduped " + "- files not fully identical"), + bytes_deduped); + } + + if(bytes_deduped == source_stat.st_size) { + if(check_xattr && !dedupe_readonly) { + rm_xattr_mark_deduplicated(dest_path, follow_symlinks); + } + result = EXIT_SUCCESS; + } else { + result = EXIT_FAILURE; + } + } + rm_sys_close(source_fd); + if(cloneto_fd > 0) { + rm_sys_close(cloneto_fd); + } + g_free(cloneto_path); + + return result; + +#else + (void)cfg; + rm_log_error_line(_("rmlint was not compiled with file cloning support.")) +#endif + + return EXIT_FAILURE; +} + + + +/** + * *********** `rmlint --is-reflink` session main ************ + **/ +int rm_is_reflink_main(int argc, const char **argv) { + + const GOptionEntry options[] = { + {"loud" , 'v' , G_OPTION_FLAG_NO_ARG , G_OPTION_ARG_CALLBACK , rm_logger_louder , _("Be more verbose (-vvv for much more)") , NULL}, + {"quiet" , 'V' , G_OPTION_FLAG_NO_ARG , G_OPTION_ARG_CALLBACK , rm_logger_quieter , _("Be less verbose (-VVV for much less)") , NULL}, + {NULL , 0 , 0 , 0 , NULL , NULL , NULL}}; + + + GError *error = NULL; + GOptionContext *context = g_option_context_new ("file1 file2"); + g_option_context_add_main_entries (context, options, NULL); + g_option_context_set_help_enabled(context, TRUE); + + const char** desc = rm_link_type_to_desc(); + + char *summary = g_strdup_printf( + "%s\n" + "%s\n\n" + "%s\n" + " %i: %s\n" + " %i: %s\n" + " %i: %s\n" + " %i: %s\n" + " %i: %s\n" + " %i: %s\n" + " %i: %s\n" + " %i: %s\n" + " %i: %s\n" + " %i: %s\n", + _("Test if two files are reflinks (share same data extents)"), + _("Returns 0 if the files are reflinks."), + _("Other return codes:"), + RM_LINK_ERROR, desc[RM_LINK_ERROR], + RM_LINK_NOT_FILE, desc[RM_LINK_NOT_FILE], + RM_LINK_WRONG_SIZE, desc[RM_LINK_WRONG_SIZE], + RM_LINK_INLINE_EXTENTS, desc[RM_LINK_INLINE_EXTENTS], + RM_LINK_SAME_FILE, desc[RM_LINK_SAME_FILE], + RM_LINK_PATH_DOUBLE, desc[RM_LINK_PATH_DOUBLE], + RM_LINK_HARDLINK, desc[RM_LINK_HARDLINK], + RM_LINK_SYMLINK, desc[RM_LINK_SYMLINK], + RM_LINK_XDEV, desc[RM_LINK_XDEV], + RM_LINK_NONE, desc[RM_LINK_NONE]); + + + g_option_context_set_summary(context, summary); + + if (!g_option_context_parse (context, &argc, (char ***)&argv, &error)) + { + rm_log_error_line(_("Error parsing command line:\n%s"), error->message); + return(EXIT_FAILURE); + } + + if (argc != 3) { + rm_log_error("Error: rmlint --is-reflink %s\n\n", _("must have exactly two files")); + print_usage(context); + return EXIT_FAILURE; + } + g_option_context_free(context); + g_free(summary); + + const char *a = argv[1]; + const char *b = argv[2]; + rm_log_debug_line("Testing if %s is clone of %s", a, b); + + int result = rm_util_link_type(a, b); + rm_log_info("Link type for '%s' and '%s', result:\n", a, b); + rm_log_warning("%s\n", desc[result]); + return result; +} diff --git a/lib/reflink.h b/lib/reflink.h new file mode 100644 index 000000000..8823e6bfd --- /dev/null +++ b/lib/reflink.h @@ -0,0 +1,67 @@ +/* + * This file is part of rmlint. + * + * rmlint is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * rmlint is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with rmlint. If not, see . + * + * Authors: + * + * - Christopher Pahl 2010-2020 (https://github.com/sahib) + * - Daniel T. 2014-2020 (https://github.com/SeeSpotRun) + * + * Hosted on http://github.com/sahib/rmlint + * + */ + +#ifndef RM_REFLINK_H +#define RM_REFLINK_H + +#include "utilities.h" + +/** + * @file reflink.h + * @brief Launchers for reflink-related utilities + **/ + +/** + * @brief + * + * @param argc arg count passed from main + * @param argv command line args; argv[0] is normally "rmlint-dedupe" + * @retval EXIT_SUCCESS or EXIT_FAILURE + * + **/ +int rm_dedupe_main(int argc, const char **argv); + +/** + * @brief + * the linux OS doesn't provide any easy way to check if two files are + * reflinks / clones (eg: + * https://unix.stackexchange.com/questions/263309/how-to-verify-a-file-copy-is-reflink-cow + * + * `rmlint --is-reflink file_a file_b` provides this functionality rmlint. + * return values: + * + * + * @param argc arg count passed from main + * @param argv command line args; argv[0] is normally "rmlint-is-reflink" + * @retval EXIT_SUCCESS if clone confirmed, EXIT_FAILURE if definitely not clones, + * Other return values defined in utilities.h 'RmOffsetsMatchCode' enum + * + **/ +int rm_is_reflink_main(int argc, const char **argv); + + +RmLinkType rm_reflink_type_from_fd(int fd1, int fd2, guint64 file_size); + +#endif /* end of include guard */ diff --git a/lib/replay.c b/lib/replay.c index cb99cae5c..85d4bfab6 100644 --- a/lib/replay.c +++ b/lib/replay.c @@ -28,6 +28,7 @@ #include "config.h" #include "file.h" #include "formats.h" +#include "logger.h" #include "preprocess.h" #include "session.h" #include "shredder.h" @@ -38,7 +39,6 @@ #include #include -#if HAVE_JSON_GLIB #include typedef struct RmUnpackedDirectory { @@ -560,7 +560,7 @@ static bool rm_parrot_check_types(RmCfg *cfg, RmFile *file) { case RM_LINT_TYPE_BADUGID: return cfg->find_badids; case RM_LINT_TYPE_UNIQUE_FILE: - return cfg->write_unfinished; + return cfg->hash_uniques || cfg->hash_unmatched; case RM_LINT_TYPE_PART_OF_DIRECTORY: return true; case RM_LINT_TYPE_UNKNOWN: @@ -585,6 +585,11 @@ static int rm_parrot_fix_match_opts(RmFile *file, GQueue *group) { return 1; } +/* RmHRFunc to remove file if it matches headfile's basename */ +static int rm_parrot_remove_matched_basenames(RmFile *file, RmFile *head) { + return file != head && rm_file_basenames_cmp(file, head)==0; +} + static int rm_parrot_sort_by_path(gconstpointer a, gconstpointer b, _UNUSED gpointer data) { const RmFile *file_a = a; const RmFile *file_b = b; @@ -715,8 +720,7 @@ static void rm_parrot_cage_write_group(RmParrotCage *cage, GQueue *group, bool p if(cfg->match_with_extension || cfg->match_without_extension - || cfg->match_basename - || cfg->unmatched_basenames) { + || cfg->match_basename) { /* This is probably a sucky way to do it, due to n^2, * but I doubt that will make a large performance difference. */ @@ -726,6 +730,13 @@ static void rm_parrot_cage_write_group(RmParrotCage *cage, GQueue *group, bool p group ); } + if(cfg->unmatched_basenames) { + rm_util_queue_foreach_remove( + group, + (RmRFunc)rm_parrot_remove_matched_basenames, + group->head->data + ); + }; rm_parrot_fix_must_match_tagged(cage, group); rm_parrot_fix_duplicate_entries(cage, group); @@ -961,22 +972,3 @@ void rm_parrot_cage_close(RmParrotCage *cage) { } } -#else - -bool rm_parrot_cage_load(_UNUSED RmParrotCage *cage, _UNUSED const char *json_path, - _UNUSED bool is_prefd) { - return false; -} - -void rm_parrot_cage_open(_UNUSED RmParrotCage *cage, _UNUSED RmSession *session) { - rm_log_error_line(_("json-glib is needed for using --replay.")); - rm_log_error_line(_("Please recompile `rmlint` with it installed.")); -} - -void rm_parrot_cage_flush(_UNUSED RmParrotCage *cage) { -} - -void rm_parrot_cage_close(_UNUSED RmParrotCage *cage) { -} - -#endif diff --git a/lib/session.c b/lib/session.c index d97ed0b4a..fbcd07a2e 100644 --- a/lib/session.c +++ b/lib/session.c @@ -29,28 +29,12 @@ #include "config.h" #include "formats.h" +#include "logger.h" #include "preprocess.h" #include "session.h" #include "traverse.h" #include "xattr.h" -#if HAVE_BTRFS_H -#include -#endif - -#if HAVE_LINUX_FS_H -#include -#endif - -#ifdef FIDEDUPERANGE -#define HAVE_FIDEDUPERANGE 1 -#else -#define HAVE_FIDEDUPERANGE 0 -#endif - -#if HAVE_BTRFS_H || HAVE_FIDEDUPERANGE -#include -#endif #if HAVE_UNAME #include "sys/utsname.h" @@ -166,239 +150,3 @@ void rm_session_acknowledge_abort(const gint abort_count) { } g_mutex_unlock(&m); } - -/* FIDEDUPERANGE supercedes the btrfs-only BTRFS_IOC_FILE_EXTENT_SAME as of Linux 4.5 and - * should work for ocfs2 and xfs as well as btrfs. We should still support the older - * btrfs ioctl so that this still works on Linux 4.2 to 4.4. The two ioctl's are - * identical apart from field names so we can use #define's to accommodate both. */ - -/* TODO: test this on system running kernel 4.[2|3|4] ; if the c headers - * support FIDEDUPERANGE but kernel doesn't, then this will fail at runtime - * because the BTRFS_IOC_FILE_EXTENT_SAME is decided at compile time... - */ - -#if HAVE_FIDEDUPERANGE -# define _DEDUPE_IOCTL_NAME "FIDEDUPERANGE" -# define _DEDUPE_IOCTL FIDEDUPERANGE -# define _DEST_FD dest_fd -# define _SRC_OFFSET src_offset -# define _DEST_OFFSET dest_offset -# define _SRC_LENGTH src_length -# define _DATA_DIFFERS FILE_DEDUPE_RANGE_DIFFERS -# define _FILE_DEDUPE_RANGE file_dedupe_range -# define _FILE_DEDUPE_RANGE_INFO file_dedupe_range_info -# define _MIN_LINUX_SUBVERSION 5 -#else -# define _DEDUPE_IOCTL_NAME "BTRFS_IOC_FILE_EXTENT_SAME" -# define _DEDUPE_IOCTL BTRFS_IOC_FILE_EXTENT_SAME -# define _DEST_FD fd -# define _SRC_OFFSET logical_offset -# define _DEST_OFFSET logical_offset -# define _SRC_LENGTH length -# define _DATA_DIFFERS BTRFS_SAME_DATA_DIFFERS -# define _FILE_DEDUPE_RANGE btrfs_ioctl_same_args -# define _FILE_DEDUPE_RANGE_INFO btrfs_ioctl_same_extent_info -# define _MIN_LINUX_SUBVERSION 2 -#endif - -/** - * *********** dedupe session main ************ - **/ -int rm_session_dedupe_main(RmCfg *cfg) { -#if HAVE_FIDEDUPERANGE || HAVE_BTRFS_H - g_assert(cfg->path_count == g_slist_length(cfg->paths)); - if(cfg->path_count != 2) { - rm_log_error(_("Usage: rmlint --dedupe [-r] [-v|V] source dest\n")); - return EXIT_FAILURE; - } - - g_assert(cfg->paths); - RmPath *dest = cfg->paths->data; - g_assert(cfg->paths->next); - RmPath *source = cfg->paths->next->data; - rm_log_debug_line("Cloning %s -> %s", source->path, dest->path); - - if(cfg->dedupe_check_xattr) { - // Check if we actually need to deduplicate. - // This utility will write a value to the extended attributes - // of the file so we know that we do not need to do it again - // next time. This is supposed to avoid disk thrashing. - // (See also: https://github.com/sahib/rmlint/issues/349) - if(rm_xattr_is_deduplicated(dest->path, cfg->follow_symlinks)) { - rm_log_debug_line("Already deduplicated according to xattr!"); - return EXIT_SUCCESS; - } - } - - // Also use --is-reflink on both files before doing extra work: - if(rm_util_link_type(source->path, dest->path) == RM_LINK_REFLINK) { - rm_log_debug_line("Already an exact reflink!"); - return EXIT_SUCCESS; - } - - int source_fd = rm_sys_open(source->path, O_RDONLY); - if(source_fd < 0) { - rm_log_error_line(_("dedupe: failed to open source file")); - return EXIT_FAILURE; - } - - struct stat source_stat; - fstat(source_fd, &source_stat); - gint64 bytes_deduped = 0; - - /* a poorly-documented limit for dedupe ioctl's */ - static const gint64 max_dedupe_chunk = 16 * 1024 * 1024; - - /* how fine a resolution to use once difference detected; - * use btrfs default node size (16k): */ - static const gint64 min_dedupe_chunk = 16 * 1024; - - rm_log_debug_line("Cloning using %s", _DEDUPE_IOCTL_NAME); - - if(!rm_session_check_kernel_version(4, _MIN_LINUX_SUBVERSION)) { - rm_log_warning_line("This needs at least linux >= 4.%d.", _MIN_LINUX_SUBVERSION); - return EXIT_FAILURE; - } - - struct { - struct _FILE_DEDUPE_RANGE args; - struct _FILE_DEDUPE_RANGE_INFO info; - } dedupe; - memset(&dedupe, 0, sizeof(dedupe)); - - dedupe.info._DEST_FD = - rm_sys_open(dest->path, cfg->dedupe_readonly ? O_RDONLY : O_RDWR); - - if(dedupe.info._DEST_FD < 0) { - rm_log_error_line( - _("dedupe: error %i: failed to open dest file.%s"), - errno, - cfg->dedupe_readonly ? "" : _("\n\t(if target is a read-only snapshot " - "then -r option is required)")); - rm_sys_close(source_fd); - return EXIT_FAILURE; - } - - /* fsync's needed to flush extent mapping */ - if(fsync(source_fd) != 0) { - rm_log_warning_line("Error syncing source file %s: %s", source->path, - strerror(errno)); - } - - if(fsync(dedupe.info._DEST_FD) != 0) { - rm_log_warning_line("Error syncing dest file %s: %s", dest->path, - strerror(errno)); - } - - int ret = 0; - gint64 dedupe_chunk = max_dedupe_chunk; - while(bytes_deduped < source_stat.st_size && !rm_session_was_aborted()) { - dedupe.args.dest_count = 1; - /* TODO: multiple destinations at same time? */ - dedupe.args._SRC_OFFSET = bytes_deduped; - dedupe.info._DEST_OFFSET = bytes_deduped; - - /* try to dedupe the rest of the file */ - dedupe.args._SRC_LENGTH = MIN(dedupe_chunk, source_stat.st_size - bytes_deduped); - - ret = ioctl(source_fd, _DEDUPE_IOCTL, &dedupe); - - if(ret != 0) { - break; - } else if(dedupe.info.status == _DATA_DIFFERS) { - if(dedupe_chunk != min_dedupe_chunk) { - dedupe_chunk = min_dedupe_chunk; - rm_log_debug_line("Dropping to %"G_GINT64_FORMAT"-byte chunks " - "after %"G_GINT64_FORMAT" bytes", - dedupe_chunk, bytes_deduped); - continue; - } else { - break; - } - } else if(dedupe.info.status != 0) { - ret = -dedupe.info.status; - errno = ret; - break; - } else if(dedupe.info.bytes_deduped == 0) { - break; - } - - bytes_deduped += dedupe.info.bytes_deduped; - } - rm_log_debug_line("Bytes deduped: %"G_GINT64_FORMAT, bytes_deduped); - - if (ret!=0) { - rm_log_perrorf(_("%s returned error: (%d)"), _DEDUPE_IOCTL_NAME, ret); - } else if(bytes_deduped == 0) { - rm_log_info_line(_("Files don't match - not deduped")); - } else if(bytes_deduped < source_stat.st_size) { - rm_log_info_line(_("Only first %"G_GINT64_FORMAT" bytes deduped " - "- files not fully identical"), - bytes_deduped); - } - - rm_sys_close(source_fd); - rm_sys_close(dedupe.info._DEST_FD); - - if(bytes_deduped == source_stat.st_size) { - if(cfg->dedupe_check_xattr && !cfg->dedupe_readonly) { - rm_xattr_mark_deduplicated(dest->path, cfg->follow_symlinks); - } - - return EXIT_SUCCESS; - } - -#else - (void)cfg; - rm_log_error_line(_("rmlint was not compiled with file cloning support.")) -#endif - - return EXIT_FAILURE; -} - -/** - * *********** `rmlint --is-reflink` session main ************ - **/ -int rm_session_is_reflink_main(RmCfg *cfg) { - /* the linux OS doesn't provide any easy way to check if two files are - * reflinks / clones (eg: - * https://unix.stackexchange.com/questions/263309/how-to-verify-a-file-copy-is-reflink-cow - * - * `rmlint --is-clone file_a file_b` provides this functionality rmlint. - * return values: - * EXIT_SUCCESS if clone confirmed - * EXIT_FAILURE if definitely not clones - * Other return values defined in utilities.h 'RmOffsetsMatchCode' enum - */ - - g_assert(cfg->path_count == g_slist_length(cfg->paths)); - if(cfg->path_count != 2) { - rm_log_error(_("Usage: rmlint --is-reflink [-v|V] file1 file2\n")); - return EXIT_FAILURE; - } - - g_assert(cfg->paths); - - RmPath *a = cfg->paths->data; - g_assert(cfg->paths->next); - - RmPath *b = cfg->paths->next->data; - rm_log_debug_line("Testing if %s is clone of %s", a->path, b->path); - - int result = rm_util_link_type(a->path, b->path); - switch(result) { - case RM_LINK_REFLINK: - rm_log_debug_line("Offsets match"); - break; - case RM_LINK_NONE: - rm_log_debug_line("Offsets differ"); - break; - case RM_LINK_MAYBE_REFLINK: - rm_log_debug_line("Can't read file offsets (maybe inline extents?)"); - break; - default: - break; - } - - return result; -} diff --git a/lib/shredder.c b/lib/shredder.c index 87b19d9df..f80ec62e0 100644 --- a/lib/shredder.c +++ b/lib/shredder.c @@ -34,6 +34,7 @@ #include "hasher.h" #include "formats.h" +#include "logger.h" #include "preprocess.h" #include "utilities.h" @@ -357,6 +358,9 @@ typedef struct RmShredGroup { /* number of file clusters */ gsize n_clusters; + /* number of file clusters that don't have external checksums */ + gsize n_unhashed_clusters; + /* number of distinct inodes */ gsize n_inodes; @@ -690,8 +694,9 @@ static void rm_shred_write_group_to_xattr(const RmSession *session, GQueue *grou return; } - if(g_queue_get_length(group) <= 1) { - /* Do not write unique file checksums */ + if(g_queue_get_length(group) <= 1 && + !(session->cfg->hash_uniques || session->cfg->hash_unmatched)) { + /* Do not write incomplete unique file checksums */ return; } @@ -760,13 +765,6 @@ static void rm_shred_group_free(RmShredGroup *self, bool force_free) { bool needs_free = !(cfg->cache_file_structs) || force_free; - /* May not free though when unfinished checksums are written. - * Those are freed by the output module. - */ - if(cfg->write_unfinished) { - needs_free = false; - } - if(self->held_files) { g_queue_foreach(self->held_files, (GFunc)rm_shred_discard_file, GUINT_TO_POINTER(needs_free)); @@ -793,7 +791,8 @@ static void rm_shred_group_free(RmShredGroup *self, bool force_free) { } static gboolean rm_shred_group_qualifies(RmShredGroup *group) { - return 1 && (group->num_files >= 2) + return 1 + && (group->num_files >= 2) /* it takes 2 to tango */ && (group->n_pref > 0 || !NEEDS_PREF(group)) /* we have at least one file from preferred path, or we don't care */ @@ -802,7 +801,7 @@ static gboolean rm_shred_group_qualifies(RmShredGroup *group) { && (group->n_new > 0 || !NEEDS_NEW(group)) /* we have at least one file newer than cfg->min_mtime, or we don't care */ && (!group->unique_basename || !group->session->cfg->unmatched_basenames); - /* we have more than one unique basename, or we don't care */ + /* we have more than one unique basename, or we don't care */ } /* call unlocked; should be no contention issues since group is finished */ @@ -846,13 +845,38 @@ static void rm_shred_group_finalise(RmShredGroup *self) { * Assume group already protected by group_lock. * */ static void rm_shred_group_update_status(RmShredGroup *group) { - if(group->status == RM_SHRED_GROUP_DORMANT && rm_shred_group_qualifies(group) && - group->hash_offset < group->file_size && - (group->n_clusters > 1 || - (group->n_inodes == 1 && group->session->cfg->merge_directories))) { - /* group can go active */ + if(group->status != RM_SHRED_GROUP_DORMANT) { + // group already hashing + return; + } + if(group->hash_offset == group->file_size) { + //hashes complete + return; + } + if(!rm_shred_group_qualifies(group) && !group->session->cfg->hash_uniques && !group->session->cfg->hash_unmatched) { + // no hashing requred (yet) + return; + } + + if(group->session->cfg->hash_uniques && group->n_unhashed_clusters > 0) { + // hash any files with cfg->hash_uniques group->status = RM_SHRED_GROUP_START_HASHING; } + else if (group->n_clusters > 1) { + // we have potential match candidates; start hashing + group->status = RM_SHRED_GROUP_START_HASHING; + } + else if(group->n_inodes == 1 && group->n_unhashed_clusters > 0 && group->session->cfg->merge_directories) { + /* special case of hardlinked files that still need hashing to help identify matching directories */ + group->status = RM_SHRED_GROUP_START_HASHING; + } + else if (group->session->cfg->hash_unmatched && group->held_files->length > 0) { + RmFile* head = group->held_files->head->data; + if(head->digest) { + // with hash_unmatched, keep going once we start + group->status = RM_SHRED_GROUP_START_HASHING; + } + } } /* Only called by rm_shred_group_free (via GDestroyNotify of group->children). @@ -912,6 +936,9 @@ static RmFile *rm_shred_group_push_file(RmShredGroup *shred_group, RmFile *file, shred_group->n_npref += rm_file_n_nprefd(file); shred_group->n_new += rm_file_n_new(file); shred_group->n_clusters++; + if(file->ext_cksum == NULL) { + shred_group->n_unhashed_clusters++; + } shred_group->n_inodes += RM_FILE_INODE_COUNT(file); g_assert(file->hash_offset == shred_group->hash_offset); @@ -1634,7 +1661,7 @@ static bool rm_shred_can_process(RmFile *file, RmShredTag *main) { static gint rm_shred_process_file(RmFile *file, RmSession *session) { RmShredTag *tag = session->shredder; - if(rm_session_was_aborted()) { + if(rm_session_was_aborted() || session->equal_exit_code==EXIT_FAILURE) { file->status = RM_FILE_STATE_IGNORE; rm_shred_sift(file); return 1; diff --git a/lib/traverse.c b/lib/traverse.c index 5f4bf8163..698a58f2c 100644 --- a/lib/traverse.c +++ b/lib/traverse.c @@ -34,6 +34,7 @@ #include "file.h" #include "formats.h" +#include "logger.h" #include "md-scheduler.h" #include "preprocess.h" #include "utilities.h" diff --git a/lib/treemerge.c b/lib/treemerge.c index 36ec8a272..668225792 100644 --- a/lib/treemerge.c +++ b/lib/treemerge.c @@ -78,6 +78,7 @@ #include #include "formats.h" +#include "logger.h" #include "pathtricia.h" #include "preprocess.h" #include "shredder.h" @@ -662,21 +663,6 @@ static void rm_tm_output_file(RmTreeMerger *self, RmFile *file) { g_hash_table_remove(self->free_map, file); } -static void rm_tm_write_unfinished_cksums(RmTreeMerger *self, RmDirectory *directory) { - for(GList *iter = directory->known_files.head; iter; iter = iter->next) { - RmFile *file = iter->data; - file->lint_type = RM_LINT_TYPE_UNIQUE_FILE; - file->twin_count = -1; - rm_tm_output_file(self, file); - } - - /* Recursively propagate to children */ - for(GList *iter = directory->children.head; iter; iter = iter->next) { - RmDirectory *child = iter->data; - rm_tm_write_unfinished_cksums(self, child); - } -} - static int rm_tm_sort_paths(const RmDirectory *da, const RmDirectory *db, _UNUSED RmTreeMerger *self) { return da->depth - db->depth; @@ -912,11 +898,6 @@ static void rm_tm_extract(RmTreeMerger *self) { mask->is_original = true; } } - - if(self->session->cfg->write_unfinished) { - rm_tm_write_unfinished_cksums(self, directory); - } - } rm_tm_output_group(self, &file_adaptor_group); diff --git a/lib/utilities.c b/lib/utilities.c index 0cd6e6bed..290e46c4a 100644 --- a/lib/utilities.c +++ b/lib/utilities.c @@ -31,6 +31,8 @@ #include #include "config.h" +#include "logger.h" +#include "reflink.h" #include "session.h" /* Be safe: This header is not essential and might be missing on some systems. @@ -79,16 +81,71 @@ #include #endif -#if HAVE_JSON_GLIB -#include +#define RM_MOUNTTABLE_IS_USABLE (HAVE_BLKID && HAVE_GIO_UNIX) + + +//////////////////////////////////// +// SYSCALL WRAPPERS // +//////////////////////////////////// + +int rm_sys_stat(const char *path, RmStat *buf) { +#if HAVE_STAT64 && !RM_IS_APPLE + return stat64(path, buf); +#else + return stat(path, buf); #endif +} + +int rm_sys_lstat(const char *path, RmStat *buf) { +#if HAVE_STAT64 && !RM_IS_APPLE + return lstat64(path, buf); +#else + return lstat(path, buf); +#endif +} + + +int rm_sys_open(const char *path, int mode) { +#if HAVE_STAT64 +#ifdef O_LARGEFILE + mode |= O_LARGEFILE; +#endif +#endif + + return open(path, mode, (S_IRUSR | S_IWUSR)); +} -#define RM_MOUNTTABLE_IS_USABLE (HAVE_BLKID && HAVE_GIO_UNIX) + +void rm_sys_close(int fd) { + if(close(fd) == -1) { + rm_log_perror("close(2) failed"); + } +} + +gint64 rm_sys_preadv(int fd, const struct iovec *iov, int iovcnt, + RmOff offset) { +#if RM_IS_APPLE || RM_IS_CYGWIN + if(lseek(fd, offset, SEEK_SET) == -1) { + rm_log_perror("seek in emulated preadv failed"); + return 0; + } + return readv(fd, iov, iovcnt); +#elif RM_PLATFORM_32 + if(lseek64(fd, offset, SEEK_SET) == -1) { + rm_log_perror("seek in emulated preadv failed"); + return 0; + } + return readv(fd, iov, iovcnt); +#else + return preadv(fd, iov, iovcnt, offset); +#endif +} //////////////////////////////////// // GENERAL UTILITES // //////////////////////////////////// + char *rm_util_strsub(const char *string, const char *subs, const char *with) { gchar *result = NULL; if(string != NULL && string[0] != '\0') { @@ -1013,16 +1070,24 @@ static struct fiemap *rm_offset_get_fiemap(int fd, const int n_extents, return fm; } +static void rm_util_set_nullable_bool(bool *ptr, bool value) { + if(ptr != NULL) { + *ptr = value; + } +} + /* Return physical (disk) offset of the beginning of the file extent containing the * specified logical file_offset. * If a pointer to file_offset_next is provided then read fiemap extents until * the next non-contiguous extent (fragment) is encountered and writes the corresponding * file offset to &file_offset_next. * */ -RmOff rm_offset_get_from_fd(int fd, RmOff file_offset, RmOff *file_offset_next, bool *is_last) { +RmOff rm_offset_get_from_fd(int fd, RmOff file_offset, RmOff *file_offset_next, bool *is_last, bool *is_inline) { RmOff result = 0; bool done = FALSE; bool first = TRUE; + rm_util_set_nullable_bool(is_last, FALSE); + rm_util_set_nullable_bool(is_inline, FALSE); /* used for detecting contiguous extents */ unsigned long expected = 0; @@ -1068,12 +1133,14 @@ RmOff rm_offset_get_from_fd(int fd, RmOff file_offset, RmOff *file_offset_next, *file_offset_next = fm_ext.fe_logical + fm_ext.fe_length; } + if(fm_ext.fe_flags & FIEMAP_EXTENT_DATA_INLINE) { + rm_util_set_nullable_bool(is_inline, TRUE); + } + if(fm_ext.fe_flags & FIEMAP_EXTENT_LAST) { done = TRUE; - if(is_last != NULL) { - *is_last = TRUE; - } + rm_util_set_nullable_bool(is_last, TRUE); } if(fm_ext.fe_length <= 0) { @@ -1104,7 +1171,7 @@ RmOff rm_offset_get_from_path(const char *path, RmOff file_offset, rm_log_info("Error opening %s in rm_offset_get_from_path\n", path); return 0; } - RmOff result = rm_offset_get_from_fd(fd, file_offset, file_offset_next, NULL); + RmOff result = rm_offset_get_from_fd(fd, file_offset, file_offset_next, NULL, NULL); rm_sys_close(fd); return result; } @@ -1112,7 +1179,7 @@ RmOff rm_offset_get_from_path(const char *path, RmOff file_offset, #else /* Probably FreeBSD */ RmOff rm_offset_get_from_fd(_UNUSED int fd, _UNUSED RmOff file_offset, - _UNUSED RmOff *file_offset_next, _UNUSED bool *is_last) { + _UNUSED RmOff *file_offset_next, _UNUSED bool *is_last, _UNUSED bool *is_inline) { return 0; } @@ -1123,7 +1190,7 @@ RmOff rm_offset_get_from_path(_UNUSED const char *path, _UNUSED RmOff file_offse #endif -static gboolean rm_util_is_path_double(char *path1, char *path2) { +static gboolean rm_util_is_path_double(const char *path1, const char *path2) { char *basename1 = rm_util_basename(path1); char *basename2 = rm_util_basename(path2); return (strcmp(basename1, basename2) == 0 && @@ -1158,7 +1225,7 @@ static gboolean rm_util_same_device(const char *path1, const char *path2) { return result; } -RmLinkType rm_util_link_type(char *path1, char *path2) { +RmLinkType rm_util_link_type(const char *path1, const char *path2) { #if _RM_OFFSET_DEBUG rm_log_debug_line("Checking link type for %s vs %s", path1, path2); #endif @@ -1243,85 +1310,31 @@ RmLinkType rm_util_link_type(char *path1, char *path2) { RM_RETURN(RM_LINK_SYMLINK); } -#if HAVE_FIEMAP - - RmOff logical_current = 0; - - bool is_last_1 = false; - bool is_last_2 = false; - bool at_least_one_checked = false; - - while(!rm_session_was_aborted()) { - RmOff logical_next_1 = 0; - RmOff logical_next_2 = 0; - - RmOff physical_1 = rm_offset_get_from_fd(fd1, logical_current, &logical_next_1, &is_last_1); - RmOff physical_2 = rm_offset_get_from_fd(fd2, logical_current, &logical_next_2, &is_last_2); - - if(is_last_1 != is_last_2) { - RM_RETURN(RM_LINK_NONE); - } - - if(is_last_1 && is_last_2 && at_least_one_checked) { - RM_RETURN(RM_LINK_REFLINK); - } - - if(physical_1 != physical_2) { -#if _RM_OFFSET_DEBUG - rm_log_debug_line("Physical offsets differ at byte %" G_GUINT64_FORMAT - ": %"G_GUINT64_FORMAT "<> %" G_GUINT64_FORMAT, - logical_current, physical_1, physical_2); -#endif - RM_RETURN(RM_LINK_NONE); - } - if(logical_next_1 != logical_next_2) { -#if _RM_OFFSET_DEBUG - rm_log_debug_line("File offsets differ after %" G_GUINT64_FORMAT - " bytes: %" G_GUINT64_FORMAT "<> %" G_GUINT64_FORMAT, - logical_current, logical_next_1, logical_next_2); -#endif - RM_RETURN(RM_LINK_NONE); - } - - if(physical_1 == 0) { -#if _RM_OFFSET_DEBUG - rm_log_debug_line( - "Can't determine whether files are clones (maybe inline extents?)"); -#endif - RM_RETURN(RM_LINK_MAYBE_REFLINK); - } - -#if _RM_OFFSET_DEBUG - rm_log_debug_line("Offsets match at fd1=%d, fd2=%d, logical=%" G_GUINT64_FORMAT ", physical=%" G_GUINT64_FORMAT, - fd1, fd2, logical_current, physical_1); -#endif - if(logical_next_1 <= logical_current) { - /* oops we seem to be getting nowhere (this shouldn't really happen) */ - rm_log_info_line( - "rm_util_link_type() giving up: file1_offset_next<=file_offset_current for %s vs %s", path1, path2); - RM_RETURN(RM_LINK_ERROR) - } - - if(logical_next_1 >= (RmOff)stat1.st_size) { - /* phew, we got to the end */ -#if _RM_OFFSET_DEBUG - rm_log_debug_line("Files are clones (share same data)") -#endif - RM_RETURN(RM_LINK_REFLINK) - } - - logical_current = logical_next_1; - at_least_one_checked = true; - } - - RM_RETURN(RM_LINK_ERROR); -#else - RM_RETURN(RM_LINK_NONE); -#endif + RmLinkType reflink_type = rm_reflink_type_from_fd(fd1, fd2, stat1.st_size); + RM_RETURN(reflink_type); #undef RM_RETURN } +const char** rm_link_type_to_desc() { + static const char* RM_LINK_TYPE_TO_DESC[] = { + N_("Reflink"), + N_("An error occurred during checking"), + "Undefined", + N_("Not a file"), + N_("File sizes differ"), + N_("Files have inline extents"), + N_("Same file and path"), + N_("Same file but with different path"), + N_("Hardlink"), + N_("Symlink"), + N_("Files are on different devices"), + N_("Not linked") + }; + return RM_LINK_TYPE_TO_DESC; +} + + ///////////////////////////////// // GTHREADPOOL WRAPPERS // diff --git a/lib/utilities.h b/lib/utilities.h index dcad11a0e..aeda1b70d 100644 --- a/lib/utilities.h +++ b/lib/utilities.h @@ -28,6 +28,7 @@ #include #include +#include #include #include @@ -38,21 +39,22 @@ /* Pat(h)tricia Trie implementation */ #include "pathtricia.h" -/* return values for rm_offsets_match */ +/* return values for rm_util_link_type */ typedef enum RmLinkType { RM_LINK_REFLINK = EXIT_SUCCESS, - RM_LINK_NONE = EXIT_FAILURE, + RM_LINK_ERROR = EXIT_FAILURE, RM_LINK_NOT_FILE = 3, RM_LINK_WRONG_SIZE = 4, - RM_LINK_MAYBE_REFLINK = 5, + RM_LINK_INLINE_EXTENTS = 5, RM_LINK_SAME_FILE = 6, RM_LINK_PATH_DOUBLE = 7, RM_LINK_HARDLINK = 8, RM_LINK_SYMLINK = 9, RM_LINK_XDEV = 10, - RM_LINK_ERROR = 11, + RM_LINK_NONE = 11, } RmLinkType; + #if HAVE_STAT64 && !RM_IS_APPLE typedef struct stat64 RmStat; @@ -87,21 +89,9 @@ typedef struct stat RmStat; // SYSCALL WRAPPERS // //////////////////////////////////// -WARN_UNUSED_RESULT static inline int rm_sys_stat(const char *path, RmStat *buf) { -#if HAVE_STAT64 && !RM_IS_APPLE - return stat64(path, buf); -#else - return stat(path, buf); -#endif -} +WARN_UNUSED_RESULT int rm_sys_stat(const char *path, RmStat *buf); -WARN_UNUSED_RESULT static inline int rm_sys_lstat(const char *path, RmStat *buf) { -#if HAVE_STAT64 && !RM_IS_APPLE - return lstat64(path, buf); -#else - return lstat(path, buf); -#endif -} +WARN_UNUSED_RESULT int rm_sys_lstat(const char *path, RmStat *buf); static inline gdouble rm_sys_stat_mtime_float(RmStat *stat) { #if RM_IS_APPLE @@ -111,40 +101,11 @@ static inline gdouble rm_sys_stat_mtime_float(RmStat *stat) { #endif } -static inline int rm_sys_open(const char *path, int mode) { -#if HAVE_STAT64 -#ifdef O_LARGEFILE - mode |= O_LARGEFILE; -#endif -#endif +int rm_sys_open(const char *path, int mode); - return open(path, mode, (S_IRUSR | S_IWUSR)); -} +void rm_sys_close(int fd); -static inline void rm_sys_close(int fd) { - if(close(fd) == -1) { - rm_log_perror("close(2) failed"); - } -} - -static inline gint64 rm_sys_preadv(int fd, const struct iovec *iov, int iovcnt, - RmOff offset) { -#if RM_IS_APPLE || RM_IS_CYGWIN - if(lseek(fd, offset, SEEK_SET) == -1) { - rm_log_perror("seek in emulated preadv failed"); - return 0; - } - return readv(fd, iov, iovcnt); -#elif RM_PLATFORM_32 - if(lseek64(fd, offset, SEEK_SET) == -1) { - rm_log_perror("seek in emulated preadv failed"); - return 0; - } - return readv(fd, iov, iovcnt); -#else - return preadv(fd, iov, iovcnt, offset); -#endif -} +gint64 rm_sys_preadv(int fd, const struct iovec *iov, int iovcnt, RmOff offset); ///////////////////////////////////// // UID/GID VALIDITY CHECKING // @@ -416,7 +377,7 @@ bool rm_mounts_can_reflink(RmMountTable *self, dev_t source, dev_t dest); * * @return the physical offset starting from the disk. */ -RmOff rm_offset_get_from_fd(int fd, RmOff file_offset, RmOff *file_offset_next, bool *is_last); +RmOff rm_offset_get_from_fd(int fd, RmOff file_offset, RmOff *file_offset_next, bool *is_last, bool* is_inline); /** * @brief Lookup the physical offset of a file path at any given offset. @@ -428,9 +389,16 @@ RmOff rm_offset_get_from_path(const char *path, RmOff file_offset, /** * @brief Test if two files have identical fiemaps. - * @retval see RmOffsetsMatchCode enum definition. + * @retval see RmLinkType enum definition. + */ +RmLinkType rm_util_link_type(const char *path1, const char *path2); + + +/** + * @brief Map RmLinkType to description. + * @retval Array of descriptions. */ -RmLinkType rm_util_link_type(char *path1, char *path2); +const char** rm_link_type_to_desc(void); ////////////////////////////// // TIMESTAMP HELPERS // diff --git a/lib/xattr.c b/lib/xattr.c index dda586cf2..1e84f8cc0 100644 --- a/lib/xattr.c +++ b/lib/xattr.c @@ -25,6 +25,7 @@ #include "xattr.h" #include "config.h" +#include "logger.h" #include #include diff --git a/po/de.po b/po/de.po index 3612c4f4e..289570bd5 100644 --- a/po/de.po +++ b/po/de.po @@ -300,10 +300,6 @@ msgstr "mtime Zeitstempel von `%s` hat sich geändert. Ignoriere Datei." msgid "Loading json-results `%s'" msgstr "Lade JSON Cache: `%s'" -#: lib/replay.c -msgid "json-glib is needed for using --replay." -msgstr "json-glib wird zur Benutzung von `--replay` benötigt." - #: lib/replay.c msgid "Please recompile `rmlint` with it installed." msgstr "Bitte kompilieren Sie `rmlint` entsprechend neu." @@ -1033,10 +1029,6 @@ msgstr "Keine stamp Datei hier: `%s`. Es wird eine nach diesem Lauf erstellt." #~ msgid "%s%15" #~ msgstr "%s%15" -#~ msgid "caching is not supported due to missing json-glib library." -#~ msgstr "" -#~ "Caching ist nicht möglich, da rmlint ohne json-glib kompiliert wurde." - #~ msgid "Loading json-cache `%s'" #~ msgstr "Lade JSON Cache: `%s'" diff --git a/po/es.po b/po/es.po index 97e91acf8..09947fb26 100644 --- a/po/es.po +++ b/po/es.po @@ -296,10 +296,6 @@ msgstr "el tiempo de modificación de '%s' ha cambiado. Ignorando." msgid "Loading json-results `%s'" msgstr "Cargando json-results '%s'" -#: lib/replay.c -msgid "json-glib is needed for using --replay." -msgstr "json-glib es necesario para usar --replay." - #: lib/replay.c msgid "Please recompile `rmlint` with it installed." msgstr "Por favor recompile 'rmlint' con ello instalado." @@ -1005,10 +1001,6 @@ msgstr "" msgid "No stamp file at `%s`, will create one after this run." msgstr "" -#~ msgid "caching is not supported due to missing json-glib library." -#~ msgstr "" -#~ "el cacheo no es soportado debido a la librería inexistente json-glib" - #~ msgid "Loading json-cache `%s'" #~ msgstr "Cargando json-cache '%s" diff --git a/po/fr.po b/po/fr.po index 2c7175a76..5cb70c6fb 100644 --- a/po/fr.po +++ b/po/fr.po @@ -294,10 +294,6 @@ msgstr "la date de modification de `%s` a changé. Ignoré." msgid "Loading json-results `%s'" msgstr "Chargement de JSON Cache `%s'" -#: lib/replay.c -msgid "json-glib is needed for using --replay." -msgstr "json-lib est nécessaire pour utiliser l'argument --replay" - #: lib/replay.c msgid "Please recompile `rmlint` with it installed." msgstr "Merci de recompiler `rmlint` une fois celui ci installé" @@ -1001,9 +997,6 @@ msgstr "" msgid "No stamp file at `%s`, will create one after this run." msgstr "" -#~ msgid "caching is not supported due to missing json-glib library." -#~ msgstr "Cache non supporté, librairie json-glib manquante." - #~ msgid "Loading json-cache `%s'" #~ msgstr "Chargement de JSON Cache `%s'" diff --git a/src/rmlint.c b/src/rmlint.c index 4da5ecbfe..c5efd648d 100644 --- a/src/rmlint.c +++ b/src/rmlint.c @@ -26,43 +26,19 @@ #include #include #include +#include #include "../lib/api.h" #include "../lib/config.h" +#include "../lib/gui.h" +#include "../lib/logger.h" +#include "../lib/hash-utility.h" +#include "../lib/reflink.h" -#if HAVE_JSON_GLIB && !GLIB_CHECK_VERSION(2, 36, 0) +#if !GLIB_CHECK_VERSION(2, 36, 0) #include #endif -static char *remove_color_escapes(char *message) { - char *dst = message; - for(char *src = message; src && *src; src++) { - if(*src == '\x1b') { - src = strchr(src, 'm'); - } else { - *dst++ = *src; - } - } - - if(dst) { - *dst = 0; - } - return message; -} - -static void logging_callback(_UNUSED const gchar *log_domain, - GLogLevelFlags log_level, - const gchar *message, - gpointer user_data) { - RmSession *session = user_data; - if(session->cfg->verbosity >= log_level) { - if(!session->cfg->with_stderr_color) { - message = remove_color_escapes((char *)message); - } - fputs(message, stderr); - } -} - static void signal_handler(int signum) { switch(signum) { case SIGINT: @@ -101,18 +77,42 @@ static void i18n_init(void) { #endif } +static void maybe_run_alt_main(int argc, const char **argv, char *match_first, + char *alt_main_name, int (*alt_main)(int, const char **)) { + if(argc < 2) { + return; + } + if(g_strcmp0(match_first, argv[1]) == 0) { + argv[1] = alt_main_name; + exit(alt_main(argc - 1, &argv[1])); + } + for(int i = 2; i < argc; i++) { + if(g_strcmp0(match_first, argv[i]) == 0) { + rm_log_error_line("%s must be first argument", match_first); + exit(EXIT_FAILURE); + } + } +} + int main(int argc, const char **argv) { +#if !GLIB_CHECK_VERSION(2, 36, 0) + /* Very old glib. Debian, Im looking at you. */ + g_type_init(); +#endif + int exit_state = EXIT_FAILURE; + RM_LOG_INIT; + /* call logging_callback on every message */ + g_log_set_default_handler(rm_logger_callback, NULL); - RmCfg cfg; - rm_cfg_set_default(&cfg); + maybe_run_alt_main(argc, argv, "--gui", "shredder", &rm_gui_launch); - RmSession session; - rm_session_init(&session, &cfg); + maybe_run_alt_main(argc, argv, "--hash", "rmlint --hash", &rm_hasher_main); - /* call logging_callback on every message */ - g_log_set_default_handler(logging_callback, &session); + maybe_run_alt_main(argc, argv, "--is-reflink", "rmlint --is-reflink", &rm_is_reflink_main); + + maybe_run_alt_main(argc, argv, "--dedupe", "rmlint --dedupe", &rm_dedupe_main); i18n_init(); @@ -126,21 +126,17 @@ int main(int argc, const char **argv) { sigaction(SIGFPE, &sa, NULL); sigaction(SIGABRT, &sa, NULL); -#if !GLIB_CHECK_VERSION(2, 36, 0) - /* Very old glib. Debian, Im looking at you. */ - g_type_init(); -#endif + + RmCfg cfg; + rm_cfg_set_default(&cfg); + RmSession session; + rm_session_init(&session, &cfg); + /* Parse commandline */ if(rm_cmd_parse_args(argc, (char **)argv, &session) != 0) { /* Do all the real work */ - if(cfg.dedupe) { - exit_state = rm_session_dedupe_main(&cfg); - } else if(cfg.is_reflink) { - exit_state = rm_session_is_reflink_main(&cfg); - } else { - exit_state = rm_cmd_main(&session); - } + exit_state = rm_cmd_main(&session); } rm_session_clear(&session); diff --git a/tests/test_mains/test_dedupe.py b/tests/test_mains/test_dedupe.py index 48404b71c..6f5a3c7f1 100644 --- a/tests/test_mains/test_dedupe.py +++ b/tests/test_mains/test_dedupe.py @@ -86,6 +86,22 @@ def test_equal_files(): with_json=False) +@needs_reflink_fs +@with_setup(usual_setup_func, usual_teardown_func) +def test_hardlinks(): + path_a = create_file('1234', 'a') + path_b = path_a + '_hardlink' + create_link('a', 'a_hardlink', symlink=False) + + with assert_exit_code(0): + run_rmlint( + '--dedupe', + path_a, path_b, + use_default_dir=False, + with_json=False, + verbosity="") + + @needs_reflink_fs @with_setup(usual_setup_func, usual_teardown_func) def test_different_files(): @@ -201,7 +217,11 @@ def test_clone_handler(): ) # parse output file for expected clone command - counts = pattern_count(sh_path, ["clone '", "skip_reflink '"]) + patterns = [ + "clone '", + "skip_reflink '"] + counts = pattern_count(sh_path, patterns) + print(counts) assert counts[0] == 1 assert counts[1] == 0 @@ -221,6 +241,6 @@ def test_clone_handler(): with_json=False ) - counts = pattern_count(sh_path, ["clone '", "skip_reflink '"]) + counts = pattern_count(sh_path, patterns) assert counts[0] == 0 assert counts[1] == 1 diff --git a/tests/test_options/test_cache.py b/tests/test_options/test_cache.py index 50c14d01d..bbd414af9 100644 --- a/tests/test_options/test_cache.py +++ b/tests/test_options/test_cache.py @@ -24,23 +24,29 @@ def create_files(): create_file('c' * 2, '4.c') create_file('c' * 2, '4.d') - # duplicate_dirs + with --write_unfinished + # duplicate_dirs + with --hash-uniques create_file('x', 'dir_a/1') create_file('x', 'dir_b/1') def check(data, write_cache): - unfinished = [p['path'] for p in data if p['type'] == 'unique_file'] + unique = [p['path'] for p in data if p['type'] == 'unique_file'] dupe_files = [p['path'] for p in data if p['type'] == 'duplicate_file'] dupe_trees = [p['path'] for p in data if p['type'] == 'duplicate_dir'] + files_in_dupe_dirs = [p['path'] for p in data if p['type'] == 'part_of_directory'] path_in = lambda name, paths: os.path.join(TESTDIR_NAME, name) in paths if write_cache: - assert len(unfinished) == 3 - assert path_in('1.b', unfinished) - assert path_in('dir_a/1', unfinished) - assert path_in('dir_b/1', unfinished) + assert len(unique) == 3 + assert path_in('3.a', unique) + assert path_in('3.a_', unique) + assert path_in('1.b', unique) + + assert len(files_in_dupe_dirs) == 2 + assert path_in('dir_a/1', files_in_dupe_dirs) + assert path_in('dir_b/1', files_in_dupe_dirs) + assert len(dupe_trees) == 2 assert path_in('dir_a', dupe_trees) @@ -63,7 +69,7 @@ def test_xattr_basic(): for _ in range(2): for write_cache in True, False: if write_cache: - head, *data, footer = run_rmlint('-U -D -S pa --xattr-write') + head, *data, footer = run_rmlint('--hash-uniques -D -S pa --xattr-write') else: head, *data, footer = run_rmlint('-D -S pa --xattr-read') @@ -101,32 +107,37 @@ def test_xattr_detail(extra_opts): xattr_1 = must_read_xattr(path_1) xattr_2 = must_read_xattr(path_2) xattr_3 = must_read_xattr(path_3) + xattr_4 = must_read_xattr(path_4) assert xattr_1["user.rmlint.blake2b.cksum"] == \ b'ba80a53f981c4d0d6a2797b69f12f6e94c212f14685ac4b74b12bb6fdbffa2d17d87c5392aab792dc252d5de4533cc9518d38aa8dbf1925ab92386edd4009923\x00' assert xattr_1 == xattr_2 - # no --write-unfinished given. + # no --hash-unatched given. assert xattr_3 == {} - # Repeating the caching option should have no effect on the output. + # no --hash-uniques given. + assert xattr_4 == {} + + # Run several times with --hash-unmatched. for _ in range(10): - head, *data, footer = run_rmlint(base_options + ' --xattr') - # one more due to the unique_file + head, *data, footer = run_rmlint(base_options + ' --xattr --hash-unmatched') + # one more due to the size twin assert len(data) == 3 xattr_1 = must_read_xattr(path_1) xattr_2 = must_read_xattr(path_2) xattr_3 = must_read_xattr(path_3) + xattr_4 = must_read_xattr(path_4) assert xattr_1["user.rmlint.blake2b.cksum"] == \ b'ba80a53f981c4d0d6a2797b69f12f6e94c212f14685ac4b74b12bb6fdbffa2d17d87c5392aab792dc252d5de4533cc9518d38aa8dbf1925ab92386edd4009923\x00' assert xattr_1 == xattr_2 - # --write-unfinished will also write the unfinished one. + # size-twin with --hash-unmatched. xattr_3 = must_read_xattr(path_3) - assert xattr_3 == {} + assert xattr_3["user.rmlint.blake2b.cksum"] == \ + b'36badf2227521b798b78d1bd43c62520a35b9b541547ff223f35f74b1168da2cd3c8d102aaee1a0cc217b601258d80151067cdee3a6352517b8fc7f7106902d3\x00' - # unique file which was not hashed -> does not need to be touched. - xattr_4 = must_read_xattr(path_4) + # unique-length file which was not hashed -> does not need to be touched. assert xattr_4 == {} # Try clearing the attributes: @@ -136,3 +147,28 @@ def test_xattr_detail(extra_opts): assert must_read_xattr(path_2) == {} assert must_read_xattr(path_3) == {} assert must_read_xattr(path_4) == {} + + # Run several times with --hash-uniques. + for _ in range(10): + head, *data, footer = run_rmlint(base_options + ' --xattr --hash-uniques') + # two more due to the 'longer' file and also the device image + assert len(data) == 5 + + xattr_1 = must_read_xattr(path_1) + xattr_2 = must_read_xattr(path_2) + xattr_3 = must_read_xattr(path_3) + xattr_4 = must_read_xattr(path_4) + assert xattr_1["user.rmlint.blake2b.cksum"] == \ + b'ba80a53f981c4d0d6a2797b69f12f6e94c212f14685ac4b74b12bb6fdbffa2d17d87c5392aab792dc252d5de4533cc9518d38aa8dbf1925ab92386edd4009923\x00' + assert xattr_1 == xattr_2 + + # size-twin with --hash-unmatched. + xattr_3 = must_read_xattr(path_3) + assert xattr_3["user.rmlint.blake2b.cksum"] == \ + b'36badf2227521b798b78d1bd43c62520a35b9b541547ff223f35f74b1168da2cd3c8d102aaee1a0cc217b601258d80151067cdee3a6352517b8fc7f7106902d3\x00' + + # unique file which was not hashed -> does not need to be touched. + xattr_4 = must_read_xattr(path_4) + assert xattr_4["user.rmlint.blake2b.cksum"] == \ + b'b8c25c0482c3323cd3fc544cd9e0fb05eee191aedce56e307d1ea1af96f96fe63d2ac82b0a3ba5c42b7b58da92cd438065b25a51170f183889651419a242d24f\x00' + diff --git a/tests/test_options/test_replay.py b/tests/test_options/test_replay.py index f72d1e164..1c99777a0 100644 --- a/tests/test_options/test_replay.py +++ b/tests/test_options/test_replay.py @@ -96,7 +96,10 @@ def test_replay_match_basename(): p=replay_path )) - assert len(data) == 3 + # second 'a' file should be kicked out + assert len(data) == 2 + paths = set([p['path'] for p in data]) + assert os.path.join(TESTDIR_NAME, 'test1/b') in paths @with_setup(usual_setup_func, usual_teardown_func) diff --git a/tests/test_options/test_stdin.py b/tests/test_options/test_stdin.py index 416629b5b..8c0931276 100644 --- a/tests/test_options/test_stdin.py +++ b/tests/test_options/test_stdin.py @@ -67,15 +67,16 @@ def test_path_starting_with_dash(): try: os.chdir(TESTDIR_NAME) - data = check_output( + proc = subprocess.Popen( [cwd + '/rmlint', '-o', 'json', '-S', 'a', '--', subdir], - stderr=STDOUT + stdin=subprocess.PIPE, + stdout=subprocess.PIPE ) + data, _ = proc.communicate("") + head, *data, footer = json.loads(data.decode('utf-8')) finally: os.chdir(cwd) - head, *data, footer = json.loads(data.decode('utf-8')) - assert data[0]['path'].endswith('a') assert data[1]['path'].endswith('b') assert footer['total_lint_size'] == 4