Skip to content

Commit

Permalink
Merge pull request #492 from SeeSpotRun/fix-mem-leaks
Browse files Browse the repository at this point in the history
Fix mem leaks
  • Loading branch information
SeeSpotRun authored Apr 23, 2021
2 parents a8c39c8 + 3c1a8e2 commit 3ff9809
Show file tree
Hide file tree
Showing 19 changed files with 248 additions and 163 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ The format follows [keepachangelog.com]. Please stick to it.
* Implement --rank-by f option to rank originals by directory full path
* Can now atomically clone from original to its hardlink via ``rmlint --dedupe``
* Option ``-c json:traversed`` to include list of fully-traversed dirs in json output
* Option ``--ignore-bad-paths`` to not abort run if one or more bad paths passed

### Changed

Expand Down
5 changes: 3 additions & 2 deletions lib/cfg.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ typedef struct RmCfg {
gboolean hash_uniques;
gboolean hash_unmatched;
gboolean keep_cached_originals;
gboolean ignore_bad_paths;

int permissions;

Expand Down Expand Up @@ -131,7 +132,7 @@ typedef struct RmCfg {
* supplied by the user, i.e., the sums of the lengths of
* the associated lists RmCfg::{paths,json_paths}, which is
* not meant to be a useful number to know, and is simply a
* byproduct of calculating path indices.
* byproduct of calculating path indicies.
*/
guint path_count;

Expand Down Expand Up @@ -174,7 +175,7 @@ typedef struct RmCfg {
/* If true, files are hold back to
* the end of the program run and printed then.
*/
gboolean cache_file_structs;
gboolean delay_output;

/* Instead of running in duplicate detection mode,
* check if the passed arguments are equal files
Expand Down
21 changes: 15 additions & 6 deletions lib/checksum.c
Original file line number Diff line number Diff line change
Expand Up @@ -698,10 +698,13 @@ static RmParanoid *rm_digest_paranoid_copy(RmParanoid *paranoid) {
}

static void rm_digest_paranoid_free(RmParanoid *paranoid) {
rm_digest_free(paranoid->shadow_hash);
rm_digest_paranoid_release_buffers(paranoid);
g_async_queue_unref(paranoid->incoming_twin_candidates);
g_slist_free(paranoid->rejects);
rm_digest_unref(paranoid->shadow_hash);
// check whether we are real or just a rm_digest_paranoid_copy():
if(paranoid->incoming_twin_candidates) {
rm_digest_paranoid_release_buffers(paranoid);
g_async_queue_unref(paranoid->incoming_twin_candidates);
g_slist_free(paranoid->rejects);
}
g_slice_free(RmParanoid, paranoid);
}

Expand Down Expand Up @@ -896,6 +899,8 @@ RmDigest *rm_digest_new(RmDigestType type, RmOff seed) {
interface->update(digest->state, (const unsigned char *)&seed, sizeof(seed));
}

digest->ref_count = 1;

return digest;
}

Expand All @@ -904,7 +909,10 @@ void rm_digest_release_buffers(RmDigest *digest) {
rm_digest_paranoid_release_buffers(digest->state);
}

void rm_digest_free(RmDigest *digest) {
void rm_digest_unref(RmDigest *digest) {
if(!g_atomic_int_dec_and_test(&digest->ref_count)) {
return;
}
const RmDigestInterface *interface = rm_digest_get_interface(digest->type);
interface->free(digest->state);
g_slice_free(RmDigest, digest);
Expand Down Expand Up @@ -934,6 +942,7 @@ RmDigest *rm_digest_copy(RmDigest *digest) {
g_assert(digest);

RmDigest *copy = g_slice_copy(sizeof(RmDigest), digest);
copy->ref_count = 1;

const RmDigestInterface *interface = rm_digest_get_interface(digest->type);
if(interface->copy == NULL) {
Expand Down Expand Up @@ -1066,7 +1075,7 @@ guint8 *rm_digest_sum(RmDigestType algo, const guint8 *data, gsize len, gsize *o
*out_len = digest->bytes;
}

rm_digest_free(digest);
rm_digest_unref(digest);
return buf;
}

Expand Down
17 changes: 15 additions & 2 deletions lib/checksum.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,9 @@ typedef struct RmDigest {
/* digest output size in bytes */
gsize bytes;

/* reference count */
guint ref_count;

} RmDigest;

typedef struct RmSemaphore {
Expand Down Expand Up @@ -193,9 +196,19 @@ const char *rm_digest_type_to_string(RmDigestType type);
RmDigest *rm_digest_new(RmDigestType type, RmOff seed);

/**
* @brief Deallocate memory associated with a RmDigest.
* @brief Decrease reference count and free memory if zero.
*/
void rm_digest_unref(RmDigest *digest);

/**
* @brief Increase reference count.
*/
void rm_digest_free(RmDigest *digest);
inline static RmDigest* rm_digest_ref(RmDigest *digest) {
if(digest) {
g_atomic_int_inc (&digest->ref_count);
}
return digest;
}

/**
* @brief Hash a datablock and add it to the current checksum.
Expand Down
17 changes: 7 additions & 10 deletions lib/cmdline.c
Original file line number Diff line number Diff line change
Expand Up @@ -386,9 +386,6 @@ static gboolean rm_cmd_parse_merge_directories(_UNUSED const char *option_name,

cfg->find_hardlinked_dupes = true;

/* Keep RmFiles after shredder. */
cfg->cache_file_structs = true;

return true;
}

Expand Down Expand Up @@ -904,7 +901,7 @@ static gboolean rm_cmd_parse_sortby(_UNUSED const char *option_name,
strncpy(cfg->rank_criteria, criteria, sizeof(cfg->rank_criteria) - 1);

/* ranking the files depends on caching them to the end of the program */
cfg->cache_file_structs = true;
cfg->delay_output = true;

return true;
}
Expand Down Expand Up @@ -1141,6 +1138,7 @@ bool rm_cmd_parse_args(int argc, char **argv, RmSession *session) {
{"no-followlinks" , 'F' , EMPTY , G_OPTION_ARG_CALLBACK , FUNC(no_follow_symlinks) , _("Ignore symlinks") , NULL} ,
{"paranoid" , 'p' , EMPTY , G_OPTION_ARG_CALLBACK , FUNC(paranoid) , _("Use more paranoid hashing") , NULL} ,
{"no-crossdev" , 'x' , DISABLE , G_OPTION_ARG_NONE , &cfg->crossdev , _("Do not cross mountpoints") , NULL} ,
{"ignore-bad-paths" , 0 , 0 , G_OPTION_ARG_NONE , &cfg->ignore_bad_paths , _("Don't abort run if bad path passed to rmlint") , NULL} ,
{"keep-all-tagged" , 'k' , 0 , G_OPTION_ARG_NONE , &cfg->keep_all_tagged , _("Keep all tagged files") , NULL} ,
{"keep-all-untagged" , 'K' , 0 , G_OPTION_ARG_NONE , &cfg->keep_all_untagged , _("Keep all untagged files") , NULL} ,
{"must-match-tagged" , 'm' , 0 , G_OPTION_ARG_NONE , &cfg->must_match_tagged , _("Must have twin in tagged dir") , NULL} ,
Expand Down Expand Up @@ -1213,7 +1211,7 @@ bool rm_cmd_parse_args(int argc, char **argv, RmSession *session) {
{"without-fiemap" , 0 , DISABLE | HIDDEN , G_OPTION_ARG_NONE , &cfg->build_fiemap , "Do not use fiemap(2) in order to save memory" , NULL} ,
{"shred-always-wait" , 0 , HIDDEN , G_OPTION_ARG_NONE , &cfg->shred_always_wait , "Always waits for file increment to finish hashing" , NULL} ,
{"fake-pathindex-as-disk" , 0 , HIDDEN , G_OPTION_ARG_NONE , &cfg->fake_pathindex_as_disk , "Pretends each input path is a separate physical disk" , NULL} ,
{"fake-holdback" , 0 , HIDDEN , G_OPTION_ARG_NONE , &cfg->cache_file_structs , "Hold back all files to the end before outputting." , NULL} ,
{"fake-holdback" , 0 , HIDDEN , G_OPTION_ARG_NONE , &cfg->delay_output , "Hold back all files to the end before outputting." , NULL} ,
{"fake-fiemap" , 0 , HIDDEN , G_OPTION_ARG_NONE , &cfg->fake_fiemap , "Create faked fiemap data for all files" , NULL} ,
{"fake-abort" , 0 , HIDDEN , G_OPTION_ARG_NONE , &cfg->fake_abort , "Simulate interrupt after 10% shredder progress" , NULL} ,
{"buffered-read" , 0 , HIDDEN , G_OPTION_ARG_NONE , &cfg->use_buffered_read , "Default to buffered reading calls (fread) during reading." , NULL} ,
Expand Down Expand Up @@ -1295,7 +1293,7 @@ bool rm_cmd_parse_args(int argc, char **argv, RmSession *session) {
goto cleanup;
}

if(!rm_cmd_set_paths(session, paths)) {
if(!rm_cmd_set_paths(session, paths) && !cfg->ignore_bad_paths) {
error =
g_error_new(RM_ERROR_QUARK, 0, _("Not all given paths are valid. Aborting"));
goto cleanup;
Expand Down Expand Up @@ -1397,7 +1395,8 @@ int rm_cmd_main(RmSession *session) {
rm_log_debug_line("No mount table created.");
}

session->mds = rm_mds_new(cfg->threads, session->mounts, cfg->fake_pathindex_as_disk);
bool no_mds_mount_table = cfg->fake_pathindex_as_disk || session->mounts == NULL;
session->mds = rm_mds_new(cfg->threads, session->mounts, no_mds_mount_table);

rm_fmt_set_state(session->formats, RM_PROGRESS_STATE_TRAVERSE);

Expand All @@ -1412,8 +1411,6 @@ int rm_cmd_main(RmSession *session) {
g_timer_elapsed(session->timer, NULL), session->total_files);

if(cfg->merge_directories) {
g_assert(cfg->cache_file_structs);

/* Currently we cannot use -D and the cloning on btrfs, since this assumes the
* same layout on two dupicate directories which is likely not a valid assumption.
* Emit a warning if the raw -D is used in conjunction with that.
Expand Down Expand Up @@ -1467,7 +1464,7 @@ int rm_cmd_main(RmSession *session) {
}
}

if(session->total_files >= 1) {
if(session->total_files + session->traversed_folders > 0) {
rm_fmt_set_state(session->formats, RM_PROGRESS_STATE_PREPROCESS);
rm_preprocess(session);

Expand Down
78 changes: 55 additions & 23 deletions lib/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,25 @@ RmFile *rm_file_new(struct RmSession *session, const char *path, RmStat *statp,
}
}

RmOff file_size;
if(cfg->use_absolute_end_offset) {
file_size = CLAMP(actual_file_size, 1, cfg->skip_end_offset);
} else {
file_size = actual_file_size * cfg->skip_end_factor;
}

if(type == RM_LINT_TYPE_DUPE_CANDIDATE || type == RM_LINT_TYPE_PART_OF_DIRECTORY) {
/* Check if the actual slice the file will be > 0; we don't want empty files in
* shredder */
if((file_size - start_seek) == 0 && actual_file_size != 0) {
return NULL;
}
}
else {
// report other types as zero-size
actual_file_size = 0;
}

RmFile *self = g_slice_new0(RmFile);
self->session = session;

Expand All @@ -63,31 +82,15 @@ RmFile *rm_file_new(struct RmSession *session, const char *path, RmStat *statp,
self->node = node;

self->depth = depth;
self->file_size = 0;
self->actual_file_size = 0;
self->file_size = file_size;
self->actual_file_size = actual_file_size;
self->n_children = 0;

self->inode = statp->st_ino;
self->dev = statp->st_dev;
self->mtime = rm_sys_stat_mtime_float(statp);
self->is_new = (self->mtime >= cfg->min_mtime);

if(type == RM_LINT_TYPE_DUPE_CANDIDATE || type == RM_LINT_TYPE_PART_OF_DIRECTORY) {
if(cfg->use_absolute_end_offset) {
self->file_size = CLAMP(actual_file_size, 1, cfg->skip_end_offset);
} else {
self->file_size = actual_file_size * cfg->skip_end_factor;
}

/* Check if the actual slice the file will be > 0; we don't want empty files in
* shredder */
if((self->file_size - start_seek) == 0 && actual_file_size != 0) {
return NULL;
}

self->actual_file_size = actual_file_size;
}

self->hash_offset = start_seek;

self->lint_type = type;
Expand All @@ -97,6 +100,7 @@ RmFile *rm_file_new(struct RmSession *session, const char *path, RmStat *statp,
self->path_index = path_index;
self->outer_link_count = -1;

self->ref_count = 1;
return self;
}

Expand Down Expand Up @@ -127,14 +131,28 @@ RmFile *rm_file_copy(RmFile *file) {
copy->signal = NULL;
copy->parent_dir = NULL;
copy->n_children = 0;
copy->ref_count = 1;

return copy;
}

void rm_file_destroy(RmFile *file) {
static gint rm_file_unref_impl(RmFile *file, gboolean unref_hardlinks) {
if(!file) {
return 0;
}
if(!g_atomic_int_dec_and_test(&file->ref_count)) {
// somebody still loves me!
return 0;
}

gint freed = 0;
if(file->hardlinks) {
g_queue_remove(file->hardlinks, file);
if(file->hardlinks->length == 0) {
if(unref_hardlinks) {
freed += rm_util_queue_foreach_remove(file->hardlinks,
(RmRFunc)rm_file_unref_impl, GINT_TO_POINTER(FALSE));
}
else if(file->hardlinks->length==0) {
g_queue_free(file->hardlinks);
}
}
Expand All @@ -143,13 +161,27 @@ void rm_file_destroy(RmFile *file) {
g_free((char *)file->ext_cksum);
}

if(file->free_digest) {
rm_digest_free(file->digest);
if(file->digest) {
rm_digest_unref(file->digest);
}

g_slice_free(RmFile, file);
return 1 + freed;
}

gint rm_file_unref(RmFile *file) {
return rm_file_unref_impl(file, FALSE);
}

gint rm_file_unref_full(RmFile *file) {
return rm_file_unref_impl(file, TRUE);
}

RmFile *rm_file_ref(RmFile *file) {
g_atomic_int_inc (&file->ref_count);
return file;
}


// TODO: this is replicated in formats/pretty.c...
static const char *LINT_TYPES[] = {
[RM_LINT_TYPE_UNKNOWN] = "",
Expand Down
28 changes: 25 additions & 3 deletions lib/file.h
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,8 @@ typedef struct RmFile {
* Only filled if type is RM_LINT_TYPE_PART_OF_DIRECTORY.
* */
size_t n_children;

guint ref_count;
} RmFile;

/* Defines a path variable containing the file's path */
Expand Down Expand Up @@ -314,10 +316,30 @@ RmFile *rm_file_new(struct RmSession *session, const char *path, RmStat *statp,
RmNode *node);

/**
* @brief Deallocate the memory allocated by rm_file_new.
* @note does not deallocate file->digest since this is handled by shredder.c
* @brief Decrease reference count to file;
* free resources if this is the last reference.
* @note threadsafe
* @note nullable
* @retval 1 if the file freed else 0
*/
int rm_file_unref(RmFile *file);

/**
* @brief unref file and all the files in its hardlink bundle;
* @note not threadsafe
* @note nullable
* @retval the number of files freed
*/
int rm_file_unref_full(RmFile *file);


/**
* @brief Increase reference count to file;
* @note threadsafe
*/
void rm_file_destroy(RmFile *file);
RmFile *rm_file_ref(RmFile *file);



/**
* @brief add link to head's hardlinks (create hardlinks queue if necessary)
Expand Down
Loading

0 comments on commit 3ff9809

Please sign in to comment.