Skip to content

Commit

Permalink
Now only compare ngrams of the same size
Browse files Browse the repository at this point in the history
  • Loading branch information
ryanpeach committed Nov 2, 2024
1 parent 3e25bf0 commit ca93eed
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 5 deletions.
2 changes: 1 addition & 1 deletion src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ pub struct Config {
#[builder(default=r"-|_|\s".to_owned())]
pub filename_spacing_pattern: String,
/// See [`self::cli::Config::filename_match_threshold`]
#[builder(default = 100)]
#[builder(default = 95)]
pub filename_match_threshold: i64,
/// See [`self::cli::Config::exclude`]
#[builder(default=vec![])]
Expand Down
10 changes: 6 additions & 4 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,16 @@ pub fn lib(config: &config::Config) -> Result<OutputReport> {

// All our reports
// NOTE: Always use `filter_by_excludes` and `dedupe_by_code` on the reports
let similar_filenames = SimilarFilename::calculate(
let mut similar_filenames = SimilarFilename::calculate(
&file_ngrams,
config.filename_match_threshold,
&filename_spacing_regex,
)
.map_err(|e| miette!("From SimilarFilename: {e}"))?
.filter_by_excludes(config.exclude.clone())
.dedupe_by_code();
.map_err(|e| miette!("From SimilarFilename: {e}"))?;
similar_filenames.sort_by(|b, a| a.partial_cmp(b).expect("This never fails"));
let similar_filenames = similar_filenames
.filter_by_excludes(config.exclude.clone())
.dedupe_by_code();

let duplicate_aliases =
DuplicateAlias::calculate(get_files(config.directories.clone()), config)
Expand Down
13 changes: 13 additions & 0 deletions src/rules/similar_filename.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ pub struct SimilarFilename {
/// Used to identify the diagnostic and exclude it if needed
id: String,

score: i64,

#[source_code]
filepaths: String,

Expand All @@ -29,6 +31,12 @@ pub struct SimilarFilename {
advice: String,
}

impl PartialOrd for SimilarFilename {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
self.score.partial_cmp(&other.score)
}
}

impl PartialEq for SimilarFilename {
fn eq(&self, other: &Self) -> bool {
self.id == other.id
Expand Down Expand Up @@ -109,6 +117,7 @@ impl SimilarFilename {
);
Ok(Self {
id,
score,
filepaths,
file1_ngram,
file2_ngram,
Expand All @@ -134,6 +143,10 @@ impl SimilarFilename {
for (i, (ngram, filepath)) in file_ngrams.clone().iter().enumerate() {
// We can start at i + 1 because we've already checked all previous files
for (other_ngram, other_filepath) in file_ngrams.iter().skip(i + 1) {
if ngram.nb_words() != other_ngram.nb_words() {
continue;
}

file_crosscheck_bar.inc(1);

// Skip if the same file
Expand Down

0 comments on commit ca93eed

Please sign in to comment.