diff --git a/app/services/language_filter.rb b/app/services/language_filter.rb index 2276f590..1bb2ab6e 100644 --- a/app/services/language_filter.rb +++ b/app/services/language_filter.rb @@ -10,6 +10,7 @@ class LanguageFilter def initialize(filter_file_path = Rails.root.join('config', 'language_filter.yml')) @filter_data = YAML.load_file(filter_file_path) @terms = @filter_data.keys.sort { |a, b| b.length <=> a.length } + @term_regexes = @terms.map { |term| [Regexp.new(Regexp.escape(term), Regexp::IGNORECASE), @filter_data[term]['replacement']] }.to_h end # Checks if the input is valid, i.e., doesn't need replacement. @@ -19,11 +20,7 @@ def initialize(filter_file_path = Rails.root.join('config', 'language_filter.yml def valid?(input) return true if input.blank? - @terms.each do |term| - return false if input.include?(term) - end - - true + @term_regexes.keys.none? { |regex| input.match?(regex) } end # Gets the filtered version of the input text. @@ -35,8 +32,8 @@ def filter(input) output = input.dup - @terms.each do |term| - output.gsub!(term, @filter_data[term]['replacement']) if output.include?(term) + @term_regexes.each do |regex, replacement| + output.gsub!(regex, replacement) end output diff --git a/config/language_filter.yml b/config/language_filter.yml index a9ba1992..5804a9e9 100644 --- a/config/language_filter.yml +++ b/config/language_filter.yml @@ -1,11 +1,11 @@ -"Gender identity disorder": +"gender identity disorder": replacement: "Gender dysphoria" rationale: "https://docs.google.com/spreadsheets/d/1uqiP5PPKXJt35uFLrG5ytsLbz2kgWXBjG__hyD12KlY/edit?gid=0#gid=0" -"Gender identity disorders": +"gender identity disorders": replacement: "Gender dysphoria" -"Gender identity disorders in children": +"gender identity disorders in children": replacement: "Gender dysphoria in children" -"Gender identity disorders in adolescence": +"gender identity disorders in adolescence": replacement: "Gender dysphoria in adolescence" "african american gays in literature": replacement: "African American gay people in literature" diff --git a/spec/services/language_filter_spec.rb b/spec/services/language_filter_spec.rb index ac78d6ac..d4bd901d 100644 --- a/spec/services/language_filter_spec.rb +++ b/spec/services/language_filter_spec.rb @@ -22,19 +22,21 @@ it 'returns true when input is valid' do input = "Georgia" - expect(filter.valid?(input)).to eq(true) end it 'returns false when input is invalid' do input = "Gender identity disorder" + expect(filter.valid?(input)).to eq(false) + end + it 'returns false when input is invalid with different case' do + input = "gEnDeR iDeNtItY dIsOrDeR" expect(filter.valid?(input)).to eq(false) end it 'returns true when input is nil' do input = nil - expect(filter.valid?(input)).to eq(true) end end @@ -59,7 +61,12 @@ it 'prioritizes terms with higher length during replacement' do input = 'Gender identity disorders' expected = 'Gender dysphoria' + expect(filter.filter(input)).to eq(expected) + end + it 'replaces harmful text with corresponding replacements regardless of case' do + input = 'gEnDeR iDeNtItY dIsOrDeR' + expected = 'Gender dysphoria' expect(filter.filter(input)).to eq(expected) end @@ -70,6 +77,12 @@ expect(filter.filter(input)).to eq(expected) end + it 'replaces harmful terms when multiple terms are combined regardless of case' do + input = 'GeNdEr IdEnTiTy DiSoRdErS--United States' + expected = 'Gender dysphoria--United States' + expect(filter.filter(input)).to eq(expected) + end + it 'returns nil when input is nil' do input = nil expected = nil