Skip to content

Commit

Permalink
Fix breaking changes (#87)
Browse files Browse the repository at this point in the history
* fix gsl warning spam

* wip fixe breaking changes, to be not so breaking

* convert raise to warning
  • Loading branch information
Ch4s3 authored Dec 31, 2016
1 parent bb5726c commit 649a380
Show file tree
Hide file tree
Showing 7 changed files with 46 additions and 39 deletions.
4 changes: 3 additions & 1 deletion classifier-reborn.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ Gem::Specification.new do |s|

s.add_development_dependency('rake')
s.add_development_dependency('rdoc')
s.add_development_dependency('test-unit')
s.add_development_dependency('minitest')
s.add_development_dependency('minitest-reporters')
s.add_development_dependency('rubocop')
s.add_development_dependency('pry')
end
33 changes: 20 additions & 13 deletions lib/classifier-reborn/lsi.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@
begin
raise LoadError if ENV['NATIVE_VECTOR'] == 'true' # to test the native vector class, try `rake test NATIVE_VECTOR=true`

require 'gsl' # requires https://github.com/blackwinter/rb-gsl
require 'gsl' # requires https://github.com/SciRuby/rb-gsl
require_relative 'extensions/vector_serialize'
$GSL = true

rescue LoadError
$GSL = false
require_relative 'extensions/vector'
end

Expand Down Expand Up @@ -65,15 +66,16 @@ def needs_rebuild?
def add_item(item, *categories, &block)
clean_word_hash = Hasher.clean_word_hash((block ? block.call(item) : item.to_s), @language)
if clean_word_hash.empty?
raise "#{item} is composed entirely of stopwords and words that are 2 characters or less. Classifier-Reborn cannot handle this document properly, and thus summarily rejected it."
puts "Input: '#{item}' is entirely stopwords or words with 2 or fewer characters. Classifier-Reborn cannot handle this document properly."
else
@items[item] = if @cache_node_vectors
CachedContentNode.new(clean_word_hash, *categories)
else
ContentNode.new(clean_word_hash, *categories)
end
@version += 1
build_index if @auto_rebuild
end
@items[item] = if @cache_node_vectors
CachedContentNode.new(clean_word_hash, *categories)
else
ContentNode.new(clean_word_hash, *categories)
end
@version += 1
build_index if @auto_rebuild
end

# A less flexible shorthand for add_item that assumes
Expand Down Expand Up @@ -203,11 +205,14 @@ def proximity_norms_for_content(doc, &block)
return [] if needs_rebuild?

content_node = node_for_content(doc, &block)

if $GSL && content_node.raw_norm.isnan?.all?
raise "There are no documents that are similar to #{doc}"
puts "There are no documents that are similar to #{doc}"
else
content_node_norms(content_node)
end
end

def content_node_norms(content_node)
result =
@items.keys.collect do |item|
if $GSL
Expand All @@ -230,8 +235,10 @@ def proximity_norms_for_content(doc, &block)
def search(string, max_nearest = 3)
return [] if needs_rebuild?
carry = proximity_norms_for_content(string)
result = carry.collect { |x| x[0] }
result[0..max_nearest - 1]
unless carry.nil?
result = carry.collect { |x| x[0] }
result[0..max_nearest - 1]
end
end

# This function takes content and finds other documents
Expand Down
12 changes: 6 additions & 6 deletions test/bayes/bayesian_test.rb
Original file line number Diff line number Diff line change
@@ -1,29 +1,29 @@
# encoding: utf-8

require File.dirname(__FILE__) + '/../test_helper'
class BayesianTest < Test::Unit::TestCase
class BayesianTest < Minitest::Test
def setup
@classifier = ClassifierReborn::Bayes.new 'Interesting', 'Uninteresting'
end

def test_good_training
assert_nothing_raised { @classifier.train_interesting 'love' }
assert_equal ['love'], @classifier.train_interesting('love')
end

def test_training_with_utf8
assert_nothing_raised { @classifier.train_interesting 'Água' }
assert_equal ['Água'], @classifier.train_interesting('Água')
end

def test_stemming_enabled_by_default
assert @classifier.stemmer_enabled?
end

def test_bad_training
assert_raise(StandardError) { @classifier.train_no_category 'words' }
assert_raises(StandardError) { @classifier.train_no_category 'words' }
end

def test_bad_method
assert_raise(NoMethodError) { @classifier.forget_everything_you_know '' }
assert_raises(NoMethodError) { @classifier.forget_everything_you_know '' }
end

def test_categories
Expand Down Expand Up @@ -120,6 +120,6 @@ def test_untrain
classification_of_bad_data = @classifier.classify 'seven'
@classifier.untrain_colors 'seven'
classification_after_untrain = @classifier.classify 'seven'
assert_not_equal classification_of_bad_data, classification_after_untrain
refute_equal classification_of_bad_data, classification_after_untrain
end
end
14 changes: 7 additions & 7 deletions test/extensions/hasher_test.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
require_relative '../test_helper'
require 'tempfile'

class HasherTest < Test::Unit::TestCase
class HasherTest < Minitest::Test
def setup
@original_stopwords_path = Hasher::STOPWORDS_PATH.dup
end
Expand All @@ -22,8 +22,8 @@ def test_clean_word_hash_without_stemming
end

def test_default_stopwords
assert_not_empty Hasher::STOPWORDS['en']
assert_not_empty Hasher::STOPWORDS['fr']
refute_empty Hasher::STOPWORDS['en']
refute_empty Hasher::STOPWORDS['fr']
assert_empty Hasher::STOPWORDS['gibberish']
end

Expand All @@ -38,17 +38,17 @@ def test_loads_custom_stopwords

custom_english_stopwords = Hasher::STOPWORDS['en']

assert_not_equal default_english_stopwords, custom_english_stopwords
refute_equal default_english_stopwords, custom_english_stopwords
end

def test_add_custom_stopword_path
# Create stopword tempfile in current directory
temp_stopwords = Tempfile.new('xy', "#{File.dirname(__FILE__) + "/"}")

# Add some stopwords to tempfile
temp_stopwords << "this words fun"
temp_stopwords.close
temp_stopwords.close

# Get path of tempfile
temp_stopwords_path = File.dirname(temp_stopwords)

Expand Down
14 changes: 5 additions & 9 deletions test/lsi/lsi_test.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
require File.dirname(__FILE__) + '/../test_helper'

class LSITest < Test::Unit::TestCase
class LSITest < Minitest::Test
def setup
# we repeat principle words to help weight them.
# This test is rather delicate, since this system is mostly noise.
Expand Down Expand Up @@ -86,7 +86,7 @@ def test_external_classifying
# will fail here, but the LSI recognizes content.
tricky_case = 'This text revolves around dogs.'
assert_equal 'Dog', lsi.classify(tricky_case)
assert_not_equal 'Dog', bayes.classify(tricky_case)
refute_equal 'Dog', bayes.classify(tricky_case)
end

def test_recategorize_interface
Expand Down Expand Up @@ -189,16 +189,12 @@ def test_invalid_searching_when_using_gsl
lsi.add_item @str3, 'Cat'
lsi.add_item @str4, 'Cat'
lsi.add_item @str5, 'Bird'
assert_raises RuntimeError do
lsi.search('penguin')
end
assert_output(/There are no documents that are similar to penguin/) { lsi.search('penguin') }
end

def test_raise_error_when_adding_bad_document
def test_warn_when_adding_bad_document
lsi = ClassifierReborn::LSI.new
assert_raises RuntimeError do
lsi.add_item("i can")
end
assert_output(/Input: 'i can' is entirely stopwords or words with 2 or fewer characters. Classifier-Reborn cannot handle this document properly./) { lsi.add_item("i can") }
end

def test_summary
Expand Down
2 changes: 1 addition & 1 deletion test/lsi/word_list_test.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
require_relative '../test_helper'

class WordListTest < Test::Unit::TestCase
class WordListTest < Minitest::Test
def test_size_does_not_count_words_twice
list = ClassifierReborn::WordList.new
assert list.size == 0
Expand Down
6 changes: 4 additions & 2 deletions test/test_helper.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
$LOAD_PATH.unshift(File.dirname(__FILE__) + '/../lib')

require 'test/unit'
require 'minitest/autorun'
require 'minitest/reporters'
Minitest::Reporters.use!
require 'pry'
require 'classifier-reborn'

include ClassifierReborn

0 comments on commit 649a380

Please sign in to comment.