From cd5754a591e0da2cf71fd8f6a03f8535d5cdc2d0 Mon Sep 17 00:00:00 2001 From: Tenoke Date: Sun, 26 Jan 2020 11:54:07 +0100 Subject: [PATCH] Fix TypeError when using the -m flag Currently, if you attempt to use the script with the --min-article-character you get an error because it gets parsed a string and the functions expect an int. This fix addresses the issue. ``` Traceback (most recent call last): File "/usr/lib/python3.6/runpy.py", line 193, in _run_module_as_main "__main__", mod_spec) File "/usr/lib/python3.6/runpy.py", line 85, in _run_code exec(code, run_globals) File "/usr/local/lib/python3.6/dist-packages/gensim/scripts/segment_wiki.py", line 385, in include_interlinks=args.include_interlinks File "/usr/local/lib/python3.6/dist-packages/gensim/scripts/segment_wiki.py", line 141, in segment_and_write_all_articles for idx, article in enumerate(article_stream): File "/usr/local/lib/python3.6/dist-packages/gensim/scripts/segment_wiki.py", line 100, in segment_all_articles for article in wiki_sections_text: File "/usr/local/lib/python3.6/dist-packages/gensim/scripts/segment_wiki.py", line 332, in get_texts_with_sections if sum(len(body.strip()) for (_, body) in sections) < self.min_article_character: TypeError: '<' not supported between instances of 'int' and 'str'``` --- gensim/scripts/segment_wiki.py | 1 + 1 file changed, 1 insertion(+) diff --git a/gensim/scripts/segment_wiki.py b/gensim/scripts/segment_wiki.py index 209b83424c..06f1500bcf 100644 --- a/gensim/scripts/segment_wiki.py +++ b/gensim/scripts/segment_wiki.py @@ -376,6 +376,7 @@ def get_texts_with_sections(self): parser.add_argument( '-m', '--min-article-character', help="Ignore articles with fewer characters than this (article stubs). Default: %(default)s.", + type=int, default=200 ) parser.add_argument(