From 4a807fcaae50adfc5bf07b70fddc71db2dca1df6 Mon Sep 17 00:00:00 2001 From: Bharat123Rox Date: Mon, 25 Feb 2019 13:36:53 +0530 Subject: [PATCH 1/2] Fix #565 by raising ValueError for empty string --- flair/data.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/flair/data.py b/flair/data.py index 2e5d91e054..ec944407c6 100644 --- a/flair/data.py +++ b/flair/data.py @@ -323,6 +323,9 @@ def __init__(self, text: str = None, use_tokenizer: bool = False, labels: Union[ # otherwise assumes whitespace tokenized text else: + # catch the empty string case + if not text: + raise ValueError("Cannot convert empty string to a Sentence object.") # add each word in tokenized string as Token object to Sentence word = '' for index, char in enumerate(text): From a07fb999d144d9f76ee1939ea2709755fc2d5db3 Mon Sep 17 00:00:00 2001 From: Bharat123Rox Date: Sat, 2 Mar 2019 13:21:14 +0530 Subject: [PATCH 2/2] Added unit test for empty string case in PR #566 --- tests/test_data.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/test_data.py b/tests/test_data.py index b9152ab4cc..34e6b49d72 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -22,6 +22,13 @@ def test_get_head(): assert (token1 == token2.get_head()) assert (None == token1.get_head()) +def test_create_sentence_on_empty_string(): + + with pytest.raises(ValueError) as e: + sentence: Sentence = Sentence('') + + assert (e.type is ValueError) + assert (e.value.args[0] == "Cannot convert empty string to a Sentence object.") def test_create_sentence_without_tokenizer(): sentence: Sentence = Sentence('I love Berlin.')