aw.RandomWordAug failing with error Sample larger than population or is negative. #184

amitkml · 2020-12-07T19:11:22Z

Hi,

NLP Augmentation with crop is failing with error Sample larger than population or is negative.

aug = naw.RandomWordAug(action='crop',aug_p=0.5, aug_min=0)
train_st_data_crop_aug['sentence_aug'] = train_st_data_crop_aug.apply(lambda x: aug.augment(x['sentence']),axis=1) ## Delete a set of contunous word will be removed randomly¶

ValueError Traceback (most recent call last)
in ()
7 print(augmented_text)
8 train_st_data_crop_aug = train_st_data
----> 9 train_st_data_crop_aug['sentence_aug'] = train_st_data_crop_aug.apply(lambda x: aug.augment(x['sentence']),axis=1) ## Delete a set of contunous word will be removed randomly¶

14 frames
/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py in apply(self, func, axis, raw, result_type, args, **kwds)
7550 kwds=kwds,
7551 )
-> 7552 return op.get_result()
7553
7554 def applymap(self, func) -> "DataFrame":

/usr/local/lib/python3.6/dist-packages/pandas/core/apply.py in get_result(self)
178 return self.apply_raw()
179
--> 180 return self.apply_standard()
181
182 def apply_empty_result(self):

/usr/local/lib/python3.6/dist-packages/pandas/core/apply.py in apply_standard(self)
269
270 def apply_standard(self):
--> 271 results, res_index = self.apply_series_generator()
272
273 # wrap results

/usr/local/lib/python3.6/dist-packages/pandas/core/apply.py in apply_series_generator(self)
298 for i, v in enumerate(series_gen):
299 # ignore SettingWithCopy here in case the user mutates
--> 300 results[i] = self.f(v)
301 if isinstance(results[i], ABCSeries):
302 # If we have a view on v, we need to make a copy because

in (x)
7 print(augmented_text)
8 train_st_data_crop_aug = train_st_data
----> 9 train_st_data_crop_aug['sentence_aug'] = train_st_data_crop_aug.apply(lambda x: aug.augment(x['sentence']),axis=1) ## Delete a set of contunous word will be removed randomly¶

/usr/local/lib/python3.6/dist-packages/nlpaug/base_augmenter.py in augment(self, data, n, num_thread)
113 # Single input with/without multiple input
114 else:
--> 115 augmented_results = self._parallel_augment(action_fx, clean_data, n=n, num_thread=num_thread)
116
117 if len(augmented_results) >= expected_output_num:

/usr/local/lib/python3.6/dist-packages/nlpaug/base_augmenter.py in _parallel_augment(cls, action_fx, data, n, num_thread)
174 def _parallel_augment(cls, action_fx, data, n, num_thread=2):
175 pool = ThreadPool(num_thread)
--> 176 results = pool.map(action_fx, [data] * n)
177 pool.close()
178 pool.join()

/usr/lib/python3.6/multiprocessing/pool.py in map(self, func, iterable, chunksize)
264 in a list that is returned.
265 '''
--> 266 return self._map_async(func, iterable, mapstar, chunksize).get()
267
268 def starmap(self, func, iterable, chunksize=None):

/usr/lib/python3.6/multiprocessing/pool.py in get(self, timeout)
642 return self._value
643 else:
--> 644 raise self._value
645
646 def _set(self, i, obj):

/usr/lib/python3.6/multiprocessing/pool.py in worker(inqueue, outqueue, initializer, initargs, maxtasks, wrap_exception)
117 job, i, func, args, kwds = task
118 try:
--> 119 result = (True, func(*args, **kwds))
120 except Exception as e:
121 if wrap_exception and func is not _helper_reraises_exception:

/usr/lib/python3.6/multiprocessing/pool.py in mapstar(args)
42
43 def mapstar(args):
---> 44 return list(map(*args))
45
46 def starmapstar(args):

/usr/local/lib/python3.6/dist-packages/nlpaug/augmenter/word/random.py in crop(self, data)
185 doc = Doc(data, self.tokenizer(data))
186
--> 187 aug_idxes = self._get_aug_range_idxes(doc.get_original_tokens())
188 aug_idxes.sort(reverse=True)
189

/usr/local/lib/python3.6/dist-packages/nlpaug/augmenter/word/word_augmenter.py in _get_aug_range_idxes(self, tokens)
105 word_idxes = [i for i, _ in enumerate(tokens[aug_cnt-1:])]
106
--> 107 start_aug_idx = self.sample(word_idxes, 1)[0]
108 aug_idxes = [start_aug_idx + _*direction for _ in range(aug_cnt)]
109

/usr/local/lib/python3.6/dist-packages/nlpaug/base_augmenter.py in sample(cls, x, num)
222 def sample(cls, x, num=None):
223 if isinstance(x, list):
--> 224 return random.sample(x, num)
225 elif isinstance(x, int):
226 return np.random.randint(1, x-1)

/usr/lib/python3.6/random.py in sample(self, population, k)
318 n = len(population)
319 if not 0 <= k <= n:
--> 320 raise ValueError("Sample larger than population or is negative")
321 result = [None] * k
322 setsize = 21 # size of a small set minus size of an empty list

ValueError: Sample larger than population or is negative

makcedward · 2020-12-08T04:53:53Z

It can be reproduced by the following codes. The root cause is at least one of the elements does not include text. Will fix it in next release. Meanwhile, you may check input to prevent this exception

aug = naw.RandomWordAug(action='crop',aug_p=0.5, aug_min=0)
aug.augment([''])

makcedward closed this as completed in 8170416 Dec 8, 2020

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

aw.RandomWordAug failing with error Sample larger than population or is negative. #184

aw.RandomWordAug failing with error Sample larger than population or is negative. #184

amitkml commented Dec 7, 2020

makcedward commented Dec 8, 2020

aw.RandomWordAug failing with error Sample larger than population or is negative. #184

aw.RandomWordAug failing with error Sample larger than population or is negative. #184

Comments

amitkml commented Dec 7, 2020

makcedward commented Dec 8, 2020