You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
NLP Augmentation with crop is failing with error Sample larger than population or is negative.
aug = naw.RandomWordAug(action='crop',aug_p=0.5, aug_min=0)
train_st_data_crop_aug['sentence_aug'] = train_st_data_crop_aug.apply(lambda x: aug.augment(x['sentence']),axis=1) ## Delete a set of contunous word will be removed randomly¶
ValueError Traceback (most recent call last)
in ()
7 print(augmented_text)
8 train_st_data_crop_aug = train_st_data
----> 9 train_st_data_crop_aug['sentence_aug'] = train_st_data_crop_aug.apply(lambda x: aug.augment(x['sentence']),axis=1) ## Delete a set of contunous word will be removed randomly¶
/usr/local/lib/python3.6/dist-packages/pandas/core/apply.py in apply_series_generator(self)
298 for i, v in enumerate(series_gen):
299 # ignore SettingWithCopy here in case the user mutates
--> 300 results[i] = self.f(v)
301 if isinstance(results[i], ABCSeries):
302 # If we have a view on v, we need to make a copy because
in (x)
7 print(augmented_text)
8 train_st_data_crop_aug = train_st_data
----> 9 train_st_data_crop_aug['sentence_aug'] = train_st_data_crop_aug.apply(lambda x: aug.augment(x['sentence']),axis=1) ## Delete a set of contunous word will be removed randomly¶
/usr/local/lib/python3.6/dist-packages/nlpaug/base_augmenter.py in augment(self, data, n, num_thread)
113 # Single input with/without multiple input
114 else:
--> 115 augmented_results = self._parallel_augment(action_fx, clean_data, n=n, num_thread=num_thread)
116
117 if len(augmented_results) >= expected_output_num:
/usr/local/lib/python3.6/dist-packages/nlpaug/base_augmenter.py in _parallel_augment(cls, action_fx, data, n, num_thread)
174 def _parallel_augment(cls, action_fx, data, n, num_thread=2):
175 pool = ThreadPool(num_thread)
--> 176 results = pool.map(action_fx, [data] * n)
177 pool.close()
178 pool.join()
/usr/lib/python3.6/multiprocessing/pool.py in map(self, func, iterable, chunksize)
264 in a list that is returned.
265 '''
--> 266 return self._map_async(func, iterable, mapstar, chunksize).get()
267
268 def starmap(self, func, iterable, chunksize=None):
/usr/lib/python3.6/multiprocessing/pool.py in get(self, timeout)
642 return self._value
643 else:
--> 644 raise self._value
645
646 def _set(self, i, obj):
/usr/lib/python3.6/multiprocessing/pool.py in worker(inqueue, outqueue, initializer, initargs, maxtasks, wrap_exception)
117 job, i, func, args, kwds = task
118 try:
--> 119 result = (True, func(*args, **kwds))
120 except Exception as e:
121 if wrap_exception and func is not _helper_reraises_exception:
/usr/lib/python3.6/random.py in sample(self, population, k)
318 n = len(population)
319 if not 0 <= k <= n:
--> 320 raise ValueError("Sample larger than population or is negative")
321 result = [None] * k
322 setsize = 21 # size of a small set minus size of an empty list
ValueError: Sample larger than population or is negative
The text was updated successfully, but these errors were encountered:
It can be reproduced by the following codes. The root cause is at least one of the elements does not include text. Will fix it in next release. Meanwhile, you may check input to prevent this exception
aug = naw.RandomWordAug(action='crop',aug_p=0.5, aug_min=0)
aug.augment([''])
Hi,
NLP Augmentation with crop is failing with error Sample larger than population or is negative.
aug = naw.RandomWordAug(action='crop',aug_p=0.5, aug_min=0)
train_st_data_crop_aug['sentence_aug'] = train_st_data_crop_aug.apply(lambda x: aug.augment(x['sentence']),axis=1) ## Delete a set of contunous word will be removed randomly¶
ValueError Traceback (most recent call last)
in ()
7 print(augmented_text)
8 train_st_data_crop_aug = train_st_data
----> 9 train_st_data_crop_aug['sentence_aug'] = train_st_data_crop_aug.apply(lambda x: aug.augment(x['sentence']),axis=1) ## Delete a set of contunous word will be removed randomly¶
14 frames
/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py in apply(self, func, axis, raw, result_type, args, **kwds)
7550 kwds=kwds,
7551 )
-> 7552 return op.get_result()
7553
7554 def applymap(self, func) -> "DataFrame":
/usr/local/lib/python3.6/dist-packages/pandas/core/apply.py in get_result(self)
178 return self.apply_raw()
179
--> 180 return self.apply_standard()
181
182 def apply_empty_result(self):
/usr/local/lib/python3.6/dist-packages/pandas/core/apply.py in apply_standard(self)
269
270 def apply_standard(self):
--> 271 results, res_index = self.apply_series_generator()
272
273 # wrap results
/usr/local/lib/python3.6/dist-packages/pandas/core/apply.py in apply_series_generator(self)
298 for i, v in enumerate(series_gen):
299 # ignore SettingWithCopy here in case the user mutates
--> 300 results[i] = self.f(v)
301 if isinstance(results[i], ABCSeries):
302 # If we have a view on v, we need to make a copy because
in (x)
7 print(augmented_text)
8 train_st_data_crop_aug = train_st_data
----> 9 train_st_data_crop_aug['sentence_aug'] = train_st_data_crop_aug.apply(lambda x: aug.augment(x['sentence']),axis=1) ## Delete a set of contunous word will be removed randomly¶
/usr/local/lib/python3.6/dist-packages/nlpaug/base_augmenter.py in augment(self, data, n, num_thread)
113 # Single input with/without multiple input
114 else:
--> 115 augmented_results = self._parallel_augment(action_fx, clean_data, n=n, num_thread=num_thread)
116
117 if len(augmented_results) >= expected_output_num:
/usr/local/lib/python3.6/dist-packages/nlpaug/base_augmenter.py in _parallel_augment(cls, action_fx, data, n, num_thread)
174 def _parallel_augment(cls, action_fx, data, n, num_thread=2):
175 pool = ThreadPool(num_thread)
--> 176 results = pool.map(action_fx, [data] * n)
177 pool.close()
178 pool.join()
/usr/lib/python3.6/multiprocessing/pool.py in map(self, func, iterable, chunksize)
264 in a list that is returned.
265 '''
--> 266 return self._map_async(func, iterable, mapstar, chunksize).get()
267
268 def starmap(self, func, iterable, chunksize=None):
/usr/lib/python3.6/multiprocessing/pool.py in get(self, timeout)
642 return self._value
643 else:
--> 644 raise self._value
645
646 def _set(self, i, obj):
/usr/lib/python3.6/multiprocessing/pool.py in worker(inqueue, outqueue, initializer, initargs, maxtasks, wrap_exception)
117 job, i, func, args, kwds = task
118 try:
--> 119 result = (True, func(*args, **kwds))
120 except Exception as e:
121 if wrap_exception and func is not _helper_reraises_exception:
/usr/lib/python3.6/multiprocessing/pool.py in mapstar(args)
42
43 def mapstar(args):
---> 44 return list(map(*args))
45
46 def starmapstar(args):
/usr/local/lib/python3.6/dist-packages/nlpaug/augmenter/word/random.py in crop(self, data)
185 doc = Doc(data, self.tokenizer(data))
186
--> 187 aug_idxes = self._get_aug_range_idxes(doc.get_original_tokens())
188 aug_idxes.sort(reverse=True)
189
/usr/local/lib/python3.6/dist-packages/nlpaug/augmenter/word/word_augmenter.py in _get_aug_range_idxes(self, tokens)
105 word_idxes = [i for i, _ in enumerate(tokens[aug_cnt-1:])]
106
--> 107 start_aug_idx = self.sample(word_idxes, 1)[0]
108 aug_idxes = [start_aug_idx + _*direction for _ in range(aug_cnt)]
109
/usr/local/lib/python3.6/dist-packages/nlpaug/base_augmenter.py in sample(cls, x, num)
222 def sample(cls, x, num=None):
223 if isinstance(x, list):
--> 224 return random.sample(x, num)
225 elif isinstance(x, int):
226 return np.random.randint(1, x-1)
/usr/lib/python3.6/random.py in sample(self, population, k)
318 n = len(population)
319 if not 0 <= k <= n:
--> 320 raise ValueError("Sample larger than population or is negative")
321 result = [None] * k
322 setsize = 21 # size of a small set minus size of an empty list
ValueError: Sample larger than population or is negative
The text was updated successfully, but these errors were encountered: