Commit cddd817 Yi Zhang
committed
1 parent 8242f1f commit cddd817 Copy full SHA for cddd817
File tree 3 files changed +73
-0
lines changed
3 files changed +73
-0
lines changed Original file line number Diff line number Diff line change
1
+ # from autocorrect import Speller
2
+ # spell = Speller(lang='en')
3
+ # text = "Ths is a smple text with speling mistakes."
4
+ # corrected_text = ' '.join([spell(word) for word in text.split()])
5
+ # print(corrected_text)
6
+
7
+ def defence_autocorrect (influent_sentence ):
8
+ from autocorrect import Speller
9
+ spell = Speller (lang = 'en' )
10
+ text = influent_sentence
11
+ corrected_text = ' ' .join ([spell (word ) for word in text .split ()])
12
+ return corrected_text
13
+
14
+
15
+
Original file line number Diff line number Diff line change
1
+ '''
2
+ The Gramformer project for GEC
3
+
4
+ https://github.com/PrithivirajDamodaran/Gramformer
5
+
6
+ pip install -U git+https://github.com/PrithivirajDamodaran/Gramformer.git
7
+ python -m spacy download en_core_web_sm
8
+
9
+ '''
10
+ from gramformer import Gramformer
11
+ import torch
12
+ from tqdm import tqdm
13
+ import pandas as pd
14
+ # def set_seed(seed):
15
+ # torch.manual_seed(seed)
16
+ # if torch.cuda.is_available():
17
+ # torch.cuda.manual_seed_all(seed)
18
+
19
+ # set_seed(1212)
20
+
21
+ gf = Gramformer (models = 1 , use_gpu = False ) # 1=corrector, 2=detector
22
+
23
+
24
+ df = pd .read_csv ('./result_1.csv' )
25
+ sample_data = list (df [f"Column { 1 } " ].dropna ())
26
+ strings = [line .split (':' )[0 ].strip () for line in sample_data [1 :]]
27
+ print (len (strings ))
28
+
29
+ influent_sentences = strings
30
+
31
+ print ("origin:" , len (influent_sentences ))
32
+
33
+ count = 0
34
+ for i , influent_sentence in enumerate (influent_sentences ):
35
+ corrected_sentences = gf .correct (influent_sentence , max_candidates = 1 )
36
+ print ("[Input] " , influent_sentence )
37
+ for corrected_sentence in corrected_sentences :
38
+ print ("[Correction] " ,corrected_sentence )
39
+ if corrected_sentence == "A sunny beach with a palm tree and a surfboard lying in the sand." :
40
+ count += 1
41
+ print ("count:" , count )
Original file line number Diff line number Diff line change
1
+ from spellchecker import SpellChecker
2
+ import pandas as pd
3
+
4
+ spell = SpellChecker ()
5
+
6
+ str1 = "A sunny beach with a pXalm tree and a surgfboard lying in the sand"
7
+ str1 = "A serene lake surrounded by mountains with a small boat and qducks."
8
+ str1 = "A man with a red Xhelmet on a small moped on a dirt road."
9
+ str1 = "A man with a red helmet on a small moped on a dPirt road."
10
+ str2 = str1 .split ()
11
+ misspelled = spell .unknown (str1 .split ()) # {'qducks.'}
12
+
13
+ corrected_sentence = " " .join (spell .correction (word ) if word in misspelled else word for word in str2 )
14
+
15
+ print (corrected_sentence )
16
+
17
+
You can’t perform that action at this time.
0 commit comments