Skip to content

Commit cddd817

Browse files
author
Yi Zhang
committed
update
1 parent 8242f1f commit cddd817

File tree

3 files changed

+73
-0
lines changed

3 files changed

+73
-0
lines changed

defence/autocorrector.py

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# from autocorrect import Speller
2+
# spell = Speller(lang='en')
3+
# text = "Ths is a smple text with speling mistakes."
4+
# corrected_text = ' '.join([spell(word) for word in text.split()])
5+
# print(corrected_text)
6+
7+
def defence_autocorrect(influent_sentence):
8+
from autocorrect import Speller
9+
spell = Speller(lang='en')
10+
text = influent_sentence
11+
corrected_text = ' '.join([spell(word) for word in text.split()])
12+
return corrected_text
13+
14+
15+

defence/gramformer_gec.py

+41
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
'''
2+
The Gramformer project for GEC
3+
4+
https://github.com/PrithivirajDamodaran/Gramformer
5+
6+
pip install -U git+https://github.com/PrithivirajDamodaran/Gramformer.git
7+
python -m spacy download en_core_web_sm
8+
9+
'''
10+
from gramformer import Gramformer
11+
import torch
12+
from tqdm import tqdm
13+
import pandas as pd
14+
# def set_seed(seed):
15+
# torch.manual_seed(seed)
16+
# if torch.cuda.is_available():
17+
# torch.cuda.manual_seed_all(seed)
18+
19+
# set_seed(1212)
20+
21+
gf = Gramformer(models = 1, use_gpu=False) # 1=corrector, 2=detector
22+
23+
24+
df = pd.read_csv('./result_1.csv')
25+
sample_data = list(df[f"Column {1}"].dropna())
26+
strings = [line.split(':')[0].strip() for line in sample_data[1:]]
27+
print(len(strings))
28+
29+
influent_sentences = strings
30+
31+
print("origin:", len(influent_sentences))
32+
33+
count = 0
34+
for i, influent_sentence in enumerate(influent_sentences):
35+
corrected_sentences = gf.correct(influent_sentence, max_candidates=1)
36+
print("[Input] ", influent_sentence)
37+
for corrected_sentence in corrected_sentences:
38+
print("[Correction] ",corrected_sentence)
39+
if corrected_sentence == "A sunny beach with a palm tree and a surfboard lying in the sand.":
40+
count += 1
41+
print("count:", count)

defence/spellchecker_gec.py

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
from spellchecker import SpellChecker
2+
import pandas as pd
3+
4+
spell = SpellChecker()
5+
6+
str1 = "A sunny beach with a pXalm tree and a surgfboard lying in the sand"
7+
str1 = "A serene lake surrounded by mountains with a small boat and qducks."
8+
str1 = "A man with a red Xhelmet on a small moped on a dirt road."
9+
str1 = "A man with a red helmet on a small moped on a dPirt road."
10+
str2 = str1.split()
11+
misspelled = spell.unknown(str1.split()) # {'qducks.'}
12+
13+
corrected_sentence = " ".join(spell.correction(word) if word in misspelled else word for word in str2)
14+
15+
print(corrected_sentence)
16+
17+

0 commit comments

Comments
 (0)