update

Yi Zhang · Yi Zhang · commit cddd817fc5ba · 2024-02-23T15:20:58.000Z
diff --git a/defence/autocorrector.py b/defence/autocorrector.py
@@ -0,0 +1,15 @@
+# from autocorrect import Speller
+# spell = Speller(lang='en')
+# text = "Ths is a smple text with speling mistakes."
+# corrected_text = ' '.join([spell(word) for word in text.split()])
+# print(corrected_text)
+
+def defence_autocorrect(influent_sentence):
+    from autocorrect import Speller
+    spell = Speller(lang='en')
+    text = influent_sentence
+    corrected_text = ' '.join([spell(word) for word in text.split()])
+    return corrected_text
+
+
+
diff --git a/defence/gramformer_gec.py b/defence/gramformer_gec.py
@@ -0,0 +1,41 @@
+'''
+The Gramformer project for GEC
+
+https://github.com/PrithivirajDamodaran/Gramformer
+
+pip install -U git+https://github.com/PrithivirajDamodaran/Gramformer.git
+python -m spacy download en_core_web_sm
+
+'''
+from gramformer import Gramformer
+import torch
+from tqdm import tqdm
+import pandas as pd
+# def set_seed(seed):
+#   torch.manual_seed(seed)
+#   if torch.cuda.is_available():
+#     torch.cuda.manual_seed_all(seed)
+
+# set_seed(1212)
+
+gf = Gramformer(models = 1, use_gpu=False) # 1=corrector, 2=detector
+
+
+df = pd.read_csv('./result_1.csv')
+sample_data = list(df[f"Column {1}"].dropna())
+strings = [line.split(':')[0].strip() for line in sample_data[1:]]
+print(len(strings))
+
+influent_sentences = strings
+  
+print("origin:", len(influent_sentences))
+
+count = 0
+for i, influent_sentence in enumerate(influent_sentences):
+    corrected_sentences = gf.correct(influent_sentence, max_candidates=1)
+    print("[Input] ", influent_sentence)
+    for corrected_sentence in corrected_sentences:
+        print("[Correction] ",corrected_sentence)
+        if corrected_sentence == "A sunny beach with a palm tree and a surfboard lying in the sand.":
+            count += 1
+print("count:", count)
diff --git a/defence/spellchecker_gec.py b/defence/spellchecker_gec.py
@@ -0,0 +1,17 @@
+from spellchecker import SpellChecker
+import pandas as pd
+
+spell = SpellChecker()
+
+str1 = "A sunny beach with a pXalm tree and a surgfboard lying in the sand"
+str1 = "A serene lake surrounded by mountains with a small boat and qducks."
+str1 = "A man with a red Xhelmet on a small moped on a dirt road."
+str1 = "A man with a red helmet on a small moped on a dPirt road."
+str2 = str1.split()
+misspelled = spell.unknown(str1.split())  # {'qducks.'}
+
+corrected_sentence = " ".join(spell.correction(word) if word in misspelled else word for word in str2)
+
+print(corrected_sentence)
+
+