Skip to content

Commit 8242f1f

Browse files
author
Yi Zhang
committed
update
1 parent 7376043 commit 8242f1f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+302357
-0
lines changed

generate_AE/attack_test.py

+46
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
2+
import torch
3+
import os
4+
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
5+
device = "cuda" if torch.cuda.is_available() else "cpu"
6+
import random
7+
seed_value = 42
8+
random.seed(seed_value)
9+
from textattack.transformations import (
10+
WordSwapRandomCharacterInsertion,
11+
WordSwapRandomCharacterSubstitution,
12+
WordSwapRandomCharacterDeletion,
13+
WordSwapNeighboringCharacterSwap,
14+
WordSwapQWERTY, # come from typing too quickly.
15+
)
16+
from textattack.transformations import CompositeTransformation
17+
from textattack.constraints.pre_transformation import RepeatModification
18+
from textattack.constraints.pre_transformation import StopwordModification
19+
from textattack.augmentation import Augmenter
20+
# transformation = CompositeTransformation([WordSwapRandomCharacterInsertion(),WordSwapRandomCharacterSubstitution(),WordSwapRandomCharacterDeletion(),WordSwapNeighboringCharacterSwap(),WordSwapQWERTY()])
21+
transformation = CompositeTransformation([WordSwapQWERTY()])
22+
constraints = [RepeatModification()]
23+
24+
import pandas as pd
25+
import clip
26+
import numpy as np
27+
from tqdm import tqdm
28+
model, preprocess = clip.load("ViT-B/32", device=device)
29+
30+
def get_data(path):
31+
data = {}
32+
i = 1
33+
with open(path,'r') as file:
34+
for line in file:
35+
data[i] = line.strip()
36+
i += 1
37+
return data
38+
39+
if __name__ == "__main__":
40+
df = pd.DataFrame()
41+
prompt = "A red ball on green grass under blue sky"
42+
43+
augmenter = Augmenter(transformation=transformation, constraints=constraints, pct_words_to_swap=0.1, transformations_per_example=10)
44+
45+
prompt_2 = augmenter.augment(prompt)
46+
print("prompt_2:", prompt_2)

generate_AE/char_level.py

+82
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
2+
import torch
3+
import os
4+
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
5+
device = "cuda" if torch.cuda.is_available() else "cpu"
6+
import random
7+
seed_value = 42
8+
random.seed(seed_value)
9+
from textattack.transformations import (
10+
WordSwapRandomCharacterInsertion,
11+
WordSwapRandomCharacterSubstitution,
12+
WordSwapRandomCharacterDeletion,
13+
WordSwapNeighboringCharacterSwap,
14+
WordSwapQWERTY, # come from typing too quickly.
15+
)
16+
from textattack.transformations import CompositeTransformation
17+
from textattack.constraints.pre_transformation import RepeatModification
18+
from textattack.constraints.pre_transformation import StopwordModification
19+
from textattack.augmentation import Augmenter
20+
transformation = CompositeTransformation([WordSwapRandomCharacterInsertion(),WordSwapRandomCharacterSubstitution(),WordSwapRandomCharacterDeletion(),WordSwapNeighboringCharacterSwap(),WordSwapQWERTY()])
21+
# transformation = CompositeTransformation([WordSwapRandomCharacterSubstitution()])
22+
constraints = [RepeatModification()]
23+
24+
import pandas as pd
25+
import clip
26+
import numpy as np
27+
from tqdm import tqdm
28+
model, preprocess = clip.load("ViT-B/32", device=device)
29+
30+
def get_data(path):
31+
data = {}
32+
i = 1
33+
with open(path,'r') as file:
34+
for line in file:
35+
data[i] = line.strip()
36+
i += 1
37+
return data
38+
39+
if __name__ == "__main__":
40+
df = pd.DataFrame()
41+
prompt = get_data('./origin_prompts/coco.txt')
42+
for id, input in prompt.items():
43+
if id == 5:
44+
result_data = []
45+
for rate in range(1, 3):
46+
augmenter = Augmenter(transformation=transformation, constraints=constraints, pct_words_to_swap=float("0." + str(rate)), transformations_per_example=10000)
47+
print("input:", input)
48+
prompt_2 = augmenter.augment(input)
49+
print("len(prompt_2):", len(prompt_2))
50+
text = clip.tokenize([input]).to(device)
51+
result = {}
52+
with torch.no_grad():
53+
text_features = model.encode_text(text)
54+
for item in tqdm(prompt_2):
55+
disturb = clip.tokenize([item]).to(device)
56+
disturb_features = model.encode_text(disturb)
57+
text_features = text_features.to('cpu', dtype=torch.float32)
58+
disturb_features = disturb_features.to('cpu', dtype=torch.float32)
59+
cosine_similarity = np.dot(text_features.flatten().numpy(), disturb_features.flatten().numpy()) / (np.linalg.norm(text_features.flatten().numpy()) * np.linalg.norm(disturb_features.flatten().numpy()))
60+
result[item] = cosine_similarity
61+
62+
sorted_result = dict(sorted(result.items(), key=lambda item: item[1], reverse=True))
63+
filtered_items = {key: value for key, value in sorted_result.items() if value > 0.85}
64+
print("len(filtered_items):", len(filtered_items))
65+
data = [len(filtered_items)] + [f"{key}: {value}" for key, value in filtered_items.items()]
66+
df = pd.DataFrame({f'Column {rate}': data})
67+
result_data.append(df)
68+
69+
result_df = pd.concat(result_data, axis=1)
70+
out_file = f"./coco/char_AE/result_{id}.csv"
71+
result_df.to_csv(out_file, index=False)
72+
73+
# df = pd.read_csv('your_data.csv')
74+
# d = df['Column 2'].dropna()
75+
# print(len(d))
76+
# for i, item in enumerate(d):
77+
# print(type(item), item)
78+
79+
80+
81+
82+

0 commit comments

Comments
 (0)