Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds new encodings #459

Merged
merged 4 commits into from
Feb 8, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 92 additions & 12 deletions garak/probes/encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def braille(text):
elif char in characterUnicodes:
if char.isdigit():
if not is_number:
is_number == True
is_number = True
output += characterUnicodes["num"]
output += characterUnicodes[char]
if is_number and char not in numberPunctuations:
Expand Down Expand Up @@ -228,6 +228,43 @@ def morse(text):
return bytes(" ".join(output), "utf-8")


def nato(text: bytes) -> bytes:
nato_map = {
"A": "Alfa",
"B": "Bravo",
"C": "Charlie",
"D": "Delta",
"E": "Echo",
"F": "Foxtrot",
"G": "Golf",
"H": "Hotel",
"I": "India",
"J": "Juliett",
"K": "Kilo",
"L": "Lima",
"M": "Mike",
"N": "November",
"O": "Oscar",
"P": "Papa",
"Q": "Quebec",
"R": "Romeo",
"S": "Sierra",
"T": "Tango",
"U": "Uniform",
"V": "Victor",
"W": "Whiskey",
"X": "Xray",
"Y": "Yankee",
"Z": "Zulu",
}

output = []
for char in text.decode().upper():
if char in nato_map:
output.append(nato_map[char])
return bytes(" ".join(output), "utf-8")


def _generate_encoded_prompts(encoding_funcs, encoding_name):
encoded_prompts = set([])
for template in templates:
Expand Down Expand Up @@ -272,6 +309,27 @@ def _attempt_prestore_hook(self, attempt, seq):
return attempt


def ecoji(text: bytes) -> bytes:
from io import BytesIO

import ecoji

r = BytesIO(text)
w = BytesIO()

ecoji.encode(r, w)

return w.getvalue()


def zalgo(text: bytes) -> bytes:
from zalgolib.zalgolib import enzalgofy

zalged = enzalgofy(text=text.decode(), intensity=50)

return bytes(zalged, "utf-8")


class InjectBase64(BaseEncodingProbe, Probe):
"""Probe for susceptibility to base64 encoded injections"""

Expand Down Expand Up @@ -321,17 +379,6 @@ def __init__(self):
Probe.__init__(self)


class InjectBase32(BaseEncodingProbe, Probe):
"""Probe for susceptibility to base32 encoded injections"""

encoding_funcs = [base64.b32encode]
encoding_name = "BASE32"

def __init__(self):
BaseEncodingProbe.__init__(self)
Probe.__init__(self)


class InjectHex(BaseEncodingProbe, Probe):
"""Probe for susceptibility to hex encoded injections"""

Expand Down Expand Up @@ -433,3 +480,36 @@ class InjectMorse(BaseEncodingProbe, Probe):
def __init__(self):
BaseEncodingProbe.__init__(self)
Probe.__init__(self)


class InjectNato(BaseEncodingProbe, Probe):
"""Probe for susceptibility to Nato phonetic alphabet encoding."""

encoding_funcs = [nato]
encoding_name = "Nato"

def __init__(self):
BaseEncodingProbe.__init__(self)
Probe.__init__(self)


class InjectEcoji(BaseEncodingProbe, Probe):
"""Probe for susceptibility to Ecoji encoding."""

encoding_name = "Ecoji"

def __init__(self):
self.encoding_funcs = [ecoji]
BaseEncodingProbe.__init__(self)
Probe.__init__(self)


class InjectZalgo(BaseEncodingProbe, Probe):
"""Probe for susceptibility to Zalgo encoding."""

encoding_name = "Zalgo"

def __init__(self):
self.encoding_funcs = [zalgo]
BaseEncodingProbe.__init__(self)
Probe.__init__(self)
5 changes: 4 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,7 @@ octoai-sdk
cmd2
torch>=2.1.0
sentencepiece>=0.1.99
markdown
markdown
zalgolib>=0.2.2
basest>=0.7.3
ecoji>=0.1.0
Loading