From f8b05c0db8663d38a12c5a9af5e7659b068f2783 Mon Sep 17 00:00:00 2001 From: Leon Derczynski Date: Fri, 25 Oct 2024 17:19:06 +0200 Subject: [PATCH 01/13] base support for centralised useragent changes --- garak/_config.py | 8 +++++++- garak/generators/rest.py | 3 +++ garak/resources/garak.core.yaml | 1 + tests/generators/test_rest.py | 1 + 4 files changed, 12 insertions(+), 1 deletion(-) diff --git a/garak/_config.py b/garak/_config.py index f420d5484..b8fbc84be 100644 --- a/garak/_config.py +++ b/garak/_config.py @@ -23,7 +23,9 @@ DICT_CONFIG_AFTER_LOAD = False -version = -1 # eh why this is here? hm. who references it +from garak import __version__ + +version = __version__ system_params = ( "verbose narrow_output parallel_requests parallel_attempts skip_unknown".split() @@ -144,10 +146,13 @@ def _load_yaml_config(settings_filenames) -> dict: def _store_config(settings_files) -> None: + import garak + global system, run, plugins, reporting settings = _load_yaml_config(settings_files) system = _set_settings(system, settings["system"]) run = _set_settings(run, settings["run"]) + run.user_agent = run.user_agent.replace("{version}", garak.__version__) plugins = _set_settings(plugins, settings["plugins"]) reporting = _set_settings(reporting, settings["reporting"]) @@ -193,6 +198,7 @@ def load_config( logging.debug("Loading configs from: %s", ",".join(settings_files)) _store_config(settings_files=settings_files) + if DICT_CONFIG_AFTER_LOAD: _lock_config_as_dict() loaded = True diff --git a/garak/generators/rest.py b/garak/generators/rest.py index 746c8917f..500dae47c 100644 --- a/garak/generators/rest.py +++ b/garak/generators/rest.py @@ -67,6 +67,7 @@ def __init__(self, uri=None, config_root=_config): self.escape_function = self._json_escape self.retry_5xx = True self.key_env_var = self.ENV_VAR if hasattr(self, "ENV_VAR") else None + self.user_agent = _config.run.user_agent # load configuration since super.__init__ has not been called self._load_config(config_root) @@ -187,6 +188,8 @@ def _call_model( # serialized as parameters, in general a method could be created to add # the prompt data to a request via params or data based on the action verb data_kw = "params" if self.http_function == requests.get else "data" + if "User-Agent" not in request_headers: + request_headers["User-Agent"] = self.user_agent req_kArgs = { data_kw: request_data, "headers": request_headers, diff --git a/garak/resources/garak.core.yaml b/garak/resources/garak.core.yaml index 98a1fa2e9..57bad2e17 100644 --- a/garak/resources/garak.core.yaml +++ b/garak/resources/garak.core.yaml @@ -13,6 +13,7 @@ run: eval_threshold: 0.5 generations: 5 probe_tags: + user_agent: "garak LLM vulnerability scanner, v{version} https://garak.ai" plugins: model_type: diff --git a/tests/generators/test_rest.py b/tests/generators/test_rest.py index 932473ba8..1fcf67175 100644 --- a/tests/generators/test_rest.py +++ b/tests/generators/test_rest.py @@ -14,6 +14,7 @@ @pytest.fixture def set_rest_config(): + _config.run.user_agent = "test user agent, garak.ai" _config.plugins.generators["rest"] = {} _config.plugins.generators["rest"]["RestGenerator"] = { "name": DEFAULT_NAME, From 1c6f442bbcbd125827ae79d753b01c3d0f9456cc Mon Sep 17 00:00:00 2001 From: Leon Derczynski Date: Mon, 28 Oct 2024 17:22:49 +0100 Subject: [PATCH 02/13] RFC compliant ua --- garak/resources/garak.core.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/garak/resources/garak.core.yaml b/garak/resources/garak.core.yaml index 57bad2e17..d487594fc 100644 --- a/garak/resources/garak.core.yaml +++ b/garak/resources/garak.core.yaml @@ -13,7 +13,7 @@ run: eval_threshold: 0.5 generations: 5 probe_tags: - user_agent: "garak LLM vulnerability scanner, v{version} https://garak.ai" + user_agent: "garak/{version} , LLM vulnerability scanner https://garak.ai" plugins: model_type: From f0611d7cd00c712cf74c17c6c5d5f86386e427d0 Mon Sep 17 00:00:00 2001 From: Leon Derczynski Date: Mon, 28 Oct 2024 17:23:06 +0100 Subject: [PATCH 03/13] rm dupe setting of _config.version --- garak/cli.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/garak/cli.py b/garak/cli.py index 5894ce0dd..a337e49d4 100644 --- a/garak/cli.py +++ b/garak/cli.py @@ -10,13 +10,12 @@ def main(arguments=None) -> None: """Main entry point for garak runs invoked from the CLI""" import datetime - from garak import __version__, __description__ + from garak import __description__ from garak import _config from garak.exception import GarakException _config.transient.starttime = datetime.datetime.now() _config.transient.starttime_iso = _config.transient.starttime.isoformat() - _config.version = __version__ if arguments is None: arguments = [] From 19e26834650ebe4f0c1f24057bfdee7192048216 Mon Sep 17 00:00:00 2001 From: Leon Derczynski Date: Mon, 28 Oct 2024 17:23:20 +0100 Subject: [PATCH 04/13] Update garak/_config.py Co-authored-by: Jeffrey Martin Signed-off-by: Leon Derczynski --- garak/_config.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/garak/_config.py b/garak/_config.py index b8fbc84be..a734f6a5d 100644 --- a/garak/_config.py +++ b/garak/_config.py @@ -146,9 +146,7 @@ def _load_yaml_config(settings_filenames) -> dict: def _store_config(settings_files) -> None: - import garak - - global system, run, plugins, reporting + global system, run, plugins, reporting, version settings = _load_yaml_config(settings_files) system = _set_settings(system, settings["system"]) run = _set_settings(run, settings["run"]) From 9d347f0cc38efc9e0c10674c19ace82d2a8d4c54 Mon Sep 17 00:00:00 2001 From: Leon Derczynski Date: Mon, 28 Oct 2024 17:27:43 +0100 Subject: [PATCH 05/13] use local version var in _config --- garak/_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/garak/_config.py b/garak/_config.py index a734f6a5d..72004b016 100644 --- a/garak/_config.py +++ b/garak/_config.py @@ -150,7 +150,7 @@ def _store_config(settings_files) -> None: settings = _load_yaml_config(settings_files) system = _set_settings(system, settings["system"]) run = _set_settings(run, settings["run"]) - run.user_agent = run.user_agent.replace("{version}", garak.__version__) + run.user_agent = run.user_agent.replace("{version}", version) plugins = _set_settings(plugins, settings["plugins"]) reporting = _set_settings(reporting, settings["reporting"]) From 68c8991f3b96db04d4c15539fc71b8cae62649ea Mon Sep 17 00:00:00 2001 From: Leon Derczynski Date: Mon, 28 Oct 2024 17:42:53 +0100 Subject: [PATCH 06/13] set requests UA in config --- garak/_config.py | 3 +++ garak/generators/rest.py | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/garak/_config.py b/garak/_config.py index 72004b016..b232e5c8c 100644 --- a/garak/_config.py +++ b/garak/_config.py @@ -153,6 +153,9 @@ def _store_config(settings_files) -> None: run.user_agent = run.user_agent.replace("{version}", version) plugins = _set_settings(plugins, settings["plugins"]) reporting = _set_settings(reporting, settings["reporting"]) + from requests import utils + + utils.default_user_agent = run.user_agent def load_base_config() -> None: diff --git a/garak/generators/rest.py b/garak/generators/rest.py index 500dae47c..746c8917f 100644 --- a/garak/generators/rest.py +++ b/garak/generators/rest.py @@ -67,7 +67,6 @@ def __init__(self, uri=None, config_root=_config): self.escape_function = self._json_escape self.retry_5xx = True self.key_env_var = self.ENV_VAR if hasattr(self, "ENV_VAR") else None - self.user_agent = _config.run.user_agent # load configuration since super.__init__ has not been called self._load_config(config_root) @@ -188,8 +187,6 @@ def _call_model( # serialized as parameters, in general a method could be created to add # the prompt data to a request via params or data based on the action verb data_kw = "params" if self.http_function == requests.get else "data" - if "User-Agent" not in request_headers: - request_headers["User-Agent"] = self.user_agent req_kArgs = { data_kw: request_data, "headers": request_headers, From 36319c3b476a04e94553b1207902c6140773000b Mon Sep 17 00:00:00 2001 From: Leon Derczynski Date: Tue, 29 Oct 2024 13:03:24 +0100 Subject: [PATCH 07/13] update get+set of http library agent values/methods --- garak/_config.py | 33 ++++++++++++++++++++++++++++++++- garak/harnesses/base.py | 11 +++++++++++ tests/test_config.py | 19 +++++++++++++++++++ 3 files changed, 62 insertions(+), 1 deletion(-) diff --git a/garak/_config.py b/garak/_config.py index b232e5c8c..a8d640d60 100644 --- a/garak/_config.py +++ b/garak/_config.py @@ -153,9 +153,40 @@ def _store_config(settings_files) -> None: run.user_agent = run.user_agent.replace("{version}", version) plugins = _set_settings(plugins, settings["plugins"]) reporting = _set_settings(reporting, settings["reporting"]) + + +def set_all_http_lib_agents(agent_string): + set_http_lib_agents( + {"requests": agent_string, "httpx": agent_string, "aiohttp": agent_string} + ) + + +def set_http_lib_agents(agent_strings: dict): + if "requests" in agent_strings: + from requests import utils + + utils.default_user_agent = lambda x=None: agent_strings["requests"] + if "httpx" in agent_strings: + import httpx + + httpx._client.USER_AGENT = agent_strings["httpx"] + if "aiohttp" in agent_strings: + import aiohttp + + aiohttp.client_reqrep.SERVER_SOFTWARE = agent_strings["aiohttp"] + + +def get_http_lib_agents(): from requests import utils + import httpx + import aiohttp + + agent_strings = {} + agent_strings["requests"] = utils.default_user_agent + agent_strings["httpx"] = httpx._client.USER_AGENT + agent_strings["aiohttp"] = aiohttp.client_reqrep.SERVER_SOFTWARE - utils.default_user_agent = run.user_agent + return agent_strings def load_base_config() -> None: diff --git a/garak/harnesses/base.py b/garak/harnesses/base.py index 79e9c63a3..898684bf6 100644 --- a/garak/harnesses/base.py +++ b/garak/harnesses/base.py @@ -64,6 +64,13 @@ def _load_buffs(self, buff_names: List) -> None: logging.warning(err_msg) continue + def _start_run_hook(self): + self._http_lib_user_agents = _config.get_http_lib_agents() + _config.set_all_http_lib_agents(_config.run.user_agent) + + def _end_run_hook(self): + _config.set_http_lib_agents(self._http_lib_user_agents) + def run(self, model, probes, detectors, evaluator, announce_probe=True) -> None: """Core harness method @@ -92,6 +99,8 @@ def run(self, model, probes, detectors, evaluator, announce_probe=True) -> None: print(msg) raise ValueError(msg) + self._start_run_hook() + for probe in probes: logging.debug("harness: probe start for %s", probe.probename) if not probe: @@ -135,4 +144,6 @@ def run(self, model, probes, detectors, evaluator, announce_probe=True) -> None: else: evaluator.evaluate(attempt_results) + self._end_run_hook() + logging.debug("harness: probe list iteration completed") diff --git a/tests/test_config.py b/tests/test_config.py index 3892e6774..c738dd763 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -9,6 +9,7 @@ import sys import tempfile +import aiohttp.client_reqrep import pytest from pathlib import Path @@ -764,3 +765,21 @@ def test_nested(): _config.plugins.generators["a"]["b"]["c"]["d"] = "e" assert _config.plugins.generators["a"]["b"]["c"]["d"] == "e" + + +def test_get_user_agents(): + agents = _config.get_http_lib_agents() + assert isinstance(agents, dict) + + +def test_set_agents(): + from requests import utils + import httpx + import aiohttp + + agent_test = "garak/9 - only simple tailors edition" + _config.set_all_http_lib_agents(agent_test) + + assert str(utils.default_user_agent()) == agent_test + assert httpx._client.USER_AGENT == agent_test + assert aiohttp.client_reqrep.SERVER_SOFTWARE == agent_test From bf5310b455f87deebf07476eea791a9f11ae22c9 Mon Sep 17 00:00:00 2001 From: Leon Derczynski Date: Tue, 29 Oct 2024 13:59:27 +0100 Subject: [PATCH 08/13] check user agents are actually used --- pyproject.toml | 3 ++- requirements.txt | 1 + tests/test_config.py | 58 ++++++++++++++++++++++++++++++++++++++------ 3 files changed, 53 insertions(+), 9 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 681ffc72b..e7d0ca3dc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -86,7 +86,8 @@ tests = [ "pytest>=8.0", "requests-mock==1.12.1", "respx>=0.21.1", - "pytest-cov>=5.0.0" + "pytest-cov>=5.0.0", + "pytest_httpserver>=1.1.0" ] lint = [ "black==24.4.2", diff --git a/requirements.txt b/requirements.txt index 8eb5a3ee0..8b30110a1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -41,6 +41,7 @@ pytest>=8.0 requests-mock==1.12.1 respx>=0.21.1 pytest-cov>=5.0.0 +pytest_httpserver>=1.1.0 # lint black==24.4.2 pylint>=3.1.0 diff --git a/tests/test_config.py b/tests/test_config.py index c738dd763..60d675562 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -4,15 +4,15 @@ import importlib import json import os +from pathlib import Path +import pytest import re import shutil import sys import tempfile -import aiohttp.client_reqrep -import pytest +from pytest_httpserver import HTTPServer -from pathlib import Path from garak import _config import garak.cli @@ -772,14 +772,56 @@ def test_get_user_agents(): assert isinstance(agents, dict) +AGENT_TEST = "garak/9 - only simple tailors edition" + + def test_set_agents(): from requests import utils import httpx import aiohttp - agent_test = "garak/9 - only simple tailors edition" - _config.set_all_http_lib_agents(agent_test) + _config.set_all_http_lib_agents(AGENT_TEST) + + assert str(utils.default_user_agent()) == AGENT_TEST + assert httpx._client.USER_AGENT == AGENT_TEST + assert aiohttp.client_reqrep.SERVER_SOFTWARE == AGENT_TEST + +def httpserver(): + return HTTPServer() + + +def test_agent_is_used_requests(httpserver: HTTPServer): + import requests + + _config.set_http_lib_agents({"requests": AGENT_TEST}) + httpserver.expect_request( + "/", headers={"User-Agent": AGENT_TEST} + ).respond_with_data("") + assert requests.get(httpserver.url_for("/")).status_code == 200 + + +def test_agent_is_used_httpx(httpserver: HTTPServer): + import httpx + + _config.set_http_lib_agents({"httpx": AGENT_TEST}) + httpserver.expect_request( + "/", headers={"User-Agent": AGENT_TEST} + ).respond_with_data("") + assert httpx.get(httpserver.url_for("/")).status_code == 200 + + +def test_agent_is_used_aiohttp(httpserver: HTTPServer): + import aiohttp + import asyncio + + _config.set_http_lib_agents({"aiohttp": AGENT_TEST}) + + async def main(): + async with aiohttp.ClientSession() as session: + async with session.get(httpserver.url_for("/")) as response: + html = await response.text() - assert str(utils.default_user_agent()) == agent_test - assert httpx._client.USER_AGENT == agent_test - assert aiohttp.client_reqrep.SERVER_SOFTWARE == agent_test + httpserver.expect_request( + "/", headers={"User-Agent": AGENT_TEST} + ).respond_with_data("") + asyncio.run(main()) From 44e5d3036d45672febf1d173157ff2be47237fd3 Mon Sep 17 00:00:00 2001 From: Leon Derczynski Date: Wed, 30 Oct 2024 08:01:38 +0100 Subject: [PATCH 09/13] mv UA to own func --- garak/_config.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/garak/_config.py b/garak/_config.py index a8d640d60..d70449e73 100644 --- a/garak/_config.py +++ b/garak/_config.py @@ -155,6 +155,14 @@ def _store_config(settings_files) -> None: reporting = _set_settings(reporting, settings["reporting"]) +def _garak_user_agent(dummy=None): + global run + if hasattr(run, "user_agent"): + return run.user_agent + else: + return "garak" + + def set_all_http_lib_agents(agent_string): set_http_lib_agents( {"requests": agent_string, "httpx": agent_string, "aiohttp": agent_string} @@ -165,7 +173,7 @@ def set_http_lib_agents(agent_strings: dict): if "requests" in agent_strings: from requests import utils - utils.default_user_agent = lambda x=None: agent_strings["requests"] + utils.default_user_agent = _garak_user_agent if "httpx" in agent_strings: import httpx From f9ca2eae422e01816d18197fb6bbba0ee7c30d33 Mon Sep 17 00:00:00 2001 From: Leon Derczynski Date: Mon, 11 Nov 2024 14:42:44 +0100 Subject: [PATCH 10/13] decouple _config.run.user_agent from UA setter --- garak/_config.py | 19 ++++++++++++++----- garak/cli.py | 3 +++ 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/garak/_config.py b/garak/_config.py index d70449e73..53060ef97 100644 --- a/garak/_config.py +++ b/garak/_config.py @@ -155,12 +155,17 @@ def _store_config(settings_files) -> None: reporting = _set_settings(reporting, settings["reporting"]) +# not my favourite solution in this module, but if +# _config.set_http_lib_agents() to be predicated on a param instead of +# a _config.run value (i.e. user_agent) - which it needs to be if it can be +# used when the values are popped back to originals - then a separate way +# of passing the UA string to _garak_user_agent() needs to exist, outside of +# _config.run.user_agent +REQUESTS_AGENT = "" + + def _garak_user_agent(dummy=None): - global run - if hasattr(run, "user_agent"): - return run.user_agent - else: - return "garak" + return str(REQUESTS_AGENT) def set_all_http_lib_agents(agent_string): @@ -170,9 +175,13 @@ def set_all_http_lib_agents(agent_string): def set_http_lib_agents(agent_strings: dict): + + global REQUESTS_AGENT + if "requests" in agent_strings: from requests import utils + REQUESTS_AGENT = agent_strings["requests"] utils.default_user_agent = _garak_user_agent if "httpx" in agent_strings: import httpx diff --git a/garak/cli.py b/garak/cli.py index a337e49d4..02d0b9833 100644 --- a/garak/cli.py +++ b/garak/cli.py @@ -254,6 +254,7 @@ def main(arguments=None) -> None: # load site config before loading CLI config _cli_config_supplied = args.config is not None + prior_user_agents = _config.get_http_lib_agents() _config.load_config(run_config_filename=args.config) # extract what was actually passed on CLI; use a masking argparser @@ -553,3 +554,5 @@ def main(arguments=None) -> None: except (ValueError, GarakException) as e: logging.exception(e) print(e) + + _config.set_http_lib_agents(prior_user_agents) From 5376d0f46be0723e26b07c1d8a592f46c21817b4 Mon Sep 17 00:00:00 2001 From: Leon Derczynski Date: Mon, 11 Nov 2024 15:14:17 +0100 Subject: [PATCH 11/13] document new _config.run param --- docs/source/configurable.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/configurable.rst b/docs/source/configurable.rst index 69294ef62..35ed8fba4 100644 --- a/docs/source/configurable.rst +++ b/docs/source/configurable.rst @@ -101,6 +101,7 @@ such as ``show_100_pass_modules``. * ``deprefix`` - Remove the prompt from the start of the output (some models return the prompt as part of their output) * ``seed`` - An optional random seed * ``eval_threshold`` - At what point in the 0..1 range output by detectors does a result count as a successful attack / hit +* ``user_agent`` - What HTTP user agent string should garak use? ``{version}`` can be used to signify where garak version ID should go ``plugins`` config items """""""""""""""""""""""" From fd486e307f017b13514a63da20cbb4e99d951259 Mon Sep 17 00:00:00 2001 From: Leon Derczynski Date: Thu, 14 Nov 2024 10:52:35 +0100 Subject: [PATCH 12/13] consistent ua (with details in parentheses) Co-authored-by: Jeffrey Martin Signed-off-by: Leon Derczynski --- garak/resources/garak.core.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/garak/resources/garak.core.yaml b/garak/resources/garak.core.yaml index d487594fc..a3d948935 100644 --- a/garak/resources/garak.core.yaml +++ b/garak/resources/garak.core.yaml @@ -13,7 +13,7 @@ run: eval_threshold: 0.5 generations: 5 probe_tags: - user_agent: "garak/{version} , LLM vulnerability scanner https://garak.ai" + user_agent: "garak/{version} (LLM vulnerability scanner https://garak.ai)" plugins: model_type: From a4844f4e16dab84e9a7931d9b4e561a3d4a72b88 Mon Sep 17 00:00:00 2001 From: Leon Derczynski Date: Thu, 14 Nov 2024 10:56:13 +0100 Subject: [PATCH 13/13] streamline version import & usage --- garak/_config.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/garak/_config.py b/garak/_config.py index 53060ef97..7e3480ae3 100644 --- a/garak/_config.py +++ b/garak/_config.py @@ -23,9 +23,7 @@ DICT_CONFIG_AFTER_LOAD = False -from garak import __version__ - -version = __version__ +from garak import __version__ as version system_params = ( "verbose narrow_output parallel_requests parallel_attempts skip_unknown".split()