Skip to content

Commit

Permalink
use XDG paths for configuration data and caching (#799)
Browse files Browse the repository at this point in the history
* use XDG paths for configuration data and caching

Support using [XDG ver 0.8](https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html) for project data.

Specifically support:
| ENV VAR          | DEFAULT            |
|------------------|--------------------|
| $XDG_DATA_HOME   | $HOME/.local/share |
| $XDG_CONFIG_HOME | $HOME/.config      |
| $XDG_CACHE_HOME  | $HOME/.cache       |

Project name `garak` is appended to each location.

This is represents the followina breaking changes to project expecations:
* report_prefix passed either at the command line or as config file option
  * set filename values only
  * no longer overrides report_dir
* report_dir passed as a config file option
  * when provided as a relative path will be prepend with `<xdg_data_home>/garak`
  * provided as an absolute path will be used as the output directory
* default `user/site` configuration file `garak.site.yaml` has moved
  * previously `<basedir>/garak.site.yaml`
  * updated location `<xdg_config_home>/garak/garak.site.yaml`

Additional changes (not considered breaking changes):
* nltk data is placed in <xdg_cache_home>/garak if not already found in the environment
* visual_jailbreak downloaded artifacts are placed in <xdg_cache_home>/garak/resources
* generated data for beast/gcg/tap are placed in <xdg_cache_home>/garak/resources

Signed-off-by: Jeffrey Martin <[email protected]>

* document default site config path

Signed-off-by: Jeffrey Martin <[email protected]>

* rename variables for style/clarity & reduce hitlog location logic

Signed-off-by: Jeffrey Martin <[email protected]>

* default output location in gcg as cache_dir

Signed-off-by: Jeffrey Martin <[email protected]>

---------

Signed-off-by: Jeffrey Martin <[email protected]>
  • Loading branch information
jmartin-tech authored Jul 30, 2024
1 parent f2e5182 commit 4e30a3f
Show file tree
Hide file tree
Showing 35 changed files with 340 additions and 118 deletions.
2 changes: 1 addition & 1 deletion docs/source/basic.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ Config values are loaded in the following priority (lowest-first):

* Plugin defaults in the code
* Core config: from ``garak/resources/garak.core.yaml``; not to be overridden
* Site config: from ``garak/garak.site.yaml``
* Site config: from ``$HOME/.config/garak/garak.site.yaml``
* Runtime config: from an optional config file specified manually, via e.g. CLI parameter
* Command-line options

Expand Down
26 changes: 20 additions & 6 deletions garak/_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@
import pathlib
from typing import List
import yaml
from xdg_base_dirs import (
xdg_cache_home,
xdg_config_home,
xdg_data_home,
)

DICT_CONFIG_AFTER_LOAD = False

Expand All @@ -26,6 +31,7 @@
run_params = "seed deprefix eval_threshold generations probe_tags interactive".split()
plugins_params = "model_type model_name extended_detectors".split()
reporting_params = "taxonomy report_prefix".split()
project_dir_name = "garak"


loaded = False
Expand All @@ -52,10 +58,18 @@ class TransientConfig(GarakSubConfig):
hitlogfile = None
args = None # only access this when determining what was passed on CLI
run_id = None
basedir = pathlib.Path(__file__).parents[0]
package_dir = pathlib.Path(__file__).parents[0]
config_dir = xdg_config_home() / project_dir_name
data_dir = xdg_data_home() / project_dir_name
cache_dir = xdg_cache_home() / project_dir_name
starttime = None
starttime_iso = None

# initialize the user home and cache paths if they do not exist
config_dir.mkdir(mode=0o740, parents=True, exist_ok=True)
data_dir.mkdir(mode=0o740, parents=True, exist_ok=True)
cache_dir.mkdir(mode=0o740, parents=True, exist_ok=True)


transient = TransientConfig()

Expand Down Expand Up @@ -136,7 +150,7 @@ def _store_config(settings_files) -> None:

def load_base_config() -> None:
global loaded
settings_files = [str(transient.basedir / "resources" / "garak.core.yaml")]
settings_files = [str(transient.package_dir / "resources" / "garak.core.yaml")]
logging.debug("Loading configs from: %s", ",".join(settings_files))
_store_config(settings_files=settings_files)
loaded = True
Expand All @@ -149,9 +163,9 @@ def load_config(
# and then not have cli be upset when these are not given as cli params
global loaded

settings_files = [str(transient.basedir / "resources" / "garak.core.yaml")]
settings_files = [str(transient.package_dir / "resources" / "garak.core.yaml")]

fq_site_config_filename = str(transient.basedir / site_config_filename)
fq_site_config_filename = str(transient.config_dir / site_config_filename)
if os.path.isfile(fq_site_config_filename):
settings_files.append(fq_site_config_filename)
else:
Expand All @@ -163,10 +177,10 @@ def load_config(
if os.path.isfile(run_config_filename):
settings_files.append(run_config_filename)
elif os.path.isfile(
str(transient.basedir / "configs" / (run_config_filename + ".yaml"))
str(transient.package_dir / "configs" / (run_config_filename + ".yaml"))
):
settings_files.append(
str(transient.basedir / "configs" / (run_config_filename + ".yaml"))
str(transient.package_dir / "configs" / (run_config_filename + ".yaml"))
)
else:
message = f"run config not found: {run_config_filename}"
Expand Down
31 changes: 22 additions & 9 deletions garak/_plugins.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def default(self, obj):
return sorted(list(obj)) # allow set as list, assumes values can be sorted
if isinstance(obj, Path):
# relative path for now, may be better to suppress `Path` objects
return str(obj).replace(str(_config.transient.basedir), "")
return str(obj).replace(str(_config.transient.package_dir), "")
try:
return json.JSONEncoder.default(self, obj)
except TypeError as e:
Expand All @@ -35,8 +35,12 @@ def default(self, obj):


class PluginCache:
_plugin_cache_file = _config.transient.basedir / "resources" / "plugin_cache.json"
_user_plugin_cache_file = _plugin_cache_file
_plugin_cache_filename = (
_config.transient.package_dir / "resources" / "plugin_cache.json"
)
_user_plugin_cache_filename = (
_config.transient.cache_dir / "resources" / "plugin_cache.json"
)
_plugin_cache_dict = None

def __init__(self) -> None:
Expand All @@ -52,11 +56,16 @@ def _extract_modules_klasses(base_klass):
]

def _load_plugin_cache(self):
if not os.path.exists(self._plugin_cache_file):
if not os.path.exists(self._plugin_cache_filename):
self._build_plugin_cache()
if not os.path.exists(self._user_plugin_cache_file):
shutil.copy2(self._plugin_cache_file, self._user_plugin_cache_file)
with open(self._user_plugin_cache_file, "r", encoding="utf-8") as cache_file:
if not os.path.exists(self._user_plugin_cache_filename):
self._user_plugin_cache_filename.parent.mkdir(
mode=0o740, parents=True, exist_ok=True
)
shutil.copy2(self._plugin_cache_filename, self._user_plugin_cache_filename)
with open(
self._user_plugin_cache_filename, "r", encoding="utf-8"
) as cache_file:
local_cache = json.load(cache_file)
return local_cache

Expand All @@ -79,7 +88,9 @@ def _build_plugin_cache(self):
sorted_keys = sorted(list(plugin_dict.keys()))
local_cache[plugin_type] = {i: plugin_dict[i] for i in sorted_keys}

with open(self._user_plugin_cache_file, "w", encoding="utf-8") as cache_file:
with open(
self._user_plugin_cache_filename, "w", encoding="utf-8"
) as cache_file:
json.dump(local_cache, cache_file, cls=PluginEncoder, indent=2)

def _enumerate_plugin_klasses(self, category: str) -> List[Callable]:
Expand All @@ -93,7 +104,9 @@ def _enumerate_plugin_klasses(self, category: str) -> List[Callable]:

module_plugin_names = set()

for module_filename in sorted(os.listdir(_config.transient.basedir / category)):
for module_filename in sorted(
os.listdir(_config.transient.package_dir / category)
):
if not module_filename.endswith(".py"):
continue
if module_filename.startswith("__"):
Expand Down
4 changes: 2 additions & 2 deletions garak/analyze/report_digest.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from garak import _config

templateLoader = jinja2.FileSystemLoader(
searchpath=_config.transient.basedir / "analyze" / "templates"
searchpath=_config.transient.package_dir / "analyze" / "templates"
)
templateEnv = jinja2.Environment(loader=templateLoader)

Expand All @@ -29,7 +29,7 @@


misp_resource_file = (
_config.transient.basedir / "garak" / "resources" / "misp_descriptions.tsv"
_config.transient.package_dir / "garak" / "resources" / "misp_descriptions.tsv"
)
misp_descriptions = {}
if os.path.isfile(misp_resource_file):
Expand Down
32 changes: 20 additions & 12 deletions garak/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@


def start_logging():
from garak import _config

logging.basicConfig(
filename="garak.log",
filename=_config.transient.data_dir / "garak.log",
level=logging.DEBUG,
format="%(asctime)s %(levelname)s %(message)s",
)
Expand All @@ -32,6 +34,7 @@ def start_run():
import os
import uuid

from pathlib import Path
from garak import _config

logging.info("started at %s", _config.transient.starttime_iso)
Expand All @@ -41,19 +44,24 @@ def start_run():
"⚠️ The current/default config is optimised for speed rather than thoroughness. Try e.g. --config full for a stronger test, or specify some probes."
)
_config.transient.run_id = str(uuid.uuid4()) # uuid1 is safe but leaks host info
report_path = Path(_config.reporting.report_dir)
if not report_path.is_absolute():
logging.debug("relative report dir provided")
report_path = _config.transient.data_dir / _config.reporting.report_dir
if not os.path.isdir(report_path):
try:
report_path.mkdir(mode=0o740, parents=True, exist_ok=True)
except PermissionError as e:
raise PermissionError(
f"Can't create logging directory {report_path}, quitting"
) from e

filename = f"garak.{_config.transient.run_id}.report.jsonl"
if not _config.reporting.report_prefix:
if not os.path.isdir(_config.reporting.report_dir):
try:
os.mkdir(_config.reporting.report_dir)
except PermissionError as e:
raise PermissionError(
f"Can't create logging directory {_config.reporting.report_dir}, quitting"
) from e
_config.transient.report_filename = f"{_config.reporting.report_dir}/garak.{_config.transient.run_id}.report.jsonl"
filename = f"garak.{_config.transient.run_id}.report.jsonl"
else:
_config.transient.report_filename = (
_config.reporting.report_prefix + ".report.jsonl"
)
filename = _config.reporting.report_prefix + ".report.jsonl"
_config.transient.report_filename = str(report_path / filename)
_config.transient.reportfile = open(
_config.transient.report_filename, "w", buffering=1, encoding="utf-8"
)
Expand Down
8 changes: 5 additions & 3 deletions garak/detectors/riskywords.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@

surge_list = defaultdict(list)
with open(
_config.transient.basedir / "resources" / "profanity_en.csv", "r", encoding="utf-8"
_config.transient.package_dir / "resources" / "profanity_en.csv",
"r",
encoding="utf-8",
) as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
Expand All @@ -24,7 +26,7 @@
MIN_OFCOM_STRENGTH = 2
ofcom_list = defaultdict(list)
with open(
_config.transient.basedir / "resources" / "ofcom-potentially-offensive.txt",
_config.transient.package_dir / "resources" / "ofcom-potentially-offensive.txt",
"r",
encoding="utf-8",
) as csvfile:
Expand Down Expand Up @@ -283,7 +285,7 @@ class LDNOOBW(StringDetector):
def __init__(self, config_root=_config):
substrings = []
with open(
_config.transient.basedir / "resources" / "ldnoobw-en.txt",
_config.transient.package_dir / "resources" / "ldnoobw-en.txt",
"r",
encoding="utf-8",
) as wordlist:
Expand Down
2 changes: 1 addition & 1 deletion garak/detectors/specialwords.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class SlursReclaimedSlurs(StringDetector):
def __init__(self, config_root=_config):
self.substrings = set([])
with open(
_config.transient.basedir / "resources" / "slurprompts.jsonl",
_config.transient.package_dir / "resources" / "slurprompts.jsonl",
"r",
encoding="utf-8",
) as slurfile:
Expand Down
10 changes: 5 additions & 5 deletions garak/evaluators/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import json
import logging
from pathlib import Path
from typing import Iterable

from colorama import Fore, Style
Expand Down Expand Up @@ -72,12 +73,11 @@ def evaluate(self, attempts: Iterable[garak.attempt.Attempt]) -> None:
hitlog_mode = (
"w" if _config.transient.hitlogfile is None else "a"
)
if not _config.reporting.report_prefix:
hitlog_filename = f"{_config.reporting.report_dir}/garak.{_config.transient.run_id}.hitlog.jsonl"
else:
hitlog_filename = (
_config.reporting.report_prefix + ".hitlog.jsonl"
hitlog_filename = Path(
str(_config.transient.report_filename).replace(
".report.jsonl", ".hitlog.jsonl"
)
)
logging.info("hit log in %s", hitlog_filename)
_config.transient.hitlogfile = open(
hitlog_filename,
Expand Down
2 changes: 1 addition & 1 deletion garak/interactive.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ def settings_ns_provider(self) -> argparse.Namespace:
def banner():
"""Display a random banner"""
with open(
_config.transient.basedir / "resources/banners", "r", encoding="utf-8"
_config.transient.package_dir / "resources" / "banners", "r", encoding="utf-8"
) as bannerfile:
banners = bannerfile.read().lstrip().split("\n\n")
print(Fore.LIGHTRED_EX + f"\n{random.choice(banners)}\n" + Fore.RESET)
Expand Down
2 changes: 1 addition & 1 deletion garak/probes/continuation.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def __init__(self, config_root=_config):
# append these to each of the prefixes for that term
slur_prefixes = defaultdict(set)
with open(
_config.transient.basedir / "resources" / "slurprompts_80.jsonl",
_config.transient.package_dir / "resources" / "slurprompts_80.jsonl",
"r",
encoding="utf-8",
) as slurfile:
Expand Down
4 changes: 2 additions & 2 deletions garak/probes/dan.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,7 +537,7 @@ class AutoDANCached(Probe):

def __init__(
self,
autodan_prompt_location: str = _config.transient.basedir
autodan_prompt_location: str = _config.transient.package_dir
/ "resources"
/ "autodan"
/ "data"
Expand Down Expand Up @@ -645,7 +645,7 @@ def __init__(self, config_root=_config):
super().__init__(config_root=config_root)

inthewild_path = str(
_config.transient.basedir / "resources" / "inthewild_jailbreak_llms.txt"
_config.transient.package_dir / "resources" / "inthewild_jailbreak_llms.txt"
)

with open(inthewild_path, "r", encoding="utf-8") as f:
Expand Down
2 changes: 1 addition & 1 deletion garak/probes/donotanswer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
def load_local_data(self):
# prompts
with open(
_config.transient.basedir
_config.transient.package_dir
/ "resources"
/ "donotanswer"
/ f"{self.dna_category}.txt",
Expand Down
2 changes: 1 addition & 1 deletion garak/probes/encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def _load_payloads():

payload_library["slur_terms"] = []
with open(
_config.transient.basedir / "resources" / "slurprompts_80.jsonl",
_config.transient.package_dir / "resources" / "slurprompts_80.jsonl",
"r",
encoding="utf-8",
) as slurfile:
Expand Down
2 changes: 1 addition & 1 deletion garak/probes/leakreplay.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from garak import _config
from garak.probes.base import Probe

resources_path = _config.transient.basedir / "resources"
resources_path = _config.transient.package_dir / "resources"


class LiteratureCloze(Probe):
Expand Down
2 changes: 1 addition & 1 deletion garak/probes/misleading.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def __init__(self, config_root=_config):

for prefix in prefixes:
with open(
_config.transient.basedir
_config.transient.package_dir
/ "resources"
/ "truefalse_falseclaims_50.txt",
"r",
Expand Down
2 changes: 1 addition & 1 deletion garak/probes/realtoxicityprompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ class RTPBlank(Probe):
for probe_class in list(probe_tags.keys()):
# load filter lists
with open(
_config.transient.basedir
_config.transient.package_dir
/ "resources"
/ "realtoxicityprompts"
/ f"rtp_{probe_class}.txt",
Expand Down
6 changes: 3 additions & 3 deletions garak/probes/snowball.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class GraphConnectivity(Probe):
def __init__(self, config_root=_config):
super().__init__(config_root=config_root)
with open(
_config.transient.basedir / "resources" / "graph_connectivity.json",
_config.transient.package_dir / "resources" / "graph_connectivity.json",
"r",
encoding="utf-8",
) as f:
Expand Down Expand Up @@ -70,7 +70,7 @@ class Primes(Probe):
def __init__(self, config_root=_config):
super().__init__(config_root=config_root)
with open(
_config.transient.basedir / "resources" / "primality_testing.json",
_config.transient.package_dir / "resources" / "primality_testing.json",
"r",
encoding="utf-8",
) as f:
Expand Down Expand Up @@ -109,7 +109,7 @@ class Senators(Probe):
def __init__(self, config_root=_config):
super().__init__(config_root=config_root)
with open(
_config.transient.basedir / "resources" / "senator_search.json",
_config.transient.package_dir / "resources" / "senator_search.json",
"r",
encoding="utf-8",
) as f:
Expand Down
Loading

0 comments on commit 4e30a3f

Please sign in to comment.