# Rutgers-friendly first run: avoid bitsandbytes and keep the model small.

model = "meta-llama/Llama-3.2-1B-Instruct"

seed = 42
quantization = "none"
batch_size = 0
max_batch_size = 16
max_response_length = 80

n_trials = 12
n_startup_trials = 4

orthogonalize_direction = true
row_normalization = "pre"
overrefusal_penalty = 0.35
direction_variance_floor = 1e-6

study_checkpoint_dir = "checkpoints_llama32_1b_rutgers"

[good_prompts]
dataset = "mlabonne/harmless_alpaca"
split = "train[:160]"
column = "text"
residual_plot_label = '"Harmless" prompts'
residual_plot_color = "royalblue"

[bad_prompts]
dataset = "mlabonne/harmful_behaviors"
split = "train[:160]"
column = "text"
residual_plot_label = '"Harmful" prompts'
residual_plot_color = "darkorange"

[good_evaluation_prompts]
dataset = "mlabonne/harmless_alpaca"
split = "test[:48]"
column = "text"

[bad_evaluation_prompts]
dataset = "mlabonne/harmful_behaviors"
split = "test[:48]"
column = "text"