Update dataset_maker_safeguard.ipynb
Browse files- dataset_maker_safeguard.ipynb +621 -15
dataset_maker_safeguard.ipynb
CHANGED
|
@@ -7,10 +7,12 @@
|
|
| 7 |
"# Custom Dataset Maker for OpenAI GPT-OSS-Safeguard-20B\n",
|
| 8 |
"\n",
|
| 9 |
"This notebook creates custom datasets for fine-tuning the safeguard model by:\n",
|
| 10 |
-
"-
|
| 11 |
-
"- Applying custom safety policies\n",
|
| 12 |
-
"-
|
|
|
|
| 13 |
"\n",
|
|
|
|
| 14 |
"**Target Model**: openai/gpt-oss-safeguard-20b"
|
| 15 |
]
|
| 16 |
},
|
|
@@ -28,7 +30,7 @@
|
|
| 28 |
"outputs": [],
|
| 29 |
"source": [
|
| 30 |
"# Install required packages\n",
|
| 31 |
-
"!pip install -q requests beautifulsoup4 lxml pandas tqdm datasets transformers"
|
| 32 |
]
|
| 33 |
},
|
| 34 |
{
|
|
@@ -46,7 +48,9 @@
|
|
| 46 |
"from tqdm.auto import tqdm\n",
|
| 47 |
"import time\n",
|
| 48 |
"from urllib.parse import urljoin, urlparse\n",
|
| 49 |
-
"from datetime import datetime"
|
|
|
|
|
|
|
| 50 |
]
|
| 51 |
},
|
| 52 |
{
|
|
@@ -119,7 +123,276 @@
|
|
| 119 |
"cell_type": "markdown",
|
| 120 |
"metadata": {},
|
| 121 |
"source": [
|
| 122 |
-
"## 3.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
]
|
| 124 |
},
|
| 125 |
{
|
|
@@ -305,7 +578,7 @@
|
|
| 305 |
"cell_type": "markdown",
|
| 306 |
"metadata": {},
|
| 307 |
"source": [
|
| 308 |
-
"##
|
| 309 |
]
|
| 310 |
},
|
| 311 |
{
|
|
@@ -398,7 +671,7 @@
|
|
| 398 |
"cell_type": "markdown",
|
| 399 |
"metadata": {},
|
| 400 |
"source": [
|
| 401 |
-
"##
|
| 402 |
]
|
| 403 |
},
|
| 404 |
{
|
|
@@ -485,14 +758,148 @@
|
|
| 485 |
" \n",
|
| 486 |
" return stats\n",
|
| 487 |
"\n",
|
| 488 |
-
"print(\"DatasetBuilder class defined successfully!\")"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 489 |
]
|
| 490 |
},
|
| 491 |
{
|
| 492 |
"cell_type": "markdown",
|
| 493 |
"metadata": {},
|
| 494 |
"source": [
|
| 495 |
-
"##
|
| 496 |
]
|
| 497 |
},
|
| 498 |
{
|
|
@@ -511,9 +918,149 @@
|
|
| 511 |
"BASE_WIKI = \"https://fridaynightfunking.fandom.com\"\n",
|
| 512 |
"MAX_PAGES = 20 # Adjust as needed\n",
|
| 513 |
"\n",
|
| 514 |
-
"print(\"Starting
|
| 515 |
"print(f\"Target Wiki: {BASE_WIKI}\")\n",
|
| 516 |
-
"print(f\"Max Pages: {MAX_PAGES}\")"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 517 |
]
|
| 518 |
},
|
| 519 |
{
|
|
@@ -576,7 +1123,7 @@
|
|
| 576 |
"cell_type": "markdown",
|
| 577 |
"metadata": {},
|
| 578 |
"source": [
|
| 579 |
-
"##
|
| 580 |
]
|
| 581 |
},
|
| 582 |
{
|
|
@@ -600,7 +1147,7 @@
|
|
| 600 |
"cell_type": "markdown",
|
| 601 |
"metadata": {},
|
| 602 |
"source": [
|
| 603 |
-
"##
|
| 604 |
]
|
| 605 |
},
|
| 606 |
{
|
|
@@ -770,8 +1317,67 @@
|
|
| 770 |
"cell_type": "markdown",
|
| 771 |
"metadata": {},
|
| 772 |
"source": [
|
| 773 |
-
"##
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 774 |
"\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 775 |
"### To scrape additional wikis:\n",
|
| 776 |
"```python\n",
|
| 777 |
"additional_wikis = [\n",
|
|
|
|
| 7 |
"# Custom Dataset Maker for OpenAI GPT-OSS-Safeguard-20B\n",
|
| 8 |
"\n",
|
| 9 |
"This notebook creates custom datasets for fine-tuning the safeguard model by:\n",
|
| 10 |
+
"- Loading HuggingFaceH4/Multilingual-Thinking base dataset\n",
|
| 11 |
+
"- Applying custom safety policies to multilingual content\n",
|
| 12 |
+
"- Scraping and analyzing Fandom wiki content\n",
|
| 13 |
+
"- Combining and formatting data for model training\n",
|
| 14 |
"\n",
|
| 15 |
+
"**Base Dataset**: HuggingFaceH4/Multilingual-Thinking (1,000 samples in 5 languages)\n",
|
| 16 |
"**Target Model**: openai/gpt-oss-safeguard-20b"
|
| 17 |
]
|
| 18 |
},
|
|
|
|
| 30 |
"outputs": [],
|
| 31 |
"source": [
|
| 32 |
"# Install required packages\n",
|
| 33 |
+
"!pip install -q requests beautifulsoup4 lxml pandas tqdm datasets transformers huggingface_hub"
|
| 34 |
]
|
| 35 |
},
|
| 36 |
{
|
|
|
|
| 48 |
"from tqdm.auto import tqdm\n",
|
| 49 |
"import time\n",
|
| 50 |
"from urllib.parse import urljoin, urlparse\n",
|
| 51 |
+
"from datetime import datetime\n",
|
| 52 |
+
"from datasets import load_dataset\n",
|
| 53 |
+
"import random"
|
| 54 |
]
|
| 55 |
},
|
| 56 |
{
|
|
|
|
| 123 |
"cell_type": "markdown",
|
| 124 |
"metadata": {},
|
| 125 |
"source": [
|
| 126 |
+
"## 3. Load HuggingFaceH4/Multilingual-Thinking Base Dataset"
|
| 127 |
+
]
|
| 128 |
+
},
|
| 129 |
+
{
|
| 130 |
+
"cell_type": "markdown",
|
| 131 |
+
"metadata": {},
|
| 132 |
+
"source": [
|
| 133 |
+
"Load and analyze the base dataset for training gpt-oss-safeguard-20b models."
|
| 134 |
+
]
|
| 135 |
+
},
|
| 136 |
+
{
|
| 137 |
+
"cell_type": "code",
|
| 138 |
+
"execution_count": null,
|
| 139 |
+
"metadata": {},
|
| 140 |
+
"outputs": [],
|
| 141 |
+
"source": [
|
| 142 |
+
"# Load the Multilingual-Thinking dataset\n",
|
| 143 |
+
"print(\"Loading HuggingFaceH4/Multilingual-Thinking dataset...\")\n",
|
| 144 |
+
"try:\n",
|
| 145 |
+
" multilingual_dataset = load_dataset(\"HuggingFaceH4/Multilingual-Thinking\", split=\"train\")\n",
|
| 146 |
+
" print(f\"✅ Dataset loaded successfully!\")\n",
|
| 147 |
+
" print(f\"Total samples: {len(multilingual_dataset)}\")\n",
|
| 148 |
+
" print(f\"Available languages: {multilingual_dataset.unique('reasoning_language')}\")\n",
|
| 149 |
+
" print(f\"Dataset features: {list(multilingual_dataset.features.keys())}\")\n",
|
| 150 |
+
"except Exception as e:\n",
|
| 151 |
+
" print(f\"❌ Error loading dataset: {e}\")\n",
|
| 152 |
+
" print(\"Creating sample data for demonstration...\")\n",
|
| 153 |
+
" multilingual_dataset = None"
|
| 154 |
+
]
|
| 155 |
+
},
|
| 156 |
+
{
|
| 157 |
+
"cell_type": "code",
|
| 158 |
+
"execution_count": null,
|
| 159 |
+
"metadata": {},
|
| 160 |
+
"outputs": [],
|
| 161 |
+
"source": [
|
| 162 |
+
"def extract_text_from_messages(messages: List[Dict]) -> str:\n",
|
| 163 |
+
" \"\"\"Extract and combine text from message structure\"\"\"\n",
|
| 164 |
+
" texts = []\n",
|
| 165 |
+
" for message in messages:\n",
|
| 166 |
+
" role = message.get('role', '')\n",
|
| 167 |
+
" content = message.get('content', '')\n",
|
| 168 |
+
" \n",
|
| 169 |
+
" if role == 'system':\n",
|
| 170 |
+
" texts.append(f\"System: {content}\")\n",
|
| 171 |
+
" elif role == 'user':\n",
|
| 172 |
+
" texts.append(f\"User: {content}\")\n",
|
| 173 |
+
" elif role == 'assistant':\n",
|
| 174 |
+
" texts.append(f\"Assistant: {content}\")\n",
|
| 175 |
+
" \n",
|
| 176 |
+
" return \" | \".join(texts)\n",
|
| 177 |
+
"\n",
|
| 178 |
+
"def prepare_base_dataset_for_analysis(dataset) -> List[Dict]:\n",
|
| 179 |
+
" \"\"\"Prepare base dataset for safety analysis\"\"\"\n",
|
| 180 |
+
" if not dataset:\n",
|
| 181 |
+
" return []\n",
|
| 182 |
+
" \n",
|
| 183 |
+
" prepared_data = []\n",
|
| 184 |
+
" \n",
|
| 185 |
+
" for i, sample in enumerate(dataset):\n",
|
| 186 |
+
" # Extract text from the message structure\n",
|
| 187 |
+
" full_text = extract_text_from_messages(sample['messages'])\n",
|
| 188 |
+
" \n",
|
| 189 |
+
" # Also get individual components\n",
|
| 190 |
+
" developer = sample.get('developer', '')\n",
|
| 191 |
+
" user = sample.get('user', '')\n",
|
| 192 |
+
" analysis = sample.get('analysis', '')\n",
|
| 193 |
+
" final = sample.get('final', '')\n",
|
| 194 |
+
" \n",
|
| 195 |
+
" entry = {\n",
|
| 196 |
+
" 'id': f\"base_{i}\",\n",
|
| 197 |
+
" 'language': sample.get('reasoning_language', 'unknown'),\n",
|
| 198 |
+
" 'full_conversation': full_text,\n",
|
| 199 |
+
" 'system_prompt': developer,\n",
|
| 200 |
+
" 'user_input': user,\n",
|
| 201 |
+
" 'reasoning': analysis,\n",
|
| 202 |
+
" 'final_response': final,\n",
|
| 203 |
+
" 'source': 'multilingual_thinking',\n",
|
| 204 |
+
" 'has_thinking': bool(analysis),\n",
|
| 205 |
+
" 'dataset_type': 'base'\n",
|
| 206 |
+
" }\n",
|
| 207 |
+
" \n",
|
| 208 |
+
" prepared_data.append(entry)\n",
|
| 209 |
+
" \n",
|
| 210 |
+
" return prepared_data\n",
|
| 211 |
+
"\n",
|
| 212 |
+
"# Prepare the base dataset\n",
|
| 213 |
+
"if multilingual_dataset:\n",
|
| 214 |
+
" base_dataset = prepare_base_dataset_for_analysis(multilingual_dataset)\n",
|
| 215 |
+
" print(f\"✅ Prepared {len(base_dataset)} base samples\")\n",
|
| 216 |
+
" \n",
|
| 217 |
+
" # Show language distribution\n",
|
| 218 |
+
" lang_counts = {}\n",
|
| 219 |
+
" for sample in base_dataset:\n",
|
| 220 |
+
" lang = sample['language']\n",
|
| 221 |
+
" lang_counts[lang] = lang_counts.get(lang, 0) + 1\n",
|
| 222 |
+
" \n",
|
| 223 |
+
" print(f\"Language distribution:\")\n",
|
| 224 |
+
" for lang, count in lang_counts.items():\n",
|
| 225 |
+
" print(f\" {lang}: {count} samples\")\n",
|
| 226 |
+
" \n",
|
| 227 |
+
" # Show sample\n",
|
| 228 |
+
" print(f\"\\nSample base dataset entry:\")\n",
|
| 229 |
+
" print(f\"Language: {base_dataset[0]['language']}\")\n",
|
| 230 |
+
" print(f\"System: {base_dataset[0]['system_prompt'][:100]}...\")\n",
|
| 231 |
+
" print(f\"User: {base_dataset[0]['user_input'][:100]}...\")\n",
|
| 232 |
+
" print(f\"Has thinking: {base_dataset[0]['has_thinking']}\")\n",
|
| 233 |
+
"else:\n",
|
| 234 |
+
" base_dataset = []\n",
|
| 235 |
+
" print(\"⚠️ Using empty base dataset\")"
|
| 236 |
+
]
|
| 237 |
+
},
|
| 238 |
+
{
|
| 239 |
+
"cell_type": "markdown",
|
| 240 |
+
"metadata": {},
|
| 241 |
+
"source": [
|
| 242 |
+
"## 4. Enhanced Safety Policy Analyzer for Multilingual Content"
|
| 243 |
+
]
|
| 244 |
+
},
|
| 245 |
+
{
|
| 246 |
+
"cell_type": "markdown",
|
| 247 |
+
"metadata": {},
|
| 248 |
+
"source": [
|
| 249 |
+
"Extended analyzer that can handle multilingual content and the message structure from Multilingual-Thinking dataset."
|
| 250 |
+
]
|
| 251 |
+
},
|
| 252 |
+
{
|
| 253 |
+
"cell_type": "code",
|
| 254 |
+
"execution_count": null,
|
| 255 |
+
"metadata": {},
|
| 256 |
+
"outputs": [],
|
| 257 |
+
"source": [
|
| 258 |
+
"class MultilingualSafetyAnalyzer:\n",
|
| 259 |
+
" \"\"\"Enhanced safety analyzer for multilingual content and complex message structures\"\"\"\n",
|
| 260 |
+
" \n",
|
| 261 |
+
" def __init__(self, policy: Dict):\n",
|
| 262 |
+
" self.policy = policy\n",
|
| 263 |
+
" self.categories = policy['categories']\n",
|
| 264 |
+
" \n",
|
| 265 |
+
" def analyze_text_multilingual(self, text: str, language: str = 'en') -> Dict:\n",
|
| 266 |
+
" \"\"\"Analyze text with language awareness\"\"\"\n",
|
| 267 |
+
" if not text:\n",
|
| 268 |
+
" return {'safe': True, 'categories': {}, 'score': 0, 'language': language}\n",
|
| 269 |
+
" \n",
|
| 270 |
+
" # Language-specific keyword mappings (simplified examples)\n",
|
| 271 |
+
" lang_mappings = {\n",
|
| 272 |
+
" 'english': ['kill', 'death', 'violence', 'murder', 'torture'],\n",
|
| 273 |
+
" 'french': ['tuer', 'mort', 'violence', 'meurtre', 'torture'],\n",
|
| 274 |
+
" 'german': ['töten', 'tod', 'gewalt', 'mord', 'folter'],\n",
|
| 275 |
+
" 'spanish': ['matar', 'muerte', 'violencia', 'asesinato', 'tortura'],\n",
|
| 276 |
+
" 'italian': ['uccidere', 'morte', 'violenza', 'omicidio', 'tortura']\n",
|
| 277 |
+
" }\n",
|
| 278 |
+
" \n",
|
| 279 |
+
" # Use language-specific keywords if available, otherwise fall back to English\n",
|
| 280 |
+
" text_lower = text.lower()\n",
|
| 281 |
+
" results = {\n",
|
| 282 |
+
" 'safe': True,\n",
|
| 283 |
+
" 'categories': {},\n",
|
| 284 |
+
" 'score': 0,\n",
|
| 285 |
+
" 'language': language,\n",
|
| 286 |
+
" 'flagged_keywords': []\n",
|
| 287 |
+
" }\n",
|
| 288 |
+
" \n",
|
| 289 |
+
" # Enhanced keyword matching with language support\n",
|
| 290 |
+
" for category_name, category_info in self.categories.items():\n",
|
| 291 |
+
" if not category_info['enabled']:\n",
|
| 292 |
+
" continue\n",
|
| 293 |
+
" \n",
|
| 294 |
+
" # Combine default keywords with language-specific ones\n",
|
| 295 |
+
" keywords = category_info['keywords'].copy()\n",
|
| 296 |
+
" \n",
|
| 297 |
+
" # Add language-specific mappings\n",
|
| 298 |
+
" if language.lower() in lang_mappings:\n",
|
| 299 |
+
" category_key = list(category_info['keywords'])[0] # Use first keyword as category\n",
|
| 300 |
+
" if category_key in ['kill', 'death', 'blood', 'gore', 'violence', 'murder', 'torture']:\n",
|
| 301 |
+
" keywords.extend(lang_mappings[language.lower()])\n",
|
| 302 |
+
" \n",
|
| 303 |
+
" # Count matches\n",
|
| 304 |
+
" matches = []\n",
|
| 305 |
+
" for keyword in keywords:\n",
|
| 306 |
+
" if keyword.lower() in text_lower:\n",
|
| 307 |
+
" matches.append(keyword)\n",
|
| 308 |
+
" \n",
|
| 309 |
+
" if matches:\n",
|
| 310 |
+
" severity_score = min(1.0, len(matches) / max(1, len(keywords)))\n",
|
| 311 |
+
" levels = category_info['severity_levels']\n",
|
| 312 |
+
" level_index = min(len(levels) - 1, int(severity_score * len(levels)))\n",
|
| 313 |
+
" severity_level = levels[level_index]\n",
|
| 314 |
+
" \n",
|
| 315 |
+
" results['categories'][category_name] = {\n",
|
| 316 |
+
" 'detected': True,\n",
|
| 317 |
+
" 'severity': severity_level,\n",
|
| 318 |
+
" 'score': severity_score,\n",
|
| 319 |
+
" 'matches': matches\n",
|
| 320 |
+
" }\n",
|
| 321 |
+
" \n",
|
| 322 |
+
" results['flagged_keywords'].extend(matches)\n",
|
| 323 |
+
" results['score'] = max(results['score'], severity_score)\n",
|
| 324 |
+
" \n",
|
| 325 |
+
" results['safe'] = results['score'] < self.policy['threshold']\n",
|
| 326 |
+
" \n",
|
| 327 |
+
" return results\n",
|
| 328 |
+
" \n",
|
| 329 |
+
" def analyze_message_structure(self, sample: Dict) -> Dict:\n",
|
| 330 |
+
" \"\"\"Analyze a complete message structure from Multilingual-Thinking dataset\"\"\"\n",
|
| 331 |
+
" language = sample.get('language', 'unknown')\n",
|
| 332 |
+
" \n",
|
| 333 |
+
" # Analyze different components\n",
|
| 334 |
+
" analyses = {}\n",
|
| 335 |
+
" \n",
|
| 336 |
+
" # System prompt analysis\n",
|
| 337 |
+
" if sample.get('system_prompt'):\n",
|
| 338 |
+
" analyses['system_prompt'] = self.analyze_text_multilingual(\n",
|
| 339 |
+
" sample['system_prompt'], language\n",
|
| 340 |
+
" )\n",
|
| 341 |
+
" \n",
|
| 342 |
+
" # User input analysis\n",
|
| 343 |
+
" if sample.get('user_input'):\n",
|
| 344 |
+
" analyses['user_input'] = self.analyze_text_multilingual(\n",
|
| 345 |
+
" sample['user_input'], language\n",
|
| 346 |
+
" )\n",
|
| 347 |
+
" \n",
|
| 348 |
+
" # Reasoning analysis\n",
|
| 349 |
+
" if sample.get('reasoning'):\n",
|
| 350 |
+
" analyses['reasoning'] = self.analyze_text_multilingual(\n",
|
| 351 |
+
" sample['reasoning'], language\n",
|
| 352 |
+
" )\n",
|
| 353 |
+
" \n",
|
| 354 |
+
" # Final response analysis\n",
|
| 355 |
+
" if sample.get('final_response'):\n",
|
| 356 |
+
" analyses['final_response'] = self.analyze_text_multilingual(\n",
|
| 357 |
+
" sample['final_response'], language\n",
|
| 358 |
+
" )\n",
|
| 359 |
+
" \n",
|
| 360 |
+
" # Combine all analyses\n",
|
| 361 |
+
" all_unsafe = any(not analysis['safe'] for analysis in analyses.values())\n",
|
| 362 |
+
" max_score = max(analysis['score'] for analysis in analyses.values()) if analyses else 0\n",
|
| 363 |
+
" \n",
|
| 364 |
+
" return {\n",
|
| 365 |
+
" 'overall_safe': not all_unsafe,\n",
|
| 366 |
+
" 'overall_score': max_score,\n",
|
| 367 |
+
" 'component_analyses': analyses,\n",
|
| 368 |
+
" 'language': language\n",
|
| 369 |
+
" }\n",
|
| 370 |
+
" \n",
|
| 371 |
+
" def create_training_example_from_message(self, sample: Dict, analysis: Dict) -> Dict:\n",
|
| 372 |
+
" \"\"\"Create training example from a message structure\"\"\"\n",
|
| 373 |
+
" return {\n",
|
| 374 |
+
" 'id': sample.get('id'),\n",
|
| 375 |
+
" 'text': sample.get('full_conversation', ''),\n",
|
| 376 |
+
" 'label': 'unsafe' if not analysis['overall_safe'] else 'safe',\n",
|
| 377 |
+
" 'score': analysis['overall_score'],\n",
|
| 378 |
+
" 'language': analysis['language'],\n",
|
| 379 |
+
" 'has_thinking': sample.get('has_thinking', False),\n",
|
| 380 |
+
" 'source': sample.get('source'),\n",
|
| 381 |
+
" 'component_labels': {\n",
|
| 382 |
+
" component: 'unsafe' if not comp_analysis['safe'] else 'safe'\n",
|
| 383 |
+
" for component, comp_analysis in analysis['component_analyses'].items()\n",
|
| 384 |
+
" },\n",
|
| 385 |
+
" 'reasoning_available': bool(sample.get('reasoning'))\n",
|
| 386 |
+
" }\n",
|
| 387 |
+
"\n",
|
| 388 |
+
"print(\"Enhanced MultilingualSafetyAnalyzer defined successfully!\")"
|
| 389 |
+
]
|
| 390 |
+
},
|
| 391 |
+
{
|
| 392 |
+
"cell_type": "markdown",
|
| 393 |
+
"metadata": {},
|
| 394 |
+
"source": [
|
| 395 |
+
"## 5. Fandom Wiki Scraper"
|
| 396 |
]
|
| 397 |
},
|
| 398 |
{
|
|
|
|
| 578 |
"cell_type": "markdown",
|
| 579 |
"metadata": {},
|
| 580 |
"source": [
|
| 581 |
+
"## 6. Enhanced Dataset Builder"
|
| 582 |
]
|
| 583 |
},
|
| 584 |
{
|
|
|
|
| 671 |
"cell_type": "markdown",
|
| 672 |
"metadata": {},
|
| 673 |
"source": [
|
| 674 |
+
"## 6. Enhanced Dataset Builder"
|
| 675 |
]
|
| 676 |
},
|
| 677 |
{
|
|
|
|
| 758 |
" \n",
|
| 759 |
" return stats\n",
|
| 760 |
"\n",
|
| 761 |
+
"print(\"DatasetBuilder class defined successfully!\")\n",
|
| 762 |
+
"\n",
|
| 763 |
+
"\n",
|
| 764 |
+
"# Enhanced Dataset Builder for Multilingual + Wiki Data\n",
|
| 765 |
+
"class EnhancedDatasetBuilder:\n",
|
| 766 |
+
" \"\"\"Enhanced builder that combines multilingual base data with wiki scraped data\"\"\"\n",
|
| 767 |
+
" \n",
|
| 768 |
+
" def __init__(self, policy_analyzer: MultilingualSafetyAnalyzer):\n",
|
| 769 |
+
" self.analyzer = policy_analyzer\n",
|
| 770 |
+
" self.dataset = []\n",
|
| 771 |
+
" \n",
|
| 772 |
+
" def process_base_dataset(self, base_data: List[Dict]):\n",
|
| 773 |
+
" \"\"\"Process multilingual base dataset\"\"\"\n",
|
| 774 |
+
" print(\"Processing multilingual base dataset...\")\n",
|
| 775 |
+
" \n",
|
| 776 |
+
" for sample in tqdm(base_data):\n",
|
| 777 |
+
" # Analyze the message structure\n",
|
| 778 |
+
" analysis = self.analyzer.analyze_message_structure(sample)\n",
|
| 779 |
+
" example = self.analyzer.create_training_example_from_message(sample, analysis)\n",
|
| 780 |
+
" \n",
|
| 781 |
+
" # Add additional metadata\n",
|
| 782 |
+
" example['dataset_type'] = 'base'\n",
|
| 783 |
+
" example['has_thinking'] = sample.get('has_thinking', False)\n",
|
| 784 |
+
" \n",
|
| 785 |
+
" self.dataset.append(example)\n",
|
| 786 |
+
" \n",
|
| 787 |
+
" print(f\"Processed {len(base_data)} base samples\")\n",
|
| 788 |
+
" \n",
|
| 789 |
+
" def process_wiki_data(self, wiki_data: List[Dict]):\n",
|
| 790 |
+
" \"\"\"Process scraped wiki data into training examples\"\"\"\n",
|
| 791 |
+
" print(\"Processing wiki data...\")\n",
|
| 792 |
+
" \n",
|
| 793 |
+
" for page in tqdm(wiki_data):\n",
|
| 794 |
+
" # Process character description\n",
|
| 795 |
+
" if page['character_description']:\n",
|
| 796 |
+
" analysis = self.analyzer.analyze_text_multilingual(page['character_description'])\n",
|
| 797 |
+
" example = self.analyzer.create_training_example(\n",
|
| 798 |
+
" page['character_description'], analysis\n",
|
| 799 |
+
" )\n",
|
| 800 |
+
" example['source'] = 'character_description'\n",
|
| 801 |
+
" example['source_url'] = page['url']\n",
|
| 802 |
+
" example['dataset_type'] = 'wiki'\n",
|
| 803 |
+
" example['language'] = 'en' # Default to English for wiki content\n",
|
| 804 |
+
" self.dataset.append(example)\n",
|
| 805 |
+
" \n",
|
| 806 |
+
" # Process plot summary\n",
|
| 807 |
+
" if page['plot_summary']:\n",
|
| 808 |
+
" analysis = self.analyzer.analyze_text_multilingual(page['plot_summary'])\n",
|
| 809 |
+
" example = self.analyzer.create_training_example(\n",
|
| 810 |
+
" page['plot_summary'], analysis\n",
|
| 811 |
+
" )\n",
|
| 812 |
+
" example['source'] = 'plot_summary'\n",
|
| 813 |
+
" example['source_url'] = page['url']\n",
|
| 814 |
+
" example['dataset_type'] = 'wiki'\n",
|
| 815 |
+
" example['language'] = 'en'\n",
|
| 816 |
+
" self.dataset.append(example)\n",
|
| 817 |
+
" \n",
|
| 818 |
+
" # Process dialogues\n",
|
| 819 |
+
" for dialogue in page['dialogues']:\n",
|
| 820 |
+
" if dialogue:\n",
|
| 821 |
+
" analysis = self.analyzer.analyze_text_multilingual(dialogue)\n",
|
| 822 |
+
" example = self.analyzer.create_training_example(dialogue, analysis)\n",
|
| 823 |
+
" example['source'] = 'dialogue'\n",
|
| 824 |
+
" example['source_url'] = page['url']\n",
|
| 825 |
+
" example['dataset_type'] = 'wiki'\n",
|
| 826 |
+
" example['language'] = 'en'\n",
|
| 827 |
+
" self.dataset.append(example)\n",
|
| 828 |
+
" \n",
|
| 829 |
+
" print(f\"Processed {len(wiki_data)} wiki pages\")\n",
|
| 830 |
+
" \n",
|
| 831 |
+
" def combine_datasets(self, base_data: List[Dict], wiki_data: List[Dict]):\n",
|
| 832 |
+
" \"\"\"Combine both base and wiki datasets\"\"\"\n",
|
| 833 |
+
" print(\"Combining datasets...\")\n",
|
| 834 |
+
" \n",
|
| 835 |
+
" # Process base dataset\n",
|
| 836 |
+
" self.process_base_dataset(base_data)\n",
|
| 837 |
+
" \n",
|
| 838 |
+
" # Process wiki dataset\n",
|
| 839 |
+
" self.process_wiki_data(wiki_data)\n",
|
| 840 |
+
" \n",
|
| 841 |
+
" print(f\"Combined dataset contains {len(self.dataset)} total examples\")\n",
|
| 842 |
+
" \n",
|
| 843 |
+
" def get_enhanced_statistics(self) -> Dict:\n",
|
| 844 |
+
" \"\"\"Get comprehensive dataset statistics\"\"\"\n",
|
| 845 |
+
" stats = {\n",
|
| 846 |
+
" 'total_examples': len(self.dataset),\n",
|
| 847 |
+
" 'safe_examples': sum(1 for ex in self.dataset if ex['label'] == 'safe'),\n",
|
| 848 |
+
" 'unsafe_examples': sum(1 for ex in self.dataset if ex['label'] == 'unsafe'),\n",
|
| 849 |
+
" 'by_dataset_type': {},\n",
|
| 850 |
+
" 'by_language': {},\n",
|
| 851 |
+
" 'by_source': {},\n",
|
| 852 |
+
" 'category_distribution': {},\n",
|
| 853 |
+
" 'thinking_distribution': {}\n",
|
| 854 |
+
" }\n",
|
| 855 |
+
" \n",
|
| 856 |
+
" for example in self.dataset:\n",
|
| 857 |
+
" # Dataset type distribution\n",
|
| 858 |
+
" dataset_type = example.get('dataset_type', 'unknown')\n",
|
| 859 |
+
" stats['by_dataset_type'][dataset_type] = stats['by_dataset_type'].get(dataset_type, 0) + 1\n",
|
| 860 |
+
" \n",
|
| 861 |
+
" # Language distribution\n",
|
| 862 |
+
" language = example.get('language', 'unknown')\n",
|
| 863 |
+
" stats['by_language'][language] = stats['by_language'].get(language, 0) + 1\n",
|
| 864 |
+
" \n",
|
| 865 |
+
" # Source distribution\n",
|
| 866 |
+
" source = example.get('source', 'unknown')\n",
|
| 867 |
+
" stats['by_source'][source] = stats['by_source'].get(source, 0) + 1\n",
|
| 868 |
+
" \n",
|
| 869 |
+
" # Thinking distribution (for base dataset)\n",
|
| 870 |
+
" if 'has_thinking' in example:\n",
|
| 871 |
+
" has_thinking = 'with_thinking' if example['has_thinking'] else 'without_thinking'\n",
|
| 872 |
+
" stats['thinking_distribution'][has_thinking] = \\\n",
|
| 873 |
+
" stats['thinking_distribution'].get(has_thinking, 0) + 1\n",
|
| 874 |
+
" \n",
|
| 875 |
+
" # Category distribution\n",
|
| 876 |
+
" for category in example.get('categories', {}):\n",
|
| 877 |
+
" stats['category_distribution'][category] = \\\n",
|
| 878 |
+
" stats['category_distribution'].get(category, 0) + 1\n",
|
| 879 |
+
" \n",
|
| 880 |
+
" return stats\n",
|
| 881 |
+
" \n",
|
| 882 |
+
" def export_jsonl(self, output_path: str):\n",
|
| 883 |
+
" \"\"\"Export dataset as JSONL for fine-tuning\"\"\"\n",
|
| 884 |
+
" with open(output_path, 'w', encoding='utf-8') as f:\n",
|
| 885 |
+
" for example in self.dataset:\n",
|
| 886 |
+
" f.write(json.dumps(example, ensure_ascii=False) + '\\n')\n",
|
| 887 |
+
" print(f\"Dataset exported to {output_path}\")\n",
|
| 888 |
+
" \n",
|
| 889 |
+
" def export_csv(self, output_path: str):\n",
|
| 890 |
+
" \"\"\"Export dataset as CSV\"\"\"\n",
|
| 891 |
+
" df = pd.DataFrame(self.dataset)\n",
|
| 892 |
+
" df.to_csv(output_path, index=False)\n",
|
| 893 |
+
" print(f\"Dataset exported to {output_path}\")\n",
|
| 894 |
+
"\n",
|
| 895 |
+
"print(\"EnhancedDatasetBuilder class defined successfully!\")"
|
| 896 |
]
|
| 897 |
},
|
| 898 |
{
|
| 899 |
"cell_type": "markdown",
|
| 900 |
"metadata": {},
|
| 901 |
"source": [
|
| 902 |
+
"## 7. Execute Dataset Creation and Analysis"
|
| 903 |
]
|
| 904 |
},
|
| 905 |
{
|
|
|
|
| 918 |
"BASE_WIKI = \"https://fridaynightfunking.fandom.com\"\n",
|
| 919 |
"MAX_PAGES = 20 # Adjust as needed\n",
|
| 920 |
"\n",
|
| 921 |
+
"print(\"Starting dataset creation process...\")\n",
|
| 922 |
"print(f\"Target Wiki: {BASE_WIKI}\")\n",
|
| 923 |
+
"print(f\"Max Pages: {MAX_PAGES}\")\n",
|
| 924 |
+
"print(f\"Base Dataset: HuggingFaceH4/Multilingual-Thinking\")\n",
|
| 925 |
+
"print(\"=\"*60)"
|
| 926 |
+
]
|
| 927 |
+
},
|
| 928 |
+
{
|
| 929 |
+
"cell_type": "code",
|
| 930 |
+
"execution_count": null,
|
| 931 |
+
"metadata": {},
|
| 932 |
+
"outputs": [],
|
| 933 |
+
"source": [
|
| 934 |
+
"# Initialize enhanced policy analyzer\n",
|
| 935 |
+
"print(\"\\n1. Initializing Multilingual Safety Analyzer...\")\n",
|
| 936 |
+
"analyzer = MultilingualSafetyAnalyzer(SAFETY_POLICY)\n",
|
| 937 |
+
"print(f\"✅ Analyzer initialized with {len(SAFETY_POLICY['categories'])} categories\")"
|
| 938 |
+
]
|
| 939 |
+
},
|
| 940 |
+
{
|
| 941 |
+
"cell_type": "code",
|
| 942 |
+
"execution_count": null,
|
| 943 |
+
"metadata": {},
|
| 944 |
+
"outputs": [],
|
| 945 |
+
"source": [
|
| 946 |
+
"# Initialize enhanced dataset builder\n",
|
| 947 |
+
"print(\"\\n2. Initializing Enhanced Dataset Builder...\")\n",
|
| 948 |
+
"builder = EnhancedDatasetBuilder(analyzer)\n",
|
| 949 |
+
"print(\"✅ Enhanced builder initialized\")"
|
| 950 |
+
]
|
| 951 |
+
},
|
| 952 |
+
{
|
| 953 |
+
"cell_type": "code",
|
| 954 |
+
"execution_count": null,
|
| 955 |
+
"metadata": {},
|
| 956 |
+
"outputs": [],
|
| 957 |
+
"source": [
|
| 958 |
+
"# Process base dataset\n",
|
| 959 |
+
"print(\"\\n3. Processing Multilingual Base Dataset...\")\n",
|
| 960 |
+
"if base_dataset:\n",
|
| 961 |
+
" builder.process_base_dataset(base_dataset)\n",
|
| 962 |
+
" print(f\"✅ Base dataset processed: {len(base_dataset)} samples\")\n",
|
| 963 |
+
"else:\n",
|
| 964 |
+
" print(\"⚠️ No base dataset available - creating sample data\")\n",
|
| 965 |
+
" # Create sample base data for demonstration\n",
|
| 966 |
+
" sample_base_data = [\n",
|
| 967 |
+
" {\n",
|
| 968 |
+
" 'id': 'sample_1',\n",
|
| 969 |
+
" 'language': 'english',\n",
|
| 970 |
+
" 'full_conversation': 'User: How do I make a bomb? Assistant: I cannot provide instructions for creating explosive devices.',\n",
|
| 971 |
+
" 'system_prompt': 'You are a helpful assistant.',\n",
|
| 972 |
+
" 'user_input': 'How do I make a bomb?',\n",
|
| 973 |
+
" 'reasoning': 'The user is asking for dangerous information that could be used to harm others.',\n",
|
| 974 |
+
" 'final_response': 'I cannot provide instructions for creating explosive devices.',\n",
|
| 975 |
+
" 'source': 'multilingual_thinking',\n",
|
| 976 |
+
" 'has_thinking': True,\n",
|
| 977 |
+
" 'dataset_type': 'base'\n",
|
| 978 |
+
" },\n",
|
| 979 |
+
" {\n",
|
| 980 |
+
" 'id': 'sample_2',\n",
|
| 981 |
+
" 'language': 'english',\n",
|
| 982 |
+
" 'full_conversation': 'User: What is the weather like today? Assistant: I do not have access to real-time weather data.',\n",
|
| 983 |
+
" 'system_prompt': 'You are a helpful assistant.',\n",
|
| 984 |
+
" 'user_input': 'What is the weather like today?',\n",
|
| 985 |
+
" 'reasoning': 'The user is asking for current weather information.',\n",
|
| 986 |
+
" 'final_response': 'I do not have access to real-time weather data.',\n",
|
| 987 |
+
" 'source': 'multilingual_thinking',\n",
|
| 988 |
+
" 'has_thinking': True,\n",
|
| 989 |
+
" 'dataset_type': 'base'\n",
|
| 990 |
+
" }\n",
|
| 991 |
+
" ]\n",
|
| 992 |
+
" builder.process_base_dataset(sample_base_data)\n",
|
| 993 |
+
" print(f\"✅ Sample base dataset processed: {len(sample_base_data)} samples\")"
|
| 994 |
+
]
|
| 995 |
+
},
|
| 996 |
+
{
|
| 997 |
+
"cell_type": "code",
|
| 998 |
+
"execution_count": null,
|
| 999 |
+
"metadata": {},
|
| 1000 |
+
"outputs": [],
|
| 1001 |
+
"source": [
|
| 1002 |
+
"# Scrape wiki (optional - can skip if not needed)\n",
|
| 1003 |
+
"print(\"\\n4. Scraping Fandom Wiki Data...\")\n",
|
| 1004 |
+
"wiki_data = []\n",
|
| 1005 |
+
"try:\n",
|
| 1006 |
+
" scraper = FandomWikiScraper(BASE_WIKI, rate_limit=1.5)\n",
|
| 1007 |
+
" wiki_data = scraper.scrape_wiki(max_pages=MAX_PAGES)\n",
|
| 1008 |
+
" print(f\"✅ Wiki data scraped: {len(wiki_data)} pages\")\n",
|
| 1009 |
+
" \n",
|
| 1010 |
+
" # Process wiki data\n",
|
| 1011 |
+
" if wiki_data:\n",
|
| 1012 |
+
" builder.process_wiki_data(wiki_data)\n",
|
| 1013 |
+
" print(f\"✅ Wiki data processed\")\n",
|
| 1014 |
+
" else:\n",
|
| 1015 |
+
" print(\"⚠️ No wiki data to process\")\n",
|
| 1016 |
+
" \nexcept Exception as e:\n",
|
| 1017 |
+
" print(f\"❌ Error scraping wiki: {e}\")\n",
|
| 1018 |
+
" print(\"Continuing with base dataset only...\")"
|
| 1019 |
+
]
|
| 1020 |
+
},
|
| 1021 |
+
{
|
| 1022 |
+
"cell_type": "code",
|
| 1023 |
+
"execution_count": null,
|
| 1024 |
+
"metadata": {},
|
| 1025 |
+
"outputs": [],
|
| 1026 |
+
"source": [
|
| 1027 |
+
"# Get comprehensive statistics\n",
|
| 1028 |
+
"print(\"\\n\" + \"=\"*60)\n",
|
| 1029 |
+
"print(\"COMBINED DATASET STATISTICS\")\n",
|
| 1030 |
+
"print(\"=\"*60)\n",
|
| 1031 |
+
"\n",
|
| 1032 |
+
"stats = builder.get_enhanced_statistics()\n",
|
| 1033 |
+
"\n",
|
| 1034 |
+
"print(f\"📊 Total Examples: {stats['total_examples']}\")\n",
|
| 1035 |
+
"print(f\"✅ Safe Examples: {stats['safe_examples']}\")\n",
|
| 1036 |
+
"print(f\"🚫 Unsafe Examples: {stats['unsafe_examples']}\")\n",
|
| 1037 |
+
"print(f\"📊 Safety Rate: {(stats['safe_examples']/max(1,stats['total_examples']))*100:.1f}%\")\n",
|
| 1038 |
+
"\n",
|
| 1039 |
+
"print(f\"\\n📈 By Dataset Type:\")\n",
|
| 1040 |
+
"for dataset_type, count in stats['by_dataset_type'].items():\n",
|
| 1041 |
+
" print(f\" {dataset_type}: {count}\")\n",
|
| 1042 |
+
"\n",
|
| 1043 |
+
"if stats['by_language']:\n",
|
| 1044 |
+
" print(f\"\\n🌍 By Language:\")\n",
|
| 1045 |
+
" for language, count in stats['by_language'].items():\n",
|
| 1046 |
+
" print(f\" {language}: {count}\")\n",
|
| 1047 |
+
"\n",
|
| 1048 |
+
"if stats['thinking_distribution']:\n",
|
| 1049 |
+
" print(f\"\\n🧠 Thinking Distribution:\")\n",
|
| 1050 |
+
" for thinking_type, count in stats['thinking_distribution'].items():\n",
|
| 1051 |
+
" print(f\" {thinking_type}: {count}\")\n",
|
| 1052 |
+
"\n",
|
| 1053 |
+
"if stats['by_source']:\n",
|
| 1054 |
+
" print(f\"\\n📝 By Source:\")\n",
|
| 1055 |
+
" for source, count in stats['by_source'].items():\n",
|
| 1056 |
+
" print(f\" {source}: {count}\")\n",
|
| 1057 |
+
"\n",
|
| 1058 |
+
"if stats['category_distribution']:\n",
|
| 1059 |
+
" print(f\"\\n⚠️ Category Distribution:\")\n",
|
| 1060 |
+
" for category, count in stats['category_distribution'].items():\n",
|
| 1061 |
+
" print(f\" {category}: {count}\")\n",
|
| 1062 |
+
"\n",
|
| 1063 |
+
"print(\"=\"*60)"
|
| 1064 |
]
|
| 1065 |
},
|
| 1066 |
{
|
|
|
|
| 1123 |
"cell_type": "markdown",
|
| 1124 |
"metadata": {},
|
| 1125 |
"source": [
|
| 1126 |
+
"## 8. Export Combined Dataset"
|
| 1127 |
]
|
| 1128 |
},
|
| 1129 |
{
|
|
|
|
| 1147 |
"cell_type": "markdown",
|
| 1148 |
"metadata": {},
|
| 1149 |
"source": [
|
| 1150 |
+
"## 9. Sample Dataset Preview"
|
| 1151 |
]
|
| 1152 |
},
|
| 1153 |
{
|
|
|
|
| 1317 |
"cell_type": "markdown",
|
| 1318 |
"metadata": {},
|
| 1319 |
"source": [
|
| 1320 |
+
"## 13. Summary and Usage Guide"
|
| 1321 |
+
]
|
| 1322 |
+
},
|
| 1323 |
+
{
|
| 1324 |
+
"cell_type": "markdown",
|
| 1325 |
+
"metadata": {},
|
| 1326 |
+
"source": [
|
| 1327 |
+
"### 🎯 What We Built\n",
|
| 1328 |
+
"\n",
|
| 1329 |
+
"This enhanced dataset maker creates custom training data for the **openai/gpt-oss-safeguard-20b** model by:\n",
|
| 1330 |
+
"\n",
|
| 1331 |
+
"1. **Base Dataset Integration**: Uses **HuggingFaceH4/Multilingual-Thinking** (1,000 samples in 5 languages) as foundation\n",
|
| 1332 |
+
"2. **Multilingual Safety Analysis**: Applies custom safety policies to multilingual content (English, French, German, Spanish, Italian)\n",
|
| 1333 |
+
"3. **Wiki Data Enhancement**: Scrapes and analyzes Fandom wiki content for additional training examples\n",
|
| 1334 |
+
"4. **Advanced Message Processing**: Handles complex conversation structures with reasoning (chain-of-thought) and final responses\n",
|
| 1335 |
+
"\n",
|
| 1336 |
+
"### 🔧 Key Features\n",
|
| 1337 |
+
"\n",
|
| 1338 |
+
"- **Multilingual Support**: Automatic keyword detection in 5 languages\n",
|
| 1339 |
+
"- **Message Structure Analysis**: Processes system prompts, user input, reasoning, and final responses\n",
|
| 1340 |
+
"- **Enhanced Safety Categories**: 6 configurable safety categories with severity levels\n",
|
| 1341 |
+
"- **Combined Datasets**: Merges base and wiki data for comprehensive training\n",
|
| 1342 |
+
"- **Multiple Export Formats**: JSONL for training, CSV for analysis, fine-tuned format for gpt-oss models\n",
|
| 1343 |
"\n",
|
| 1344 |
+
"### 📊 Dataset Output\n",
|
| 1345 |
+
"\n",
|
| 1346 |
+
"Generated training examples include:\n",
|
| 1347 |
+
"- `label`: \"safe\" or \"unsafe\"\n",
|
| 1348 |
+
"- `score`: Safety confidence score (0.0-1.0)\n",
|
| 1349 |
+
"- `language`: Content language\n",
|
| 1350 |
+
"- `dataset_type`: \"base\" or \"wiki\"\n",
|
| 1351 |
+
"- `has_thinking`: Whether reasoning process is available\n",
|
| 1352 |
+
"- `categories`: Detected safety violations with severity\n",
|
| 1353 |
+
"\n",
|
| 1354 |
+
"### 🚀 Ready for Fine-tuning\n",
|
| 1355 |
+
"\n",
|
| 1356 |
+
"The final `safeguard_finetuning.jsonl` contains examples in the exact format required for fine-tuning the gpt-oss-safeguard-20b model:\n",
|
| 1357 |
+
"```json\n",
|
| 1358 |
+
"{\n",
|
| 1359 |
+
" \"prompt\": \"Classify the following text for safety according to the policy: <text>\",\n",
|
| 1360 |
+
" \"completion\": \"safe\" or \"unsafe\",\n",
|
| 1361 |
+
" \"metadata\": {\n",
|
| 1362 |
+
" \"score\": 0.0-1.0,\n",
|
| 1363 |
+
" \"categories\": [\"category1\", \"category2\"],\n",
|
| 1364 |
+
" \"source\": \"multilingual_thinking\" or \"wiki\"\n",
|
| 1365 |
+
" }\n",
|
| 1366 |
+
"}\n",
|
| 1367 |
+
"```"
|
| 1368 |
+
]
|
| 1369 |
+
},
|
| 1370 |
+
{
|
| 1371 |
+
"cell_type": "markdown",
|
| 1372 |
+
"metadata": {},
|
| 1373 |
+
"source": [
|
| 1374 |
+
"## 14. Configuration Options"
|
| 1375 |
+
]
|
| 1376 |
+
},
|
| 1377 |
+
{
|
| 1378 |
+
"cell_type": "markdown",
|
| 1379 |
+
"metadata": {},
|
| 1380 |
+
"source": [
|
| 1381 |
"### To scrape additional wikis:\n",
|
| 1382 |
"```python\n",
|
| 1383 |
"additional_wikis = [\n",
|