Spaces:

aparke21
/

newtest

Runtime error

App Files Files Community

aparke21 commited on 13 days ago

Commit

9014afd

verified ·

1 Parent(s): 08439af

Upload 106 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +3 -0
README.md +19 -6
__pycache__/utils.cpython-312.pyc +0 -0
app.py +1026 -0
app.sh +17 -0
app_logs/app_5936040.out +18 -0
app_logs/app_5936041.out +18 -0
app_logs/app_5936047.out +19 -0
app_logs/app_5936050.out +1 -0
app_logs/app_5936052.out +57 -0
assets/umd_logo.png +3 -0
configs/prompts.yaml +100 -0
configs/task1_demo.yaml +27 -0
configs/task1_demo_sph.yaml +28 -0
data/survey_responses_screened.csv +3 -0
push.sh +22 -0
requirements.txt +167 -0
requirements_concise.txt +18 -0
unsloth/gemma-3-12b-it_task1_1_epochs_test_neutral_partialTraits/README.md +202 -0
unsloth/gemma-3-12b-it_task1_1_epochs_test_neutral_partialTraits/adapter_config.json +31 -0
unsloth/gemma-3-12b-it_task1_1_epochs_test_neutral_partialTraits/adapter_model.safetensors +3 -0
unsloth/gemma-3-12b-it_task1_1_epochs_test_neutral_partialTraits/added_tokens.json +3 -0
unsloth/gemma-3-12b-it_task1_1_epochs_test_neutral_partialTraits/chat_template.json +3 -0
unsloth/gemma-3-12b-it_task1_1_epochs_test_neutral_partialTraits/preprocessor_config.json +29 -0
unsloth/gemma-3-12b-it_task1_1_epochs_test_neutral_partialTraits/processor_config.json +4 -0
unsloth/gemma-3-12b-it_task1_1_epochs_test_neutral_partialTraits/special_tokens_map.json +33 -0
unsloth/gemma-3-12b-it_task1_1_epochs_test_neutral_partialTraits/tokenizer.json +3 -0
unsloth/gemma-3-12b-it_task1_1_epochs_test_neutral_partialTraits/tokenizer.model +3 -0
unsloth/gemma-3-12b-it_task1_1_epochs_test_neutral_partialTraits/tokenizer_config.json +0 -0
unsloth_compiled_cache/AqlmLoraLinear_peft_forward.py +67 -0
unsloth_compiled_cache/AwqLoraLinear_peft_forward.py +66 -0
unsloth_compiled_cache/BatchNorm1d.py +88 -0
unsloth_compiled_cache/BatchNorm2d.py +88 -0
unsloth_compiled_cache/BatchNorm3d.py +88 -0
unsloth_compiled_cache/Conv1d.py +43 -0
unsloth_compiled_cache/Conv2d.py +43 -0
unsloth_compiled_cache/Conv3d.py +43 -0
unsloth_compiled_cache/ConvTranspose1d.py +70 -0
unsloth_compiled_cache/ConvTranspose2d.py +71 -0
unsloth_compiled_cache/ConvTranspose3d.py +71 -0
unsloth_compiled_cache/GPTQLoraLinear_peft_forward.py +73 -0
unsloth_compiled_cache/GroupNorm.py +43 -0
unsloth_compiled_cache/LayerNorm.py +45 -0
unsloth_compiled_cache/Linear4bit_peft_forward.py +97 -0
unsloth_compiled_cache/Linear8bitLt_peft_forward.py +90 -0
unsloth_compiled_cache/Linear_peft_forward.py +89 -0
unsloth_compiled_cache/LoraParallelLinear_peft_forward.py +87 -0
unsloth_compiled_cache/RMSNorm.py +46 -0
unsloth_compiled_cache/UnslothAlignPropTrainer.py +637 -0
unsloth_compiled_cache/UnslothBCOTrainer.py +1824 -0

.gitattributes CHANGED Viewed

@@ -36,3 +36,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 src_hf_deploy[[:space:]]2/assets/umd_logo.png filter=lfs diff=lfs merge=lfs -text
 src_hf_deploy[[:space:]]2/data/survey_responses_screened.csv filter=lfs diff=lfs merge=lfs -text
 src_hf_deploy[[:space:]]2/unsloth/gemma-3-12b-it_task1_1_epochs_test_neutral_partialTraits/tokenizer.json filter=lfs diff=lfs merge=lfs -text

 src_hf_deploy[[:space:]]2/assets/umd_logo.png filter=lfs diff=lfs merge=lfs -text
 src_hf_deploy[[:space:]]2/data/survey_responses_screened.csv filter=lfs diff=lfs merge=lfs -text
 src_hf_deploy[[:space:]]2/unsloth/gemma-3-12b-it_task1_1_epochs_test_neutral_partialTraits/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+assets/umd_logo.png filter=lfs diff=lfs merge=lfs -text
+data/survey_responses_screened.csv filter=lfs diff=lfs merge=lfs -text
+unsloth/gemma-3-12b-it_task1_1_epochs_test_neutral_partialTraits/tokenizer.json filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,12 +1,25 @@
 ---
-title: Newtest
-emoji: 📈
-colorFrom: pink
-colorTo: red
 sdk: gradio
-sdk_version: 6.0.2
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: AI-Empowered Community Simulation (Beta)
+emoji: 🧠
+colorFrom: red
+colorTo: purple
 sdk: gradio
+sdk_version: 5.49.1
 app_file: app.py
 pinned: false
+# hardware: "gpu-a100-large"   # REQUESTS A100 80GB GPU
+# hardware: "gpu-l40s"   # Request 1x NVIDIA L40S (48GB VRAM)
+# hardware: "zerogpu"
+hardware: "t4-small"
 ---
+# AI-Empowered Community Simulation (Beta)
+This Space requires **at least 28 Gb of GPU RAM** due to the size of the UnsLoTH long-context VLM model used for inference and summarization.
+If the hardware fails to start or your account does not have access to this tier,
+please select the appropriate hardware from:
+**Settings → Hardware → ZeroGPU**
+---

__pycache__/utils.cpython-312.pyc ADDED Viewed

Binary file (5.34 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,1026 @@

+"""
+	Instruction Tuning of LLM for Trait-conditioned Style Impact Caliberation
+"""
+import unsloth
+import yaml     # type: ignore
+import pandas as pd # type: ignore
+import os
+from PIL import Image # type: ignore
+import gradio as gr
+import torch    # type: ignore
+from langchain_community.chat_models import ChatOllama # type: ignore
+from langchain_core.messages import SystemMessage, HumanMessage # type: ignore
+from langchain_ollama import OllamaEmbeddings # type: ignore
+from langchain_core.output_parsers import StrOutputParser # type: ignore
+from pydantic import BaseModel # format LLM output as JSON # type: ignore
+from unsloth import FastVisionModel, FastModel, FastLanguageModel # type: ignore
+from transformers import TextStreamer # type: ignore
+from unsloth.chat_templates import get_chat_template    # type: ignore
+from unsloth.chat_templates import standardize_sharegpt # type: ignore
+from transformers import TextIteratorStreamer
+from utils import convert_to_base64, load_config, process_trait_info # type: ignore
+from tqdm import tqdm   # type: ignore
+from termcolor import colored # type: ignore
+import threading
+import random
+import numpy as np
+import random
+import threading
+# generation_lock = threading.Lock()
+# from transformers import StoppingCriteria, StoppingCriteriaList
+# class StopGenerationCriteria(StoppingCriteria):
+# 	def __init__(self, stop_event):
+# 		self.stop_event = stop_event
+# 	def __call__(self, input_ids, scores, **kwargs):
+# 		return self.stop_event.is_set()
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
+TRAIT_VALUES = {
+	"Gender": [
+		"Male", "Female", "Non-binary/third gender", "Leave Blank",
+	],
+	"Age": [
+		"18–24", "25–34", "35–44", "45–54", "55–64", "65 or older", "Leave Blank",
+	],
+	"Current Profession": [
+		"Healthcare/Medical", "Government/Public Service",
+		"Business/Finance",
+		"Technology/Engineering", "Education", "Arts/Entertainment",
+		"Retail/Hospitality/Food Service",
+		"Skilled Trades/Labor (e.g., construction, electrician, landscaper, house cleaner)",
+		"Student",
+		"Unemployed/Looking for work", "Retired",
+		"Other",
+		"Leave Blank",
+	],
+	"Race/Ethnicity" : [
+		"Asian", "Black/African American", "Hispanic/Latino",
+		"Native American/Alaska Native", "Native Hawaiian/Other Pacific Islander",
+		"White/Caucasian", "Other", "Leave Blank",
+	],
+	"Religious/Cultural Group": [
+		"Christianity", "Islam", "Hinduism", "Judaism", "Buddhism", "None of the above", "Leave Blank",
+	],
+	"Political Affiliation": [
+		"Conservative", "Apolitical/Not involved in politics", "Independent",
+		"Libertarian", "Moderate", "Liberal", "Leave Blank",
+	],
+	"Highest Education": [
+		"Less than high school", "High school diploma or equivalent", "Some college, no degree",
+		"Associate’s degree", "Bachelor’s degree",
+		"Master’s degree", "Doctoral or professional degree",
+		"Leave Blank",
+	],
+	"Annual Household Income": [
+		"Less than $25,000", "$25,000–$49,999", "$50,000–$74,999",
+		"$75,000–$99,999", "$100,000–$149,999", "$150,000 or more",
+		"Leave Blank",
+	],
+	"Family Status": [
+		"Single, living alone", "Single, living with family", "Single Parent with children",
+		"Married/Partnered, no children", "Married/Partnered, with children",
+		"Multi-generation family (e.g., with parents, grandparents, or extended family)",
+		"Leave Blank",
+	],
+}
+HEALTH_TOPICS = {
+	"Chronic Obstructive Pulmonary Disease (COPD)": "COPD1.1",
+	"Heart Disease": "HD1",
+	"HIV": "HIV1.1",
+	"Mental Health": "MH1.1",
+	"Nutrition": "N2.1",
+	"Substance Abuse": "SA4.1",
+	"Sexual Practice": "SP7.1",
+	"Vaccination": "V7.1",
+	"Cystic Fibrosis": "CF1.1",
+}
+health_topics = ""
+for topic in HEALTH_TOPICS:
+	health_topics += topic + '\n'
+##########################################################
+### To increase style variability to avoid repetitiveness
+##########################################################
+# * Style variants
+style_variants = [
+	"Write with a slightly informal and reflective tone.",
+	"Write in a straightforward conversational tone.",
+	"Write with mild emotional coloring, but still natural.",
+	"Write in a calm, matter-of-fact tone.",
+	"Write in a slightly narrative, flowing tone.",
+	"Write in a concise but personable tone.",
+	"Write in a informal, pragmatic tone, focusing on clarity and utility.",
+]
+# --- Add small lexical noise / synonym variation ---
+lexical_flavors = [
+	"Feel free to vary sentence structures slightly.",
+	"Use a mix of simple and slightly complex sentences.",
+	"Use a light mix of paraphrasing expressions.",
+	"Feel free to choose different synonyms for common emotional words.",
+	"Introduce subtle variation in connectors like 'however', 'still', or 'overall'.",
+]
+openers = [
+	"This message",
+	"From this message",
+	"Through the message",
+	"After seeing this message",
+	"Looking at this poster",
+	"Based on what this poster conveys",
+	"Hmmm I think that this message",
+	"Reflecting on the message here",
+	"Considering what this poster is trying to say",
+	"Seeing this message makes me think",
+	"Thinking about what this poster is communicating",
+	"After reading what's on here",
+	"Based on what’s written here",
+	"After I look at this whole thing",
+]
+openers_generic = [
+	"Hmmm when thinking about",
+	"When I think about",
+	"My impression about",
+	"On top of my head",
+	"My general thoughts about",
+	"The way I see it,",
+	"From my point of view on",
+	"My initial take on",
+	"In my own words,",
+	"As I see things,",
+	"Just speaking for myself,",
+	"At a glance,",
+]
+openers_poster_summary = [
+	"This poster",
+	"This poster seems to",
+	"My interpretation of the poster is",
+	"From what this poster shows, it seems to",
+	"Looking at the poster as a whole, it appears to",
+	"Based on the imagery and tone, the poster seems to",
+	"Visually, the poster comes across as trying to",
+	"To me, this poster is trying to",
+	"When I look at this poster, it feels like it aims to",
+	"The poster gives me the impression that it intends to",
+]
+openers_explain = [
+	"The reason why I think that is because",
+	"To explain why I",
+	"Well, to explain my thoughts",
+	"To put it simply, I feel this way because",
+	"My reasoning behind that is",
+	"What leads me to that view is",
+	"A big part of why I think that is",
+	"To give some context for my view,",
+	"Here’s why I lean that way:",
+	"I see it that way mainly because",
+	"Let me explain why I think so",
+	"Thinking through it, I realize it's because",
+	"To unpack my thinking a bit,",
+	"I guess it’s because",
+	"The thing that really shapes my view is",
+	"It’s pretty much because",
+	"A lot of it comes down to",
+	"I feel that way mostly because",
+	"My thinking comes from the idea that",
+]
+"""
+	Generate LLM response given a single user prompt and input image
+"""
+def vlm_response(user_input, history, health_topic,
+		gender, age, profession, race, religion,
+		political, education, income, family_status,
+		# extraversion, agreeableness, conscientiousness, neuroticism, openness,
+	):
+	# # 1. Initialize Stop Event for this session
+	# stop_event = threading.Event()
+	# # Create the stopping criteria to pass to the model
+	# stopping_criteria = StoppingCriteriaList([StopGenerationCriteria(stop_event)])
+	# 1. Clear any lingering state
+	torch.cuda.empty_cache()  # Clear GPU memory
+	# 2. Initialize Streamers LOCALLY (Fresh for every request)
+	#    Note: We need to re-initialize these for every single generation call
+	#    or just once per function call if we share them.
+	streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
+	# streamer_aux = TextIteratorStreamer(tokenizer_aux, skip_prompt=True, skip_special_tokens=True)
+	""" [NOTE] we have not use `history` for this generation """
+	# get uploaded image
+	image = Image.open(user_input['files'][0]) if user_input['files'] else None
+	image_uploaded = True
+	if image is None:
+		image = Image.new('RGB', (24,24))
+		image_uploaded = False
+	# image_b64 = convert_to_base64(image)
+	print(health_topic)
+	# print("Image uploaded:", image_uploaded)
+	#################################################
+	# 1. Construct traits from user inputs
+	#################################################
+	demo_dict = {
+		"Gender": gender,
+		"Age": age,
+		"Current Profession": profession,
+		"Race/Ethnicity": race,
+		"Religious/Cultural Group": religion,
+		"Political Affiliation": political,
+		"Highest Education": education,
+		"Annual Household Income": income,
+		"Family Status": family_status,
+	}
+	# big5_dict = {
+	# 	"Extraversion": extraversion,
+	# 	"Agreeableness": agreeableness,
+	# 	"Conscientiousness": conscientiousness,
+	# 	"Neuroticism": neuroticism,
+	# 	"Open-Mindedness": openness,
+	# }
+	demo_info = ""
+	for trait, value in demo_dict.items():
+		if value != "Leave Blank": # only add non-blank values
+			demo_info += f"{trait}: {value}\n"
+		else:
+			demo_info += f"{trait}: [Not specified]\n"
+	persona_score = ""
+	persona_score += "Big-Five Trait Scores:\n"
+	# for trait, value in big5_dict.items():
+	# 	persona_score += f"{trait}: {value}\n"
+	# no locus of control trait score
+	locus = None
+	######################################################################################
+	# 1*. modify trait info based on trait selection setings
+	#       demo_full: wheter include full demographic traits or only selected ones
+	#       include_big5, include_facet, include_locus: include big5 / facet / locus of control traits or not
+	#     format: <trait>: <value> if available; else <trait>: [Not specified]
+	######################################################################################
+	demo_info, persona_score, locus = process_trait_info(
+		demo_info, persona_score, locus,
+		demo_full=False, include_big5=True,
+		include_facet=False, include_locus=False,
+		train_mode=False,
+	)
+	# print(demo_info)
+	# print(persona_score)
+	###############################################
+	### 		Add style variability           ###
+	###############################################
+	style_hint = random.choice(style_variants) # increase style variant
+	lexical_hint = random.choice(lexical_flavors) # increase lexical variant
+	opening_phrase = random.choice(openers) # increase opening variant
+	opening_generic = random.choice(openers_generic) # increase opening variant
+	opening_poster = random.choice(openers_poster_summary) # poster summary variation
+	opening_explain = random.choice(openers_explain) # thought explanation
+	print('Style:', style_hint)
+	print('Lexical:', lexical_hint)
+	print('Opening:', opening_phrase)
+	print('Generic opening:', opening_generic)
+	# Wrap the GENERATION logic in try/finally to handle cleanup
+	try:
+		if image_uploaded:
+			"""###############################################################
+				Case 1: a health poster is uploaded
+				=> VLM-enabled response prediction to that specific poster
+			###############################################################"""
+			################################################
+			# * IMAGE UNDERSTANDING
+			################################################
+			yield "Analyzing image content..." # UI Feedback
+			PROMPT = (
+				f"Describe the content and main message in given heatlh campaign poster and how it's related to {health_topic}. ",
+				"Note that the message could be non-direct or subtle (e.g. irony, fear-driven evoke without explicit texts, etc). Only provide the answer (in 2-4 sentences). ",
+				f"Start the response with {opening_poster}"
+			)
+			messages = [
+				{"role": "user", "content": [
+					{"type": "image"},
+					{"type": "text", "text": PROMPT}
+				]}
+			]
+			input_text = tokenizer.apply_chat_template(messages, add_generation_prompt = True)
+			inputs = tokenizer(
+				image.convert("RGB"),
+				input_text,
+				add_special_tokens = False,
+				return_tensors = "pt",
+			).to(device)
+			# Model inference
+			gen_tokens = model.generate(
+				**inputs,
+				max_new_tokens = 512,
+				use_cache = True,
+				# do_sample=cfgs["stochastic"],
+				# temperature=cfgs["temperature"],
+				# min_p=0.9,
+				# min_p=0.3,
+				top_k=15,
+				temperature=0.8,
+				do_sample=True, # cfgs["stochastic"]
+			)
+			outs = tokenizer.batch_decode(gen_tokens[:, inputs.input_ids.shape[1]:])[0]
+			image_desc = outs.replace(tokenizer.eos_token, "")
+			image_desc = image_desc.replace("<end_of_turn>", "")
+			################################################
+			# 2. Construct SYSTEM and USER PROMPT
+			################################################
+			SYSTEM_PROMPT = cfg_prompts["SYSTEM_SIM"]
+			SIM_PROMPT = ""
+			# prompt for role-playing information
+			SIM_PROMPT += f"You are: Demographics:\n{demo_info}\n"
+			# SIM_PROMPT += f"Your personality test shows you have (min score = 0; max score = 5):\nBig-Five Trait Scores:\n{persona_score}\n\n"
+			# SIM_PROMPT += f"You also have {locus}\n"
+			# situation description (role-playing)
+			SIM_PROMPT += cfg_prompts["SIMULATION_SIM"]
+			################################################
+			# 3. Stage 1: VLM-enabled response prediction
+			# 	 Predict Trait-aware Likert Scale Responses
+			################################################
+			assert cfgs["infer_engine"] == "unsloth", "Only unsloth inference is supported"
+			assert cfgs["vision"] == True, "Must have vision input"
+			# load a sample row to extract Likert scale questions
+			df = pd.read_csv(os.path.expandvars(cfgs["data_path"]))
+			# extract sample with given health_topic for correct question set
+			sample = df[df['Poster_id'] == HEALTH_TOPICS[health_topic]].iloc[0]
+			del df # free memory
+			""" Iterate through each question"""
+			# answers_json = {}
+			answers_numeric = ""
+			# for question in [
+			# 	"This message makes me more concerned about the health risks in the poster - Scale: 1 (not at all) - 9 (extremely)",
+			# 	"The message motivates me to engage in healthier lifestyle and habit - Scale: 1 (not at all) - 9 (extremely)",
+			# 	"In your opinion, how harmful is ignoring the health risks in the poster? - Scale: 1 (not at all) - 9 (extremely",
+			# 	"How open are you to engaging in the activity in the poster? - Scale: 1 (not at all) - 9 (extremely)",
+			# ]:
+			for i in range(1,16,1):
+				# a. parse specific Likert score question
+				col = f"Q{i}"
+				if pd.isna(sample[col]):
+					continue
+				question = sample[col].replace("\n", " ")
+				# instruction prompt to answer in proper format
+				if "type in" in question.lower():
+					continue # skip free-text questions for demo
+				elif "make you feel" in question.lower():
+					continue # skip emotional questions: imprecise
+				elif "how open" in question.lower():
+					continue # skip intentional question: low-accuracy
+				# b. intialize USER PROMPT with SIMULATION PROMPT
+				#    with full demographic+personality data
+				USER_PROMPT = SIM_PROMPT
+				USER_PROMPT += f"Question: {question}\n\n"
+				# instruction prompt to answer in proper format
+				USER_PROMPT += cfg_prompts['INSTRUCTION_MCQ']
+				# c. Contruct LLM message: response prediction
+				messages = [
+					{"role": "user", "content": [
+						{"type": "image"},
+						{"type": "text", "text": SYSTEM_PROMPT + USER_PROMPT}
+					]}
+				]
+				input_text = tokenizer.apply_chat_template(messages, add_generation_prompt = True)
+				inputs = tokenizer(
+					image.convert("RGB"),
+					input_text,
+					add_special_tokens = False,
+					return_tensors = "pt",
+				).to(device)
+				# d. Model inference
+				gen_tokens = model.generate(
+					**inputs,
+					max_new_tokens = 16,
+					use_cache = True,
+					do_sample=cfgs["stochastic"],
+					temperature=cfgs["temperature"],
+					min_p=0.9,
+				)
+				outs = tokenizer.batch_decode(gen_tokens[:, inputs.input_ids.shape[1]:])[0]
+				answer = outs.replace(tokenizer.eos_token, "")
+				answer = answer.replace("<end_of_turn>", "")
+				# answers_json[col] = answer
+				answers_numeric += f"{question}. Your answer: {answer}\n"
+			# print(answers_json)
+			print(answers_numeric)
+			################################################
+			# 4. Stage 2: LLM Summarization of all answers
+			#   => final response generation based on
+			#	   all Likert answers to the poster
+			# 	=> one-shot prompting
+			################################################
+			SYSTEM_PROMPT = "You are a helpful assistant."
+			# USER_PROMPT = f"Please convert these questions and answers into a concise and coherent \
+			# summary of your overall reactions, feelings, and perspectives about the poster: {answers_numeric} \
+			# Please provide the final response only."
+			# USER_PROMPT = f"Summarize the main points from questions and answers below into a concise and coherent overall reaction to the poster:\
+			# {answers_numeric}. Provide the final response only.\n"
+			USER_PROMPT = (
+			"Summarize the following survey responses into a short, natural paragraph that captures your overall sentiment, motivation, and thinking. "
+			f"Write as if paraphrasing what a person might say in conversation. Adjust your style based on your demographic/personality traits."
+			"Do NOT repeat numeric scores. "
+			"Preserve polarity: low scores → low concern/motivation/openness; high scores → high concern/motivation/openness. "
+			"If answers are mixed (e.g., believes something is harmful but isn't personally moved), reflect that nuance explicitly. "
+			"Keep to 1-5 sentences.\n\n"
+			"**STRICTLY FOLLOW THESE RULES:**\n"
+			"- Infer direction from each item's Scale description (e.g., 1-9: higher = more; 0-6: higher = more). "
+			"- Use calibrated wording: 1-2 = very low, 3-4 = low, 5 = moderate, 6-7 = high, 8-9 = very high; for 0-6: 0-1 = not/slight, 2-3 = somewhat, 4-5 = high, 6 = very. "
+			"- VERY IMPORTANT: provide ONLY the final summarized response, without anything else!"
+			f"- The response MUST have a consistent health topic: {health_topic}. Ground each sentence to the impact of campaign message.\n"
+			"- Never invert sentiment. Prefer hedged phrases (e.g., “not particularly,” “only somewhat,” “very open,” “not open at all”).\n\n"
+			f"- Mimic the talking style of emulated demographic as realistic as possible."
+			"**Example input 1:**\n"
+			"The message makes me more concerned about the health risks of poor eating habits - Scale: 1-9. Your answer: 9\n"
+			"The message motivates me to make healthy eating choices - Scale: 1-9. Your answer: 9\n"
+			"In your opinion, how harmful is neglecting proper nutrition and weight management to your overall health? - Scale: 0–6. Your answer: 5\n"
+			"How open are you to adopting healthier eating habits and lifestyle changes? - Scale: 1-9. Your answer: 9\n"
+			"**Example output 1:**\n"
+			"This message really heightened my awareness of how unhealthy eating can be. The content in the message strongly motivates me to make better choices, and I feel very ready to follow through.\n\n"
+			"**Example input 2:**\n"
+			"The message makes me more concerned about the health risks of COPD and smoking - Scale: 1-9. Your answer: 1\n"
+			"The message motivates me to not smoke. - Scale: 1-9. Your answer: 1\n"
+			"In your opinion, how harmful is smoking to your general health? - Scale: 0-6. Your answer: 6\n"
+			"How open are you to smoking in the future? - Scale: 1-9. Your answer: 1\n"
+			"**Example output 2:**\n"
+			"From this message, I recognize smoking is very harmful, but the content in the message didn't increase my concern or motivate me much. It does somewhat make me understand that smoking is harmful, however. Anyway, I'm not open to smoking in the future.\n\n"
+			"**Example input 3:**\n"
+			"The message makes me more concerned about the effects of lack of exercise - Scale: 1-9. Your answer: 4\n"
+			"The message motivates me to be more active - Scale: 1-9. Your answer: 3\n"
+			"How open are you to exercising regularly? - Scale: 1-9. Your answer: 4\n"
+			"**Example output 3:**\n"
+			"Through the message, I get that exercise matters and the message raised my awareness a bit, but the poster content itself didn't really motivate me. The content in the message has some small impact in motivating me to change my routine.\n\n"
+			# "**Example input 4:**\n"
+			# "The message makes me more concerned about the health risks of substance abuse - Scale: 1 (not at all) - 9 (extremely). Your answer: 6\n"
+			# "The message motivates me to not use substances. - Scale: 1 (not at all) - 9 (extremely). Your answer: 6\n"
+			# "In your opinion, how harmful is substance use to your general health? - Scale: 0 (not at all)-6 (extremely harmful). Your answer: 5\n"
+			# "How open are you to trying a substance in the future? - Scale: 1 (not at all)-9 (extremely). Your answer: 1\n"
+			# "**Example output 4:**\n"
+			# "This message somewhat makes me more concerned about the health risks of substance abuse motivates me not to use them. However, the message itself doesn't completely convince me that substance abuse is harmful. However, I'm not open to trying substance at all!!\n"
+			f"Start the response with '{opening_phrase}'  (Style hint: {style_hint}; Lexical hint: {lexical_hint})\n"
+			f"Input: {answers_numeric}. "
+			)
+			# Contruct LLM message
+			messages = [
+				{"role": "user", "content": [
+					# {"type": "image"},
+					{"type": "text", "text": SYSTEM_PROMPT + USER_PROMPT}
+				]}
+			]
+			# input_text = tokenizer_aux.apply_chat_template(messages, add_generation_prompt = True)
+			# inputs = tokenizer_aux(
+			# 	# image.convert("RGB"),
+			# 	input_text,
+			# 	add_special_tokens = False,
+			# 	return_tensors = "pt",
+			# ).to(device)
+			input_text = tokenizer.apply_chat_template(messages, add_generation_prompt = True)
+			inputs = tokenizer(
+				# image.convert("RGB"),
+				input_text,
+				add_special_tokens = False,
+				return_tensors = "pt",
+			).to(device)
+			############################
+			### Text LLM Streaming   ###
+			############################
+			# generation with streamer
+			generate_kwargs = dict(
+				**inputs,
+				streamer=streamer, # streamer_aux,
+				max_new_tokens=512,
+				use_cache=True,
+				# min_p=0.3,
+				top_k=15,
+				temperature=0.8,
+				do_sample=True, # cfgs["stochastic"]
+			)
+			# separate thread to run generation
+			thread = threading.Thread(
+				target=model.generate, # model_aux.generate,
+				kwargs=generate_kwargs
+			)
+			thread.start()
+			# stream out generation
+			outputs = [
+				f"Emulated traits:\n {demo_info}\n" + '='*20 + "\n\n",
+				image_desc + "\n\n"
+			]
+			for new_token in streamer: # streamer_aux:
+				outputs.append(new_token)
+				final_output = ''.join(outputs)
+				yield final_output
+			# Ensure thread finishes
+			thread.join()
+			# text representation of final response
+			response = "".join(outputs[2:]) # ignore trait summary & image description
+			print(colored('Traits', 'green'), demo_info)
+			print(colored('Emulated response:', 'green'), response)
+			print('='*100)
+			################################################
+			# 5. Stage 3: provide explanation (demo purpose)
+			#   => condition on {trait} AND {reponse}
+			################################################
+			SYSTEM_PROMPT = cfg_prompts["SYSTEM_SIM"]
+			SIM_PROMPT = ""
+			# prompt for role-playing information
+			SIM_PROMPT += f"You are: Demographics:\n{demo_info}\n"
+			# SIM_PROMPT += f"Your personality test shows you have (min score = 0; max score = 5):\nBig-Five Trait Scores:\n{persona_score}\n\n"
+			# SIM_PROMPT += f"You also have {locus}\n"
+			# situation description (role-playing)
+			SIM_PROMPT += cfg_prompts["SIMULATION_SIM"]
+			SIM_PROMPT += (
+				f"After seeing the uploaded impage, your response were {response}. "
+				"Briefly explain WHY you responded that way, based on your demographic background. "
+				f"Keep the explanation concise and direct. Start the response with '{opening_explain}' "
+				f"(Style hint: {style_hint}, concise; Lexical hint: {lexical_hint}). "
+				"Afterward, give a few *generic and succinct* suggestions to improve the poster's persuasiveness."
+			)
+			USER_PROMPT = SIM_PROMPT
+			# Contruct LLM message
+			messages = [
+				{"role": "user", "content": [
+					{"type": "image"},
+					{"type": "text", "text": SYSTEM_PROMPT + USER_PROMPT}
+				]}
+			]
+			# input_text = tokenizer_aux.apply_chat_template(messages, add_generation_prompt = True)
+			# inputs = tokenizer_aux(
+			# 	image.convert("RGB"),
+			# 	input_text,
+			# 	add_special_tokens = False,
+			# 	return_tensors = "pt",
+			# ).to(device)
+			input_text = tokenizer.apply_chat_template(messages, add_generation_prompt = True)
+			inputs = tokenizer(
+				image.convert("RGB"),
+				input_text,
+				add_special_tokens = False,
+				return_tensors = "pt",
+			).to(device)
+			############################
+			### Text LLM Streaming   ###
+			############################
+			# generation with streamer
+			generate_kwargs = dict(
+				**inputs,
+				streamer=streamer, # streamer_aux,
+				max_new_tokens=512,
+				use_cache=True,
+				min_p=0.85,
+				temperature=0.1,
+				do_sample=True, # cfgs["stochastic"]
+			)
+			# separate thread to run generation
+			thread = threading.Thread(
+				target=model.generate, # model_aux.generate,
+				kwargs=generate_kwargs
+			)
+			thread.start()
+			# stream out generation
+			# outputs = [image_desc + "\n\n"]
+			outputs += ["\n"]
+			for new_token in streamer: # streamer_aux:
+				outputs.append(new_token)
+				final_output = ''.join(outputs)
+				yield final_output
+			thread.join()
+			return answer
+		else:
+			"""###############################################################
+				Case 2: no health poster is uploaded
+				=> General Response to the health topic
+				=> not conditioned on any particular health poster
+			###############################################################"""
+			################################################
+			# 2. Construct SYSTEM and USER PROMPT
+			################################################
+			SYSTEM_PROMPT = (
+				"You are a person with unique demographic and personality traits. "
+				"Based on your background, you naturally have thoughts, feelings, and reactions to what you see."
+			)
+			SIM_PROMPT = ""
+			# prompt for role-playing information
+			SIM_PROMPT += f"You are: {demo_info}\n"
+			# SIM_PROMPT += f"Your personality test shows you have (min score = 0; max score = 5): {persona_score}\n"
+			# SIM_PROMPT += f"You also have {locus}\n"
+			# situation description (role-playing)
+			SIM_PROMPT += f"You are being asked a general question to share your *general* opinions and beliefs about a given health topic.\n"
+			################################################
+			# 3. LLM-enabled response prediction
+			# 	 Predict Trait-aware Likert Scale Responses
+			################################################
+			assert cfgs["infer_engine"] == "unsloth", "Only unsloth inference is supported"
+			USER_PROMPT = SIM_PROMPT
+			USER_PROMPT += (
+				f"What are your *general* thoughts and opinions about the {health_topic} health topic? "
+				f" What's your attitude and feeling when talking about {health_topic} in general and why?"
+				f" How familiar are you with {health_topic}? How much do you care or know about it?"
+				f" Do you think {health_topic} is an important topic to talk about?"
+				f" What is its impacts and importance {health_topic} in society and your life? Why?"
+				f" Do you have any strong opinions about it?"
+				f" Are you interested in learning more about it?"
+			)
+			# instruction prompt to answer in proper format
+			USER_PROMPT += (
+				"Your personality, locus of control, and demographic traits influence your response. Adjust your style based on your demographic personality traits.\n"
+				"**STRICTLY FOLLOW THESE RULES:**\n"
+				"- Human-like, casual, everyday conversational response. Only answer the questions\n"
+				f"- The response MUST have a consistent health topic: {health_topic}.\n"
+				# "- Answer briefly in **5-7 sentences**.\n"
+				"- Only provide the answer. DO NOT REPEAT THE PROMPT!\n"
+				"- Condition your response on your *demographic/personality traits provided earlier, IGNORING the [Not specified] ones*.\n"
+				"- MUST provide *reasonable* and *informative* answers aligned with your background."
+				f"- Start the response with '{opening_generic}' ; {style_hint} {lexical_hint}\n"
+				# f"- Start the answer some variations of \'About my personal thoughts on *{health_topic}*, I \' \n"
+				# f"- Start the answer with something like: When thinking about {health_topic}, I ..."
+			)
+			# c. Contruct LLM message
+			# print("USER PROMPT:", USER_PROMPT)
+			messages = [
+				{"role": "user", "content": SYSTEM_PROMPT + USER_PROMPT}
+			]
+			assert "gemma" in cfgs["model"], "Currently only gemma model is supported for no-image input"
+			input_text = tokenizer.apply_chat_template(messages, add_generation_prompt = True)
+			inputs = tokenizer(
+				input_text,
+				add_special_tokens = False,
+				return_tensors = "pt",
+			).to(device)
+			############################
+			### Text LLM Streaming   ###
+			############################
+			# generation with streamer
+			generate_kwargs = dict(
+				**inputs,
+				streamer=streamer,
+				max_new_tokens=512,
+				use_cache=True,
+				# min_p=0.3,
+				top_k=15,
+				temperature=0.8,
+				do_sample=True, # cfgs["stochastic"]
+			)
+			# separate thread to run generation
+			thread = threading.Thread(
+				target=model.generate,
+				kwargs=generate_kwargs
+			)
+			thread.start()
+			# stream out generation
+			outputs = [f"Emulated traits:\n {demo_info}\n" + '='*20 + "\n\n"]
+			for new_token in streamer:
+				outputs.append(new_token)
+				final_output = ''.join(outputs)
+				yield final_output
+			thread.join()
+	except GeneratorExit:
+		print("User disconnected. Waiting for generation to complete...")
+	finally:
+		# Ensure cleanup happens even on normal finish or errors
+		if thread is not None and thread.is_alive():
+			thread.join()
+		torch.cuda.empty_cache()
+"""###########################################################################
+	Evaluate a given model (specified in model_cfgs)
+	on posters with given test_style
+	Args:
+		+ cfgs			: specify model type (e.g. gemma or llama),
+						  data source, and export paths
+		+ prompts		: set of prompts
+	Outputs:
+		=> save model in cfgs["export_path"] (CSV file)
+			+ if cfgs["export_path"] not exists, initialize it with cfgs["data_path"]
+				=> original survey data with ground-truth responses
+			+ add column "<model>:<version>": store AI-simulated responses
+			+ support concurrent evaluation on different jobs
+##########################################################################"""
+if __name__ == '__main__':
+	"""==========================================
+		1. load model settings & prompts format
+	=========================================="""
+	######################################
+	# Load model configs & prompts
+	######################################
+	model_cfg = "./configs/task1_demo_sph.yaml"
+	prompt_cfg = "./configs/prompts.yaml"
+	cfgs = load_config(model_cfg)
+	cfg_prompts = load_config(prompt_cfg)
+	"""==========================================
+		2. Evaluate model defined in configs
+	=========================================="""
+	print(colored('MODEL USE:', 'green'), cfgs["model"])
+	# print(prompts['SYSTEM'])
+	# print(prompts['INSTRUCTION'])
+	"""===============================
+		3. Initialize model
+		=> `model`, `tokenizer`
+			are initialized here
+	==============================="""
+	assert cfgs["infer_engine"] == "unsloth", "Only unsloth inference is supported"
+	assert cfgs["vision"] == True, "Must have vision input"
+	if cfgs["vision"]:
+		#################################################
+		### (1) MAIN MODEL
+		###	=> response emulation, fine-tuned model
+		#################################################
+		# WITH VISUAL STIMULI
+		model, tokenizer = FastVisionModel.from_pretrained(
+			model_name=cfgs["model"],
+			load_in_4bit=True,
+		)
+		FastVisionModel.for_inference(model)
+		if "gemma" in cfgs["model"]:
+			# gemma-specific tokenizer chat template
+			tokenizer = get_chat_template(
+				tokenizer,
+				chat_template = "gemma-3",
+			)
+		#################################################
+		### (2) AUXILLIARY MODEL
+		### => summarization model
+		###	=> larger (12b) for better summarization
+		#################################################
+		# model_aux, tokenizer_aux = FastVisionModel.from_pretrained(
+		# 	model_name=cfgs["model_summarize"],
+		# 	load_in_4bit=True,
+		# )
+		# FastVisionModel.for_inference(model)
+		# if "gemma" in cfgs["model"]:
+		# 	# gemma-specific tokenizer chat template
+		# 	tokenizer_aux = get_chat_template(
+		# 		tokenizer_aux,
+		# 		chat_template = "gemma-3",
+		# 	)
+	# # initialize streamer tokens
+	# streamer = TextIteratorStreamer(
+	# 	tokenizer, skip_prompt=True, skip_special_tokens=True
+	# )
+	# streamer_aux = TextIteratorStreamer(
+	# 	tokenizer_aux, skip_prompt=True, skip_special_tokens=True
+	# )
+	"""=============================================
+		4. User-input Dropdown Traits
+	============================================="""
+	#################################
+	###   Gradio Interface       ###
+	#################################
+	with gr.Blocks(theme="gradio/dark") as interface:
+		# --- Title Page with Logo ---
+		LOGO_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), "assets/umd_logo.png"))
+		gr.Image(value=LOGO_PATH, show_label=False, interactive=False, height=100)
+		gr.Markdown(
+			"""
+			<div style="text-align: center;">
+			<h1 style="margin-bottom: 0.5em;">
+				UMD AI-Empowered Response Prediction in Public Health Messaging
+			</h1>
+			</div>
+			<hr style="margin-top: 0.8em; margin-bottom: 0.8em;"> <!-- thinner spacing around line -->
+			<div style="text-align: center;">
+			<h2 style="margin-top: 0.3em; margin-bottom: 0.6em;">
+				User Guide
+			</h2>
+			</div>
+			<ul style="text-align: left; max-width: 800px; margin: auto;">
+			<li>This program emulates <b>demographic- and personality-conditioned responses</b> to public health posters using our trait-aligned Vision-Language Model (VLM).</li>
+			<li>To begin, (1) specify the target demographic traits, then (2) upload a public health poster to predict responses.</li>
+			<li>If a health poster is uploaded, the model first summarizes its understanding of the image.</li>
+			<li><b>Please note:</b>
+				<ul>
+				<li>Each interaction only uses the uploaded image and selected traits (no conversation history).</li>
+				<li>You don’t need to type any text prompt; just upload the Health Poster and click <b>Submit</b>.</li>
+				<li>If no poster or image is uploaded, the program automatically generates the emulated person’s <b>general opinion</b> on the selected Health Topic.</li>
+				<li>Please do not interrupt the generation process as it can lead to unexpected results. In case it happens, simply refresh the web app.</li>
+				<li><b>Limitation:</b> The model may generate less realistic emulations to some under-represented demographics in the survey dataset (e.g., Asian seniors). We are conducting more comprehensive survey to effectively address this limitation.</li>
+				</ul>
+			</li>
+			</ul>
+			<hr style="margin-top: 0.8em; margin-bottom: 1.2em;">
+			""",
+			elem_id="intro-section"
+		)
+		# Scroll to intro section on load
+		gr.HTML("""
+		<script>
+		window.onload = function() {
+		window.scrollTo({ top: 0, behavior: 'smooth' });
+		}
+		</script>
+		""")
+		##########################
+		### Demographic Traits ###
+		##########################
+		gr.Markdown("## 1. Please specify the target demographic traits to be emulated here:")
+		# Dropdowns (single-select, no custom values)
+		with gr.Row():
+			gender = gr.Dropdown(
+				label="Gender",
+				choices=TRAIT_VALUES["Gender"],
+				allow_custom_value=False,
+				value="Female",
+			)
+			age = gr.Dropdown(
+				label="Age",
+				choices=TRAIT_VALUES["Age"],
+				allow_custom_value=False,
+				value="25–34",
+			)
+			profession = gr.Dropdown(
+				label="Current Profession",
+				choices=TRAIT_VALUES["Current Profession"],  # keep given order
+				allow_custom_value=False,
+				value="Student",
+			)
+		with gr.Row():
+			race = gr.Dropdown(
+				label="Race/Ethnicity",
+				choices=TRAIT_VALUES["Race/Ethnicity"],
+				allow_custom_value=False,
+				value="White/Caucasian",
+			)
+			religion = gr.Dropdown(
+				label="Religious/Cultural Group",
+				choices=TRAIT_VALUES["Religious/Cultural Group"],
+				allow_custom_value=False,
+				value="Leave Blank",
+			)
+			political = gr.Dropdown(
+				label="Political Affiliation",
+				choices=TRAIT_VALUES["Political Affiliation"],
+				allow_custom_value=False,
+				value="Leave Blank",
+			)
+		with gr.Row():
+			education = gr.Dropdown(
+				label="Highest Education",
+				choices=TRAIT_VALUES["Highest Education"],
+				allow_custom_value=False,
+				value="Leave Blank",
+			)
+			income = gr.Dropdown(
+				label="Annual Household Income",
+				choices=TRAIT_VALUES["Annual Household Income"],
+				allow_custom_value=False,
+				value="$75,000–$99,999",
+			)
+			family_status = gr.Dropdown(
+				label="Family Status",
+				choices=TRAIT_VALUES["Family Status"],
+				allow_custom_value=False,
+				value="Leave Blank"
+			)
+		# ##########################
+		# ### Big Five Traits    ###
+		# ##########################
+		# gr.Markdown("## 1.b) Please adjust the Big Five Personality Traits to be emulated:")
+		# with gr.Accordion("Big Five Personality Traits (1 = very low, 5 = very high)", open=True):
+		# 	gr.Markdown(
+		# 		"Adjust the sliders to represent the target personality profile. "
+		# 		"Leave them as-is if not applicable."
+		# 	)
+		# 	with gr.Row():
+		# 		with gr.Column(scale=1):
+		# 			openness = gr.Slider(
+		# 				label="Open-Mindedness",
+		# 				minimum=1, maximum=5, step=0.2, value=2.5,
+		# 				interactive=True
+		# 			)
+		# 		with gr.Column(scale=1):
+		# 			conscientiousness = gr.Slider(
+		# 				label="Conscientiousness",
+		# 				minimum=1, maximum=5, step=0.2, value=2.5,
+		# 				interactive=True
+		# 			)
+		# 		with gr.Column(scale=1):
+		# 			extraversion = gr.Slider(
+		# 				label="Extraversion",
+		# 				minimum=1, maximum=5, step=0.2, value=2.5,
+		# 				interactive=True
+		# 			)
+		# 	with gr.Row():
+		# 		with gr.Column(scale=1):
+		# 			neuroticism = gr.Slider(
+		# 				label="Neuroticism",
+		# 				minimum=1, maximum=5, step=0.2, value=2.5,
+		# 				interactive=True
+		# 			)
+		# 		with gr.Column(scale=1):
+		# 			agreeableness = gr.Slider(
+		# 				label="Agreeableness",
+		# 				minimum=1, maximum=5, step=0.2, value=2.5,
+		# 				interactive=True
+		# 			)
+		# 		gr.Column(scale=1)  # right spacer
+		##########################
+		###    Health Topic	   ###
+		##########################
+		gr.Markdown("## 2. Please specify the main Health Topic of the poster here:")
+		# ---- dropdown at ~50% page width and centered ----
+		with gr.Row():
+			with gr.Column(scale=1):
+				health_topic = gr.Dropdown(
+					label="Health Topic",
+					choices=HEALTH_TOPICS,
+					allow_custom_value=False,
+				)
+			gr.Column(scale=1)  # right spacer
+		##########################
+		###   Chat interface   ###
+		##########################
+		gr.Markdown("## 3. Upload Public Health Poster here (if no poster is uploaded, the model emulates General Response to the topic):")
+		gr.Markdown("""
+		#### ▶️ Use Case 1: Poster-Based Response
+		+ Upload **only one** poster image — the first file is the one processed.
+		+ The model has **no memory**, so re-upload the image for each new request.
+		+ Must choose a **Health Topic** that matches the poster content for best results.
+		+ No text prompt is needed: upload the poster and click **Submit**.
+		#### ▶️ Use Case 2: General Response (No Poster)
+		+ Simply select a Health Topic and click **Send**.
+		"""
+		)
+		gr.Markdown("""
+		### 📘 Important Notes
+		- ⚠️ **Do not interrupt the generation process.** Stopping midway can cause backend issues. Please allow the response to complete.
+		- 🏷️ Before uploading a poster, select its **corresponding health topic**.
+		- 🎯 For the best experience, ensure the **topic accurately matches the poster content**.
+		- 🧩 If you choose not to upload a poster, the model will produce a **general, trait-conditioned response** for the selected topic.
+		""")
+		chat = gr.ChatInterface(
+			fn=vlm_response,
+			multimodal=True,  # text + image
+			title=f"Vision-Language Model: Trait-Conditioned Response Emulation",
+			type="messages",
+			additional_inputs=[
+				health_topic, gender, age, profession, race, religion,
+				political, education, income, family_status,
+				# extraversion, agreeableness, conscientiousness, neuroticism, openness,
+			],
+			chatbot=gr.Chatbot(height=500), # height=330
+			autofocus=False,
+		)
+	"""=============================================
+		5. Chat Interface Launch
+	============================================="""
+	interface.queue(
+		max_size=20,
+		default_concurrency_limit=1,
+	).launch(
+		share=True,
+		max_threads=1,
+		# show_error=True,
+		# prevent_thread_lock=False,
+		# debug=True,
+	)

app.sh ADDED Viewed

	@@ -0,0 +1,17 @@

+#!/bin/bash
+#SBATCH -c 16                   # 16 CPUs
+#SBATCH --mem=32g               # 32 GB RAM
+#SBATCH --gres=gpu:rtxa5000:1   # 1 GPU (A6000)
+#SBATCH --time=3-00:00:00       # 8 days
+#SBATCH --account=gamma
+#SBATCH --partition=gamma
+#SBATCH --qos=gamma-huge-long
+#SBATCH --output=/fs/nexus-projects/health_sim_ai/src_hf_deploy/app_logs/app_%j.out
+export HOME=/fs/nexus-projects/health_sim_ai
+cd /fs/nexus-projects/health_sim_ai
+source venvs/llm/bin/activate
+cd src_hf_deploy
+python -u app.py
+# python inference_pred_llm.py
+# python inference_rec_llm.py

app_logs/app_5936040.out ADDED Viewed

	@@ -0,0 +1,18 @@

+🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
+🦥 Unsloth Zoo will now patch everything to make training faster!
+MODEL USE: unsloth/gemma-3-12b-it_task1_1_epochs_test_neutral_partialTraits
+==((====))==  Unsloth 2025.3.19: Fast Gemma3 patching. Transformers: 4.50.0.
+   \\   /|    NVIDIA RTX A5000. Num GPUs = 1. Max memory: 23.547 GB. Platform: Linux.
+O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 8.6. CUDA Toolkit: 12.4. Triton: 3.2.0
+\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post3. FA2 = False]
+ "-____-"     Free license: http://github.com/unslothai/unsloth
+Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
+Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.50, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
+/fs/nexus-projects/health_sim_ai/venvs/llm/lib/python3.12/site-packages/gradio/blocks.py:1069: UserWarning: Cannot load gradio/dark. Caught Exception: The space gradio/dark does not exist
+  warnings.warn(f"Cannot load {theme}. Caught Exception: {str(e)}")
+/fs/nexus-projects/health_sim_ai/src_hf_deploy/app.py:1010: UserWarning: You have not specified a value for the `type` parameter. Defaulting to the 'tuples' format for chatbot messages, but this is deprecated and will be removed in a future version of Gradio. Please set type='messages' instead, which uses openai-style dictionaries with 'role' and 'content' keys.
+  chatbot=gr.Chatbot(height=500), # height=330
+/fs/nexus-projects/health_sim_ai/venvs/llm/lib/python3.12/site-packages/gradio/chat_interface.py:323: UserWarning: The type of the gr.Chatbot does not match the type of the gr.ChatInterface.The type of the gr.ChatInterface, 'messages', will be used.
+  warnings.warn(
+slurmstepd: error: *** JOB 5936040 ON gammagpu09 CANCELLED AT 2025-12-08T03:01:34 ***

app_logs/app_5936041.out ADDED Viewed

	@@ -0,0 +1,18 @@

+🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
+🦥 Unsloth Zoo will now patch everything to make training faster!
+MODEL USE: unsloth/gemma-3-12b-it_task1_1_epochs_test_neutral_partialTraits
+==((====))==  Unsloth 2025.3.19: Fast Gemma3 patching. Transformers: 4.50.0.
+   \\   /|    NVIDIA RTX A5000. Num GPUs = 1. Max memory: 23.547 GB. Platform: Linux.
+O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 8.6. CUDA Toolkit: 12.4. Triton: 3.2.0
+\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post3. FA2 = False]
+ "-____-"     Free license: http://github.com/unslothai/unsloth
+Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
+Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.50, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
+/fs/nexus-projects/health_sim_ai/venvs/llm/lib/python3.12/site-packages/gradio/blocks.py:1069: UserWarning: Cannot load gradio/dark. Caught Exception: The space gradio/dark does not exist
+  warnings.warn(f"Cannot load {theme}. Caught Exception: {str(e)}")
+/fs/nexus-projects/health_sim_ai/src_hf_deploy/app.py:1010: UserWarning: You have not specified a value for the `type` parameter. Defaulting to the 'tuples' format for chatbot messages, but this is deprecated and will be removed in a future version of Gradio. Please set type='messages' instead, which uses openai-style dictionaries with 'role' and 'content' keys.
+  chatbot=gr.Chatbot(height=500), # height=330
+/fs/nexus-projects/health_sim_ai/venvs/llm/lib/python3.12/site-packages/gradio/chat_interface.py:323: UserWarning: The type of the gr.Chatbot does not match the type of the gr.ChatInterface.The type of the gr.ChatInterface, 'messages', will be used.
+  warnings.warn(
+slurmstepd: error: *** JOB 5936041 ON gammagpu09 CANCELLED AT 2025-12-08T03:07:56 ***

app_logs/app_5936047.out ADDED Viewed

	@@ -0,0 +1,19 @@

+🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
+🦥 Unsloth Zoo will now patch everything to make training faster!
+MODEL USE: unsloth/gemma-3-12b-it_task1_1_epochs_test_neutral_partialTraits
+==((====))==  Unsloth 2025.3.19: Fast Gemma3 patching. Transformers: 4.50.0.
+   \\   /|    NVIDIA RTX A5000. Num GPUs = 1. Max memory: 23.547 GB. Platform: Linux.
+O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 8.6. CUDA Toolkit: 12.4. Triton: 3.2.0
+\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post3. FA2 = False]
+ "-____-"     Free license: http://github.com/unslothai/unsloth
+Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
+Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.50, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
+/fs/nexus-projects/health_sim_ai/venvs/llm/lib/python3.12/site-packages/gradio/blocks.py:1069: UserWarning: Cannot load gradio/dark. Caught Exception: The space gradio/dark does not exist
+  warnings.warn(f"Cannot load {theme}. Caught Exception: {str(e)}")
+/fs/nexus-projects/health_sim_ai/src_hf_deploy/app.py:1010: UserWarning: You have not specified a value for the `type` parameter. Defaulting to the 'tuples' format for chatbot messages, but this is deprecated and will be removed in a future version of Gradio. Please set type='messages' instead, which uses openai-style dictionaries with 'role' and 'content' keys.
+  chatbot=gr.Chatbot(height=500), # height=330
+/fs/nexus-projects/health_sim_ai/venvs/llm/lib/python3.12/site-packages/gradio/chat_interface.py:323: UserWarning: The type of the gr.Chatbot does not match the type of the gr.ChatInterface.The type of the gr.ChatInterface, 'messages', will be used.
+  warnings.warn(
+grep: gradio_output.log: No such file or directory
+Gradio Public URL:

app_logs/app_5936050.out ADDED Viewed

	@@ -0,0 +1 @@


1	+ slurmstepd: error: * JOB 5936050 ON gammagpu09 CANCELLED AT 2025-12-08T03:23:42 *

app_logs/app_5936052.out ADDED Viewed

	@@ -0,0 +1,57 @@

+🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
+🦥 Unsloth Zoo will now patch everything to make training faster!
+MODEL USE: unsloth/gemma-3-12b-it_task1_1_epochs_test_neutral_partialTraits
+==((====))==  Unsloth 2025.3.19: Fast Gemma3 patching. Transformers: 4.50.0.
+   \\   /|    NVIDIA RTX A5000. Num GPUs = 1. Max memory: 23.547 GB. Platform: Linux.
+O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 8.6. CUDA Toolkit: 12.4. Triton: 3.2.0
+\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post3. FA2 = False]
+ "-____-"     Free license: http://github.com/unslothai/unsloth
+Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
+Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.50, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
+/fs/nexus-projects/health_sim_ai/venvs/llm/lib/python3.12/site-packages/gradio/blocks.py:1069: UserWarning: Cannot load gradio/dark. Caught Exception: The space gradio/dark does not exist
+  warnings.warn(f"Cannot load {theme}. Caught Exception: {str(e)}")
+/fs/nexus-projects/health_sim_ai/src_hf_deploy/app.py:1010: UserWarning: You have not specified a value for the `type` parameter. Defaulting to the 'tuples' format for chatbot messages, but this is deprecated and will be removed in a future version of Gradio. Please set type='messages' instead, which uses openai-style dictionaries with 'role' and 'content' keys.
+  chatbot=gr.Chatbot(height=500), # height=330
+/fs/nexus-projects/health_sim_ai/venvs/llm/lib/python3.12/site-packages/gradio/chat_interface.py:323: UserWarning: The type of the gr.Chatbot does not match the type of the gr.ChatInterface.The type of the gr.ChatInterface, 'messages', will be used.
+  warnings.warn(
+* Running on local URL:  http://127.0.0.1:7860
+* Running on public URL: https://8a035fb4eb42d29651.gradio.live
+This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
+Chronic Obstructive Pulmonary Disease (COPD)
+Style: Write in a informal, pragmatic tone, focusing on clarity and utility.
+Lexical: Feel free to vary sentence structures slightly.
+Opening: Through the message
+Generic opening: My initial take on
+Chronic Obstructive Pulmonary Disease (COPD)
+Style: Write in a slightly narrative, flowing tone.
+Lexical: Use a light mix of paraphrasing expressions.
+Opening: Through the message
+Generic opening: On top of my head
+The message makes me more concerned about the health risks of COPD and smoking - Scale: 1 (not at all) - 9 (extremely). Your answer: 8
+The message motivates me to not smoke. - Scale: 1 (not at all) - 9 (extremely). Your answer: 8
+In your opinion, how harmful is smoking to your general health? - Scale: 0 (not at all)-6 (extremely harmful). Your answer: 6
+Nutrition
+Style: Write in a slightly narrative, flowing tone.
+Lexical: Use a mix of simple and slightly complex sentences.
+Opening: Reflecting on the message here
+Generic opening: Just speaking for myself,
+The message makes me more concerned about the health risks of poor eating habits - Scale: 1 (not at all) - 9 (extremely). Your answer: 8
+The message motivates me to make healthy eating choices - Scale: 1 (not at all) - 9 (extremely). Your answer: 8
+In your opinion, how harmful is neglecting proper nutrition and weight management to your overall health? - Scale: 0 (not at all)-6 (extremely harmful). Your answer: 6
+Traits Demographics:
+Gender: Female
+Age: 25–34
+Current Profession: Student
+Race/Ethnicity: White/Caucasian
+Religious/Cultural Group: [Not specified]
+Political Affiliation: [Not specified]
+Highest Education: [Not specified]
+Annual Household Income: $75,000–$99,999
+Family Status: [Not specified]
+Emulated response: Reflecting on the message here, I'm now very concerned about the health consequences of poor eating. The message really motivates me to make healthy choices - I feel more determined than ever to prioritize my nutrition and maintain a healthy weight. It's made me realize the importance of mindful eating and making informed food choices.
+====================================================================================================

assets/umd_logo.png ADDED Viewed

Git LFS Details

SHA256: 6163ef79b6fa3772492de058d477a5852cb7b5d920a32d25c764270a917802e6
Pointer size: 131 Bytes
Size of remote file: 300 kB

configs/prompts.yaml ADDED Viewed

	@@ -0,0 +1,100 @@

+#########################################################
+### TASK 1: COMMUNITY SIMULATION                     ###
+#########################################################
+# SYSTEM PROMPT FOR COMMUNITY RESPONSE PREDICTION
+SYSTEM_SIM: >
+    You are a person with unique demographic and personality traits.
+    During an online study, you are shown a public health campaign poster.
+    Based on your background, you naturally have thoughts, feelings, and reactions to what you see.
+# SIMULATION PROMPT FOR COMMUNITY RESPONSE PREDICTION
+SIMULATION_SIM: >
+        You are now being shown a public health campaign poster, followed by a survey question
+        designed to capture your thoughts, feelings, and emotions in response to the image.
+# TASK 1: RESPONSE PREDICTION -> MCQ (SENTIMENT, BEHAVIORAL, EMOTIONAL)
+INSTRUCTION_MCQ: |
+    Please respond the survey question authentically, as if you are completing a real online survey. Your personality, locus of control, and demographic traits influence your reactions.
+    **CRITICAL INSTRUCTIONS - FOLLOW THESE EXACTLY:**
+    - **BE REALISTIC and HUMAN-LIKE. Only answer the questions. Imagine you're quickly filling out a survey.**
+    - Answer with **ONLY the valid number** to realistically express your emotions/feeling.
+    - Only provide the answer. DO NOT REPEAT THE QUESTION NOR PROVIDE REASONING.
+# TASK 1: RESPONSE PREDICTION -> OPEN-TEXT
+INSTRUCTION_FREE: |
+    Please respond the survey question authentically, as if you are completing a real online survey. Your personality, locus of control, and demographic traits influence your reactions.
+    **CRITICAL INSTRUCTIONS - FOLLOW THESE EXACTLY:**
+    - **BE REALISTIC and HUMAN-LIKE. Only answer the questions. Imagine you're quickly filling out a survey.**
+    - Answer very briefly in **1-3 sentences**.
+    - Only provide the answer. DO NOT REPEAT THE QUESTION NOR PROVIDE REASONING.
+#########################################################
+### TASK 2: COMMUNICATION STRATEGY SUGGESTION         ###
+#########################################################
+# SYSTEM PROMPT FOR COMMUNICATION STRATEGY SUGGESTION
+SYSTEM_REC: >
+    You are not an AI or language model. You are an expert with PhD-level training in demography, behavioral science, psychology, marketing, and public health.
+    You have deep knowledge of how individuals and communities with varying personality traits and demographic profiles respond to different campaign messaging strategies and visual stimuli.
+# SIMULATION PROMPT FOR COMMUNICATION STRATEGY SUGGESTION
+SIMULATION_REC: |
+    You need to evaluate campaign messaging strategies intended to influence a specific individual or community, characterized by known demographic and personality traits.
+    There are 3 main types of communication strategies:
+    (1) Informational/Neutral
+    (2) Self-Efficacy
+    (3) Threatening/Fear-driven
+# TASK 2: STRATEGY EVALUATION
+INSTRUCTION_REC: |
+    Evaluate the likely impact of the above communication strategy on the specified individual/community.
+    (1) Negative impact (expected response score of 1–3 out of 9)
+    (2) No impact (expected response score of 4–6 out of 9)
+    (3) Positive impact (expected response score of 7–9 out of 9)
+    Please answer with 1 of 3 following labels only: "positive", "negative", or "no impact".
+# # TASK 2: STRATEGY SUGGESTION
+# INSTRUCTION_REC_NO_IMPACT: |
+#     There are 3 main types of communication strategies:
+#     (1) Informational/Neutral
+#     (2) Self-Efficacy
+#     (3) Threatening/Fear-driven
+#     Based on your expertise, which strategy is most likely to have LITTLE IMPACT (i.e., an expected response score of 4–6 out of 9) on the target individual or community?
+#     Suggestion only ONE and only provide the strategy name.
+# INSTRUCTION_REC_POSITIVE: |
+#     There are 3 main types of communication strategies:
+#     (1) Informational/Neutral
+#     (2) Self-Efficacy
+#     (3) Threatening/Fear-driven
+#     Based on your expertise, which strategy is most likely to have a POSITIVE IMPACT (i.e., an expected response score of 7–9 out of 9) on the target individual or community?
+#     Suggestion only ONE and only provide the strategy name.
+# INSTRUCTION_REC_NEGATIVE: |
+#     There are 3 main types of communication strategies:
+#     (1) Informational/Neutral
+#     (2) Self-Efficacy
+#     (3) Threatening/Fear-driven
+#     Based on your expertise, which strategy is most likely to have a NEGATIVE IMPACT (i.e., an expected response score of 1–3 out of 9) on the target individual or community?
+#     Suggestion only ONE and only provide the strategy name.
+#########################################################
+### TASK 3: COMMUNICATION STRATEGY CLASSIFICATION     ###
+#########################################################
+# SYSTEM PROMPT FOR COMMUNICATION STRATEGY CLASSIFICATION
+SYSTEM_CLS: >
+    You are an expert with PhD qualifications in 5 areas: demography, behavioral science, psychology, marketing, and public health.
+# SIMULATION PROMPT FOR COMMUNICATION STRATEGY CLASSIFICATION
+SIMULATION_CLS: >
+        You are now being shown a public health campaign poster.
+# TASK 3: STRATEGY CLASSIFICAITON
+INSTRUCTION_STRAT: |
+    There are <?> main types of communication strategies:
+    (1)
+    (2)
+    (3)
+    Based on your experience and expertise, what is the communication strategy of the poster? Choose only one and only include the strategy name.
+JSON_CONVERSION: >
+    Extract the content in this answer to JSON with format: <Q1>: \"<Answer to Q1>\"
+    Ensure all questions are properly included (13 questions in total).

configs/task1_demo.yaml ADDED Viewed

	@@ -0,0 +1,27 @@

+temperature: 0.
+top_p: 1.0
+stochastic: False # deterministic
+seed: 99
+infer_engine: "unsloth"
+data_path: "data/survey_responses_screened.csv" # make sure to export HOME to project path
+# export_path: "$HOME/src/evals/task1_ai_responses.csv"
+#########################
+### Emulation Model   ###
+#########################
+# model: "unsloth/Llama-3.2-11B-Vision-Instruct"
+# model: "unsloth/Llama-3.2-11B-Vision-Instruct_task1_1_epochs_test_train_on_all"
+# model: "unsloth/gemma-3-4b-it_task1_1_epochs_test_train_on_all"
+# model: "unsloth/gemma-3-4b-it_task1_1_epochs_test_neutral"
+# model: "unsloth/gemma-3-4b-it_task1_1_epochs_test_efficacy"
+# model: "unsloth/gemma-3-12b-it"
+# model: "unsloth/gemma-3-12b-it_task1_1_epochs_test_neutral"
+# model: "unsloth/gemma-3-12b-it_task1_1_epochs_test_threatening_partialTraits"
+model: "unsloth/gemma-3-12b-it_task1_1_epochs_test_neutral_partialTraits"
+vision: true    # default
+trait: true     # default
+version: ""
+model_summarize: "unsloth/gemma-3-12b-it"

configs/task1_demo_sph.yaml ADDED Viewed

	@@ -0,0 +1,28 @@

+temperature: 0.
+top_p: 1.0
+stochastic: False # deterministic
+seed: 99
+infer_engine: "unsloth"
+data_path: "data/survey_responses_screened.csv" # make sure to export HOME to project path
+# export_path: "$HOME/src/evals/task1_ai_responses.csv"
+#########################
+### Emulation Model   ###
+#########################
+# model: "unsloth/Llama-3.2-11B-Vision-Instruct"
+# model: "unsloth/Llama-3.2-11B-Vision-Instruct_task1_1_epochs_test_train_on_all"
+# model: "unsloth/gemma-3-4b-it_task1_1_epochs_test_train_on_all"
+# model: "unsloth/gemma-3-4b-it_task1_1_epochs_test_neutral"
+# model: "unsloth/gemma-3-4b-it_task1_1_epochs_test_efficacy"
+# model: "unsloth/gemma-3-12b-it"
+# model: "unsloth/gemma-3-12b-it_task1_1_epochs_test_neutral"
+# model: "unsloth/gemma-3-12b-it_task1_1_epochs_test_threatening_partialTraits"
+model: "unsloth/gemma-3-12b-it_task1_1_epochs_test_neutral_partialTraits"
+# model: "unsloth/gemma-3-12b-it_task1_1_epochs_test_neutral_partialTraits_sphTraits"
+vision: true    # default
+trait: true     # default
+version: ""
+model_summarize: "unsloth/gemma-3-12b-it"

data/survey_responses_screened.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9eb0e96347a8739c4d6b138a9395feeec591b8dd64fd0f6a74b857b49bb47b2c
+size 18465749

push.sh ADDED Viewed

	@@ -0,0 +1,22 @@

+git init
+git lfs install
+git add app.py configs data requirements.txt unsloth utils.py
+git commit -m "Initial commit"
+git branch -M main
+git lfs migrate import --include-ref=refs/heads/main --above=10MB -y
+git remote add huggingface https://huggingface.co/spaces/anh-nn01/ai_empowered_community_simulation_beta
+git push -u huggingface main --
+# Notes:
+# 1. use module load for lfs
+# 2. use only launch(), not launch(share=True, max_threads=1,)
+# 3. export full requirements.txt using pip freeze > requirements.txt
+#       => comment out `ipython` and `ollama` dependencies
+# 4. Manual upload LoRA weights to HF repo due to potential file corruption
+# 5. Manual upload of /app/assets/umd_logo.png
+# 6. Perhaps manual upload everything is more stable for now :))

requirements.txt ADDED Viewed

	@@ -0,0 +1,167 @@

+accelerate==1.6.0
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.11.16
+aiosignal==1.3.2
+annotated-types==0.7.0
+anyio==4.9.0
+asttokens==3.0.1
+attrs==25.3.0
+bitsandbytes==0.45.5
+Brotli==1.1.0
+certifi==2025.1.31
+charset-normalizer==3.4.1
+click==8.1.8
+colored==2.3.0
+contourpy==1.3.2
+cut-cross-entropy==25.1.1
+cycler==0.12.1
+Cython==3.1.2
+dataclasses-json==0.6.7
+datasets==3.5.0
+decorator==5.2.1
+diffusers @ git+https://github.com/huggingface/diffusers.git@ee40088fe5437f8ed65ec96a22250149e4f334cc
+dill==0.3.8
+docker-pycreds==0.4.0
+docstring_parser==0.16
+executing==2.2.1
+fastapi==0.119.0
+ffmpeg==1.4
+ffmpy==0.6.3
+filelock==3.18.0
+fonttools==4.58.0
+frozenlist==1.5.0
+fsspec==2024.12.0
+gitdb==4.0.12
+GitPython==3.1.44
+gradio==5.49.1
+gradio_client==1.13.3
+greenlet==3.2.0
+groovy==0.1.2
+h11==0.14.0
+hf-xet==1.1.10
+hf_transfer==0.1.9
+httpcore==1.0.8
+httpx==0.27.2
+httpx-sse==0.4.0
+huggingface-hub==0.35.3
+idna==3.10
+importlib_metadata==8.6.1
+# ipython==9.8.0
+# ipython_pygments_lexers==1.1.1
+jedi==0.19.2
+Jinja2==3.1.6
+jsonpatch==1.33
+jsonpointer==3.0.0
+kiwisolver==1.4.8
+langchain==0.3.23
+langchain-community==0.3.21
+langchain-core==0.3.52
+langchain-ollama==0.2.1
+langchain-text-splitters==0.3.8
+langsmith==0.3.31
+markdown-it-py==3.0.0
+MarkupSafe==3.0.2
+marshmallow==3.26.1
+matplotlib==3.10.3
+matplotlib-inline==0.2.1
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.4.3
+multiprocess==0.70.16
+mypy-extensions==1.0.0
+networkx==3.4.2
+numpy==2.2.4
+nvidia-cublas-cu12==12.4.5.8
+nvidia-cuda-cupti-cu12==12.4.127
+nvidia-cuda-nvrtc-cu12==12.4.127
+nvidia-cuda-runtime-cu12==12.4.127
+nvidia-cudnn-cu12==9.1.0.70
+nvidia-cufft-cu12==11.2.1.3
+nvidia-curand-cu12==10.3.5.147
+nvidia-cusolver-cu12==11.6.1.9
+nvidia-cusparse-cu12==12.3.1.170
+nvidia-cusparselt-cu12==0.6.2
+nvidia-nccl-cu12==2.21.5
+nvidia-nvjitlink-cu12==12.4.127
+nvidia-nvtx-cu12==12.4.127
+# ollama==0.4.2
+orjson==3.10.16
+packaging==24.2
+pandas==2.2.3
+parso==0.8.5
+peft==0.15.2
+pexpect==4.9.0
+pillow==11.2.1
+platformdirs==4.3.7
+prompt_toolkit==3.0.52
+propcache==0.3.1
+protobuf==3.20.3
+psutil==7.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pyarrow==19.0.1
+pydantic==2.11.3
+pydantic-settings==2.8.1
+pydantic_core==2.33.1
+pydub==0.25.1
+Pygments==2.19.1
+pyparsing==3.2.3
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.0
+python-multipart==0.0.20
+pytz==2025.2
+PyYAML==6.0.2
+regex==2024.11.6
+requests==2.32.3
+requests-toolbelt==1.0.0
+rich==14.0.0
+ruff==0.14.0
+safehttpx==0.1.6
+safetensors==0.5.3
+seaborn==0.13.2
+semantic-version==2.10.0
+sentencepiece==0.2.0
+sentry-sdk==2.27.0
+setproctitle==1.3.5
+setuptools==79.0.0
+shellingham==1.5.4
+shtab==1.7.2
+six==1.17.0
+smmap==5.0.2
+sniffio==1.3.1
+SQLAlchemy==2.0.40
+stack-data==0.6.3
+starlette==0.48.0
+sympy==1.13.1
+tenacity==9.1.2
+termcolor==3.0.1
+tokenizers==0.21.4
+tomlkit==0.13.3
+torch==2.6.0
+torchvision==0.21.0
+tqdm==4.67.1
+traitlets==5.14.3
+transformers==4.50.0
+triton==3.2.0
+trl==0.15.2
+typeguard==4.4.2
+typer==0.19.2
+typing-inspect==0.9.0
+typing-inspection==0.4.0
+typing_extensions==4.13.2
+tyro==0.9.19
+tzdata==2025.2
+unsloth==2025.3.19
+unsloth_zoo==2025.3.17
+urllib3==2.4.0
+uvicorn==0.37.0
+wandb==0.19.10
+wcwidth==0.2.14
+websockets==15.0.1
+wheel==0.45.1
+xformers==0.0.29.post3
+xxhash==3.5.0
+yarl==1.19.0
+zipp==3.21.0
+zstandard==0.23.0

requirements_concise.txt ADDED Viewed

	@@ -0,0 +1,18 @@

+numpy==2.2.4
+pandas==2.2.3
+pillow==11.2.1
+langchain==0.3.23
+langchain-core==0.3.52
+langchain-community==0.3.21
+langchain-ollama==0.2.1
+# ollama==0.4.2
+tqdm
+torch
+unsloth==2025.3.19
+termcolor
+python-dotenv
+transformers==4.50.0
+wandb
+# Image Generation
+# git+https://github.com/huggingface/diffusers.git

unsloth/gemma-3-12b-it_task1_1_epochs_test_neutral_partialTraits/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+base_model: unsloth/gemma-3-12b-it-unsloth-bnb-4bit
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.15.2

unsloth/gemma-3-12b-it_task1_1_epochs_test_neutral_partialTraits/adapter_config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "unsloth/gemma-3-12b-it-unsloth-bnb-4bit",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 8,
+  "lora_bias": false,
+  "lora_dropout": 0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": "(?:.*?(?:language|text).*?(?:self_attn|attention|attn|mlp|feed_forward|ffn|dense).*?(?:k_proj|v_proj|q_proj|out_proj|fc1|fc2|o_proj|gate_proj|up_proj|down_proj).*?)|(?:\\bmodel\\.layers\\.[\\d]{1,}\\.(?:self_attn|attention|attn|mlp|feed_forward|ffn|dense)\\.(?:(?:k_proj|v_proj|q_proj|out_proj|fc1|fc2|o_proj|gate_proj|up_proj|down_proj)))",
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_rslora": false
+}

unsloth/gemma-3-12b-it_task1_1_epochs_test_neutral_partialTraits/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7108dca92843322a12e503ab99cbd70a5f676fa25c54e6a11d88473f65143ee3
+size 131040264

unsloth/gemma-3-12b-it_task1_1_epochs_test_neutral_partialTraits/added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "<image_soft_token>": 262144
+}

unsloth/gemma-3-12b-it_task1_1_epochs_test_neutral_partialTraits/chat_template.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "chat_template": "{{ bos_token }}\n{%- if messages[0]['role'] == 'system' -%}\n    {%- if messages[0]['content'] is string -%}\n        {%- set first_user_prefix = messages[0]['content'] + '\n\n' -%}\n    {%- else -%}\n        {%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%}\n    {%- endif -%}\n    {%- set loop_messages = messages[1:] -%}\n{%- else -%}\n    {%- set first_user_prefix = \"\" -%}\n    {%- set loop_messages = messages -%}\n{%- endif -%}\n{%- for message in loop_messages -%}\n    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}\n        {{ raise_exception(\"Conversation roles must alternate user/assistant/user/assistant/...\") }}\n    {%- endif -%}\n    {%- if (message['role'] == 'assistant') -%}\n        {%- set role = \"model\" -%}\n    {%- else -%}\n        {%- set role = message['role'] -%}\n    {%- endif -%}\n    {{ '<start_of_turn>' + role + '\n' + (first_user_prefix if loop.first else \"\") }}\n    {%- if message['content'] is string -%}\n        {{ message['content'] | trim }}\n    {%- elif message['content'] is iterable -%}\n        {%- for item in message['content'] -%}\n            {%- if item['type'] == 'image' -%}\n                {{ '<start_of_image>' }}\n            {%- elif item['type'] == 'text' -%}\n                {{ item['text'] | trim }}\n            {%- endif -%}\n        {%- endfor -%}\n    {%- else -%}\n        {{ raise_exception(\"Invalid content type\") }}\n    {%- endif -%}\n    {{ '<end_of_turn>\n' }}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n    {{ '<start_of_turn>model\n' }}\n{%- endif -%}\n"
+}

unsloth/gemma-3-12b-it_task1_1_epochs_test_neutral_partialTraits/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "do_convert_rgb": null,
+  "do_normalize": true,
+  "do_pan_and_scan": null,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "image_processor_type": "Gemma3ImageProcessor",
+  "image_seq_length": 256,
+  "image_std": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "pan_and_scan_max_num_crops": null,
+  "pan_and_scan_min_crop_size": null,
+  "pan_and_scan_min_ratio_to_activate": null,
+  "processor_class": "Gemma3Processor",
+  "resample": 2,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "height": 896,
+    "width": 896
+  }
+}

unsloth/gemma-3-12b-it_task1_1_epochs_test_neutral_partialTraits/processor_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "image_seq_length": 256,
+  "processor_class": "Gemma3Processor"
+}

unsloth/gemma-3-12b-it_task1_1_epochs_test_neutral_partialTraits/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "boi_token": "<start_of_image>",
+  "bos_token": {
+    "content": "<bos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eoi_token": "<end_of_image>",
+  "eos_token": {
+    "content": "<end_of_turn>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "image_token": "<image_soft_token>",
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

unsloth/gemma-3-12b-it_task1_1_epochs_test_neutral_partialTraits/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7666402c0617d170e6b0a985b3130c3fb0795393aa0970600994a5d9aae12351
+size 33384822

unsloth/gemma-3-12b-it_task1_1_epochs_test_neutral_partialTraits/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
+size 4689074

unsloth/gemma-3-12b-it_task1_1_epochs_test_neutral_partialTraits/tokenizer_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff

unsloth_compiled_cache/AqlmLoraLinear_peft_forward.py ADDED Viewed

	@@ -0,0 +1,67 @@

+"""
+2025.3.17
+2025.3.19
+4.50.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
+torch_compile_options = {'epilogue_fusion': True, 'max_autotune': False, 'shape_padding': True, 'trace.enabled': False, 'triton.cudagraphs': False}
+from torch import Tensor
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+from peft.tuners.lora.aqlm import (torch)
+torch_addmm = torch.addmm
+torch_add   = torch.add
+# @torch.compile(fullgraph = False, dynamic = True, options = torch_compile_options)
+def lora_forward(result, lora_A, lora_B, dropout, x, scaling):
+    xA = dropout(x) @ lora_A.weight.t()
+    # output = result + scaling * xA @ lora_B.weight.t()
+    shape = result.shape
+    output = torch_addmm(
+        result.view(-1, shape[-1]),
+        xA.view(-1, xA.shape[-1]),
+        lora_B.weight.t(),
+        alpha = scaling,
+        beta = 1,
+    ).view(shape)
+    bias = lora_B.bias
+    if bias is not None:
+        output = torch_add(
+        output,
+        bias,
+        alpha = scaling,
+    )
+    return output
+pass
+def unsloth_forward(self, x: torch.Tensor):
+    # note: logic differs from default Linear because merging is not supported
+    result = self.base_layer(x)
+    if self.disable_adapters:
+        return result
+    for active_adapter in self.active_adapters:
+        if active_adapter not in self.lora_A.keys():
+            continue
+        lora_A = self.lora_A[active_adapter]
+        lora_B = self.lora_B[active_adapter]
+        dropout = self.lora_dropout[active_adapter]
+        scaling = self.scaling[active_adapter]
+        requires_conversion = not torch.is_autocast_enabled()
+        if requires_conversion:
+            expected_dtype = result.dtype
+            x = self._cast_input_dtype(x, lora_A.weight.dtype)
+        output = lora_B(lora_A(dropout(x)))
+        if requires_conversion:
+            output = output.to(expected_dtype)
+        output = output * scaling
+        result += output
+    return result

unsloth_compiled_cache/AwqLoraLinear_peft_forward.py ADDED Viewed

	@@ -0,0 +1,66 @@

+"""
+2025.3.17
+2025.3.19
+4.50.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
+torch_compile_options = {'epilogue_fusion': True, 'max_autotune': False, 'shape_padding': True, 'trace.enabled': False, 'triton.cudagraphs': False}
+from torch import Tensor
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+from peft.tuners.lora.awq import (torch)
+torch_addmm = torch.addmm
+torch_add   = torch.add
+# @torch.compile(fullgraph = False, dynamic = True, options = torch_compile_options)
+def lora_forward(result, lora_A, lora_B, dropout, x, scaling):
+    xA = dropout(x) @ lora_A.weight.t()
+    # output = result + scaling * xA @ lora_B.weight.t()
+    shape = result.shape
+    output = torch_addmm(
+        result.view(-1, shape[-1]),
+        xA.view(-1, xA.shape[-1]),
+        lora_B.weight.t(),
+        alpha = scaling,
+        beta = 1,
+    ).view(shape)
+    bias = lora_B.bias
+    if bias is not None:
+        output = torch_add(
+        output,
+        bias,
+        alpha = scaling,
+    )
+    return output
+pass
+def unsloth_forward(self, x: torch.Tensor):
+    result = self.quant_linear_module(x)
+    if self.disable_adapters:
+        return result
+    for active_adapter in self.active_adapters:
+        if active_adapter not in self.lora_A.keys():
+            continue
+        lora_A = self.lora_A[active_adapter]
+        lora_B = self.lora_B[active_adapter]
+        dropout = self.lora_dropout[active_adapter]
+        scaling = self.scaling[active_adapter]
+        requires_conversion = not torch.is_autocast_enabled()
+        if requires_conversion:
+            expected_dtype = result.dtype
+            x = self._cast_input_dtype(x, lora_A.weight.dtype)
+        output = lora_B(lora_A(dropout(x)))
+        if requires_conversion:
+            output = output.to(expected_dtype)
+        output = output * scaling
+        result = result + output
+    return result

unsloth_compiled_cache/BatchNorm1d.py ADDED Viewed

	@@ -0,0 +1,88 @@

+"""
+2025.3.17
+2025.3.19
+4.50.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
+# Unsloth Zoo - Utilities for Unsloth
+# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+import os
+import importlib.util
+if importlib.util.find_spec("unsloth_studio") is None:
+    UNSLOTH_STUDIO_ENABLED = False
+else:
+    UNSLOTH_STUDIO_ENABLED = os.environ.get("UNSLOTH_STUDIO_DISABLED", "0") == "0"
+pass
+from typing import List, Dict, Tuple, Optional, Any, Callable
+import math
+torch_compile_options = {'epilogue_fusion': True, 'max_autotune': False, 'shape_padding': True, 'trace.enabled': False, 'triton.cudagraphs': False}
+from torch import Tensor
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+from transformers.models.gemma3.modeling_gemma3 import (nn)
+def forward(self, input: Tensor) -> Tensor:
+    self._check_input_dim(input)
+    # exponential_average_factor is set to self.momentum
+    # (when it is available) only so that it gets updated
+    # in ONNX graph when this node is exported to ONNX.
+    if self.momentum is None:
+        exponential_average_factor = 0.0
+    else:
+        exponential_average_factor = self.momentum
+    if self.training and self.track_running_stats:
+        # TODO: if statement only here to tell the jit to skip emitting this when it is None
+        if self.num_batches_tracked is not None:  # type: ignore[has-type]
+            self.num_batches_tracked.add_(1)  # type: ignore[has-type]
+            if self.momentum is None:  # use cumulative moving average
+                exponential_average_factor = 1.0 / float(self.num_batches_tracked)
+            else:  # use exponential moving average
+                exponential_average_factor = self.momentum
+    r"""
+    Decide whether the mini-batch stats should be used for normalization rather than the buffers.
+    Mini-batch stats are used in training mode, and in eval mode when buffers are None.
+    """
+    if self.training:
+        bn_training = True
+    else:
+        bn_training = (self.running_mean is None) and (self.running_var is None)
+    r"""
+    Buffers are only updated if they are to be tracked and we are in training mode. Thus they only need to be
+    passed when the update should occur (i.e. in training mode when they are tracked), or when buffer stats are
+    used for normalization (i.e. in eval mode when buffers are not None).
+    """
+    return F.batch_norm(
+        input,
+        # If buffers are not to be tracked, ensure that they won't be updated
+        self.running_mean
+        if not self.training or self.track_running_stats
+        else None,
+        self.running_var if not self.training or self.track_running_stats else None,
+        self.weight,
+        self.bias,
+        bn_training,
+        exponential_average_factor,
+        self.eps,
+    ).to(input.dtype).to(input.dtype)

unsloth_compiled_cache/BatchNorm2d.py ADDED Viewed

	@@ -0,0 +1,88 @@

+"""
+2025.3.17
+2025.3.19
+4.50.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
+# Unsloth Zoo - Utilities for Unsloth
+# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+import os
+import importlib.util
+if importlib.util.find_spec("unsloth_studio") is None:
+    UNSLOTH_STUDIO_ENABLED = False
+else:
+    UNSLOTH_STUDIO_ENABLED = os.environ.get("UNSLOTH_STUDIO_DISABLED", "0") == "0"
+pass
+from typing import List, Dict, Tuple, Optional, Any, Callable
+import math
+torch_compile_options = {'epilogue_fusion': True, 'max_autotune': False, 'shape_padding': True, 'trace.enabled': False, 'triton.cudagraphs': False}
+from torch import Tensor
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+from transformers.models.gemma3.modeling_gemma3 import (nn)
+def forward(self, input: Tensor) -> Tensor:
+    self._check_input_dim(input)
+    # exponential_average_factor is set to self.momentum
+    # (when it is available) only so that it gets updated
+    # in ONNX graph when this node is exported to ONNX.
+    if self.momentum is None:
+        exponential_average_factor = 0.0
+    else:
+        exponential_average_factor = self.momentum
+    if self.training and self.track_running_stats:
+        # TODO: if statement only here to tell the jit to skip emitting this when it is None
+        if self.num_batches_tracked is not None:  # type: ignore[has-type]
+            self.num_batches_tracked.add_(1)  # type: ignore[has-type]
+            if self.momentum is None:  # use cumulative moving average
+                exponential_average_factor = 1.0 / float(self.num_batches_tracked)
+            else:  # use exponential moving average
+                exponential_average_factor = self.momentum
+    r"""
+    Decide whether the mini-batch stats should be used for normalization rather than the buffers.
+    Mini-batch stats are used in training mode, and in eval mode when buffers are None.
+    """
+    if self.training:
+        bn_training = True
+    else:
+        bn_training = (self.running_mean is None) and (self.running_var is None)
+    r"""
+    Buffers are only updated if they are to be tracked and we are in training mode. Thus they only need to be
+    passed when the update should occur (i.e. in training mode when they are tracked), or when buffer stats are
+    used for normalization (i.e. in eval mode when buffers are not None).
+    """
+    return F.batch_norm(
+        input,
+        # If buffers are not to be tracked, ensure that they won't be updated
+        self.running_mean
+        if not self.training or self.track_running_stats
+        else None,
+        self.running_var if not self.training or self.track_running_stats else None,
+        self.weight,
+        self.bias,
+        bn_training,
+        exponential_average_factor,
+        self.eps,
+    ).to(input.dtype).to(input.dtype)

unsloth_compiled_cache/BatchNorm3d.py ADDED Viewed

	@@ -0,0 +1,88 @@

+"""
+2025.3.17
+2025.3.19
+4.50.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
+# Unsloth Zoo - Utilities for Unsloth
+# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+import os
+import importlib.util
+if importlib.util.find_spec("unsloth_studio") is None:
+    UNSLOTH_STUDIO_ENABLED = False
+else:
+    UNSLOTH_STUDIO_ENABLED = os.environ.get("UNSLOTH_STUDIO_DISABLED", "0") == "0"
+pass
+from typing import List, Dict, Tuple, Optional, Any, Callable
+import math
+torch_compile_options = {'epilogue_fusion': True, 'max_autotune': False, 'shape_padding': True, 'trace.enabled': False, 'triton.cudagraphs': False}
+from torch import Tensor
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+from transformers.models.gemma3.modeling_gemma3 import (nn)
+def forward(self, input: Tensor) -> Tensor:
+    self._check_input_dim(input)
+    # exponential_average_factor is set to self.momentum
+    # (when it is available) only so that it gets updated
+    # in ONNX graph when this node is exported to ONNX.
+    if self.momentum is None:
+        exponential_average_factor = 0.0
+    else:
+        exponential_average_factor = self.momentum
+    if self.training and self.track_running_stats:
+        # TODO: if statement only here to tell the jit to skip emitting this when it is None
+        if self.num_batches_tracked is not None:  # type: ignore[has-type]
+            self.num_batches_tracked.add_(1)  # type: ignore[has-type]
+            if self.momentum is None:  # use cumulative moving average
+                exponential_average_factor = 1.0 / float(self.num_batches_tracked)
+            else:  # use exponential moving average
+                exponential_average_factor = self.momentum
+    r"""
+    Decide whether the mini-batch stats should be used for normalization rather than the buffers.
+    Mini-batch stats are used in training mode, and in eval mode when buffers are None.
+    """
+    if self.training:
+        bn_training = True
+    else:
+        bn_training = (self.running_mean is None) and (self.running_var is None)
+    r"""
+    Buffers are only updated if they are to be tracked and we are in training mode. Thus they only need to be
+    passed when the update should occur (i.e. in training mode when they are tracked), or when buffer stats are
+    used for normalization (i.e. in eval mode when buffers are not None).
+    """
+    return F.batch_norm(
+        input,
+        # If buffers are not to be tracked, ensure that they won't be updated
+        self.running_mean
+        if not self.training or self.track_running_stats
+        else None,
+        self.running_var if not self.training or self.track_running_stats else None,
+        self.weight,
+        self.bias,
+        bn_training,
+        exponential_average_factor,
+        self.eps,
+    ).to(input.dtype).to(input.dtype)

unsloth_compiled_cache/Conv1d.py ADDED Viewed

	@@ -0,0 +1,43 @@

+"""
+2025.3.17
+2025.3.19
+4.50.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
+# Unsloth Zoo - Utilities for Unsloth
+# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+import os
+import importlib.util
+if importlib.util.find_spec("unsloth_studio") is None:
+    UNSLOTH_STUDIO_ENABLED = False
+else:
+    UNSLOTH_STUDIO_ENABLED = os.environ.get("UNSLOTH_STUDIO_DISABLED", "0") == "0"
+pass
+from typing import List, Dict, Tuple, Optional, Any, Callable
+import math
+torch_compile_options = {'epilogue_fusion': True, 'max_autotune': False, 'shape_padding': True, 'trace.enabled': False, 'triton.cudagraphs': False}
+from torch import Tensor
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+def forward(self, input: Tensor) -> Tensor:
+    return self._conv_forward(input, self.weight, self.bias).to(input.dtype).to(input.dtype)

unsloth_compiled_cache/Conv2d.py ADDED Viewed

	@@ -0,0 +1,43 @@

+"""
+2025.3.17
+2025.3.19
+4.50.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
+# Unsloth Zoo - Utilities for Unsloth
+# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+import os
+import importlib.util
+if importlib.util.find_spec("unsloth_studio") is None:
+    UNSLOTH_STUDIO_ENABLED = False
+else:
+    UNSLOTH_STUDIO_ENABLED = os.environ.get("UNSLOTH_STUDIO_DISABLED", "0") == "0"
+pass
+from typing import List, Dict, Tuple, Optional, Any, Callable
+import math
+torch_compile_options = {'epilogue_fusion': True, 'max_autotune': False, 'shape_padding': True, 'trace.enabled': False, 'triton.cudagraphs': False}
+from torch import Tensor
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+def forward(self, input: Tensor) -> Tensor:
+    return self._conv_forward(input, self.weight, self.bias).to(input.dtype).to(input.dtype)

unsloth_compiled_cache/Conv3d.py ADDED Viewed

	@@ -0,0 +1,43 @@

+"""
+2025.3.17
+2025.3.19
+4.50.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
+# Unsloth Zoo - Utilities for Unsloth
+# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+import os
+import importlib.util
+if importlib.util.find_spec("unsloth_studio") is None:
+    UNSLOTH_STUDIO_ENABLED = False
+else:
+    UNSLOTH_STUDIO_ENABLED = os.environ.get("UNSLOTH_STUDIO_DISABLED", "0") == "0"
+pass
+from typing import List, Dict, Tuple, Optional, Any, Callable
+import math
+torch_compile_options = {'epilogue_fusion': True, 'max_autotune': False, 'shape_padding': True, 'trace.enabled': False, 'triton.cudagraphs': False}
+from torch import Tensor
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+def forward(self, input: Tensor) -> Tensor:
+    return self._conv_forward(input, self.weight, self.bias).to(input.dtype).to(input.dtype)

unsloth_compiled_cache/ConvTranspose1d.py ADDED Viewed

	@@ -0,0 +1,70 @@

+"""
+2025.3.17
+2025.3.19
+4.50.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
+# Unsloth Zoo - Utilities for Unsloth
+# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+import os
+import importlib.util
+if importlib.util.find_spec("unsloth_studio") is None:
+    UNSLOTH_STUDIO_ENABLED = False
+else:
+    UNSLOTH_STUDIO_ENABLED = os.environ.get("UNSLOTH_STUDIO_DISABLED", "0") == "0"
+pass
+from typing import List, Dict, Tuple, Optional, Any, Callable
+import math
+torch_compile_options = {'epilogue_fusion': True, 'max_autotune': False, 'shape_padding': True, 'trace.enabled': False, 'triton.cudagraphs': False}
+from torch import Tensor
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+from transformers.models.gemma3.modeling_gemma3 import (List, Optional, Tuple, nn)
+def forward(self, input: Tensor, output_size: Optional[List[int]] = None) -> Tensor:
+    if self.padding_mode != "zeros":
+        raise ValueError(
+            "Only `zeros` padding mode is supported for ConvTranspose1d"
+        )
+    assert isinstance(self.padding, tuple)
+    # One cannot replace List by Tuple or Sequence in "_output_padding" because
+    # TorchScript does not support `Sequence[T]` or `Tuple[T, ...]`.
+    num_spatial_dims = 1
+    output_padding = self._output_padding(
+        input,
+        output_size,
+        self.stride,  # type: ignore[arg-type]
+        self.padding,  # type: ignore[arg-type]
+        self.kernel_size,  # type: ignore[arg-type]
+        num_spatial_dims,
+        self.dilation,  # type: ignore[arg-type]
+    )
+    return F.conv_transpose1d(
+        input,
+        self.weight,
+        self.bias,
+        self.stride,
+        self.padding,
+        output_padding,
+        self.groups,
+        self.dilation,
+    ).to(input.dtype).to(input.dtype)

unsloth_compiled_cache/ConvTranspose2d.py ADDED Viewed

	@@ -0,0 +1,71 @@

+"""
+2025.3.17
+2025.3.19
+4.50.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
+# Unsloth Zoo - Utilities for Unsloth
+# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+import os
+import importlib.util
+if importlib.util.find_spec("unsloth_studio") is None:
+    UNSLOTH_STUDIO_ENABLED = False
+else:
+    UNSLOTH_STUDIO_ENABLED = os.environ.get("UNSLOTH_STUDIO_DISABLED", "0") == "0"
+pass
+from typing import List, Dict, Tuple, Optional, Any, Callable
+import math
+torch_compile_options = {'epilogue_fusion': True, 'max_autotune': False, 'shape_padding': True, 'trace.enabled': False, 'triton.cudagraphs': False}
+from torch import Tensor
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+from transformers.models.gemma3.modeling_gemma3 import (List, Optional, Tuple, nn)
+def forward(self, input: Tensor, output_size: Optional[List[int]] = None) -> Tensor:
+    if self.padding_mode != "zeros":
+        raise ValueError(
+            "Only `zeros` padding mode is supported for ConvTranspose2d"
+        )
+    assert isinstance(self.padding, tuple)
+    # One cannot replace List by Tuple or Sequence in "_output_padding" because
+    # TorchScript does not support `Sequence[T]` or `Tuple[T, ...]`.
+    num_spatial_dims = 2
+    output_padding = self._output_padding(
+        input,
+        output_size,
+        self.stride,  # type: ignore[arg-type]
+        self.padding,  # type: ignore[arg-type]
+        self.kernel_size,  # type: ignore[arg-type]
+        num_spatial_dims,
+        self.dilation,  # type: ignore[arg-type]
+    )
+    return F.conv_transpose2d(
+        input,
+        self.weight,
+        self.bias,
+        self.stride,
+        self.padding,
+        output_padding,
+        self.groups,
+        self.dilation,
+    ).to(input.dtype).to(input.dtype)

unsloth_compiled_cache/ConvTranspose3d.py ADDED Viewed

	@@ -0,0 +1,71 @@

+"""
+2025.3.17
+2025.3.19
+4.50.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
+# Unsloth Zoo - Utilities for Unsloth
+# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+import os
+import importlib.util
+if importlib.util.find_spec("unsloth_studio") is None:
+    UNSLOTH_STUDIO_ENABLED = False
+else:
+    UNSLOTH_STUDIO_ENABLED = os.environ.get("UNSLOTH_STUDIO_DISABLED", "0") == "0"
+pass
+from typing import List, Dict, Tuple, Optional, Any, Callable
+import math
+torch_compile_options = {'epilogue_fusion': True, 'max_autotune': False, 'shape_padding': True, 'trace.enabled': False, 'triton.cudagraphs': False}
+from torch import Tensor
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+from transformers.models.gemma3.modeling_gemma3 import (List, Optional, Tuple, nn)
+def forward(self, input: Tensor, output_size: Optional[List[int]] = None) -> Tensor:
+    if self.padding_mode != "zeros":
+        raise ValueError(
+            "Only `zeros` padding mode is supported for ConvTranspose3d"
+        )
+    assert isinstance(self.padding, tuple)
+    # One cannot replace List by Tuple or Sequence in "_output_padding" because
+    # TorchScript does not support `Sequence[T]` or `Tuple[T, ...]`.
+    num_spatial_dims = 3
+    output_padding = self._output_padding(
+        input,
+        output_size,
+        self.stride,  # type: ignore[arg-type]
+        self.padding,  # type: ignore[arg-type]
+        self.kernel_size,  # type: ignore[arg-type]
+        num_spatial_dims,
+        self.dilation,  # type: ignore[arg-type]
+    )
+    return F.conv_transpose3d(
+        input,
+        self.weight,
+        self.bias,
+        self.stride,
+        self.padding,
+        output_padding,
+        self.groups,
+        self.dilation,
+    ).to(input.dtype).to(input.dtype)

unsloth_compiled_cache/GPTQLoraLinear_peft_forward.py ADDED Viewed

	@@ -0,0 +1,73 @@

+"""
+2025.3.17
+2025.3.19
+4.50.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
+torch_compile_options = {'epilogue_fusion': True, 'max_autotune': False, 'shape_padding': True, 'trace.enabled': False, 'triton.cudagraphs': False}
+from torch import Tensor
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+from peft.tuners.lora.gptq import (torch)
+torch_addmm = torch.addmm
+torch_add   = torch.add
+# @torch.compile(fullgraph = False, dynamic = True, options = torch_compile_options)
+def lora_forward(result, lora_A, lora_B, dropout, x, scaling):
+    xA = dropout(x) @ lora_A.weight.t()
+    # output = result + scaling * xA @ lora_B.weight.t()
+    shape = result.shape
+    output = torch_addmm(
+        result.view(-1, shape[-1]),
+        xA.view(-1, xA.shape[-1]),
+        lora_B.weight.t(),
+        alpha = scaling,
+        beta = 1,
+    ).view(shape)
+    bias = lora_B.bias
+    if bias is not None:
+        output = torch_add(
+        output,
+        bias,
+        alpha = scaling,
+    )
+    return output
+pass
+def unsloth_forward(self, x: torch.Tensor):
+    # note: logic differs from default Linear because merging is not supported
+    result = self.quant_linear_module(x)
+    if self.disable_adapters:
+        return result
+    lora_A_keys = self.lora_A.keys()
+    for active_adapter in self.active_adapters:
+        if active_adapter not in lora_A_keys:
+            continue
+        lora_A = self.lora_A[active_adapter]
+        lora_B = self.lora_B[active_adapter]
+        dropout = self.lora_dropout[active_adapter]
+        scaling = self.scaling[active_adapter]
+        requires_conversion = not torch.is_autocast_enabled()
+        if requires_conversion:
+            expected_dtype = result.dtype
+            x = self._cast_input_dtype(x, lora_A.weight.dtype)
+        output = lora_B(lora_A(dropout(x)))
+        if requires_conversion:
+            output = output.to(expected_dtype)
+        if scaling != 1:  # skip scaling == 1 no-op
+            output = output * scaling
+        result += output
+    return result

unsloth_compiled_cache/GroupNorm.py ADDED Viewed

	@@ -0,0 +1,43 @@

+"""
+2025.3.17
+2025.3.19
+4.50.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
+# Unsloth Zoo - Utilities for Unsloth
+# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+import os
+import importlib.util
+if importlib.util.find_spec("unsloth_studio") is None:
+    UNSLOTH_STUDIO_ENABLED = False
+else:
+    UNSLOTH_STUDIO_ENABLED = os.environ.get("UNSLOTH_STUDIO_DISABLED", "0") == "0"
+pass
+from typing import List, Dict, Tuple, Optional, Any, Callable
+import math
+torch_compile_options = {'epilogue_fusion': True, 'max_autotune': False, 'shape_padding': True, 'trace.enabled': False, 'triton.cudagraphs': False}
+from torch import Tensor
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+def forward(self, input: Tensor) -> Tensor:
+    return F.group_norm(input, self.num_groups, self.weight, self.bias, self.eps).to(input.dtype).to(input.dtype)

unsloth_compiled_cache/LayerNorm.py ADDED Viewed

	@@ -0,0 +1,45 @@

+"""
+2025.3.17
+2025.3.19
+4.50.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
+# Unsloth Zoo - Utilities for Unsloth
+# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+import os
+import importlib.util
+if importlib.util.find_spec("unsloth_studio") is None:
+    UNSLOTH_STUDIO_ENABLED = False
+else:
+    UNSLOTH_STUDIO_ENABLED = os.environ.get("UNSLOTH_STUDIO_DISABLED", "0") == "0"
+pass
+from typing import List, Dict, Tuple, Optional, Any, Callable
+import math
+torch_compile_options = {'epilogue_fusion': True, 'max_autotune': False, 'shape_padding': True, 'trace.enabled': False, 'triton.cudagraphs': False}
+from torch import Tensor
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+def forward(self, input: Tensor) -> Tensor:
+    return F.layer_norm(
+        input, self.normalized_shape, self.weight, self.bias, self.eps
+    ).to(input.dtype).to(input.dtype)

unsloth_compiled_cache/Linear4bit_peft_forward.py ADDED Viewed

	@@ -0,0 +1,97 @@

+"""
+2025.3.17
+2025.3.19
+4.50.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
+torch_compile_options = {'epilogue_fusion': True, 'max_autotune': False, 'shape_padding': True, 'trace.enabled': False, 'triton.cudagraphs': False}
+from torch import Tensor
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+from peft.tuners.lora.bnb import (torch)
+torch_addmm = torch.addmm
+torch_add   = torch.add
+# @torch.compile(fullgraph = False, dynamic = True, options = torch_compile_options)
+def lora_forward(result, lora_A, lora_B, dropout, x, scaling):
+    xA = dropout(x) @ lora_A.weight.t()
+    # output = result + scaling * xA @ lora_B.weight.t()
+    shape = result.shape
+    output = torch_addmm(
+        result.view(-1, shape[-1]),
+        xA.view(-1, xA.shape[-1]),
+        lora_B.weight.t(),
+        alpha = scaling,
+        beta = 1,
+    ).view(shape)
+    bias = lora_B.bias
+    if bias is not None:
+        output = torch_add(
+        output,
+        bias,
+        alpha = scaling,
+    )
+    return output
+pass
+def unsloth_forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor:
+    adapter_names = kwargs.pop("adapter_names", None)
+    if self.disable_adapters:
+        if self.merged:
+            self.unmerge()
+        result = self.base_layer(x, *args, **kwargs)
+    elif adapter_names is not None:
+        result = self._mixed_batch_forward(x, *args, adapter_names=adapter_names, **kwargs)
+    elif self.merged:
+        result = self.base_layer(x, *args, **kwargs)
+    else:
+        result = self.base_layer(x, *args, **kwargs)
+        # As per Tim Dettmers, for 4bit, we need to defensively clone here.
+        # The reason is that in some cases, an error can occur that backprop
+        # does not work on a manipulated view. This issue may be solved with
+        # newer PyTorch versions but this would need extensive testing to be
+        # sure.
+        for active_adapter in self.active_adapters:
+            if active_adapter not in self.lora_A.keys():
+                continue
+            lora_A = self.lora_A[active_adapter]
+            lora_B = self.lora_B[active_adapter]
+            dropout = self.lora_dropout[active_adapter]
+            scaling = self.scaling[active_adapter]
+            requires_conversion = not torch.is_autocast_enabled()
+            if requires_conversion:
+                expected_dtype = result.dtype
+                x = self._cast_input_dtype(x, lora_A.weight.dtype)
+            if not self.use_dora[active_adapter]:
+                return lora_forward(result, lora_A, lora_B, dropout, x, scaling)
+            else:
+                if isinstance(dropout, torch.nn.Identity) or not self.training:
+                    base_result = result
+                else:
+                    x = dropout(x)
+                    base_result = None
+                output = self.lora_magnitude_vector[active_adapter](
+                    x,
+                    lora_A=lora_A,
+                    lora_B=lora_B,
+                    scaling=scaling,
+                    base_layer=self.get_base_layer(),
+                    base_result=base_result,
+                )
+            if requires_conversion:
+                output = output.to(expected_dtype)
+            result = result + output
+    return result

unsloth_compiled_cache/Linear8bitLt_peft_forward.py ADDED Viewed

	@@ -0,0 +1,90 @@

+"""
+2025.3.17
+2025.3.19
+4.50.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
+torch_compile_options = {'epilogue_fusion': True, 'max_autotune': False, 'shape_padding': True, 'trace.enabled': False, 'triton.cudagraphs': False}
+from torch import Tensor
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+from peft.tuners.lora.bnb import (torch)
+torch_addmm = torch.addmm
+torch_add   = torch.add
+# @torch.compile(fullgraph = False, dynamic = True, options = torch_compile_options)
+def lora_forward(result, lora_A, lora_B, dropout, x, scaling):
+    xA = dropout(x) @ lora_A.weight.t()
+    # output = result + scaling * xA @ lora_B.weight.t()
+    shape = result.shape
+    output = torch_addmm(
+        result.view(-1, shape[-1]),
+        xA.view(-1, xA.shape[-1]),
+        lora_B.weight.t(),
+        alpha = scaling,
+        beta = 1,
+    ).view(shape)
+    bias = lora_B.bias
+    if bias is not None:
+        output = torch_add(
+        output,
+        bias,
+        alpha = scaling,
+    )
+    return output
+pass
+def unsloth_forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor:
+    adapter_names = kwargs.pop("adapter_names", None)
+    if self.disable_adapters:
+        if self.merged:
+            self.unmerge()
+        result = self.base_layer(x, *args, **kwargs)
+    elif adapter_names is not None:
+        result = self._mixed_batch_forward(x, *args, adapter_names=adapter_names, **kwargs)
+    elif self.merged:
+        result = self.base_layer(x, *args, **kwargs)
+    else:
+        result = self.base_layer(x, *args, **kwargs)
+        for active_adapter in self.active_adapters:
+            if active_adapter not in self.lora_A.keys():
+                continue
+            lora_A = self.lora_A[active_adapter]
+            lora_B = self.lora_B[active_adapter]
+            dropout = self.lora_dropout[active_adapter]
+            scaling = self.scaling[active_adapter]
+            requires_conversion = not torch.is_autocast_enabled()
+            if requires_conversion:
+                expected_dtype = result.dtype
+                x = self._cast_input_dtype(x, lora_A.weight.dtype)
+            if not self.use_dora[active_adapter]:
+                return lora_forward(result, lora_A, lora_B, dropout, x, scaling)
+            else:
+                if isinstance(dropout, torch.nn.Identity) or not self.training:
+                    base_result = result
+                else:
+                    x = dropout(x)
+                    base_result = None
+                output = self.lora_magnitude_vector[active_adapter](
+                    x,
+                    lora_A=lora_A,
+                    lora_B=lora_B,
+                    scaling=scaling,
+                    base_layer=self.get_base_layer(),
+                    base_result=base_result,
+                )
+            if requires_conversion:
+                output = output.to(expected_dtype)
+            result = result + output
+    return result

unsloth_compiled_cache/Linear_peft_forward.py ADDED Viewed

	@@ -0,0 +1,89 @@

+"""
+2025.3.17
+2025.3.19
+4.50.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
+torch_compile_options = {'epilogue_fusion': True, 'max_autotune': False, 'shape_padding': True, 'trace.enabled': False, 'triton.cudagraphs': False}
+from torch import Tensor
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+from peft.tuners.lora.layer import (Any, F, nn, torch)
+torch_addmm = torch.addmm
+torch_add   = torch.add
+# @torch.compile(fullgraph = False, dynamic = True, options = torch_compile_options)
+def lora_forward(result, lora_A, lora_B, dropout, x, scaling):
+    xA = dropout(x) @ lora_A.weight.t()
+    # output = result + scaling * xA @ lora_B.weight.t()
+    shape = result.shape
+    output = torch_addmm(
+        result.view(-1, shape[-1]),
+        xA.view(-1, xA.shape[-1]),
+        lora_B.weight.t(),
+        alpha = scaling,
+        beta = 1,
+    ).view(shape)
+    bias = lora_B.bias
+    if bias is not None:
+        output = torch_add(
+        output,
+        bias,
+        alpha = scaling,
+    )
+    return output
+pass
+def unsloth_forward(self, x: torch.Tensor, *args: Any, **kwargs: Any) -> torch.Tensor:
+    adapter_names = kwargs.pop("adapter_names", None)
+    if self.disable_adapters:
+        if self.merged:
+            self.unmerge()
+        result = self.base_layer(x, *args, **kwargs)
+    elif adapter_names is not None:
+        result = self._mixed_batch_forward(x, *args, adapter_names=adapter_names, **kwargs)
+    elif self.merged:
+        result = self.base_layer(x, *args, **kwargs)
+    else:
+        result = self.base_layer(x, *args, **kwargs)
+        torch_result_dtype = result.dtype
+        lora_A_keys = self.lora_A.keys()
+        for active_adapter in self.active_adapters:
+            if active_adapter not in lora_A_keys:
+                continue
+            lora_A = self.lora_A[active_adapter]
+            lora_B = self.lora_B[active_adapter]
+            dropout = self.lora_dropout[active_adapter]
+            scaling = self.scaling[active_adapter]
+            if not torch.is_autocast_enabled(): result, x = result.to(lora_A.weight.dtype), x.to(lora_A.weight.dtype)
+            if not self.use_dora[active_adapter]:
+                return lora_forward(result, lora_A, lora_B, dropout, x, scaling)
+            else:
+                if isinstance(dropout, nn.Identity) or not self.training:
+                    base_result = result
+                else:
+                    x = dropout(x)
+                    base_result = None
+                result = result + self.lora_magnitude_vector[active_adapter](
+                    x,
+                    lora_A=lora_A,
+                    lora_B=lora_B,
+                    scaling=scaling,
+                    base_layer=self.get_base_layer(),
+                    base_result=base_result,
+                )
+        result = result.to(torch_result_dtype)
+    return result

unsloth_compiled_cache/LoraParallelLinear_peft_forward.py ADDED Viewed

	@@ -0,0 +1,87 @@

+"""
+2025.3.17
+2025.3.19
+4.50.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
+torch_compile_options = {'epilogue_fusion': True, 'max_autotune': False, 'shape_padding': True, 'trace.enabled': False, 'triton.cudagraphs': False}
+from torch import Tensor
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+from peft.tuners.lora.tp_layer import (Any, __name__, nn, torch)
+torch_addmm = torch.addmm
+torch_add   = torch.add
+# @torch.compile(fullgraph = False, dynamic = True, options = torch_compile_options)
+def lora_forward(result, lora_A, lora_B, dropout, x, scaling):
+    xA = dropout(x) @ lora_A.weight.t()
+    # output = result + scaling * xA @ lora_B.weight.t()
+    shape = result.shape
+    output = torch_addmm(
+        result.view(-1, shape[-1]),
+        xA.view(-1, xA.shape[-1]),
+        lora_B.weight.t(),
+        alpha = scaling,
+        beta = 1,
+    ).view(shape)
+    bias = lora_B.bias
+    if bias is not None:
+        output = torch_add(
+        output,
+        bias,
+        alpha = scaling,
+    )
+    return output
+pass
+def unsloth_forward(self, x: torch.Tensor, *args: Any, **kwargs: Any):
+    adapter_names = kwargs.pop("adapter_names", None)
+    # If weight is used for matrix multiplication here, the final aggregation operation of the original
+    # parallel_linear layer will be missing, so we need to directly call its forward function to obtain the
+    # output of the original parallel_linear layer.
+    if self.disable_adapters:
+        if self.merged:
+            self.unmerge()
+        result, bias = self.base_layer(x, *args, **kwargs)
+    elif adapter_names is not None:
+        raise ValueError(f"{self.__class__.__name__} does not support mixed_batch_forward yet.")
+    elif self.merged:
+        result, bias = self.base_layer(x, *args, **kwargs)
+    else:
+        result, bias = self.base_layer(x, *args, **kwargs)
+        torch_result_dtype = result.dtype
+        for active_adapter in self.active_adapters:
+            if active_adapter not in self.lora_A.keys():
+                continue
+            lora_A = self.lora_A[active_adapter]
+            lora_B = self.lora_B[active_adapter]
+            dropout = self.lora_dropout[active_adapter]
+            scaling = self.scaling[active_adapter]
+            if not torch.is_autocast_enabled(): result, x = result.to(lora_A.weight.dtype), x.to(lora_A.weight.dtype)
+            if not self.use_dora[active_adapter]:
+                return lora_forward(result, lora_A, lora_B, dropout, x, scaling)
+            else:
+                if isinstance(dropout, torch.nn.Identity) or not self.training:
+                    base_result = result
+                else:
+                    x = dropout(x)
+                    base_result = None
+                result = result + self.lora_magnitude_vector[active_adapter](
+                    x,
+                    lora_A=lora_A,
+                    lora_B=lora_B,
+                    scaling=scaling,
+                    base_layer=self.get_base_layer(),
+                    base_result=base_result,
+                )
+        result = result.to(torch_result_dtype)
+    return result, bias

unsloth_compiled_cache/RMSNorm.py ADDED Viewed

	@@ -0,0 +1,46 @@

+"""
+2025.3.17
+2025.3.19
+4.50.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
+# Unsloth Zoo - Utilities for Unsloth
+# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+import os
+import importlib.util
+if importlib.util.find_spec("unsloth_studio") is None:
+    UNSLOTH_STUDIO_ENABLED = False
+else:
+    UNSLOTH_STUDIO_ENABLED = os.environ.get("UNSLOTH_STUDIO_DISABLED", "0") == "0"
+pass
+from typing import List, Dict, Tuple, Optional, Any, Callable
+import math
+torch_compile_options = {'epilogue_fusion': True, 'max_autotune': False, 'shape_padding': True, 'trace.enabled': False, 'triton.cudagraphs': False}
+from torch import Tensor
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+from transformers.models.gemma3.modeling_gemma3 import (torch)
+def forward(self, x: torch.Tensor) -> torch.Tensor:
+    """
+    Runs forward pass.
+    """
+    return F.rms_norm(x, self.normalized_shape, self.weight, self.eps).to(input.dtype).to(input.dtype)

unsloth_compiled_cache/UnslothAlignPropTrainer.py ADDED Viewed

	@@ -0,0 +1,637 @@

+"""
+2025.3.17
+2025.3.19
+4.50.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
+from torch import Tensor
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+from trl.trainer.alignprop_trainer import (Accelerator, AlignPropConfig, AlignPropTrainer, Any, Callable, DDPOStableDiffusionPipeline, Optional, ProjectConfiguration, PyTorchModelHubMixin, Union, defaultdict, generate_model_card, get_comet_experiment_url, is_wandb_available, logger, os, set_seed, textwrap, torch, wandb, warn)
+import os
+from typing import *
+from dataclasses import dataclass, field
+from packaging.version import Version
+import torch
+import numpy as np
+from contextlib import nullcontext
+from torch.nn import functional as F
+from transformers import DataCollatorForSeq2Seq, DataCollatorForLanguageModeling
+torch_compile_options = {
+    "epilogue_fusion"   : True,
+    "max_autotune"      : False,
+    "shape_padding"     : True,
+    "trace.enabled"     : False,
+    "triton.cudagraphs" : False,
+}
+@torch.compile(dynamic = True, fullgraph = True, options = torch_compile_options,)
+def selective_log_softmax(logits, index):
+    logits = logits.to(torch.float32)
+    selected_logits = torch.gather(logits, dim = -1, index = index.unsqueeze(-1)).squeeze(-1)
+    # loop to reduce peak mem consumption
+    # logsumexp_values = torch.stack([torch.logsumexp(lg, dim=-1) for lg in logits])
+    logsumexp_values = torch.logsumexp(logits, dim = -1)
+    per_token_logps = selected_logits - logsumexp_values  # log_softmax(x_i) = x_i - logsumexp(x)
+    return per_token_logps
+@dataclass
+class UnslothAlignPropConfig(AlignPropConfig):
+    """
+    Configuration class for the [`AlignPropTrainer`].
+    Using [`~transformers.HfArgumentParser`] we can turn this class into
+    [argparse](https://docs.python.org/3/library/argparse#module-argparse) arguments that can be specified on the
+    command line.
+    Parameters:
+        exp_name (`str`, *optional*, defaults to `os.path.basename(sys.argv[0])[: -len(".py")]`):
+            Name of this experiment (defaults to the file name without the extension).
+        run_name (`str`, *optional*, defaults to `""`):
+            Name of this run.
+        seed (`int`, *optional*, defaults to `0`):
+            Random seed for reproducibility.
+        log_with (`str` or `None`, *optional*, defaults to `None`):
+            Log with either `"wandb"` or `"tensorboard"`. Check
+            [tracking](https://huggingface.co/docs/accelerate/usage_guides/tracking) for more details.
+        log_image_freq (`int`, *optional*, defaults to `1`):
+            Frequency for logging images.
+        tracker_kwargs (`dict[str, Any]`, *optional*, defaults to `{}`):
+            Keyword arguments for the tracker (e.g., `wandb_project`).
+        accelerator_kwargs (`dict[str, Any]`, *optional*, defaults to `{}`):
+            Keyword arguments for the accelerator.
+        project_kwargs (`dict[str, Any]`, *optional*, defaults to `{}`):
+            Keyword arguments for the accelerator project config (e.g., `logging_dir`).
+        tracker_project_name (`str`, *optional*, defaults to `"trl"`):
+            Name of project to use for tracking.
+        logdir (`str`, *optional*, defaults to `"logs"`):
+            Top-level logging directory for checkpoint saving.
+        num_epochs (`int`, *optional*, defaults to `100`):
+            Number of epochs to train.
+        save_freq (`int`, *optional*, defaults to `1`):
+            Number of epochs between saving model checkpoints.
+        num_checkpoint_limit (`int`, *optional*, defaults to `5`):
+            Number of checkpoints to keep before overwriting old ones.
+        mixed_precision (`str`, *optional*, defaults to `"fp16"`):
+            Mixed precision training.
+        allow_tf32 (`bool`, *optional*, defaults to `True`):
+            Allow `tf32` on Ampere GPUs.
+        resume_from (`str`, *optional*, defaults to `""`):
+            Path to resume training from a checkpoint.
+        sample_num_steps (`int`, *optional*, defaults to `50`):
+            Number of sampler inference steps.
+        sample_eta (`float`, *optional*, defaults to `1.0`):
+            Eta parameter for the DDIM sampler.
+        sample_guidance_scale (`float`, *optional*, defaults to `5.0`):
+            Classifier-free guidance weight.
+        train_batch_size (`int`, *optional*, defaults to `1`):
+            Batch size for training.
+        train_use_8bit_adam (`bool`, *optional*, defaults to `False`):
+            Whether to use the 8bit Adam optimizer from `bitsandbytes`.
+        train_learning_rate (`float`, *optional*, defaults to `1e-3`):
+            Learning rate.
+        train_adam_beta1 (`float`, *optional*, defaults to `0.9`):
+            Beta1 for Adam optimizer.
+        train_adam_beta2 (`float`, *optional*, defaults to `0.999`):
+            Beta2 for Adam optimizer.
+        train_adam_weight_decay (`float`, *optional*, defaults to `1e-4`):
+            Weight decay for Adam optimizer.
+        train_adam_epsilon (`float`, *optional*, defaults to `1e-8`):
+            Epsilon value for Adam optimizer.
+        train_gradient_accumulation_steps (`int`, *optional*, defaults to `1`):
+            Number of gradient accumulation steps.
+        train_max_grad_norm (`float`, *optional*, defaults to `1.0`):
+            Maximum gradient norm for gradient clipping.
+        negative_prompts (`str` or `None`, *optional*, defaults to `None`):
+            Comma-separated list of prompts to use as negative examples.
+        truncated_backprop_rand (`bool`, *optional*, defaults to `True`):
+            If `True`, randomized truncation to different diffusion timesteps is used.
+        truncated_backprop_timestep (`int`, *optional*, defaults to `49`):
+            Absolute timestep to which the gradients are backpropagated. Used only if `truncated_backprop_rand=False`.
+        truncated_rand_backprop_minmax (`tuple[int, int]`, *optional*, defaults to `(0, 50)`):
+            Range of diffusion timesteps for randomized truncated backpropagation.
+        push_to_hub (`bool`, *optional*, defaults to `False`):
+            Whether to push the final model to the Hub.
+    """
+    vllm_sampling_params: Optional[Any] = field(
+        default = None,
+        metadata = {'help': 'vLLM SamplingParams'},
+    )
+    unsloth_num_chunks : Optional[int] = field(
+        default = -1,
+        metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'},
+    )
+    def __init__(
+        self,
+        exp_name = 'app',
+        run_name = '',
+        seed = 3407,
+        log_with = None,
+        log_image_freq = 1,
+        tracker_project_name = 'trl',
+        logdir = 'logs',
+        num_epochs = 100,
+        save_freq = 1,
+        num_checkpoint_limit = 5,
+        mixed_precision = 'fp16',
+        allow_tf32 = True,
+        resume_from = '',
+        sample_num_steps = 50,
+        sample_eta = 1.0,
+        sample_guidance_scale = 5.0,
+        train_batch_size = 1,
+        train_use_8bit_adam = False,
+        train_learning_rate = 5e-05,
+        train_adam_beta1 = 0.9,
+        train_adam_beta2 = 0.999,
+        train_adam_weight_decay = 0.01,
+        train_adam_epsilon = 1e-08,
+        train_gradient_accumulation_steps = 2,
+        train_max_grad_norm = 1.0,
+        negative_prompts = None,
+        truncated_backprop_rand = True,
+        truncated_backprop_timestep = 49,
+        push_to_hub = False,
+        vllm_sampling_params = None,
+        unsloth_num_chunks = -1,
+        **kwargs,
+    ):
+        super().__init__(
+            exp_name = exp_name,
+            run_name = run_name,
+            seed = seed,
+            log_with = log_with,
+            log_image_freq = log_image_freq,
+            tracker_project_name = tracker_project_name,
+            logdir = logdir,
+            num_epochs = num_epochs,
+            save_freq = save_freq,
+            num_checkpoint_limit = num_checkpoint_limit,
+            mixed_precision = mixed_precision,
+            allow_tf32 = allow_tf32,
+            resume_from = resume_from,
+            sample_num_steps = sample_num_steps,
+            sample_eta = sample_eta,
+            sample_guidance_scale = sample_guidance_scale,
+            train_batch_size = train_batch_size,
+            train_use_8bit_adam = train_use_8bit_adam,
+            train_learning_rate = train_learning_rate,
+            train_adam_beta1 = train_adam_beta1,
+            train_adam_beta2 = train_adam_beta2,
+            train_adam_weight_decay = train_adam_weight_decay,
+            train_adam_epsilon = train_adam_epsilon,
+            train_gradient_accumulation_steps = train_gradient_accumulation_steps,
+            train_max_grad_norm = train_max_grad_norm,
+            negative_prompts = negative_prompts,
+            truncated_backprop_rand = truncated_backprop_rand,
+            truncated_backprop_timestep = truncated_backprop_timestep,
+            push_to_hub = push_to_hub,**kwargs)
+        self.vllm_sampling_params = vllm_sampling_params
+        self.unsloth_num_chunks = unsloth_num_chunks
+pass
+class _UnslothAlignPropTrainer(PyTorchModelHubMixin):
+    """"""
+    _tag_names = ["trl", "alignprop"]
+    def __init__(
+        self,
+        config: AlignPropConfig,
+        reward_function: Callable[[torch.Tensor, tuple[str], tuple[Any]], torch.Tensor],
+        prompt_function: Callable[[], tuple[str, Any]],
+        sd_pipeline: DDPOStableDiffusionPipeline,
+        image_samples_hook: Optional[Callable[[Any, Any, Any], Any]] = None,
+    ):
+        if image_samples_hook is None:
+            warn("No image_samples_hook provided; no images will be logged")
+        self.prompt_fn = prompt_function
+        self.reward_fn = reward_function
+        self.config = config
+        self.image_samples_callback = image_samples_hook
+        accelerator_project_config = ProjectConfiguration(**self.config.project_kwargs)
+        if self.config.resume_from:
+            self.config.resume_from = os.path.normpath(os.path.expanduser(self.config.resume_from))
+            if "checkpoint_" not in os.path.basename(self.config.resume_from):
+                # get the most recent checkpoint in this directory
+                checkpoints = list(
+                    filter(
+                        lambda x: "checkpoint_" in x,
+                        os.listdir(self.config.resume_from),
+                    )
+                )
+                if len(checkpoints) == 0:
+                    raise ValueError(f"No checkpoints found in {self.config.resume_from}")
+                checkpoint_numbers = sorted([int(x.split("_")[-1]) for x in checkpoints])
+                self.config.resume_from = os.path.join(
+                    self.config.resume_from,
+                    f"checkpoint_{checkpoint_numbers[-1]}",
+                )
+                accelerator_project_config.iteration = checkpoint_numbers[-1] + 1
+        self.accelerator = Accelerator(
+            log_with=self.config.log_with,
+            mixed_precision=self.config.mixed_precision,
+            project_config=accelerator_project_config,
+            # we always accumulate gradients across timesteps; we want config.train.gradient_accumulation_steps to be the
+            # number of *samples* we accumulate across, so we need to multiply by the number of training timesteps to get
+            # the total number of optimizer steps to accumulate across.
+            gradient_accumulation_steps=self.config.train_gradient_accumulation_steps,
+            **self.config.accelerator_kwargs,
+        )
+        is_using_tensorboard = config.log_with is not None and config.log_with == "tensorboard"
+        if self.accelerator.is_main_process:
+            self.accelerator.init_trackers(
+                self.config.tracker_project_name,
+                config=dict(alignprop_trainer_config=config.to_dict())
+                if not is_using_tensorboard
+                else config.to_dict(),
+                init_kwargs=self.config.tracker_kwargs,
+            )
+        logger.info(f"\n{config}")
+        set_seed(self.config.seed, device_specific=True)
+        self.sd_pipeline = sd_pipeline
+        self.sd_pipeline.set_progress_bar_config(
+            position=1,
+            disable=not self.accelerator.is_local_main_process,
+            leave=False,
+            desc="Timestep",
+            dynamic_ncols=True,
+        )
+        # For mixed precision training we cast all non-trainable weights (vae, non-lora text_encoder and non-lora unet) to half-precision
+        # as these weights are only used for inference, keeping weights in full precision is not required.
+        if self.accelerator.mixed_precision == "fp16":
+            inference_dtype = torch.float16
+        elif self.accelerator.mixed_precision == "bf16":
+            inference_dtype = torch.bfloat16
+        else:
+            inference_dtype = torch.float32
+        self.sd_pipeline.vae.to(self.accelerator.device, dtype=inference_dtype)
+        self.sd_pipeline.text_encoder.to(self.accelerator.device, dtype=inference_dtype)
+        self.sd_pipeline.unet.to(self.accelerator.device, dtype=inference_dtype)
+        trainable_layers = self.sd_pipeline.get_trainable_layers()
+        self.accelerator.register_save_state_pre_hook(self._save_model_hook)
+        self.accelerator.register_load_state_pre_hook(self._load_model_hook)
+        # Enable TF32 for faster training on Ampere GPUs,
+        # cf https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices
+        if self.config.allow_tf32:
+            torch.backends.cuda.matmul.allow_tf32 = True
+        self.optimizer = self._setup_optimizer(
+            trainable_layers.parameters() if not isinstance(trainable_layers, list) else trainable_layers
+        )
+        self.neg_prompt_embed = self.sd_pipeline.text_encoder(
+            self.sd_pipeline.tokenizer(
+                [""] if self.config.negative_prompts is None else self.config.negative_prompts,
+                return_tensors="pt",
+                padding="max_length",
+                truncation=True,
+                max_length=self.sd_pipeline.tokenizer.model_max_length,
+            ).input_ids.to(self.accelerator.device)
+        )[0]
+        # NOTE: for some reason, autocast is necessary for non-lora training but for lora training it isn't necessary and it uses
+        # more memory
+        self.autocast = self.sd_pipeline.autocast or self.accelerator.autocast
+        if hasattr(self.sd_pipeline, "use_lora") and self.sd_pipeline.use_lora:
+            unet, self.optimizer = self.accelerator.prepare(trainable_layers, self.optimizer)
+            self.trainable_layers = list(filter(lambda p: p.requires_grad, unet.parameters()))
+        else:
+            self.trainable_layers, self.optimizer = self.accelerator.prepare(trainable_layers, self.optimizer)
+        if config.resume_from:
+            logger.info(f"Resuming from {config.resume_from}")
+            self.accelerator.load_state(config.resume_from)
+            self.first_epoch = int(config.resume_from.split("_")[-1]) + 1
+        else:
+            self.first_epoch = 0
+    def compute_rewards(self, prompt_image_pairs):
+        reward, reward_metadata = self.reward_fn(
+            prompt_image_pairs["images"], prompt_image_pairs["prompts"], prompt_image_pairs["prompt_metadata"]
+        )
+        return reward
+    def step(self, epoch: int, global_step: int):
+        """
+        Perform a single step of training.
+        Args:
+            epoch (int): The current epoch.
+            global_step (int): The current global step.
+        Side Effects:
+            - Model weights are updated
+            - Logs the statistics to the accelerator trackers.
+            - If `self.image_samples_callback` is not None, it will be called with the prompt_image_pairs, global_step, and the accelerator tracker.
+        Returns:
+            global_step (int): The updated global step.
+        """
+        info = defaultdict(list)
+        self.sd_pipeline.unet.train()
+        for _ in range(self.config.train_gradient_accumulation_steps):
+            with self.accelerator.accumulate(self.sd_pipeline.unet), self.autocast(), torch.enable_grad():
+                prompt_image_pairs = self._generate_samples(
+                    batch_size=self.config.train_batch_size,
+                )
+                rewards = self.compute_rewards(prompt_image_pairs)
+                prompt_image_pairs["rewards"] = rewards
+                rewards_vis = self.accelerator.gather(rewards).detach().cpu().numpy()
+                loss = self.calculate_loss(rewards)
+                self.accelerator.backward(loss)
+                if self.accelerator.sync_gradients:
+                    self.accelerator.clip_grad_norm_(
+                        self.trainable_layers.parameters()
+                        if not isinstance(self.trainable_layers, list)
+                        else self.trainable_layers,
+                        self.config.train_max_grad_norm,
+                    )
+                self.optimizer.step()
+                self.optimizer.zero_grad()
+            info["reward_mean"].append(rewards_vis.mean())
+            info["reward_std"].append(rewards_vis.std())
+            info["loss"].append(loss.item())
+        # Checks if the accelerator has performed an optimization step behind the scenes
+        if self.accelerator.sync_gradients:
+            # log training-related stuff
+            info = {k: torch.mean(torch.tensor(v)) for k, v in info.items()}
+            info = self.accelerator.reduce(info, reduction="mean")
+            info.update({"epoch": epoch})
+            self.accelerator.log(info, step=global_step)
+            global_step += 1
+            info = defaultdict(list)
+        else:
+            raise ValueError(
+                "Optimization step should have been performed by this point. Please check calculated gradient accumulation settings."
+            )
+        # Logs generated images
+        if self.image_samples_callback is not None and global_step % self.config.log_image_freq == 0:
+            self.image_samples_callback(prompt_image_pairs, global_step, self.accelerator.trackers[0])
+        if epoch != 0 and epoch % self.config.save_freq == 0 and self.accelerator.is_main_process:
+            self.accelerator.save_state()
+        return global_step
+    def calculate_loss(self, rewards):
+        """
+        Calculate the loss for a batch of an unpacked sample
+        Args:
+            rewards (torch.Tensor):
+                Differentiable reward scalars for each generated image, shape: [batch_size]
+        Returns:
+            loss (torch.Tensor)
+            (all of these are of shape (1,))
+        """
+        #  Loss is specific to Aesthetic Reward function used in AlignProp (https://huggingface.co/papers/2310.03739)
+        loss = 10.0 - (rewards).mean()
+        return loss
+    def loss(
+        self,
+        advantages: torch.Tensor,
+        clip_range: float,
+        ratio: torch.Tensor,
+    ):
+        unclipped_loss = -advantages * ratio
+        clipped_loss = -advantages * torch.clamp(
+            ratio,
+            1.0 - clip_range,
+            1.0 + clip_range,
+        )
+        return torch.mean(torch.maximum(unclipped_loss, clipped_loss))
+    def _setup_optimizer(self, trainable_layers_parameters):
+        if self.config.train_use_8bit_adam:
+            import bitsandbytes
+            optimizer_cls = bitsandbytes.optim.AdamW8bit
+        else:
+            optimizer_cls = torch.optim.AdamW
+        return optimizer_cls(
+            trainable_layers_parameters,
+            lr=self.config.train_learning_rate,
+            betas=(self.config.train_adam_beta1, self.config.train_adam_beta2),
+            weight_decay=self.config.train_adam_weight_decay,
+            eps=self.config.train_adam_epsilon,
+        )
+    def _save_model_hook(self, models, weights, output_dir):
+        self.sd_pipeline.save_checkpoint(models, weights, output_dir)
+        weights.pop()  # ensures that accelerate doesn't try to handle saving of the model
+    def _load_model_hook(self, models, input_dir):
+        self.sd_pipeline.load_checkpoint(models, input_dir)
+        models.pop()  # ensures that accelerate doesn't try to handle loading of the model
+    def _generate_samples(self, batch_size, with_grad=True, prompts=None):
+        """
+        Generate samples from the model
+        Args:
+            batch_size (int): Batch size to use for sampling
+            with_grad (bool): Whether the generated RGBs should have gradients attached to it.
+        Returns:
+            prompt_image_pairs (dict[Any])
+        """
+        prompt_image_pairs = {}
+        sample_neg_prompt_embeds = self.neg_prompt_embed.repeat(batch_size, 1, 1)
+        if prompts is None:
+            prompts, prompt_metadata = zip(*[self.prompt_fn() for _ in range(batch_size)])
+        else:
+            prompt_metadata = [{} for _ in range(batch_size)]
+        prompt_ids = self.sd_pipeline.tokenizer(
+            prompts,
+            return_tensors="pt",
+            padding="max_length",
+            truncation=True,
+            max_length=self.sd_pipeline.tokenizer.model_max_length,
+        ).input_ids.to(self.accelerator.device)
+        prompt_embeds = self.sd_pipeline.text_encoder(prompt_ids)[0]
+        if with_grad:
+            sd_output = self.sd_pipeline.rgb_with_grad(
+                prompt_embeds=prompt_embeds,
+                negative_prompt_embeds=sample_neg_prompt_embeds,
+                num_inference_steps=self.config.sample_num_steps,
+                guidance_scale=self.config.sample_guidance_scale,
+                eta=self.config.sample_eta,
+                truncated_backprop_rand=self.config.truncated_backprop_rand,
+                truncated_backprop_timestep=self.config.truncated_backprop_timestep,
+                truncated_rand_backprop_minmax=self.config.truncated_rand_backprop_minmax,
+                output_type="pt",
+            )
+        else:
+            sd_output = self.sd_pipeline(
+                prompt_embeds=prompt_embeds,
+                negative_prompt_embeds=sample_neg_prompt_embeds,
+                num_inference_steps=self.config.sample_num_steps,
+                guidance_scale=self.config.sample_guidance_scale,
+                eta=self.config.sample_eta,
+                output_type="pt",
+            )
+        images = sd_output.images
+        prompt_image_pairs["images"] = images
+        prompt_image_pairs["prompts"] = prompts
+        prompt_image_pairs["prompt_metadata"] = prompt_metadata
+        return prompt_image_pairs
+    def train(self, epochs: Optional[int] = None):
+        """
+        Train the model for a given number of epochs
+        """
+        global_step = 0
+        if epochs is None:
+            epochs = self.config.num_epochs
+        for epoch in range(self.first_epoch, epochs):
+            global_step = self.step(epoch, global_step)
+    def _save_pretrained(self, save_directory):
+        self.sd_pipeline.save_pretrained(save_directory)
+        self.create_model_card()
+    def create_model_card(
+        self,
+        model_name: Optional[str] = None,
+        dataset_name: Optional[str] = None,
+        tags: Union[str, list[str], None] = None,
+    ):
+        """
+        Creates a draft of a model card using the information available to the `Trainer`.
+        Args:
+            model_name (`str` or `None`, *optional*, defaults to `None`):
+                Name of the model.
+            dataset_name (`str` or `None`, *optional*, defaults to `None`):
+                Name of the dataset used for training.
+            tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
+                Tags to be associated with the model card.
+        """
+        if not self.is_world_process_zero():
+            return
+        if hasattr(self.model.config, "_name_or_path") and not os.path.isdir(self.model.config._name_or_path):
+            base_model = self.model.config._name_or_path
+        else:
+            base_model = None
+        tags = tags or []
+        if isinstance(tags, str):
+            tags = [tags]
+        if hasattr(self.model.config, "unsloth_version"):
+            tags.append("unsloth")
+        citation = textwrap.dedent("""\
+        @article{prabhudesai2024aligning,
+            title        = {{Aligning Text-to-Image Diffusion Models with Reward Backpropagation}},
+            author       = {Mihir Prabhudesai and Anirudh Goyal and Deepak Pathak and Katerina Fragkiadaki},
+            year         = 2024,
+            eprint       = {arXiv:2310.03739}
+        }""")
+        model_card = generate_model_card(
+            base_model=base_model,
+            model_name=model_name,
+            hub_model_id=self.hub_model_id,
+            dataset_name=dataset_name,
+            tags=tags,
+            wandb_url=wandb.run.get_url() if is_wandb_available() and wandb.run is not None else None,
+            comet_url=get_comet_experiment_url(),
+            trainer_name="AlignProp",
+            trainer_citation=citation,
+            paper_title="Aligning Text-to-Image Diffusion Models with Reward Backpropagation",
+            paper_id="2310.03739",
+        )
+        model_card.save(os.path.join(self.args.output_dir, "README.md"))
+class UnslothAlignPropTrainer(_UnslothAlignPropTrainer):
+    """
+    The AlignPropTrainer uses Deep Diffusion Policy Optimization to optimise diffusion models.
+    Note, this trainer is heavily inspired by the work here: https://github.com/mihirp1998/AlignProp/
+    As of now only Stable Diffusion based pipelines are supported
+    Attributes:
+        config (`AlignPropConfig`):
+            Configuration object for AlignPropTrainer. Check the documentation of `PPOConfig` for more details.
+        reward_function (`Callable[[torch.Tensor, tuple[str], tuple[Any]], torch.Tensor]`):
+            Reward function to be used
+        prompt_function (`Callable[[], tuple[str, Any]]`):
+            Function to generate prompts to guide model
+        sd_pipeline (`DDPOStableDiffusionPipeline`):
+            Stable Diffusion pipeline to be used for training.
+        image_samples_hook (`Optional[Callable[[Any, Any, Any], Any]]`):
+            Hook to be called to log images
+    """
+    def __init__(
+        self,
+        config,
+        reward_function,
+        prompt_function,
+        sd_pipeline,
+        image_samples_hook = None,
+        **kwargs
+    ):
+        if args is None: args = UnslothAlignPropConfig()
+        other_metrics = []
+        from unsloth_zoo.logging_utils import PatchRLStatistics
+        PatchRLStatistics('alignprop_trainer', other_metrics)
+        super().__init__(
+            config = config,
+            reward_function = reward_function,
+            prompt_function = prompt_function,
+            sd_pipeline = sd_pipeline,
+            image_samples_hook = image_samples_hook,**kwargs)
+pass

unsloth_compiled_cache/UnslothBCOTrainer.py ADDED Viewed

	@@ -0,0 +1,1824 @@

+"""
+2025.3.17
+2025.3.19
+4.50.0
+0.15.2
+__UNSLOTH_VERSIONING__
+"""
+from torch import Tensor
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+from trl.trainer.bco_trainer import (Any, AutoModelForCausalLM, BCOConfig, BCOTrainer, BaseImageProcessor, CLF_NAME, Callable, DPODataCollatorWithPadding, DataCollator, DataLoader, Dataset, EvalLoopOutput, F, FeatureExtractionMixin, Literal, Optional, PartialState, PeftModel, PreTrainedModel, PreTrainedModelWrapper, PreTrainedTokenizerBase, ProcessorMixin, RUNNING_NAME, RunningMoments, SequentialSampler, Trainer, TrainerCallback, TrainingArguments, Union, _process_tokens, _tokenize, amp, contextmanager, create_reference_model, deepcopy, defaultdict, disable_dropout_in_model, generate_model_card, get_comet_experiment_url, has_length, inspect, is_comet_available, is_peft_available, is_sklearn_available, is_wandb_available, itemgetter, log_table_to_comet_experiment, maybe_apply_chat_template, nn, np, nullcontext, os, pad_to_length, pd, peft_module_casting_to_bf16, prepare_model_for_kbit_training, random, textwrap, torch, tqdm, transformers, version, wandb, warnings, F, Optional, PeftModel, PreTrainedModel, Trainer, is_peft_available, os, torch)
+import os
+from typing import *
+from dataclasses import dataclass, field
+from packaging.version import Version
+import torch
+import numpy as np
+from contextlib import nullcontext
+from torch.nn import functional as F
+from transformers import DataCollatorForSeq2Seq, DataCollatorForLanguageModeling
+torch_compile_options = {
+    "epilogue_fusion"   : True,
+    "max_autotune"      : False,
+    "shape_padding"     : True,
+    "trace.enabled"     : False,
+    "triton.cudagraphs" : False,
+}
+@torch.compile(dynamic = True, fullgraph = True, options = torch_compile_options,)
+def selective_log_softmax(logits, index):
+    logits = logits.to(torch.float32)
+    selected_logits = torch.gather(logits, dim = -1, index = index.unsqueeze(-1)).squeeze(-1)
+    # loop to reduce peak mem consumption
+    # logsumexp_values = torch.stack([torch.logsumexp(lg, dim=-1) for lg in logits])
+    logsumexp_values = torch.logsumexp(logits, dim = -1)
+    per_token_logps = selected_logits - logsumexp_values  # log_softmax(x_i) = x_i - logsumexp(x)
+    return per_token_logps
+@dataclass
+class UnslothBCOConfig(BCOConfig):
+    """
+    Configuration class for the [`BCOTrainer`].
+    Using [`~transformers.HfArgumentParser`] we can turn this class into
+    [argparse](https://docs.python.org/3/library/argparse#module-argparse) arguments that can be specified on the
+    command line.
+    Parameters:
+        max_length (`int` or `None`, *optional*, defaults to `1024`):
+            Maximum length of the sequences (prompt + completion) in the batch. This argument is required if you want
+            to use the default data collator.
+        max_prompt_length (`int` or `None`, *optional*, defaults to `512`):
+            Maximum length of the prompt. This argument is required if you want to use the default data collator.
+        max_completion_length (`int` or `None`, *optional*, defaults to `None`):
+            Maximum length of the completion. This argument is required if you want to use the default data collator
+            and your model is an encoder-decoder.
+        beta (`float`, *optional*, defaults to `0.1`):
+            Parameter controlling the deviation from the reference model. Higher β means less deviation from the
+            reference model.
+        label_pad_token_id (`int`,  *optional*, defaults to `-100`):
+            Label pad token id. This argument is required if you want to use the default data collator.
+        padding_value (`int` or `None`, *optional*, defaults to `None`):
+            Padding value to use. If `None`, the padding value of the tokenizer is used.
+        truncation_mode (`str`, *optional*, defaults to `"keep_end"`):
+            Truncation mode to use when the prompt is too long. Possible values are `"keep_end"` or `"keep_start"`.
+            This argument is required if you want to use the default data collator.
+        disable_dropout (`bool`, *optional*, defaults to `True`):
+            Whether to disable dropout in the model and reference model.
+        generate_during_eval (`bool`, *optional*, defaults to `False`):
+            If `True`, generates and logs completions from both the model and the reference model to W&B or Comet during
+            evaluation.
+        is_encoder_decoder (`bool` or `None`, *optional*, defaults to `None`):
+            When using the `model_init` argument (callable) to instantiate the model instead of the `model` argument,
+            you need to specify if the model returned by the callable is an encoder-decoder model.
+        precompute_ref_log_probs (`bool`, *optional*, defaults to `False`):
+            Whether to precompute reference model log probabilities for training and evaluation datasets. This is
+            useful when training without the reference model to reduce the total GPU memory needed.
+        model_init_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`):
+            Keyword arguments to pass to `AutoModelForCausalLM.from_pretrained` when instantiating the model from a
+            string.
+        ref_model_init_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`):
+            Keyword arguments to pass to `AutoModelForCausalLM.from_pretrained` when instantiating the reference model
+            from a string.
+        dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
+            Number of processes to use for processing the dataset.
+        prompt_sample_size (`int`, *optional*, defaults to `1024`):
+            Number of prompts that are fed to density ratio classifier.
+        min_density_ratio (`float`, *optional*, defaults to `0.5`):
+            Minimum value of the density ratio. The estimated density ratio is clamped to this value.
+        max_density_ratio (`float`, *optional*, defaults to `10.0`):
+            Maximum value of the density ratio. The estimated density ratio is clamped to this value.
+    """
+    vllm_sampling_params: Optional[Any] = field(
+        default = None,
+        metadata = {'help': 'vLLM SamplingParams'},
+    )
+    unsloth_num_chunks : Optional[int] = field(
+        default = -1,
+        metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'},
+    )
+    def __init__(
+        self,
+        output_dir = None,
+        overwrite_output_dir = None,
+        do_train = False,
+        do_eval = False,
+        do_predict = False,
+        eval_strategy = 'no',
+        prediction_loss_only = False,
+        per_device_train_batch_size = 4,
+        per_device_eval_batch_size = 4,
+        per_gpu_train_batch_size = None,
+        per_gpu_eval_batch_size = None,
+        gradient_accumulation_steps = 2,
+        eval_accumulation_steps = 2,
+        eval_delay = 0,
+        torch_empty_cache_steps = 250,
+        learning_rate = 5e-05,
+        weight_decay = 0.01,
+        adam_beta1 = 0.9,
+        adam_beta2 = 0.999,
+        adam_epsilon = 1e-08,
+        max_grad_norm = 1.0,
+        num_train_epochs = 3.0,
+        max_steps = -1,
+        lr_scheduler_type = 'linear',
+        warmup_ratio = 0.1,
+        warmup_steps = 0,
+        log_level = 'passive',
+        log_level_replica = 'warning',
+        log_on_each_node = True,
+        logging_dir = None,
+        logging_strategy = 'steps',
+        logging_first_step = False,
+        logging_steps = 1,
+        logging_nan_inf_filter = False,
+        save_strategy = 'steps',
+        save_steps = 500,
+        save_total_limit = None,
+        save_safetensors = True,
+        save_on_each_node = False,
+        save_only_model = False,
+        restore_callback_states_from_checkpoint = False,
+        no_cuda = False,
+        use_cpu = False,
+        use_mps_device = False,
+        seed = 3407,
+        data_seed = 3407,
+        jit_mode_eval = False,
+        use_ipex = False,
+        bf16 = False,
+        fp16 = False,
+        fp16_opt_level = 'O1',
+        half_precision_backend = 'auto',
+        bf16_full_eval = False,
+        fp16_full_eval = False,
+        tf32 = None,
+        local_rank = -1,
+        ddp_backend = None,
+        tpu_num_cores = None,
+        tpu_metrics_debug = False,
+        debug = '',
+        dataloader_drop_last = False,
+        eval_steps = None,
+        dataloader_num_workers = 0,
+        dataloader_prefetch_factor = None,
+        past_index = -1,
+        run_name = None,
+        disable_tqdm = None,
+        remove_unused_columns = True,
+        label_names = None,
+        load_best_model_at_end = False,
+        metric_for_best_model = None,
+        greater_is_better = None,
+        ignore_data_skip = False,
+        fsdp = '',
+        fsdp_min_num_params = 0,
+        fsdp_config = None,
+        tp_size = 0,
+        fsdp_transformer_layer_cls_to_wrap = None,
+        accelerator_config = None,
+        deepspeed = None,
+        label_smoothing_factor = 0.0,
+        optim = 'adamw_8bit',
+        optim_args = None,
+        adafactor = False,
+        group_by_length = False,
+        length_column_name = 'length',
+        report_to = None,
+        ddp_find_unused_parameters = None,
+        ddp_bucket_cap_mb = None,
+        ddp_broadcast_buffers = None,
+        dataloader_pin_memory = True,
+        dataloader_persistent_workers = False,
+        skip_memory_metrics = True,
+        use_legacy_prediction_loop = False,
+        push_to_hub = False,
+        resume_from_checkpoint = None,
+        hub_model_id = None,
+        hub_strategy = 'every_save',
+        hub_token = None,
+        hub_private_repo = None,
+        hub_always_push = False,
+        gradient_checkpointing = False,
+        gradient_checkpointing_kwargs = None,
+        include_inputs_for_metrics = False,
+        eval_do_concat_batches = True,
+        fp16_backend = 'auto',
+        evaluation_strategy = None,
+        push_to_hub_model_id = None,
+        push_to_hub_organization = None,
+        push_to_hub_token = None,
+        mp_parameters = '',
+        auto_find_batch_size = False,
+        full_determinism = False,
+        torchdynamo = None,
+        ray_scope = 'last',
+        ddp_timeout = 1800,
+        torch_compile = False,
+        torch_compile_backend = None,
+        torch_compile_mode = None,
+        dispatch_batches = None,
+        split_batches = None,
+        include_tokens_per_second = False,
+        include_num_input_tokens_seen = False,
+        neftune_noise_alpha = None,
+        optim_target_modules = None,
+        batch_eval_metrics = False,
+        eval_on_start = False,
+        use_liger_kernel = False,
+        eval_use_gather_object = False,
+        average_tokens_across_devices = False,
+        max_length = 1024,
+        max_prompt_length = 512,
+        max_completion_length = None,
+        beta = 0.1,
+        label_pad_token_id = -100,
+        padding_value = None,
+        truncation_mode = 'keep_end',
+        disable_dropout = True,
+        generate_during_eval = False,
+        is_encoder_decoder = None,
+        precompute_ref_log_probs = False,
+        model_init_kwargs = None,
+        ref_model_init_kwargs = None,
+        dataset_num_proc = None,
+        prompt_sample_size = 1024,
+        min_density_ratio = 0.5,
+        max_density_ratio = 10.0,
+        vllm_sampling_params = None,
+        unsloth_num_chunks = -1,
+        **kwargs,
+    ):
+        if learning_rate < 1e-7: raise FloatingPointError(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!')
+        if learning_rate > 1: raise OverflowError(f'Unsloth: Your learning rate of `{learning_rate}` is way too larger > 1! Consider decreasing it to 1e-1, otherwise gradient updates will explode!')
+        if output_dir is None and save_strategy == 'steps' and save_steps == 500:
+            output_dir = 'unsloth_training_checkpoints'
+            save_strategy = 'no'
+        if dataset_num_proc is None:
+            from multiprocessing import cpu_count
+            dataset_num_proc = cpu_count()
+        super().__init__(
+            output_dir = output_dir,
+            overwrite_output_dir = overwrite_output_dir,
+            do_train = do_train,
+            do_eval = do_eval,
+            do_predict = do_predict,
+            eval_strategy = eval_strategy,
+            prediction_loss_only = prediction_loss_only,
+            per_device_train_batch_size = per_device_train_batch_size,
+            per_device_eval_batch_size = per_device_eval_batch_size,
+            per_gpu_train_batch_size = per_gpu_train_batch_size,
+            per_gpu_eval_batch_size = per_gpu_eval_batch_size,
+            gradient_accumulation_steps = gradient_accumulation_steps,
+            eval_accumulation_steps = eval_accumulation_steps,
+            eval_delay = eval_delay,
+            torch_empty_cache_steps = torch_empty_cache_steps,
+            learning_rate = learning_rate,
+            weight_decay = weight_decay,
+            adam_beta1 = adam_beta1,
+            adam_beta2 = adam_beta2,
+            adam_epsilon = adam_epsilon,
+            max_grad_norm = max_grad_norm,
+            num_train_epochs = num_train_epochs,
+            max_steps = max_steps,
+            lr_scheduler_type = lr_scheduler_type,
+            warmup_ratio = warmup_ratio,
+            warmup_steps = warmup_steps,
+            log_level = log_level,
+            log_level_replica = log_level_replica,
+            log_on_each_node = log_on_each_node,
+            logging_dir = logging_dir,
+            logging_strategy = logging_strategy,
+            logging_first_step = logging_first_step,
+            logging_steps = logging_steps,
+            logging_nan_inf_filter = logging_nan_inf_filter,
+            save_strategy = save_strategy,
+            save_steps = save_steps,
+            save_total_limit = save_total_limit,
+            save_safetensors = save_safetensors,
+            save_on_each_node = save_on_each_node,
+            save_only_model = save_only_model,
+            restore_callback_states_from_checkpoint = restore_callback_states_from_checkpoint,
+            no_cuda = no_cuda,
+            use_cpu = use_cpu,
+            use_mps_device = use_mps_device,
+            seed = seed,
+            data_seed = data_seed,
+            jit_mode_eval = jit_mode_eval,
+            use_ipex = use_ipex,
+            bf16 = bf16,
+            fp16 = fp16,
+            fp16_opt_level = fp16_opt_level,
+            half_precision_backend = half_precision_backend,
+            bf16_full_eval = bf16_full_eval,
+            fp16_full_eval = fp16_full_eval,
+            tf32 = tf32,
+            local_rank = local_rank,
+            ddp_backend = ddp_backend,
+            tpu_num_cores = tpu_num_cores,
+            tpu_metrics_debug = tpu_metrics_debug,
+            debug = debug,
+            dataloader_drop_last = dataloader_drop_last,
+            eval_steps = eval_steps,
+            dataloader_num_workers = dataloader_num_workers,
+            dataloader_prefetch_factor = dataloader_prefetch_factor,
+            past_index = past_index,
+            run_name = run_name,
+            disable_tqdm = disable_tqdm,
+            remove_unused_columns = remove_unused_columns,
+            label_names = label_names,
+            load_best_model_at_end = load_best_model_at_end,
+            metric_for_best_model = metric_for_best_model,
+            greater_is_better = greater_is_better,
+            ignore_data_skip = ignore_data_skip,
+            fsdp = fsdp,
+            fsdp_min_num_params = fsdp_min_num_params,
+            fsdp_config = fsdp_config,
+            tp_size = tp_size,
+            fsdp_transformer_layer_cls_to_wrap = fsdp_transformer_layer_cls_to_wrap,
+            accelerator_config = accelerator_config,
+            deepspeed = deepspeed,
+            label_smoothing_factor = label_smoothing_factor,
+            optim = optim,
+            optim_args = optim_args,
+            adafactor = adafactor,
+            group_by_length = group_by_length,
+            length_column_name = length_column_name,
+            report_to = report_to,
+            ddp_find_unused_parameters = ddp_find_unused_parameters,
+            ddp_bucket_cap_mb = ddp_bucket_cap_mb,
+            ddp_broadcast_buffers = ddp_broadcast_buffers,
+            dataloader_pin_memory = dataloader_pin_memory,
+            dataloader_persistent_workers = dataloader_persistent_workers,
+            skip_memory_metrics = skip_memory_metrics,
+            use_legacy_prediction_loop = use_legacy_prediction_loop,
+            push_to_hub = push_to_hub,
+            resume_from_checkpoint = resume_from_checkpoint,
+            hub_model_id = hub_model_id,
+            hub_strategy = hub_strategy,
+            hub_token = hub_token,
+            hub_private_repo = hub_private_repo,
+            hub_always_push = hub_always_push,
+            gradient_checkpointing = gradient_checkpointing,
+            gradient_checkpointing_kwargs = gradient_checkpointing_kwargs,
+            include_inputs_for_metrics = include_inputs_for_metrics,
+            eval_do_concat_batches = eval_do_concat_batches,
+            fp16_backend = fp16_backend,
+            evaluation_strategy = evaluation_strategy,
+            push_to_hub_model_id = push_to_hub_model_id,
+            push_to_hub_organization = push_to_hub_organization,
+            push_to_hub_token = push_to_hub_token,
+            mp_parameters = mp_parameters,
+            auto_find_batch_size = auto_find_batch_size,
+            full_determinism = full_determinism,
+            torchdynamo = torchdynamo,
+            ray_scope = ray_scope,
+            ddp_timeout = ddp_timeout,
+            torch_compile = torch_compile,
+            torch_compile_backend = torch_compile_backend,
+            torch_compile_mode = torch_compile_mode,
+            dispatch_batches = dispatch_batches,
+            split_batches = split_batches,
+            include_tokens_per_second = include_tokens_per_second,
+            include_num_input_tokens_seen = include_num_input_tokens_seen,
+            neftune_noise_alpha = neftune_noise_alpha,
+            optim_target_modules = optim_target_modules,
+            batch_eval_metrics = batch_eval_metrics,
+            eval_on_start = eval_on_start,
+            use_liger_kernel = use_liger_kernel,
+            eval_use_gather_object = eval_use_gather_object,
+            average_tokens_across_devices = average_tokens_across_devices,
+            max_length = max_length,
+            max_prompt_length = max_prompt_length,
+            max_completion_length = max_completion_length,
+            beta = beta,
+            label_pad_token_id = label_pad_token_id,
+            padding_value = padding_value,
+            truncation_mode = truncation_mode,
+            disable_dropout = disable_dropout,
+            generate_during_eval = generate_during_eval,
+            is_encoder_decoder = is_encoder_decoder,
+            precompute_ref_log_probs = precompute_ref_log_probs,
+            model_init_kwargs = model_init_kwargs,
+            ref_model_init_kwargs = ref_model_init_kwargs,
+            dataset_num_proc = dataset_num_proc,
+            prompt_sample_size = prompt_sample_size,
+            min_density_ratio = min_density_ratio,
+            max_density_ratio = max_density_ratio,**kwargs)
+        self.vllm_sampling_params = vllm_sampling_params
+        self.unsloth_num_chunks = unsloth_num_chunks
+pass
+class _UnslothBCOTrainer(Trainer):
+    r""""""
+    _tag_names = ["trl", "bco"]
+    def __init__(
+        self,
+        model: Union[PreTrainedModel, nn.Module, str] = None,
+        ref_model: Optional[Union[PreTrainedModel, nn.Module, str]] = None,
+        args: BCOConfig = None,
+        train_dataset: Optional[Dataset] = None,
+        eval_dataset: Optional[Union[Dataset, dict[str, Dataset]]] = None,
+        processing_class: Optional[
+            Union[PreTrainedTokenizerBase, BaseImageProcessor, FeatureExtractionMixin, ProcessorMixin]
+        ] = None,
+        data_collator: Optional[DataCollator] = None,
+        model_init: Optional[Callable[[], PreTrainedModel]] = None,
+        callbacks: Optional[list[TrainerCallback]] = None,
+        optimizers: tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None),
+        preprocess_logits_for_metrics: Optional[Callable[[torch.Tensor, torch.Tensor], torch.Tensor]] = None,
+        peft_config: Optional[dict] = None,
+        compute_metrics: Optional[Callable[[EvalLoopOutput], dict]] = None,
+        model_adapter_name: Optional[str] = None,
+        ref_adapter_name: Optional[str] = None,
+        embedding_func: Optional[Callable] = None,
+        embedding_tokenizer: Optional[PreTrainedTokenizerBase] = None,
+    ):
+        if not is_sklearn_available():
+            raise ImportError(
+                "BCOTrainer requires the scikit-learn library. Please install it with `pip install scikit-learn`."
+            )
+        if type(args) is TrainingArguments:
+            raise ValueError("Please use `BCOConfig` instead `TrainingArguments`.")
+        if not isinstance(model, str) and ref_model is model:
+            raise ValueError(
+                "`model` and `ref_model` cannot be the same object. If you want `ref_model` to be the "
+                "same as `model`, you must mass a copy of it, or `None` if you use peft."
+            )
+        if args.model_init_kwargs is None:
+            model_init_kwargs = {}
+        elif not isinstance(model, str):
+            raise ValueError("You passed model_kwargs to the BCOTrainer. But your model is already instantiated.")
+        else:
+            model_init_kwargs = args.model_init_kwargs
+            torch_dtype = model_init_kwargs.get("torch_dtype")
+            if torch_dtype is not None:
+                # Convert to `torch.dtype` if an str is passed
+                if isinstance(torch_dtype, str) and torch_dtype != "auto":
+                    torch_dtype = getattr(torch, torch_dtype)
+                if torch_dtype != "auto" and not isinstance(torch_dtype, torch.dtype):
+                    raise ValueError(
+                        f"Invalid `torch_dtype` passed to the BCOConfig. Expected a string with either `torch.dtype` or 'auto', but got {torch_dtype}."
+                    )
+                model_init_kwargs["torch_dtype"] = torch_dtype
+        if args.ref_model_init_kwargs is None:
+            ref_model_init_kwargs = {}
+        elif not isinstance(ref_model, str):
+            raise ValueError(
+                "You passed ref_model_kwargs to the BCOTrainer. But your ref_model is already instantiated."
+            )
+        else:
+            ref_model_init_kwargs = args.ref_model_init_kwargs
+            torch_dtype = ref_model_init_kwargs.get("torch_dtype")
+            if torch_dtype is not None:
+                # Convert to `torch.dtype` if an str is passed
+                if isinstance(torch_dtype, str) and torch_dtype != "auto":
+                    torch_dtype = getattr(torch, torch_dtype)
+                if torch_dtype != "auto" and not isinstance(torch_dtype, torch.dtype):
+                    raise ValueError(
+                        f"Invalid `torch_dtype` passed to the BCOConfig. Expected a string with either `torch.dtype` or 'auto', but got {torch_dtype}."
+                    )
+                ref_model_init_kwargs["torch_dtype"] = torch_dtype
+        if isinstance(model, str):
+            model = AutoModelForCausalLM.from_pretrained(model, **model_init_kwargs)
+        if isinstance(ref_model, str):
+            ref_model = AutoModelForCausalLM.from_pretrained(ref_model, **ref_model_init_kwargs)
+        # Initialize this variable to False. This helps tracking the case when `peft_module_casting_to_bf16`
+        # has been called in order to properly call autocast if needed.
+        self._peft_has_been_casted_to_bf16 = False
+        if not is_peft_available() and peft_config is not None:
+            raise ValueError(
+                "PEFT is not installed and you passed a `peft_config` in the trainer's kwargs, please install it with `pip install peft` to use the PEFT models"
+            )
+        elif is_peft_available() and peft_config is not None:
+            # if model is a peft model and we have a peft_config, we merge and unload it first
+            if isinstance(model, PeftModel):
+                model = model.merge_and_unload()
+            if getattr(model, "is_loaded_in_8bit", False) or getattr(model, "is_loaded_in_4bit", False):
+                _support_gc_kwargs = hasattr(
+                    args, "gradient_checkpointing_kwargs"
+                ) and "gradient_checkpointing_kwargs" in list(
+                    inspect.signature(prepare_model_for_kbit_training).parameters
+                )
+                prepare_model_kwargs = {"use_gradient_checkpointing": args.gradient_checkpointing}
+                if _support_gc_kwargs:
+                    prepare_model_kwargs["gradient_checkpointing_kwargs"] = args.gradient_checkpointing_kwargs
+                model = prepare_model_for_kbit_training(model, **prepare_model_kwargs)
+            elif getattr(args, "gradient_checkpointing", False):
+                # For backward compatibility with older versions of transformers
+                if hasattr(model, "enable_input_require_grads"):
+                    model.enable_input_require_grads()
+                else:
+                    def make_inputs_require_grad(module, input, output):
+                        output.requires_grad_(True)
+                    model.get_input_embeddings().register_forward_hook(make_inputs_require_grad)
+            # get peft model with the given config
+            model = model
+            if args.bf16 and getattr(model, "is_loaded_in_4bit", False):
+                peft_module_casting_to_bf16(model)
+                # If args.bf16 we need to explicitly call `generate` with torch amp autocast context manager
+                self._peft_has_been_casted_to_bf16 = True
+        # For models that use gradient_checkpointing, we need to attach a hook that enables input
+        # to explicitly have `requires_grad=True`, otherwise training will either silently
+        # fail or completely fail.
+        elif getattr(args, "gradient_checkpointing", False):
+            # For backward compatibility with older versions of transformers
+            if hasattr(model, "enable_input_require_grads"):
+                model.enable_input_require_grads()
+            else:
+                def make_inputs_require_grad(module, input, output):
+                    output.requires_grad_(True)
+                model.get_input_embeddings().register_forward_hook(make_inputs_require_grad)
+        if args.generate_during_eval and not (is_wandb_available() or is_comet_available()):
+            raise ValueError(
+                "`generate_during_eval=True` requires Weights and Biases or Comet to be installed."
+                " Please install `wandb` or `comet-ml` to resolve."
+            )
+        if model is not None:
+            self.is_encoder_decoder = model.config.is_encoder_decoder
+        elif args.is_encoder_decoder is None:
+            raise ValueError("When no model is provided, you need to pass the parameter is_encoder_decoder.")
+        else:
+            self.is_encoder_decoder = args.is_encoder_decoder
+        self.is_peft_model = is_peft_available() and isinstance(model, PeftModel)
+        self.model_adapter_name = model_adapter_name
+        self.ref_adapter_name = ref_adapter_name
+        if ref_model:
+            self.ref_model = ref_model
+        elif self.is_peft_model or args.precompute_ref_log_probs:
+            # The `model` with adapters turned off will be used as the reference model
+            self.ref_model = None
+        else:
+            self.ref_model = create_reference_model(model)
+        if processing_class is None:
+            raise ValueError(
+                "max_length or a processing_class must be specified when using the default DPODataCollatorWithPadding"
+            )
+        if args.max_length is None:
+            warnings.warn(
+                "When using DPODataCollatorWithPadding, you should set `max_length` in the `BCOConfig`. "
+                "It will be set to `512` by default, but you should do it yourself in the future.",
+                UserWarning,
+            )
+            max_length = 512
+        if args.max_length is not None:
+            max_length = args.max_length
+        if args.max_prompt_length is None:
+            warnings.warn(
+                "When using DPODataCollatorWithPadding, you should set `max_prompt_length` in the `BCOConfig`. "
+                "It will be set to `128` by default, but you should do it yourself in the future.",
+                UserWarning,
+            )
+            max_prompt_length = 128
+        if args.max_prompt_length is not None:
+            max_prompt_length = args.max_prompt_length
+        max_completion_length = None
+        if args.max_completion_length is None and self.is_encoder_decoder:
+            warnings.warn(
+                "When using DPODataCollatorWithPadding with an encoder decoder architecture, you should set `max_completion_length` in the BCOTrainer's init"
+                " it will be set to `128` by default, but you should do it yourself in the future.",
+                UserWarning,
+            )
+            max_completion_length = 128
+        if args.max_completion_length is not None and self.is_encoder_decoder:
+            max_completion_length = args.max_completion_length
+        if data_collator is None:
+            data_collator = DPODataCollatorWithPadding(
+                pad_token_id=processing_class.pad_token_id,
+                label_pad_token_id=args.label_pad_token_id,
+                is_encoder_decoder=self.is_encoder_decoder,
+            )
+            if args.remove_unused_columns:
+                args.remove_unused_columns = False
+                # warn users
+                warnings.warn(
+                    "When using DPODataCollatorWithPadding, you should set `remove_unused_columns=False` in your BCOConfig"
+                    " we have set it for you, but you should do it yourself in the future.",
+                    UserWarning,
+                )
+            self.use_dpo_data_collator = True
+        else:
+            self.use_dpo_data_collator = False
+        # Disable dropout in the model and reference model
+        if args.disable_dropout:
+            disable_dropout_in_model(model)
+            if self.ref_model is not None:
+                disable_dropout_in_model(self.ref_model)
+        self.max_length = max_length
+        self.generate_during_eval = args.generate_during_eval
+        self.label_pad_token_id = args.label_pad_token_id
+        self.padding_value = args.padding_value if args.padding_value is not None else processing_class.pad_token_id
+        self.max_prompt_length = max_prompt_length
+        self.truncation_mode = args.truncation_mode
+        self.max_completion_length = max_completion_length
+        self.precompute_ref_log_probs = args.precompute_ref_log_probs
+        # Since ref_logs are precomputed on the first call to get_train/eval_dataloader
+        # keep track of first called to avoid computation of future calls
+        self._precomputed_train_ref_log_probs = False
+        self._precomputed_eval_ref_log_probs = False
+        # metric
+        self._stored_metrics = defaultdict(lambda: defaultdict(list))
+        # BCO parameter
+        self.beta = args.beta
+        self.aux_loss_enabled = getattr(model.config, "output_router_logits", False)
+        self.aux_loss_coef = getattr(model.config, "router_aux_loss_coef", 0.0)
+        if self.aux_loss_enabled and self.aux_loss_coef == 0.0:
+            warnings.warn(
+                "You set `output_router_logits` to `True` in the model config, but `router_aux_loss_coef` is set to "
+                "`0.0`, meaning the auxiliary loss will not be used. Either set `router_aux_loss_coef` to a value "
+                "greater than `0.0`, or set `output_router_logits` to `False` if you don't want to use the auxiliary "
+                "loss.",
+                UserWarning,
+            )
+        # Underlying Distribution Matching argument
+        self.embedding_func = embedding_func
+        self.embedding_tokenizer = embedding_tokenizer
+        # The trainer estimates the number of FLOPs (floating-point operations) using the number of elements in the
+        # input tensor associated with the key "input_ids". However, in BCO, the sampled data does not include the
+        # "input_ids" key. Instead, the available keys are "prompt_input_ids" and "completion_input_ids". As a result,
+        # the trainer issues the warning: "Could not estimate the number of tokens of the input, floating-point
+        # operations will not be computed." To suppress this warning, we set the "estimate_tokens" key in the model's
+        # "warnings_issued" dictionary to True. This acts as a flag to indicate that the warning has already been
+        # issued.
+        model.warnings_issued["estimate_tokens"] = True
+        with PartialState().local_main_process_first():
+            # Apply the chat template if needed
+            train_dataset = train_dataset.map(
+                maybe_apply_chat_template, fn_kwargs={"tokenizer": processing_class}, num_proc=args.dataset_num_proc
+            )
+            if eval_dataset is not None:
+                eval_dataset = eval_dataset.map(
+                    maybe_apply_chat_template,
+                    fn_kwargs={"tokenizer": processing_class},
+                    num_proc=args.dataset_num_proc,
+                )
+            # Shuffle the datasets
+            train_dataset = train_dataset.shuffle(seed=args.data_seed)
+            if eval_dataset is not None:
+                eval_dataset = eval_dataset.shuffle(seed=args.data_seed)
+            # Tokenize and prepare the training datasets
+            train_dataset = train_dataset.map(
+                _tokenize,
+                batched=True,
+                fn_kwargs={"tokenizer": processing_class, "embedding_tokenizer": self.embedding_tokenizer},
+                num_proc=args.dataset_num_proc,
+                desc="Tokenizing train dataset",
+            )
+            # Prepare the datasets
+            fn_kwargs = {
+                "prefix": "",
+                "is_encoder_decoder": self.is_encoder_decoder,
+                "tokenizer": processing_class,
+                "max_length": self.max_length,
+                "truncation_mode": self.truncation_mode,
+                "label_pad_token_id": self.label_pad_token_id,
+                "max_prompt_length": self.max_prompt_length,
+                "max_completion_length": self.max_completion_length,
+            }
+            train_dataset = train_dataset.map(
+                _process_tokens,
+                fn_kwargs=fn_kwargs,
+                num_proc=args.dataset_num_proc,
+                desc="Processing tokenized train dataset",
+            )
+            if eval_dataset is not None:
+                # Tokenize
+                eval_dataset = eval_dataset.map(
+                    _tokenize,
+                    fn_kwargs={"tokenizer": processing_class, "embedding_tokenizer": self.embedding_tokenizer},
+                    batched=True,
+                    num_proc=args.dataset_num_proc,
+                    desc="Tokenizing eval dataset",
+                )
+                # Process
+                fn_kwargs = {
+                    "prefix": "",
+                    "is_encoder_decoder": self.is_encoder_decoder,
+                    "tokenizer": processing_class,
+                    "max_length": self.max_length,
+                    "truncation_mode": self.truncation_mode,
+                    "label_pad_token_id": self.label_pad_token_id,
+                    "max_prompt_length": self.max_prompt_length,
+                    "max_completion_length": self.max_completion_length,
+                }
+                eval_dataset = eval_dataset.map(
+                    _process_tokens,
+                    fn_kwargs=fn_kwargs,
+                    num_proc=args.dataset_num_proc,
+                    desc="Processing tokenized eval dataset",
+                )
+            desirable = train_dataset.filter(
+                lambda x: x["label"], num_proc=args.dataset_num_proc, desc="Filtering desirable examples"
+            )
+            undesirable = train_dataset.filter(
+                lambda x: not x["label"], num_proc=args.dataset_num_proc, desc="Filtering undesirable examples"
+            )
+            desirable = desirable.shuffle(seed=args.data_seed)
+            undesirable = undesirable.shuffle(seed=args.data_seed)
+        super().__init__(
+            model=model,
+            args=args,
+            data_collator=data_collator,
+            train_dataset=train_dataset,
+            eval_dataset=eval_dataset,
+            processing_class=processing_class,
+            model_init=model_init,
+            compute_metrics=compute_metrics,
+            callbacks=callbacks,
+            optimizers=optimizers,
+            preprocess_logits_for_metrics=preprocess_logits_for_metrics,
+        )
+        # Gradient accumulation requires scaled loss. Normally, loss scaling in the parent class depends on whether the
+        # model accepts loss-related kwargs. Since we compute our own loss, this check is irrelevant. We set
+        # self.model_accepts_loss_kwargs to False to enable scaling.
+        self.model_accepts_loss_kwargs = False
+        # Add tags for models that have been loaded with the correct transformers version
+        if hasattr(self.model, "add_model_tags"):
+            self.model.add_model_tags(self._tag_names)
+        if not hasattr(self, "accelerator"):
+            raise AttributeError(
+                "Your `Trainer` does not have an `accelerator` object. Consider upgrading `transformers`."
+            )
+        # Deepspeed Zero-3 does not support precompute_ref_log_probs
+        if self.is_deepspeed_enabled:
+            if self.accelerator.state.deepspeed_plugin.zero_stage == 3 and self.precompute_ref_log_probs:
+                raise ValueError(
+                    "You cannot use `precompute_ref_log_probs=True` with Deepspeed ZeRO-3. Please set `precompute_ref_log_probs=False`."
+                )
+        if self.ref_model is None:
+            if not (self.is_peft_model or self.precompute_ref_log_probs):
+                raise ValueError(
+                    "No reference model and model is not a Peft model. Try setting `precompute_ref_log_probs=True`"
+                )
+        else:
+            if self.is_deepspeed_enabled:
+                self.ref_model = self._prepare_deepspeed(self.ref_model)
+            else:
+                self.ref_model = self.accelerator.prepare_model(self.ref_model, evaluation_mode=True)
+        self.running = RunningMoments(accelerator=self.accelerator)
+        if self.embedding_func is None:
+            return
+        chosen_embeddings = self._get_sample_prompt_embeddings(desirable, sample_size=self.args.prompt_sample_size)
+        rejected_embeddings = self._get_sample_prompt_embeddings(undesirable, sample_size=self.args.prompt_sample_size)
+        embeddings = torch.cat((chosen_embeddings, rejected_embeddings), dim=0)
+        labels = torch.cat(
+            (torch.ones_like(chosen_embeddings[:, 0]), torch.zeros_like(rejected_embeddings[:, 0])), dim=0
+        )
+        self.clf = LogisticRegression(class_weight="balanced").fit(
+            embeddings.cpu().float().numpy(), labels.cpu().numpy()
+        )
+    @property
+    def match_underlying_distribution(self):
+        return self.embedding_func is not None and self.embedding_tokenizer is not None
+    def _get_chosen_prob(self, prompt_embeddings: torch.FloatTensor) -> torch.FloatTensor:
+        """
+        Calculates the probability if the given prompt embedding is from desirable dataset.
+        This function calculates the probability in the process and ensemble across processes.
+        """
+        dtype = prompt_embeddings.dtype
+        device = prompt_embeddings.device
+        rank = self.accelerator.process_index
+        padded_prompt_embeddings = self.accelerator.pad_across_processes(
+            prompt_embeddings, pad_index=self.embedding_tokenizer.pad_token_id
+        )
+        sample_size = padded_prompt_embeddings.shape[0]
+        nonzero = padded_prompt_embeddings.mean(dim=1) != self.embedding_tokenizer.pad_token_id
+        prompt_embeddings = self.accelerator.gather(padded_prompt_embeddings)
+        # cannot predict for all empty values
+        if prompt_embeddings.shape[0] == 0:
+            return torch.tensor([], device=device, dtype=dtype)
+        prob = self.clf.predict_proba(prompt_embeddings.cpu().float().numpy())[:, 1]
+        prob = torch.as_tensor(prob, dtype=dtype, device=device)
+        prob = self.accelerator.reduce(prob, reduction="mean")
+        prob = prob[sample_size * rank : sample_size * (rank + 1)]
+        prob = prob[nonzero]
+        return prob
+    def _vectorize_prompt(self, input_ids: torch.LongTensor, attention_mask: torch.LongTensor) -> torch.FloatTensor:
+        """
+        Replaces processing_class.pad_token_id to embedding_tokenizer.pad_token_id
+        and applies self.embedding_func
+        """
+        input_ids = torch.where(
+            input_ids == self.processing_class.pad_token_id,
+            self.embedding_tokenizer.pad_token_id,
+            input_ids,
+        )
+        with torch.no_grad():
+            embeddings = self.embedding_func(
+                input_ids=input_ids,
+                attention_mask=attention_mask,
+            )
+        return embeddings
+    def _get_prompt_embeddings(
+        self, batch: dict[str, Union[list, torch.LongTensor]]
+    ) -> tuple[torch.FloatTensor, torch.FloatTensor]:
+        """Extract embeddings from frozen embedding model"""
+        if not self.match_underlying_distribution:
+            return None, None
+        embeddings = self._vectorize_prompt(
+            input_ids=batch["embedding_input_ids"],
+            attention_mask=batch["embedding_attention_mask"],
+        )
+        chosen_idx = [i for i in range(len(batch["label"])) if batch["label"][i] is True]
+        rejected_idx = [i for i in range(len(batch["label"])) if batch["label"][i] is False]
+        chosen_embeddings = embeddings[chosen_idx, ...]
+        rejected_embeddings = embeddings[rejected_idx, ...]
+        return (chosen_embeddings, rejected_embeddings)
+    def _get_sample_prompt_embeddings(self, dataset: Dataset, sample_size: int = 512) -> torch.FloatTensor:
+        """
+        Sample instances from dataset and get prompt embeddings.
+        Used for density ratio classifier training.
+        """
+        n_samples = min(len(dataset), sample_size)
+        rand_indices = np.random.choice(len(dataset), size=(n_samples,))
+        embedding_dataset = dataset.select(rand_indices)
+        dataloader_params = {
+            "batch_size": self.args.per_device_train_batch_size,
+            "collate_fn": self.data_collator,
+            "num_workers": self.args.dataloader_num_workers,
+            "pin_memory": self.args.dataloader_pin_memory,
+            "shuffle": False,
+        }
+        # prepare dataloader
+        data_loader = self.accelerator.prepare(DataLoader(embedding_dataset, **dataloader_params))
+        with torch.no_grad():
+            all_embeddings = torch.empty(0)
+            for padded_batch in tqdm(iterable=data_loader, desc="Building sample prompt embeddings"):
+                embeddings = self._vectorize_prompt(
+                    input_ids=padded_batch["embedding_input_ids"],
+                    attention_mask=padded_batch["embedding_attention_mask"],
+                )
+                embeddings = self.accelerator.gather_for_metrics(embeddings)
+                all_embeddings = torch.cat((all_embeddings, embeddings.cpu()))
+        return all_embeddings
+    def _prepare_deepspeed(self, model: PreTrainedModelWrapper):
+        # Adapted from accelerate: https://github.com/huggingface/accelerate/blob/739b135f8367becb67ffaada12fe76e3aa60fefd/src/accelerate/accelerator.py#L1473
+        deepspeed_plugin = self.accelerator.state.deepspeed_plugin
+        config_kwargs = deepcopy(deepspeed_plugin.deepspeed_config)
+        if model is not None:
+            if hasattr(model, "config"):
+                hidden_size = (
+                    max(model.config.hidden_sizes)
+                    if getattr(model.config, "hidden_sizes", None)
+                    else getattr(model.config, "hidden_size", None)
+                )
+                if hidden_size is not None and config_kwargs["zero_optimization"]["stage"] == 3:
+                    # Note that `stage3_prefetch_bucket_size` can produce DeepSpeed messages like: `Invalidate trace cache @ step 0: expected module 1, but got module 0`
+                    # This is expected and is not an error, see: https://github.com/microsoft/DeepSpeed/discussions/4081
+                    config_kwargs.update(
+                        {
+                            "zero_optimization.reduce_bucket_size": hidden_size * hidden_size,
+                            "zero_optimization.stage3_param_persistence_threshold": 10 * hidden_size,
+                            "zero_optimization.stage3_prefetch_bucket_size": 0.9 * hidden_size * hidden_size,
+                        }
+                    )
+        # If ZeRO-3 is used, we shard both the active and reference model.
+        # Otherwise, we assume the reference model fits in memory and is initialized on each device with ZeRO disabled (stage 0)
+        if config_kwargs["zero_optimization"]["stage"] != 3:
+            config_kwargs["zero_optimization"]["stage"] = 0
+        model, *_ = deepspeed.initialize(model=model, config=config_kwargs)
+        model.eval()
+        return model
+    def _save_optimizer_and_scheduler(self, output_dir):
+        super()._save_optimizer_and_scheduler(output_dir)
+        # When saving optimizer and scheduler to checkpoint, save also the running delta object.
+        output_dir = output_dir if output_dir is not None else self.args.output_dir
+        self.running.save_to_json(os.path.join(output_dir, RUNNING_NAME))
+        if self.match_underlying_distribution:
+            torch.save(self.clf.get_params(), os.path.join(output_dir, CLF_NAME))
+    def _load_optimizer_and_scheduler(self, checkpoint):
+        super()._load_optimizer_and_scheduler(checkpoint)
+        if checkpoint is None:
+            return
+        # when loading optimizer and scheduler from checkpoint, also load the running delta object.
+        running_file = os.path.join(checkpoint, RUNNING_NAME)
+        if os.path.isfile(running_file):
+            self.running = RunningMoments.load_from_json(self.accelerator, running_file)
+        if self.match_underlying_distribution:
+            clf_file = os.path.join(checkpoint, CLF_NAME)
+            if os.path.isfile(running_file):
+                self.clf.set_params(**torch.load(clf_file, weights_only=True, map_location="cpu"))
+    @contextmanager
+    def null_ref_context(self):
+        """Context manager for handling null reference model (that is, peft adapter manipulation)."""
+        with (
+            self.accelerator.unwrap_model(self.model).disable_adapter()
+            if self.is_peft_model and not self.ref_adapter_name
+            else nullcontext()
+        ):
+            if self.ref_adapter_name:
+                self.model.set_adapter(self.ref_adapter_name)
+            yield
+            if self.ref_adapter_name:
+                self.model.set_adapter(self.model_adapter_name or "default")
+    def get_train_dataloader(self) -> DataLoader:
+        """
+        Returns the training [`~torch.utils.data.DataLoader`].
+        Subclass of transformers.src.transformers.trainer.get_train_dataloader to precompute `ref_log_probs`.
+        """
+        if self.precompute_ref_log_probs and not self._precomputed_train_ref_log_probs:
+            dataloader_params = {
+                "batch_size": self.args.per_device_train_batch_size,
+                "collate_fn": self.data_collator,
+                "num_workers": self.args.dataloader_num_workers,
+                "pin_memory": self.args.dataloader_pin_memory,
+                "shuffle": False,
+            }
+            # prepare dataloader
+            data_loader = self.accelerator.prepare(DataLoader(self.train_dataset, **dataloader_params))
+            reference_completion_logps = []
+            for padded_batch in tqdm(iterable=data_loader, desc="Train dataset reference log probs"):
+                reference_completion_logp = self.compute_reference_log_probs(padded_batch)
+                reference_completion_logp = self.accelerator.gather_for_metrics(reference_completion_logp)
+                reference_completion_logps.append(reference_completion_logp.cpu())
+            self.train_dataset = self.train_dataset.add_column(
+                name="reference_logps", column=torch.cat(reference_completion_logps).float().numpy()
+            )
+            self._precomputed_train_ref_log_probs = True
+        return super().get_train_dataloader()
+    def get_eval_dataloader(self, eval_dataset: Optional[Dataset] = None) -> DataLoader:
+        """
+        Returns the evaluation [`~torch.utils.data.DataLoader`].
+        Subclass of transformers.src.transformers.trainer.get_eval_dataloader to precompute `ref_log_probs`.
+        Args:
+            eval_dataset (`torch.utils.data.Dataset`, *optional*):
+                If provided, will override `self.eval_dataset`. If it is a [`~datasets.Dataset`], columns not accepted
+                by the `model.forward()` method are automatically removed. It must implement `__len__`.
+        """
+        if eval_dataset is None and self.eval_dataset is None:
+            raise ValueError("Trainer: evaluation requires an eval_dataset.")
+        eval_dataset = eval_dataset if eval_dataset is not None else self.eval_dataset
+        if self.precompute_ref_log_probs and not self._precomputed_eval_ref_log_probs:
+            dataloader_params = {
+                "batch_size": self.args.per_device_eval_batch_size,
+                "collate_fn": self.data_collator,
+                "num_workers": self.args.dataloader_num_workers,
+                "pin_memory": self.args.dataloader_pin_memory,
+                "shuffle": False,
+            }
+            # prepare dataloader
+            data_loader = self.accelerator.prepare(DataLoader(eval_dataset, **dataloader_params))
+            reference_completion_logps = []
+            for padded_batch in tqdm(iterable=data_loader, desc="Eval dataset reference log probs"):
+                reference_completion_logp = self.compute_reference_log_probs(padded_batch)
+                reference_completion_logp = self.accelerator.gather_for_metrics(reference_completion_logp)
+                reference_completion_logps.append(reference_completion_logp.cpu())
+            eval_dataset = eval_dataset.add_column(
+                name="reference_logps", column=torch.cat(reference_completion_logps).float().numpy()
+            )
+            # Save calculated reference_chosen_logps and reference_rejected_logps to the eval_dataset for subsequent runs
+            if self.eval_dataset is not None:
+                self.eval_dataset = eval_dataset
+            self._precomputed_eval_ref_log_probs = True
+        return super().get_eval_dataloader(eval_dataset=eval_dataset)
+    def compute_reference_log_probs(self, padded_batch: dict) -> dict:
+        """Computes log probabilities of the reference model for a single padded batch of a BCO specific dataset."""
+        with torch.no_grad():
+            if self.ref_model is None:
+                with self.null_ref_context():
+                    if self.is_encoder_decoder:
+                        completion_logits = self.model(
+                            padded_batch["prompt_input_ids"],
+                            attention_mask=padded_batch["prompt_attention_mask"],
+                            decoder_input_ids=padded_batch.get("completion_decoder_input_ids"),
+                            labels=padded_batch["completion_labels"],
+                        ).logits
+                    else:
+                        completion_logits = self.model(
+                            padded_batch["completion_input_ids"],
+                            attention_mask=padded_batch["completion_attention_mask"],
+                        ).logits
+            else:
+                if self.is_encoder_decoder:
+                    completion_logits = self.ref_model(
+                        padded_batch["prompt_input_ids"],
+                        attention_mask=padded_batch["prompt_attention_mask"],
+                        decoder_input_ids=padded_batch.get("completion_decoder_input_ids"),
+                        labels=padded_batch["completion_labels"],
+                    ).logits
+                else:
+                    completion_logits = self.ref_model(
+                        padded_batch["completion_input_ids"], attention_mask=padded_batch["completion_attention_mask"]
+                    ).logits
+        completion_logps = self.get_batch_logps(
+            completion_logits,
+            padded_batch["completion_labels"],
+            average_log_prob=False,
+            is_encoder_decoder=self.is_encoder_decoder,
+            label_pad_token_id=self.label_pad_token_id,
+        )
+        return completion_logps
+    @staticmethod
+    def get_batch_logps(
+        logits: torch.FloatTensor,
+        labels: torch.LongTensor,
+        average_log_prob: bool = False,
+        label_pad_token_id: int = -100,
+        is_encoder_decoder: bool = False,
+    ) -> torch.FloatTensor:
+        """Compute the log probabilities of the given labels under the given logits.
+        Args:
+            logits: Logits of the model (unnormalized). Shape: (batch_size, sequence_length, vocab_size)
+            labels: Labels for which to compute the log probabilities. Label tokens with a value of label_pad_token_id are ignored. Shape: (batch_size, sequence_length)
+            average_log_prob: If True, return the average log probability per (non-masked) token. Otherwise, return the sum of the log probabilities of the (non-masked) tokens.
+        Returns:
+            A tensor of shape (batch_size,) containing the average/sum log probabilities of the given labels under the given logits.
+        """
+        if logits.shape[:-1] != labels.shape:
+            raise ValueError("Logits (batch and sequence length dim) and labels must have the same shape.")
+        if not is_encoder_decoder:
+            labels = labels[:, 1:].clone()
+            logits = logits[:, :-1, :]
+        else:
+            # Fixes end-dec RuntimeError
+            labels = labels.clone()
+        loss_mask = labels != label_pad_token_id
+        # dummy token; we'll ignore the losses on these tokens later
+        labels[labels == label_pad_token_id] = 0
+        per_token_logps = selective_log_softmax(logits, labels)
+        if average_log_prob:
+            return (per_token_logps * loss_mask).sum(-1) / loss_mask.sum(-1)
+        else:
+            return (per_token_logps * loss_mask).sum(-1)
+    def forward(
+        self, model: nn.Module, batch: dict[str, Union[list, torch.LongTensor]]
+    ) -> tuple[torch.FloatTensor, torch.FloatTensor, torch.FloatTensor, torch.FloatTensor]:
+        model_kwargs = (
+            {
+                "labels": batch["completion_labels"],
+                "decoder_input_ids": batch.get("completion_decoder_input_ids"),
+            }
+            if self.is_encoder_decoder
+            else {}
+        )
+        if self.aux_loss_enabled:
+            model_kwargs["output_router_logits"] = True
+        outputs = model(
+            batch["completion_input_ids"],
+            attention_mask=batch["completion_attention_mask"],
+            **model_kwargs,
+        )
+        completion_logits = outputs.logits
+        completion_logps = self.get_batch_logps(
+            completion_logits,
+            batch["completion_labels"],
+            average_log_prob=False,
+            is_encoder_decoder=self.is_encoder_decoder,
+            label_pad_token_id=self.label_pad_token_id,
+        )
+        if completion_logps.shape[0] != len(batch["label"]):
+            raise ValueError(
+                "There is a mismatch between the number of examples in this batch and the number of "
+                "examples for which an output sequence was predicted."
+            )
+        chosen_idx = [i for i in range(completion_logps.shape[0]) if batch["label"][i] is True]
+        rejected_idx = [i for i in range(completion_logps.shape[0]) if batch["label"][i] is False]
+        chosen_logps = completion_logps[chosen_idx, ...]
+        rejected_logps = completion_logps[rejected_idx, ...]
+        chosen_logits = completion_logits[chosen_idx, ...]
+        rejected_logits = completion_logits[rejected_idx, ...]
+        if self.aux_loss_enabled:
+            return (chosen_logps, rejected_logps, chosen_logits, rejected_logits, outputs.aux_loss)
+        else:
+            return (chosen_logps, rejected_logps, chosen_logits, rejected_logits)
+    def _get_udm_weight(self, rejected_embeddings: torch.FloatTensor) -> torch.FloatTensor:
+        prob_desirable = self._get_chosen_prob(rejected_embeddings)
+        min_ratio = self.args.min_density_ratio
+        max_ratio = self.args.max_density_ratio
+        weight = (prob_desirable / (1 - prob_desirable + 1e-8)).clamp(min=min_ratio, max=max_ratio)
+        return weight
+    def bco_loss(
+        self,
+        policy_chosen_logps: torch.FloatTensor,
+        policy_rejected_logps: torch.FloatTensor,
+        reference_chosen_logps: torch.FloatTensor,
+        reference_rejected_logps: torch.FloatTensor,
+        chosen_embeddings: Optional[torch.FloatTensor],
+        rejected_embeddings: Optional[torch.FloatTensor],
+    ) -> tuple[torch.FloatTensor, torch.FloatTensor, torch.FloatTensor, torch.FloatTensor]:
+        """Compute the BCO loss for a batch of policy and reference model log probabilities.
+        Args:
+            policy_chosen_logps: Log probabilities of the policy model for the chosen responses. Shape: (num(chosen) in batch_size,)
+            policy_rejected_logps: Log probabilities of the policy model for the rejected responses. Shape: (num(rejected) in batch_size,)
+            reference_chosen_logps: Log probabilities of the reference model for the chosen responses. Shape: (num(chosen) in batch_size,)
+            reference_rejected_logps: Log probabilities of the reference model for the rejected responses. Shape: (num(rejected) in batch_size,)
+            chosen_embeddings: embeddings of desirable prompts
+            rejected_embeddings: embeddings of undesirable prompts
+        Returns:
+            A tuple of four tensors: (losses, chosen_rewards, rejected_rewards, delta).
+            The losses tensor contains the BCO loss for each example in the batch.
+            The chosen_rewards and rejected_rewards tensors contain the rewards for the chosen and rejected responses, respectively.
+            The delta value contains the moving average of all implicit rewards.
+        """
+        if policy_chosen_logps.shape[0] != 0 or reference_chosen_logps.shape[0] != 0:
+            chosen_logratios = policy_chosen_logps - reference_chosen_logps
+            chosen_rewards = self.beta * chosen_logratios
+        else:
+            # lists can't be empty -- if they are, then accelerate.gather will hang
+            chosen_losses = torch.Tensor([]).to(self.accelerator.device)
+            chosen_rewards = torch.Tensor([]).to(self.accelerator.device)
+        if policy_rejected_logps.shape[0] != 0 or reference_rejected_logps.shape[0] != 0:
+            rejected_logratios = policy_rejected_logps - reference_rejected_logps
+            rejected_rewards = self.beta * rejected_logratios
+        else:
+            # lists can't be empty -- if they are, then accelerate.gather will hang
+            rejected_losses = torch.Tensor([]).to(self.accelerator.device)
+            rejected_rewards = torch.Tensor([]).to(self.accelerator.device)
+        rewards = torch.cat((chosen_rewards, rejected_rewards), 0).mean().detach()
+        self.running.update(rewards)
+        delta = self.running.mean
+        if policy_chosen_logps.shape[0] != 0 or reference_chosen_logps.shape[0] != 0:
+            chosen_losses = -F.logsigmoid(chosen_rewards - delta)
+        if policy_rejected_logps.shape[0] != 0 or reference_rejected_logps.shape[0] != 0:
+            rejected_losses = -F.logsigmoid(-(rejected_rewards - delta))
+        if self.match_underlying_distribution:
+            chosen_weight = torch.ones_like(chosen_losses)
+            rejected_weight = self._get_udm_weight(rejected_embeddings)
+            losses = torch.cat((chosen_weight * chosen_losses, rejected_weight * rejected_losses), dim=0)
+        else:
+            losses = torch.cat((chosen_losses, rejected_losses), dim=0)
+        return losses, chosen_rewards, rejected_rewards, torch.as_tensor(delta)
+    def get_batch_loss_metrics(
+        self,
+        model,
+        batch: dict[str, Union[list, torch.LongTensor]],
+    ):
+        """Compute the BCO loss and other metrics for the given batch of inputs for train or test."""
+        metrics = {}
+        batch = {k: (v.to(self.accelerator.device) if isinstance(v, torch.Tensor) else v) for k, v in batch.items()}
+        forward_output = self.forward(model, batch)
+        (
+            policy_chosen_logps,
+            policy_rejected_logps,
+            policy_chosen_logits,
+            policy_rejected_logits,
+        ) = forward_output[:4]
+        if self.aux_loss_enabled:
+            aux_loss = forward_output[4]
+        # if reference_logps in batch use them, otherwise use the reference model
+        if "reference_logps" in batch:
+            chosen_idx = [i for i in range(batch["reference_logps"].shape[0]) if batch["label"][i] is True]
+            rejected_idx = [i for i in range(batch["reference_logps"].shape[0]) if batch["label"][i] is False]
+            reference_chosen_logps = batch["reference_logps"][chosen_idx, ...]
+            reference_rejected_logps = batch["reference_logps"][rejected_idx, ...]
+        else:
+            with torch.no_grad():
+                if self.ref_model is None:
+                    with self.null_ref_context():
+                        (
+                            reference_chosen_logps,
+                            reference_rejected_logps,
+                            _,
+                            _,
+                        ) = self.forward(self.model, batch)[:4]
+                else:
+                    (
+                        reference_chosen_logps,
+                        reference_rejected_logps,
+                        _,
+                        _,
+                    ) = self.forward(self.ref_model, batch)[:4]
+        chosen_embeddings, rejected_embeddings = self._get_prompt_embeddings(batch)
+        losses, chosen_rewards, rejected_rewards, delta = self.bco_loss(
+            policy_chosen_logps,
+            policy_rejected_logps,
+            reference_chosen_logps,
+            reference_rejected_logps,
+            chosen_embeddings,
+            rejected_embeddings,
+        )
+        metrics["delta"] = self.accelerator.gather_for_metrics(delta).mean().item()
+        num_chosen = torch.Tensor([len(chosen_rewards)]).to(self.accelerator.device)
+        num_rejected = torch.Tensor([len(rejected_rewards)]).to(self.accelerator.device)
+        all_num_chosen = self.accelerator.gather_for_metrics(num_chosen).sum().item()
+        all_num_rejected = self.accelerator.gather_for_metrics(num_rejected).sum().item()
+        if all_num_chosen > 0:
+            metrics["rewards/chosen_sum"] = (
+                self.accelerator.gather_for_metrics(chosen_rewards.nansum()).nansum().item()
+            )
+            metrics["logps/chosen_sum"] = (
+                self.accelerator.gather_for_metrics(policy_chosen_logps.nansum()).nansum().item()
+            )
+            metrics["logits/chosen_sum"] = (
+                self.accelerator.gather_for_metrics(policy_chosen_logits.nansum()).nansum().item()
+            )
+            metrics["count/chosen"] = all_num_chosen
+        if all_num_rejected > 0:
+            metrics["rewards/rejected_sum"] = (
+                self.accelerator.gather_for_metrics(rejected_rewards.nansum()).nansum().item()
+            )
+            metrics["logps/rejected_sum"] = (
+                self.accelerator.gather_for_metrics(policy_rejected_logps.nansum()).nansum().item()
+            )
+            metrics["logits/rejected_sum"] = (
+                self.accelerator.gather_for_metrics(policy_rejected_logits.nansum()).nansum().item()
+            )
+            metrics["count/rejected"] = all_num_rejected
+        loss = losses.nanmean()
+        if self.aux_loss_enabled:
+            loss += self.aux_loss_coef * aux_loss
+        return loss, metrics
+    def compute_loss(
+        self,
+        model: Union[PreTrainedModel, nn.Module],
+        inputs: dict[str, Union[torch.Tensor, Any]],
+        return_outputs=False,
+        num_items_in_batch=None,
+    ) -> Union[torch.Tensor, tuple[torch.Tensor, dict[str, torch.Tensor]]]:
+        compute_loss_context_manager = amp.autocast("cuda") if self._peft_has_been_casted_to_bf16 else nullcontext()
+        with compute_loss_context_manager:
+            loss, metrics = self.get_batch_loss_metrics(model, inputs)
+        # Make sure to move the loss to the device the original accumulating loss is at back in the `Trainer` class:
+        loss = loss.to(self.args.device)
+        # force log the metrics
+        if self.accelerator.is_main_process:
+            self.store_metrics(metrics, train_eval="train")
+        if return_outputs:
+            return (loss, metrics)
+        return loss
+    def store_metrics(self, metrics: dict[str, float], train_eval: Literal["train", "eval"] = "train") -> None:
+        for key, value in metrics.items():
+            self._stored_metrics[train_eval][key].append(value)
+    def _get_train_sampler(self) -> Optional[torch.utils.data.Sampler]:
+        if self.train_dataset is None or not has_length(self.train_dataset):
+            return None
+        return SequentialSampler(self.train_dataset)
+    def generate_from_model_and_ref(self, model, batch: dict[str, torch.LongTensor]) -> tuple[str, str]:
+        """Generate samples from the model and reference model for the given batch of inputs."""
+        # If one uses `generate_during_eval` with peft + bf16, we need to explicitly call generate with
+        # the torch cuda amp context manager as some hidden states are silently casted to full precision.
+        generate_context_manager = amp.autocast("cuda") if self._peft_has_been_casted_to_bf16 else nullcontext()
+        with generate_context_manager:
+            policy_output = model.generate(
+                input_ids=batch["prompt_input_ids"],
+                attention_mask=batch["prompt_attention_mask"],
+                max_length=self.max_length,
+                do_sample=True,
+                pad_token_id=self.processing_class.pad_token_id,
+            )
+            # if reference_output in batch use that otherwise use the reference model
+            if "reference_output" in batch:
+                reference_output = batch["reference_output"]
+            else:
+                if self.ref_model is None:
+                    with self.null_ref_context():
+                        reference_output = self.model.generate(
+                            input_ids=batch["prompt_input_ids"],
+                            attention_mask=batch["prompt_attention_mask"],
+                            max_length=self.max_length,
+                            do_sample=True,
+                            pad_token_id=self.processing_class.pad_token_id,
+                        )
+                else:
+                    reference_output = self.ref_model.generate(
+                        input_ids=batch["prompt_input_ids"],
+                        attention_mask=batch["prompt_attention_mask"],
+                        max_length=self.max_length,
+                        do_sample=True,
+                        pad_token_id=self.processing_class.pad_token_id,
+                    )
+        policy_output = pad_to_length(policy_output, self.max_length, self.processing_class.pad_token_id)
+        policy_output_decoded = self.processing_class.batch_decode(policy_output, skip_special_tokens=True)
+        reference_output = pad_to_length(reference_output, self.max_length, self.processing_class.pad_token_id)
+        reference_output_decoded = self.processing_class.batch_decode(reference_output, skip_special_tokens=True)
+        return policy_output_decoded, reference_output_decoded
+    def prediction_step(
+        self,
+        model: Union[PreTrainedModel, nn.Module],
+        inputs: dict[str, Union[torch.Tensor, Any]],
+        prediction_loss_only: bool,
+        ignore_keys: Optional[list[str]] = None,
+    ):
+        if ignore_keys is None:
+            if hasattr(model, "config"):
+                ignore_keys = getattr(model.config, "keys_to_ignore_at_inference", [])
+            else:
+                ignore_keys = []
+        prediction_context_manager = amp.autocast("cuda") if self._peft_has_been_casted_to_bf16 else nullcontext()
+        with torch.no_grad(), prediction_context_manager:
+            loss, metrics = self.get_batch_loss_metrics(model, inputs)
+        # force log the metrics
+        if self.accelerator.is_main_process:
+            self.store_metrics(metrics, train_eval="eval")
+        if prediction_loss_only:
+            return (loss.detach(), None, None)
+        # logits for the chosen and rejected samples from model
+        logits_dict = {
+            "eval_logits/chosen": metrics["logits/chosen"],
+            "eval_logits/rejected": metrics["logits/rejected"],
+        }
+        logits = tuple(v.unsqueeze(dim=0) for k, v in logits_dict.items() if k not in ignore_keys)
+        logits = torch.stack(logits).mean(axis=1).to(self.accelerator.device)
+        labels = torch.zeros(logits.shape[0], device=self.accelerator.device)
+        return (loss.detach(), logits, labels)
+    def evaluation_loop(
+        self,
+        dataloader: DataLoader,
+        description: str,
+        prediction_loss_only: Optional[bool] = None,
+        ignore_keys: Optional[list[str]] = None,
+        metric_key_prefix: str = "eval",
+    ) -> EvalLoopOutput:
+        """
+        Overriding built-in evaluation loop to store metrics for each batch.
+        Prediction/evaluation loop, shared by `Trainer.evaluate()` and `Trainer.predict()`.
+        Works both with or without labels.
+        """
+        # Sample and save to game log if requested (for one batch to save time)
+        if self.generate_during_eval:
+            # Generate random indices within the range of the total number of samples
+            num_samples = len(dataloader.dataset)
+            random_indices = random.sample(range(num_samples), k=self.args.eval_batch_size)
+            # Use dataloader.dataset.select to get the random batch without iterating over the DataLoader
+            random_batch_dataset = dataloader.dataset.select(random_indices)
+            random_batch = self.data_collator(random_batch_dataset)
+            random_batch = self._prepare_inputs(random_batch)
+            target_indicies = [i for i in range(len(random_batch["label"])) if random_batch["label"][i] is False]
+            target_batch = {
+                "prompt_input_ids": random_batch["prompt_input_ids"][target_indicies],
+                "prompt_attention_mask": random_batch["prompt_attention_mask"][target_indicies],
+                "prompt": itemgetter(*target_indicies)(random_batch["prompt"]),
+            }
+            policy_output_decoded, ref_output_decoded = self.generate_from_model_and_ref(self.model, target_batch)
+            table = pd.DataFrame(
+                columns=["Prompt", "Policy", "Ref Model"],
+                data=[
+                    [prompt, pol[len(prompt) :], ref[len(prompt) :]]
+                    for prompt, pol, ref in zip(target_batch["prompt"], policy_output_decoded, ref_output_decoded)
+                ],
+            )
+            if "wandb" in self.args.report_to:
+                wandb.log({"game_log": wandb.Table(data=table)})
+            if "comet_ml" in self.args.report_to:
+                log_table_to_comet_experiment(
+                    name="game_log.csv",
+                    table=table,
+                )
+        # Base evaluation
+        initial_output = super().evaluation_loop(
+            dataloader, description, prediction_loss_only, ignore_keys, metric_key_prefix
+        )
+        return initial_output
+    def log(self, logs: dict[str, float], start_time: Optional[float] = None) -> None:
+        """
+        Log `logs` on the various objects watching training, including stored metrics.
+        Args:
+            logs (`dict[str, float]`):
+                The values to log.
+            start_time (`float` or `None`, *optional*, defaults to `None`):
+                Start time of the training.
+        """
+        # logs either has 'loss' or 'eval_loss'
+        train_eval = "train" if "loss" in logs else "eval"
+        # train metrics should have no prefix, eval should have 'eval_'
+        prefix = "eval_" if train_eval == "eval" else ""
+        # accumulate average metrics from sums and lengths
+        for split in ["chosen", "rejected"]:
+            if f"count/{split}" in self._stored_metrics[train_eval]:
+                count_sum = torch.Tensor(self._stored_metrics[train_eval][f"count/{split}"]).sum().item()
+                for metric in ["rewards", "logps", "logits"]:
+                    logs[f"{prefix}{metric}/{split}"] = (
+                        torch.Tensor(self._stored_metrics[train_eval][f"{metric}/{split}_sum"]).sum().item()
+                        / count_sum
+                    )
+                    # delete obsolete metric
+                    del self._stored_metrics[train_eval][f"{metric}/{split}_sum"]
+                del self._stored_metrics[train_eval][f"count/{split}"]
+        # calculate reward margin
+        if f"{prefix}rewards/chosen" in logs and f"{prefix}rewards/rejected" in logs:
+            logs[f"{prefix}rewards/margins"] = logs[f"{prefix}rewards/chosen"] - logs[f"{prefix}rewards/rejected"]
+        # Add averaged stored metrics to logs
+        for key, metrics in self._stored_metrics[train_eval].items():
+            logs[f"{prefix}{key}"] = torch.Tensor(metrics).mean().item()
+        del self._stored_metrics[train_eval]
+        if version.parse(transformers.__version__) >= version.parse("4.47.0.dev0"):
+            return super().log(logs, start_time)
+        else:  # transformers<=4.46
+            return super().log(logs)
+    def create_model_card(
+        self,
+        model_name: Optional[str] = None,
+        dataset_name: Optional[str] = None,
+        tags: Union[str, list[str], None] = None,
+    ):
+        """
+        Creates a draft of a model card using the information available to the `Trainer`.
+        Args:
+            model_name (`str` or `None`, *optional*, defaults to `None`):
+                Name of the model.
+            dataset_name (`str` or `None`, *optional*, defaults to `None`):
+                Name of the dataset used for training.
+            tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
+                Tags to be associated with the model card.
+        """
+        if not self.is_world_process_zero():
+            return
+        if hasattr(self.model.config, "_name_or_path") and not os.path.isdir(self.model.config._name_or_path):
+            base_model = self.model.config._name_or_path
+        else:
+            base_model = None
+        tags = tags or []
+        if isinstance(tags, str):
+            tags = [tags]
+        if hasattr(self.model.config, "unsloth_version"):
+            tags.append("unsloth")
+        citation = textwrap.dedent("""\
+        @article{jung2024binary,
+            title        = {{Binary Classifier Optimization for Large Language Model Alignment}},
+            author       = {Seungjae Jung and Gunsoo Han and Daniel Wontae Nam and Kyoung{-}Woon On},
+            year         = 2024,
+            eprint       = {arXiv:2404.04656}
+        }""")
+        model_card = generate_model_card(
+            base_model=base_model,
+            model_name=model_name,
+            hub_model_id=self.hub_model_id,
+            dataset_name=dataset_name,
+            tags=tags,
+            wandb_url=wandb.run.get_url() if is_wandb_available() and wandb.run is not None else None,
+            comet_url=get_comet_experiment_url(),
+            trainer_name="BCO",
+            trainer_citation=citation,
+            paper_title="Binary Classifier Optimization for Large Language Model Alignment",
+            paper_id="2404.04656",
+        )
+        model_card.save(os.path.join(self.args.output_dir, "README.md"))
+class UnslothBCOTrainer(_UnslothBCOTrainer):
+    """
+    Initialize BCOTrainer from [BCO](https://huggingface.co/papers/2404.04656) paper.
+    Args:
+        model (`transformers.PreTrainedModel`):
+            The model to train, preferably an `AutoModelForSequenceClassification`.
+        ref_model (`PreTrainedModelWrapper`):
+            Hugging Face transformer model with a casual language modelling head. Used for implicit reward computation and loss. If no
+            reference model is provided, the trainer will create a reference model with the same architecture as the model to be optimized.
+        args (`BCOConfig`):
+            The arguments to use for training.
+        train_dataset (`datasets.Dataset`):
+            The dataset to use for training.
+        eval_dataset (`datasets.Dataset`):
+            The dataset to use for evaluation.
+        processing_class (`PreTrainedTokenizerBase` or `BaseImageProcessor` or `FeatureExtractionMixin` or `ProcessorMixin`, *optional*):
+            Processing class used to process the data. If provided, will be used to automatically process the inputs
+            for the model, and it will be saved along the model to make it easier to rerun an interrupted training or
+            reuse the fine-tuned model.
+        data_collator (`transformers.DataCollator`, *optional*, defaults to `None`):
+            The data collator to use for training. If None is specified, the default data collator (`DPODataCollatorWithPadding`) will be used
+            which will pad the sequences to the maximum length of the sequences in the batch, given a dataset of paired sequences.
+        model_init (`Callable[[], transformers.PreTrainedModel]`):
+            The model initializer to use for training. If None is specified, the default model initializer will be used.
+        callbacks (`list[transformers.TrainerCallback]`):
+            The callbacks to use for training.
+        optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`):
+            The optimizer and scheduler to use for training.
+        preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`):
+            The function to use to preprocess the logits before computing the metrics.
+        peft_config (`dict`, defaults to `None`):
+            The PEFT configuration to use for training. If you pass a PEFT configuration, the model will be wrapped in a PEFT model.
+        compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*):
+            The function to use to compute the metrics. Must take a `EvalPrediction` and return
+            a dictionary string to metric values.
+        model_adapter_name (`str`, defaults to `None`):
+            Name of the train target PEFT adapter, when using LoRA with multiple adapters.
+        ref_adapter_name (`str`, defaults to `None`):
+            Name of the reference PEFT adapter, when using LoRA with multiple adapters.
+    """
+    def __init__(
+        self,
+        model = None,
+        ref_model = None,
+        args = None,
+        train_dataset = None,
+        eval_dataset = None,
+        processing_class = None,
+        data_collator = None,
+        model_init = None,
+        callbacks = None,
+        preprocess_logits_for_metrics = None,
+        peft_config = None,
+        compute_metrics = None,
+        model_adapter_name = None,
+        ref_adapter_name = None,
+        embedding_func = None,
+        embedding_tokenizer = None,
+        **kwargs
+    ):
+        if args is None: args = UnslothBCOConfig()
+        use_bf16 = getattr(args, 'bf16', False)
+        use_fp16 = getattr(args, 'fp16', False)
+        force_float32 = False
+        if os.environ.get('UNSLOTH_FORCE_FLOAT32', '0') == '1':
+            print('Unsloth: Switching to float32 training since model cannot work with float16')
+            force_float32 = True
+        mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32')
+        dtype = getattr(model.config, 'torch_dtype', None)
+        if dtype is None: dtype = model.get_input_embeddings().dtype
+        from unsloth_zoo.utils import _get_dtype
+        dtype = _get_dtype(dtype)
+        float16 = dtype == torch.float16
+        if not force_float32 and (float16 and use_bf16): raise TypeError('Unsloth: Model is in float16 precision but you want to use bfloat16 precision. Set fp16 to `True` and bf16 to `False`')
+        if not force_float32 and (not float16 and use_fp16): raise TypeError('Unsloth: Model is in bfloat16 precision but you want to use float16 precision. Set fp16 to `False` and bf16 to `True`')
+        if force_float32:
+            args.fp16 = False
+            args.bf16 = False
+            os.environ['ACCELERATE_MIXED_PRECISION'] = 'no'
+        elif (not use_bf16 and not use_fp16) and mixed_precision_dtype == 'float32':
+            args.fp16 = float16
+            args.bf16 = not float16
+            os.environ['ACCELERATE_MIXED_PRECISION'] = 'fp16' if float16 else 'bf16'
+        if getattr(args, 'eval_dataset', None) is not None and getattr(args, 'eval_strategy', 'no') == 'no':
+            args.eval_strategy = 'steps'
+            if getattr(args, 'eval_steps', None) is None: args.eval_steps = 0.1
+        ga_steps = getattr(args, 'gradient_accumulation_steps', None)
+        if ga_steps is not None and ga_steps > 1:
+            from transformers import __version__ as transformers_version
+            if Version(transformers_version) <= Version('4.45.2'):
+                print('**** Unsloth: Please use our fixed gradient_accumulation_steps by updating transformers, TRL and Unsloth!\n'
+                      '`pip install --upgrade --no-cache-dir --force-reinstall --no-deps unsloth transformers trl unsloth_zoo`')
+        if getattr(args, 'eval_strategy', 'no') != 'no':
+            eval_bsz = getattr(args, 'per_device_eval_batch_size', 8)
+            if eval_bsz == 8 and args.per_device_train_batch_size < eval_bsz: args.per_device_eval_batch_size = args.per_device_train_batch_size
+            if getattr(args, 'eval_accumulation_steps', None) is None and ga_steps is not None: args.eval_accumulation_steps = ga_steps
+        fp16_full_eval = getattr(args, 'fp16_full_eval', False)
+        bf16_full_eval = getattr(args, 'bf16_full_eval', False)
+        if args.fp16 and bf16_full_eval: args.bf16_full_eval = False; args.fp16_full_eval = True
+        if args.bf16 and fp16_full_eval: args.bf16_full_eval = True; args.fp16_full_eval = False
+        if force_float32:
+            args.bf16_full_eval = False
+            args.fp16_full_eval = False
+        elif os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32') == 'bfloat16':
+            args.bf16_full_eval = True
+            args.fp16_full_eval = False
+        elif not bf16_full_eval and not fp16_full_eval:
+            args.bf16_full_eval = args.bf16
+            args.fp16_full_eval = args.fp16
+        _output_logits = False
+        if locals().get('compute_metrics', None) is not None: _output_logits = True
+        if locals().get('preprocess_logits_for_metrics', None) is not None: _output_logits = True
+        if _output_logits:
+            os.environ['UNSLOTH_RETURN_LOGITS'] = '1'
+        if 'max_seq_length' not in locals() and not hasattr(args, 'max_seq_length'):
+            pass
+        else:
+            model_max_seq_length = getattr(model, 'max_seq_length', None)
+            args_max_seq_length  = getattr(args,  'max_seq_length', None)
+            if args_max_seq_length is None and model_max_seq_length is not None:
+                max_seq_length = model.max_seq_length
+                if hasattr(args, 'max_seq_length'): args.max_seq_length = max_seq_length
+        if model is not None and hasattr(model, 'for_training'):
+            model.for_training()
+        if 'tokenizer' in locals() and hasattr(tokenizer, 'padding_side'): tokenizer.padding_side = 'right'
+        if 'processing_class' in locals():
+            if hasattr(processing_class, 'padding_side'): processing_class.padding_side = 'right'
+            if hasattr(processing_class, 'tokenizer') and hasattr(processing_class.tokenizer, 'padding_side'): processing_class.tokenizer.padding_side = 'right'
+        __tokenizer = processing_class if 'processing_class' in locals() else tokenizer
+        from unsloth_zoo.vision_utils import UnslothVisionDataCollator
+        if not isinstance(data_collator, UnslothVisionDataCollator):
+            if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names:
+                data_collator = DataCollatorForLanguageModeling(__tokenizer, mlm = False)
+            elif isinstance(data_collator, DataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names:
+                data_collator = DataCollatorForSeq2Seq(__tokenizer)
+        else:
+            if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False
+            if hasattr(args, 'dataset_text_field'): args.dataset_text_field = ''
+            if hasattr(args, 'dataset_kwargs'): args.dataset_kwargs = {'skip_prepare_dataset': True}
+        if not isinstance(data_collator, UnslothVisionDataCollator):
+            if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'):
+                if isinstance(data_collator, DataCollatorForSeq2Seq):
+                    data_collator = DataCollatorForSeq2Seq(__tokenizer.tokenizer)
+                else:
+                    data_collator = DataCollatorForLanguageModeling(__tokenizer.tokenizer, mlm = False)
+        other_metrics = []
+        from unsloth_zoo.logging_utils import PatchRLStatistics
+        PatchRLStatistics('bco_trainer', other_metrics)
+        super().__init__(
+            model = model,
+            ref_model = ref_model,
+            args = args,
+            train_dataset = train_dataset,
+            eval_dataset = eval_dataset,
+            processing_class = processing_class,
+            data_collator = data_collator,
+            model_init = model_init,
+            callbacks = callbacks,
+            preprocess_logits_for_metrics = preprocess_logits_for_metrics,
+            peft_config = peft_config,
+            compute_metrics = compute_metrics,
+            model_adapter_name = model_adapter_name,
+            ref_adapter_name = ref_adapter_name,
+            embedding_func = embedding_func,
+            embedding_tokenizer = embedding_tokenizer,**kwargs)
+        if hasattr(self, 'neftune_hook_handle'):
+            self.neftune_hook_handle.remove()
+            if hasattr(self, 'neftune_hook_handle'): del self.neftune_hook_handle
+        if getattr(args, 'neftune_noise_alpha', None) is not None:
+            model.get_input_embeddings().neftune_noise_alpha = self.neftune_noise_alpha
+        pass
+pass