Spaces:

yilunzhao
/

intervention-demo

Sleeping

yilunzhao commited on Nov 7, 2024

Commit

ebe6532

verified ·

1 Parent(s): 7eb73dd

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,7 +6,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
 # Load model and tokenizer if a GPU is available
 if torch.cuda.is_available():
-    model_id = "allenai/OLMo-7B-Instruct"
     model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True)
     tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
 else:
@@ -16,13 +16,12 @@ else:
 @spaces.GPU
 def generate_response(passage: str, question: str) -> str:
     # Prepare the input text by combining the passage and question
-    chat = [{"role": "user", "content": f"Passage: {passage}\nQuestion: {question}"}]
-    prompt = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
-    inputs = tokenizer.encode(prompt, add_special_tokens=False, return_tensors="pt")
-    response = model.generate(input_ids=inputs.to(model.device), max_new_tokens=100)
-    response = tokenizer.batch_decode(response, skip_special_tokens=True)[0].split("<|assistant|>")[-1].strip()
     return response

 # Load model and tokenizer if a GPU is available
 if torch.cuda.is_available():
+    model_id = "allenai/OLMo-7B-hf"
     model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True)
     tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
 else:
 @spaces.GPU
 def generate_response(passage: str, question: str) -> str:
     # Prepare the input text by combining the passage and question
+    message = [f"Passage: {passage}\nQuestion: {question}"]
+    inputs = tokenizer(message, return_tensors='pt', return_token_type_ids=False)
+    response = model.generate(**inputs, max_new_tokens=100)
+    response = tokenizer.batch_decode(response, skip_special_tokens=True)[0]
     return response