Spaces:

burtenshaw
/

coworking_agent

Runtime error

App Files Files Community

coworking_agent / app.py

burtenshaw HF Staff

Update app.py

b92c058 verified 10 months ago

raw

history blame contribute delete

2.82 kB

	from pathlib import Path
	from typing import List, Dict

	from dotenv import load_dotenv
	from fastrtc import (
	get_stt_model,
	get_tts_model,
	Stream,
	ReplyOnPause,
	get_twilio_turn_credentials,
	)
	from smolagents import CodeAgent, HfApiModel, DuckDuckGoSearchTool

	# Load environment variables
	load_dotenv()

	# Initialize file paths
	curr_dir = Path(__file__).parent

	# Initialize models
	stt_model = get_stt_model()
	tts_model = get_tts_model()

	# Conversation state to maintain history
	conversation_state: List[Dict[str, str]] = []

	# System prompt for agent
	system_prompt = """You are a helpful assistant that can helps with finding places to
	workremotely from. You should specifically check against reviews and ratings of the
	place. You should use this criteria to find the best place to work from:
	- Price
	- Reviews
	- Ratings
	- Location
	- WIFI
	Only return the name, address of the place, and a short description of the place.
	Always search for real places.
	Only return real places, not fake ones.
	If you receive anything other than a location, you should ask for a location.
	<example>
	User: I am in Paris, France. Can you find me a place to work from?
	Assistant: I found a place called "Le Café de la Paix" at 123 Rue de la Paix,
	Paris, France. It has good reviews and is in a great location.
	</example>
	<example>
	User: I am in London, UK. Can you find me a place to work from?
	Assistant: I found a place called "The London Coffee Company".
	</example>
	<example>
	User: How many people are in the room?
	Assistant: I only respond to requests about finding places to work from.
	</example>

	"""

	model = HfApiModel(provider="together", model="Qwen/Qwen2.5-Coder-32B-Instruct")

	agent = CodeAgent(
	tools=[
	DuckDuckGoSearchTool(),
	],
	model=model,
	max_steps=2,
	verbosity_level=2,
	description="Search the web for cafes to work from.",
	)


	def process_response(audio):
	"""Process audio input and generate LLM response with TTS"""
	# Convert speech to text using STT model
	text = stt_model.stt(audio)
	if not text.strip():
	return

	input_text = f"{system_prompt}\n\n{text}"
	# Get response from agent
	response_content = agent.run(input_text)

	# Convert response to audio using TTS model
	for audio_chunk in tts_model.stream_tts_sync(response_content or ""):
	# Yield the audio chunk
	yield audio_chunk


	stream = Stream(
	handler=ReplyOnPause(process_response, input_sample_rate=16000),
	modality="audio",
	mode="send-receive",
	ui_args={
	"pulse_color": "rgb(255, 255, 255)",
	"icon_button_color": "rgb(255, 255, 255)",
	"title": "🧑‍💻The Coworking Agent",
	},
	rtc_configuration=get_twilio_turn_credentials(),
	)

	if __name__ == "__main__":
	stream.ui.launch(server_port=7860)