Spaces:

MCP-1st-Birthday
/

FixMyEnv

Running

FixMyEnv / app.py

Geevarghese George

add full flow

8ffce3a about 1 month ago

9.98 kB

	import logging
	import shutil
	from pathlib import Path

	import gradio as gr
	from mcp import StdioServerParameters
	from smolagents import InferenceClientModel, MCPClient

	from config import (
	AGENT_MODEL,
	CHAT_HISTORY_TURNS_CUTOFF,
	CHAT_HISTORY_WORD_CUTOFF,
	GITHUB_TOOLSETS,
	HF_TOKEN,
	)
	from config import GITHUB_PAT as GITHUB_TOKEN
	from src.upgrade_advisor.agents.package import PackageDiscoveryAgent
	from src.upgrade_advisor.chat.chat import (
	qn_rewriter,
	run_document_qa,
	summarize_chat_history,
	)
	from src.upgrade_advisor.misc import (
	get_example_pyproject_question,
	get_example_requirements_question,
	)

	logger = logging.getLogger(__name__)
	logger.setLevel(logging.INFO)
	logger.addHandler(logging.StreamHandler())

	# this is to use the gradio-upload-mcp server for file uploads
	uploads_dir = Path("uploads")
	uploads_dir.mkdir(exist_ok=True)
	uploads_dir = uploads_dir.resolve()


	def _monkeypatch_gradio_save_history():
	"""Guard against non-int indices in Gradio's chat history saver.

	Gradio 5.49.1 occasionally passes a component (e.g., Textbox) as the
	conversation index when save_history=True, which raises a TypeError. We
	coerce unexpected index types to None so Gradio inserts a new conversation
	instead of erroring.
	"""
	import gradio as gr

	if getattr(gr.ChatInterface, "_ua_safe_patch", False):
	return

	original = gr.ChatInterface._save_conversation

	def _safe_save_conversation(self, index, conversation, saved_conversations):
	if not isinstance(index, int):
	index = None
	try:
	return original(self, index, conversation, saved_conversations)
	except Exception:
	logger.exception("Failed to save chat history; leaving history unchanged.")
	return index, saved_conversations

	gr.ChatInterface._save_conversation = _safe_save_conversation
	gr.ChatInterface._ua_safe_patch = True


	_monkeypatch_gradio_save_history()


	async def chat_fn(message, history, persisted_attachments=None):
	# parse incoming history is a list of dicts with 'role' and 'content' keys
	from datetime import datetime

	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
	logger.info(f"Received message: {message}")
	logger.info(f"History: {history}")
	if len(history) > 0:
	summarized_history = await summarize_chat_history(
	history,
	turns_cutoff=CHAT_HISTORY_TURNS_CUTOFF,
	word_cutoff=CHAT_HISTORY_WORD_CUTOFF,
	)
	else:
	summarized_history = ""
	incoming_attachments = message.get("files", []) if isinstance(message, dict) else []
	persisted_attachments = persisted_attachments or []
	# If no new attachments are provided, keep using the previously persisted ones.
	attachments = incoming_attachments or persisted_attachments
	latest_attachment = attachments[-1] if attachments else []

	logger.info(f"Summarized chat history:\n{summarized_history}")
	logger.info(f"With attachments: {attachments} (incoming: {incoming_attachments})")
	logger.info(f"Latest attachment: {latest_attachment}")
	logger.info(f"Persisted attachments: {persisted_attachments}")

	# if attachements are present message is a dict with 'text' and 'files' keys
	message = message.get("text", "") if isinstance(message, dict) else message
	# overwrite messages with the text content only
	message = message.strip()
	rewritten_message, is_rewritten_good = await qn_rewriter(
	message, summarized_history
	)

	if is_rewritten_good:
	logger.info(f"Rewritten question: {rewritten_message}")
	else:
	logger.info(f"Using original question: {message}")
	rewritten_message = None
	# Collect events from the agent run
	# add chat summary to message
	message = f"""
	CHAT SUMMARY SO FAR:
	{summarized_history}
	CURRENT QUESTION FROM USER:
	{message}
	"""
	if len(attachments) > 0:
	message += """Attached FILE:\n"""
	# use the last file from the list of files only, as
	# the single file is expected to be a pyproject.toml
	# copy to uploads directory
	if latest_attachment:
	# take the last uploaded file
	source_file = latest_attachment
	file_name = f"{timestamp}_{Path(latest_attachment).name}"
	elif len(persisted_attachments) > 0:
	# take the last persisted file if no new uploads
	source_file = persisted_attachments[-1]
	file_name = f"{timestamp}_{Path(persisted_attachments[-1]).name}"
	else:
	source_file = None
	file_name = None

	logger.info(f"Copying uploaded file {source_file} to {uploads_dir}")
	shutil.copy(source_file, uploads_dir / file_name)
	message += f"""
	FILE PATH: {uploads_dir / file_name}\n
	"""
	logger.info(f"Final message to agent:\n{message}")
	# Run the package discovery agent to build context
	context = agent.discover_package_info(
	user_input=message, reframed_question=rewritten_message
	)
	# Build a concise context from tool outputs
	logger.info(f"Built context of length {len(context)}")
	logger.info(f"Context content:\n{context}")
	# Run a document QA pass using the user's question
	qa_answer = await run_document_qa(
	question=message, context=context, rewritten_question=rewritten_message
	)
	logger.info(f"QA answer: {qa_answer}")
	return {
	"role": "assistant",
	"content": qa_answer,
	}, attachments


	if __name__ == "__main__":
	logger.info("Starting MCP client...")

	try:
	gh_mcp_params = StdioServerParameters(
	# for StdioServerParameters, we use podman to run the
	# MCP server from GH in a container
	command="podman",
	args=[
	"run",
	"-i",
	"--rm",
	"-e",
	"GITHUB_PERSONAL_ACCESS_TOKEN",
	"-e",
	"GITHUB_READ_ONLY",
	"-e",
	"GITHUB_TOOLSETS",
	"ghcr.io/github/github-mcp-server",
	],
	env={
	"GITHUB_PERSONAL_ACCESS_TOKEN": GITHUB_TOKEN,
	"GITHUB_READ_ONLY": "1",
	"GITHUB_TOOLSETS": GITHUB_TOOLSETS,
	},
	)
	pypi_mcp_params = dict(
	# url="https://mcp-1st-birthday-pypi-mcp.hf.space/gradio_api/mcp/",
	url="https://mcp-1st-birthday-uv-pypi-mcp.hf.space/gradio_api/mcp/",
	transport="streamable-http",
	)
	upload_mcp_params = StdioServerParameters(
	command="uvx",
	args=[
	"--from",
	"gradio[mcp]",
	"gradio",
	"upload-mcp",
	# Base must be the Gradio root; upload-mcp adds
	# /gradio_api/upload.
	# The docs are misleading here, it has gradio_api/upload as the base.
	"https://mcp-1st-birthday-uv-pypi-mcp.hf.space/",
	uploads_dir.as_posix(),
	],
	)

	pypi_mcp_client = MCPClient(
	server_parameters=[
	pypi_mcp_params,
	gh_mcp_params,
	upload_mcp_params,
	],
	structured_output=True,
	)

	model = InferenceClientModel(
	token=HF_TOKEN,
	model_id=AGENT_MODEL,
	)

	# Gradio chat interface state to persist uploaded files
	files_state = gr.State([])

	with pypi_mcp_client as toolset:
	logger.info("MCP clients connected successfully")

	agent = PackageDiscoveryAgent(
	model=model,
	tools=toolset,
	)
	# link package_agent to the chat function

	# attach files from local machine
	demo = gr.ChatInterface(
	fn=chat_fn,
	chatbot=gr.Chatbot(
	height=600,
	type="messages",
	),
	title="Package Upgrade Advisor",
	type="messages",
	# additional_inputs_accordion="Attach pyproject.toml file",
	textbox=gr.MultimodalTextbox(
	label="pyproject.toml",
	file_types=[".toml"],
	file_count="single",
	min_width=100,
	sources="upload",
	inputs=files_state,
	),
	additional_inputs=[files_state],
	additional_outputs=[files_state],
	save_history=True,
	examples=[
	["Tell me about the 'requests' package. How to use it with JSON ?"],
	[get_example_requirements_question()],
	[get_example_pyproject_question()],
	["Which version of 'pandas' is compatible with 'numpy' 2.0?"],
	[
	{
	"text": """Can I upgrade my dependencies from
	the attached pyproject.toml to work with
	python 3.14? Any suggestions on
	potential issues I should be aware of?""",
	"files": ["tests/test2.toml"],
	}
	],
	],
	stop_btn=True,
	theme="compact",
	)
	demo.launch()

	finally:
	logger.info("Cleaning up MCP client resources")
	# remove contents of uploads_dir
	for f in uploads_dir.iterdir():
	try:
	f.unlink()
	except Exception:
	logger.exception(f"Failed to delete uploaded file: {f}")

	logger.info("Shutdown complete.")