Spaces:

lucasgagneten
/

OCR-NER-Facturas

Sleeping

OCR-NER-Facturas / app.py

Lucas Gagneten

Gemini client

f35ce5e about 2 months ago

11.7 kB

	import gradio as gr
	from PIL import Image
	import pandas as pd

	# ----------------------------------------------------------------------
	# 💡 1. IMPORTACIONES
	# ----------------------------------------------------------------------

	# ocr_processor: Funciones de OCR (Doctr) y guardado de imagen
	from ocr_processor import process_and_setup

	# label_editor: Funciones de setup, edición y persistencia (JSON/ZIP)
	from label_editor import (
	setup_label_components,
	update_ui,
	save_current_annotation_to_json,
	export_and_zip_dataset,
	update_dataframe_and_state,
	display_selected_row,
	)

	# image_loader: Define la UI para la carga de imagen y la API Key
	from image_loader import setup_image_components

	# bbox_adder: Funciones para añadir manualmente tokens no detectados por OCR
	from bbox_adder import add_new_bbox_mode, append_new_token

	# --- Función de Limpieza/Reset ---

	def clear_ui_and_reset_states(api_key_input, tb_new_token_text, btn_add_new_token, state_new_bbox):
	"""Limpia los componentes de la interfaz y resetea los estados a su valor inicial."""
	print("Reiniciando la interfaz y los estados...")

	# Valores de reseteo para los estados de Gradio
	reset_image_orig_state = None
	reset_tokens_data_state = []
	reset_highlight_index_state = -1
	reset_image_filename_state = None

	# Actualizaciones para los componentes de la interfaz
	api_key_update = gr.update(value="", visible=True)
	image_input_update = gr.update(value=None, visible=True)
	image_output_update = gr.update(value=None, visible=False)
	df_update = gr.update(value=[])

	# Componentes de edición (ocultar)
	tb_update = gr.update(value="", visible=False)
	dd_update = gr.update(value="O", visible=False)

	# Componentes de Adición (ocultar y resetear)
	tb_new_token_update = gr.update(value="", visible=False)
	btn_add_token_update = gr.update(visible=False)
	reset_new_bbox_state = None

	status_update = "Sube una imagen para comenzar..."

	return (
	reset_image_orig_state, # 0. image_orig_state
	reset_tokens_data_state, # 1. tokens_data_state
	reset_highlight_index_state, # 2. highlight_index_state
	reset_image_filename_state, # 3. image_filename_state
	api_key_update, # 4. api_key_input 💡
	image_input_update, # 5. image_input_file
	image_output_update, # 6. image_output_display
	df_update, # 7. df_label_input
	tb_update, # 8. tb_token_editor
	dd_update, # 9. dd_tag_selector
	tb_new_token_update, # 10. tb_new_token_text
	btn_add_token_update, # 11. btn_add_new_token
	reset_new_bbox_state, # 12. state_new_bbox
	status_update # 13. status_output
	)


	# --- FUNCIÓN AUXILIAR DE FLUJO: OCR y Gemini (MODIFICADA) ---

	def process_image(image, api_key: str):
	"""
	Ejecuta el OCR, la inferencia de Gemini (si hay API Key) y el preprocesamiento
	inicial, guardando la imagen.
	"""
	# 💡 La función ahora requiere api_key
	if image is None:
	# Nota: 8 outputs para la CONEXIÓN 1
	return None, [], None, [], "Sube una imagen para comenzar...", gr.update(visible=True), gr.update(visible=False, value=None), None

	try:
	# process_and_setup requiere image y api_key (la lógica de consulta condicional está dentro de ocr_processor)
	# Retorna: image_orig, tokens_data, highlighted_image, df_data, status, image_filename
	result = process_and_setup(image, api_key)

	if result[0] is None:
	# Nota: 8 outputs para la CONEXIÓN 1
	return None, [], None, [], "Error en el procesamiento del OCR. Verifica logs.", gr.update(visible=True), gr.update(visible=False, value=None), None

	image_orig, tokens_data, highlighted_image, df_data, status, image_filename = result

	# Convertir datos para el DataFrame de Gradio (lista de listas)
	df_rows = []
	if df_data and isinstance(df_data, dict):
	for t, n in zip(df_data['token'], df_data['ner_tag']):
	df_rows.append([t, n])

	# Nota: 8 outputs para la CONEXIÓN 1
	return (
	image_orig,
	tokens_data,
	highlighted_image,
	df_rows,
	status,
	gr.update(visible=False), # Ocultar image_input_file
	gr.update(visible=True), # Mostrar image_output_display
	image_filename # Nombre de archivo único
	)

	except Exception as e:
	print(f"Error en process_image: {str(e)}")
	# Nota: 8 outputs para la CONEXIÓN 1
	return None, [], None, [], f"Error: {str(e)}", gr.update(visible=True), gr.update(visible=False, value=None), None

	def capture_highlight_index(evt: gr.SelectData):
	"""Captura el índice de fila (0-index) seleccionado en el DataFrame."""
	if evt and evt.index is not None and evt.index[0] is not None:
	return evt.index[0]
	return gr.State(-1)

	# --- INTERFAZ GRADIO (GR.BLOCKS) ---

	with gr.Blocks(title="Anotador NER de Facturas (Doctr/LayoutXLM)") as app:
	gr.Markdown(
	"""
	# 🧾 Anotador NER para Facturas (LayoutXLM)

	Instrucciones: 1. Sube una imagen (y opcionalmente la Clave API de Gemini para asistencia de etiquetado). La imagen se guarda automáticamente en `dataset/imagenes`.
	2. Edita los tokens o etiquetas. Los cambios se aplican automáticamente.
	3. Haz clic en 'Guardar Anotación Actual (JSON)' para confirmar los datos de la factura actual en `dataset/anotacion_factura.json`.
	4. Haz clic en 'Descargar Dataset Completo (.zip)' para obtener todas las imágenes y el JSON consolidado.
	"""
	)

	# --- 1. Definición de Estados Globales ---
	image_orig_state = gr.State(None)
	tokens_data_state = gr.State([])
	highlight_index_state = gr.State(-1)
	image_filename_state = gr.State(None)
	STATE_NEW_BBOX = gr.State(value=None) # Estado para BBox Manual

	with gr.Row():
	with gr.Column(scale=1):
	# Columna Izquierda: Carga y Visualización
	# 💡 setup_image_components retorna: api_key_input, image_input_file, image_output_display
	api_key_input, image_input_file, image_output_display = setup_image_components()

	# Se muestran explícitamente los componentes de entrada
	api_key_input
	image_input_file

	status_output = gr.Markdown("Sube una imagen para comenzar...")
	btn_clear = gr.Button("🗑️ Quitar Imagen / Nuevo Documento", visible=True)

	with gr.Column(scale=2):
	# Columna Derecha: Edición de Etiquetas
	gr.Markdown("### 2. Edición de Etiquetas NER")

	# 💡 setup_label_components ya retorna los componentes de adición manual
	(
	df_label_input, tb_token_editor, dd_tag_selector,
	btn_save_annotation, btn_export, file_output,
	tb_new_token_text, btn_add_new_token, temp_state_dummy
	) = setup_label_components()

	# Dataframe
	df_label_input

	# Contenedor para los editores (Token y Tag)
	with gr.Row(visible=True) as editor_row:
	with gr.Column(scale=2):
	tb_token_editor
	# Campo de texto para el BBox manual (se muestra condicionalmente)
	tb_new_token_text
	with gr.Column(scale=1):
	dd_tag_selector

	# Contenedor para los botones de Guardar/Descargar/Agregar
	with gr.Row(visible=True):
	btn_save_annotation
	btn_export
	btn_add_new_token

	file_output


	# --- CONEXIONES DE EVENTOS ---

	# CONEXIÓN 1: EJECUTAR OCR y Gemini (MODIFICADA)
	image_input_file.change(
	fn=process_image,
	inputs=[image_input_file, api_key_input], # 💡 AÑADIR api_key_input
	outputs=[
	image_orig_state, tokens_data_state, image_output_display, df_label_input, status_output,
	image_input_file, image_output_display, image_filename_state
	],
	api_name=False
	)

	# CONEXIÓN 2: Selección de FILA
	df_label_input.select(
	fn=capture_highlight_index,
	inputs=None,
	outputs=[highlight_index_state],
	queue=False
	).then(
	fn=display_selected_row,
	inputs=[tokens_data_state, highlight_index_state],
	outputs=[tb_token_editor, dd_tag_selector, highlight_index_state],
	).then(
	fn=update_ui,
	inputs=[image_orig_state, tokens_data_state, df_label_input, highlight_index_state],
	outputs=[tokens_data_state, image_output_display],
	api_name=False
	)

	# CONEXIÓN 3: Edición de Tag o Token (Actualiza estado y UI)
	dd_tag_selector.change(
	fn=lambda t, d, i, new_tag_val: update_dataframe_and_state(t, d, new_tag_val, None, i, 'tag'),
	inputs=[tokens_data_state, df_label_input, highlight_index_state, dd_tag_selector],
	outputs=[tokens_data_state, df_label_input],
	).then(
	fn=update_ui,
	inputs=[image_orig_state, tokens_data_state, df_label_input, highlight_index_state],
	outputs=[tokens_data_state, image_output_display],
	api_name=False
	)

	token_update_events = [tb_token_editor.blur, tb_token_editor.submit]
	for event in token_update_events:
	event(
	fn=lambda t, d, i, new_token_val: update_dataframe_and_state(t, d, None, new_token_val, i, 'token'),
	inputs=[tokens_data_state, df_label_input, highlight_index_state, tb_token_editor],
	outputs=[tokens_data_state, df_label_input],
	).then(
	fn=update_ui,
	inputs=[image_orig_state, tokens_data_state, df_label_input, highlight_index_state],
	outputs=[tokens_data_state, image_output_display],
	api_name=False
	)

	# CONEXIÓN 4: Guardar y Exportar
	btn_save_annotation.click(
	fn=save_current_annotation_to_json,
	inputs=[image_orig_state, tokens_data_state, image_filename_state],
	outputs=[file_output, status_output],
	api_name=False
	)

	btn_export.click(
	fn=export_and_zip_dataset,
	inputs=[image_orig_state, tokens_data_state, image_filename_state],
	outputs=[file_output, status_output],
	api_name=False
	)

	# CONEXIÓN 5: Limpiar y Reiniciar (MODIFICADA)
	btn_clear.click(
	fn=clear_ui_and_reset_states,
	inputs=[api_key_input, tb_new_token_text, btn_add_new_token, STATE_NEW_BBOX], # 💡 AÑADIR api_key_input
	outputs=[
	image_orig_state, tokens_data_state, highlight_index_state,
	image_filename_state, api_key_input, image_input_file, image_output_display, # 💡 AÑADIR api_key_input
	df_label_input, tb_token_editor, dd_tag_selector,
	tb_new_token_text, btn_add_new_token, STATE_NEW_BBOX,
	status_output
	],
	api_name=False
	)


	if __name__ == "__main__":
	try:
	app.launch()
	except Exception as e:
	print(f"Error crítico durante la ejecución de la aplicación: {str(e)}")
	raise