Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from PIL import Image | |
| import pandas as pd | |
| # ---------------------------------------------------------------------- | |
| # 💡 1. IMPORTACIONES | |
| # ---------------------------------------------------------------------- | |
| # ocr_processor: Funciones de OCR (Doctr) y guardado de imagen | |
| from ocr_processor import process_and_setup | |
| # label_editor: Funciones de setup, edición y persistencia (JSON/ZIP) | |
| from label_editor import ( | |
| setup_label_components, | |
| update_ui, | |
| save_current_annotation_to_json, | |
| export_and_zip_dataset, | |
| update_dataframe_and_state, | |
| display_selected_row, | |
| ) | |
| # image_loader: Define la UI para la carga de imagen y la API Key | |
| from image_loader import setup_image_components | |
| # bbox_adder: Funciones para añadir manualmente tokens no detectados por OCR | |
| from bbox_adder import add_new_bbox_mode, append_new_token | |
| # --- Función de Limpieza/Reset --- | |
| def clear_ui_and_reset_states(api_key_input, tb_new_token_text, btn_add_new_token, state_new_bbox): | |
| """Limpia los componentes de la interfaz y resetea los estados a su valor inicial.""" | |
| print("Reiniciando la interfaz y los estados...") | |
| # Valores de reseteo para los estados de Gradio | |
| reset_image_orig_state = None | |
| reset_tokens_data_state = [] | |
| reset_highlight_index_state = -1 | |
| reset_image_filename_state = None | |
| # Actualizaciones para los componentes de la interfaz | |
| api_key_update = gr.update(value="", visible=True) | |
| image_input_update = gr.update(value=None, visible=True) | |
| image_output_update = gr.update(value=None, visible=False) | |
| df_update = gr.update(value=[]) | |
| # Componentes de edición (ocultar) | |
| tb_update = gr.update(value="", visible=False) | |
| dd_update = gr.update(value="O", visible=False) | |
| # Componentes de Adición (ocultar y resetear) | |
| tb_new_token_update = gr.update(value="", visible=False) | |
| btn_add_token_update = gr.update(visible=False) | |
| reset_new_bbox_state = None | |
| status_update = "Sube una imagen para comenzar..." | |
| return ( | |
| reset_image_orig_state, # 0. image_orig_state | |
| reset_tokens_data_state, # 1. tokens_data_state | |
| reset_highlight_index_state, # 2. highlight_index_state | |
| reset_image_filename_state, # 3. image_filename_state | |
| api_key_update, # 4. api_key_input 💡 | |
| image_input_update, # 5. image_input_file | |
| image_output_update, # 6. image_output_display | |
| df_update, # 7. df_label_input | |
| tb_update, # 8. tb_token_editor | |
| dd_update, # 9. dd_tag_selector | |
| tb_new_token_update, # 10. tb_new_token_text | |
| btn_add_token_update, # 11. btn_add_new_token | |
| reset_new_bbox_state, # 12. state_new_bbox | |
| status_update # 13. status_output | |
| ) | |
| # --- FUNCIÓN AUXILIAR DE FLUJO: OCR y Gemini (MODIFICADA) --- | |
| def process_image(image, api_key: str): | |
| """ | |
| Ejecuta el OCR, la inferencia de Gemini (si hay API Key) y el preprocesamiento | |
| inicial, guardando la imagen. | |
| """ | |
| # 💡 La función ahora requiere api_key | |
| if image is None: | |
| # Nota: 8 outputs para la CONEXIÓN 1 | |
| return None, [], None, [], "Sube una imagen para comenzar...", gr.update(visible=True), gr.update(visible=False, value=None), None | |
| try: | |
| # process_and_setup requiere image y api_key (la lógica de consulta condicional está dentro de ocr_processor) | |
| # Retorna: image_orig, tokens_data, highlighted_image, df_data, status, image_filename | |
| result = process_and_setup(image, api_key) | |
| if result[0] is None: | |
| # Nota: 8 outputs para la CONEXIÓN 1 | |
| return None, [], None, [], "Error en el procesamiento del OCR. Verifica logs.", gr.update(visible=True), gr.update(visible=False, value=None), None | |
| image_orig, tokens_data, highlighted_image, df_data, status, image_filename = result | |
| # Convertir datos para el DataFrame de Gradio (lista de listas) | |
| df_rows = [] | |
| if df_data and isinstance(df_data, dict): | |
| for t, n in zip(df_data['token'], df_data['ner_tag']): | |
| df_rows.append([t, n]) | |
| # Nota: 8 outputs para la CONEXIÓN 1 | |
| return ( | |
| image_orig, | |
| tokens_data, | |
| highlighted_image, | |
| df_rows, | |
| status, | |
| gr.update(visible=False), # Ocultar image_input_file | |
| gr.update(visible=True), # Mostrar image_output_display | |
| image_filename # Nombre de archivo único | |
| ) | |
| except Exception as e: | |
| print(f"Error en process_image: {str(e)}") | |
| # Nota: 8 outputs para la CONEXIÓN 1 | |
| return None, [], None, [], f"Error: {str(e)}", gr.update(visible=True), gr.update(visible=False, value=None), None | |
| def capture_highlight_index(evt: gr.SelectData): | |
| """Captura el índice de fila (0-index) seleccionado en el DataFrame.""" | |
| if evt and evt.index is not None and evt.index[0] is not None: | |
| return evt.index[0] | |
| return gr.State(-1) | |
| # --- INTERFAZ GRADIO (GR.BLOCKS) --- | |
| with gr.Blocks(title="Anotador NER de Facturas (Doctr/LayoutXLM)") as app: | |
| gr.Markdown( | |
| """ | |
| # 🧾 Anotador NER para Facturas (LayoutXLM) | |
| **Instrucciones:** 1. **Sube** una imagen (y opcionalmente la Clave API de Gemini para asistencia de etiquetado). La imagen se guarda automáticamente en `dataset/imagenes`. | |
| 2. **Edita** los tokens o etiquetas. Los cambios se aplican automáticamente. | |
| 3. Haz clic en **'Guardar Anotación Actual (JSON)'** para confirmar los datos de la factura actual en `dataset/anotacion_factura.json`. | |
| 4. Haz clic en **'Descargar Dataset Completo (.zip)'** para obtener todas las imágenes y el JSON consolidado. | |
| """ | |
| ) | |
| # --- 1. Definición de Estados Globales --- | |
| image_orig_state = gr.State(None) | |
| tokens_data_state = gr.State([]) | |
| highlight_index_state = gr.State(-1) | |
| image_filename_state = gr.State(None) | |
| STATE_NEW_BBOX = gr.State(value=None) # Estado para BBox Manual | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| # Columna Izquierda: Carga y Visualización | |
| # 💡 setup_image_components retorna: api_key_input, image_input_file, image_output_display | |
| api_key_input, image_input_file, image_output_display = setup_image_components() | |
| # Se muestran explícitamente los componentes de entrada | |
| api_key_input | |
| image_input_file | |
| status_output = gr.Markdown("Sube una imagen para comenzar...") | |
| btn_clear = gr.Button("🗑️ Quitar Imagen / Nuevo Documento", visible=True) | |
| with gr.Column(scale=2): | |
| # Columna Derecha: Edición de Etiquetas | |
| gr.Markdown("### 2. Edición de Etiquetas NER") | |
| # 💡 setup_label_components ya retorna los componentes de adición manual | |
| ( | |
| df_label_input, tb_token_editor, dd_tag_selector, | |
| btn_save_annotation, btn_export, file_output, | |
| tb_new_token_text, btn_add_new_token, temp_state_dummy | |
| ) = setup_label_components() | |
| # Dataframe | |
| df_label_input | |
| # Contenedor para los editores (Token y Tag) | |
| with gr.Row(visible=True) as editor_row: | |
| with gr.Column(scale=2): | |
| tb_token_editor | |
| # Campo de texto para el BBox manual (se muestra condicionalmente) | |
| tb_new_token_text | |
| with gr.Column(scale=1): | |
| dd_tag_selector | |
| # Contenedor para los botones de Guardar/Descargar/Agregar | |
| with gr.Row(visible=True): | |
| btn_save_annotation | |
| btn_export | |
| btn_add_new_token | |
| file_output | |
| # --- CONEXIONES DE EVENTOS --- | |
| # CONEXIÓN 1: EJECUTAR OCR y Gemini (MODIFICADA) | |
| image_input_file.change( | |
| fn=process_image, | |
| inputs=[image_input_file, api_key_input], # 💡 AÑADIR api_key_input | |
| outputs=[ | |
| image_orig_state, tokens_data_state, image_output_display, df_label_input, status_output, | |
| image_input_file, image_output_display, image_filename_state | |
| ], | |
| api_name=False | |
| ) | |
| # CONEXIÓN 2: Selección de FILA | |
| df_label_input.select( | |
| fn=capture_highlight_index, | |
| inputs=None, | |
| outputs=[highlight_index_state], | |
| queue=False | |
| ).then( | |
| fn=display_selected_row, | |
| inputs=[tokens_data_state, highlight_index_state], | |
| outputs=[tb_token_editor, dd_tag_selector, highlight_index_state], | |
| ).then( | |
| fn=update_ui, | |
| inputs=[image_orig_state, tokens_data_state, df_label_input, highlight_index_state], | |
| outputs=[tokens_data_state, image_output_display], | |
| api_name=False | |
| ) | |
| # CONEXIÓN 3: Edición de Tag o Token (Actualiza estado y UI) | |
| dd_tag_selector.change( | |
| fn=lambda t, d, i, new_tag_val: update_dataframe_and_state(t, d, new_tag_val, None, i, 'tag'), | |
| inputs=[tokens_data_state, df_label_input, highlight_index_state, dd_tag_selector], | |
| outputs=[tokens_data_state, df_label_input], | |
| ).then( | |
| fn=update_ui, | |
| inputs=[image_orig_state, tokens_data_state, df_label_input, highlight_index_state], | |
| outputs=[tokens_data_state, image_output_display], | |
| api_name=False | |
| ) | |
| token_update_events = [tb_token_editor.blur, tb_token_editor.submit] | |
| for event in token_update_events: | |
| event( | |
| fn=lambda t, d, i, new_token_val: update_dataframe_and_state(t, d, None, new_token_val, i, 'token'), | |
| inputs=[tokens_data_state, df_label_input, highlight_index_state, tb_token_editor], | |
| outputs=[tokens_data_state, df_label_input], | |
| ).then( | |
| fn=update_ui, | |
| inputs=[image_orig_state, tokens_data_state, df_label_input, highlight_index_state], | |
| outputs=[tokens_data_state, image_output_display], | |
| api_name=False | |
| ) | |
| # CONEXIÓN 4: Guardar y Exportar | |
| btn_save_annotation.click( | |
| fn=save_current_annotation_to_json, | |
| inputs=[image_orig_state, tokens_data_state, image_filename_state], | |
| outputs=[file_output, status_output], | |
| api_name=False | |
| ) | |
| btn_export.click( | |
| fn=export_and_zip_dataset, | |
| inputs=[image_orig_state, tokens_data_state, image_filename_state], | |
| outputs=[file_output, status_output], | |
| api_name=False | |
| ) | |
| # CONEXIÓN 5: Limpiar y Reiniciar (MODIFICADA) | |
| btn_clear.click( | |
| fn=clear_ui_and_reset_states, | |
| inputs=[api_key_input, tb_new_token_text, btn_add_new_token, STATE_NEW_BBOX], # 💡 AÑADIR api_key_input | |
| outputs=[ | |
| image_orig_state, tokens_data_state, highlight_index_state, | |
| image_filename_state, api_key_input, image_input_file, image_output_display, # 💡 AÑADIR api_key_input | |
| df_label_input, tb_token_editor, dd_tag_selector, | |
| tb_new_token_text, btn_add_new_token, STATE_NEW_BBOX, | |
| status_output | |
| ], | |
| api_name=False | |
| ) | |
| if __name__ == "__main__": | |
| try: | |
| app.launch() | |
| except Exception as e: | |
| print(f"Error crítico durante la ejecución de la aplicación: {str(e)}") | |
| raise |