Spaces:

lucasgagneten
/

OCR-NER-Facturas

Sleeping

File size: 11,717 Bytes

b26ec1d
a3dc003
 
 
 
f35ce5e
a3dc003
 
 
 
 
f35ce5e
cd12bfc
 
 
a3dc003
 
cd12bfc
 
 
b26ec1d
f35ce5e
 
b26ec1d
f35ce5e
 
b26ec1d
f35ce5e
a3dc003
f35ce5e
 
2c6746f
 
fe821af
f35ce5e
 
 
cd12bfc
2c6746f
fe821af
f35ce5e
2c6746f
 
 
fe821af
 
 
 
 
f35ce5e
a3dc003
 
 
 
2c6746f
 
 
f35ce5e
 
 
 
 
 
 
 
 
 
 
 
 
 
2c6746f
 
 
f35ce5e
fe821af
f35ce5e
 
 
 
 
 
fe821af
f35ce5e
a3dc003
cd12bfc
fe821af
f35ce5e
 
 
fe821af
 
f35ce5e
fe821af
 
 
 
 
 
 
 
 
 
f35ce5e
fe821af
 
 
 
 
 
 
 
 
 
 
 
 
f35ce5e
fe821af
f35ce5e
fe821af
 
 
 
 
 
 
 
a3dc003
b26ec1d
 
cd12bfc
b26ec1d
f35ce5e
cd12bfc
 
 
b26ec1d
 
 
a3dc003
fe821af
cd12bfc
 
a3dc003
f35ce5e
b26ec1d
 
 
fe821af
f35ce5e
 
 
 
 
 
2c6746f
 
 
b26ec1d
 
 
fe821af
b26ec1d
f35ce5e
a3dc003
 
 
f35ce5e
a3dc003
b26ec1d
cd12bfc
 
 
 
fe821af
 
cd12bfc
f35ce5e
a3dc003
fe821af
cd12bfc
 
f35ce5e
cd12bfc
a3dc003
 
 
cd12bfc
 
fe821af
 
 
2c6746f
f35ce5e
2c6746f
 
f35ce5e
2c6746f
fe821af
cd12bfc
2c6746f
 
b26ec1d
2c6746f
f35ce5e
b26ec1d
2c6746f
 
b26ec1d
2c6746f
b26ec1d
fe821af
 
 
 
b26ec1d
 
2c6746f
 
b26ec1d
2c6746f
f35ce5e
fe821af
 
 
 
 
b26ec1d
 
2c6746f
 
b26ec1d
fe821af
a3dc003
fe821af
 
 
 
 
 
 
 
 
 
 
a3dc003
f35ce5e
cd12bfc
 
 
 
 
 
 
b26ec1d
cd12bfc
fe821af
2c6746f
 
b26ec1d
 
f35ce5e
2c6746f
 
f35ce5e
2c6746f
a3dc003
f35ce5e
a3dc003
f35ce5e
2c6746f
 
 
 
 
 
b26ec1d
ced724a
53ab502
ced724a

import gradio as gr
from PIL import Image
import pandas as pd

# ----------------------------------------------------------------------
# 💡 1. IMPORTACIONES
# ----------------------------------------------------------------------

# ocr_processor: Funciones de OCR (Doctr) y guardado de imagen
from ocr_processor import process_and_setup 

# label_editor: Funciones de setup, edición y persistencia (JSON/ZIP)
from label_editor import (
    setup_label_components, 
    update_ui, 
    save_current_annotation_to_json, 
    export_and_zip_dataset, 
    update_dataframe_and_state, 
    display_selected_row, 
)

# image_loader: Define la UI para la carga de imagen y la API Key
from image_loader import setup_image_components 

# bbox_adder: Funciones para añadir manualmente tokens no detectados por OCR
from bbox_adder import add_new_bbox_mode, append_new_token 

# --- Función de Limpieza/Reset ---

def clear_ui_and_reset_states(api_key_input, tb_new_token_text, btn_add_new_token, state_new_bbox):
    """Limpia los componentes de la interfaz y resetea los estados a su valor inicial."""
    print("Reiniciando la interfaz y los estados...")
    
    # Valores de reseteo para los estados de Gradio
    reset_image_orig_state = None 
    reset_tokens_data_state = [] 
    reset_highlight_index_state = -1 
    reset_image_filename_state = None 
    
    # Actualizaciones para los componentes de la interfaz
    api_key_update = gr.update(value="", visible=True)
    image_input_update = gr.update(value=None, visible=True)
    image_output_update = gr.update(value=None, visible=False)
    df_update = gr.update(value=[])
    
    # Componentes de edición (ocultar)
    tb_update = gr.update(value="", visible=False)
    dd_update = gr.update(value="O", visible=False) 
    
    # Componentes de Adición (ocultar y resetear)
    tb_new_token_update = gr.update(value="", visible=False)
    btn_add_token_update = gr.update(visible=False)
    reset_new_bbox_state = None
    
    status_update = "Sube una imagen para comenzar..."
    
    return (
        reset_image_orig_state,          # 0. image_orig_state
        reset_tokens_data_state,         # 1. tokens_data_state
        reset_highlight_index_state,     # 2. highlight_index_state
        reset_image_filename_state,      # 3. image_filename_state
        api_key_update,                  # 4. api_key_input 💡
        image_input_update,              # 5. image_input_file
        image_output_update,             # 6. image_output_display
        df_update,                       # 7. df_label_input
        tb_update,                       # 8. tb_token_editor 
        dd_update,                       # 9. dd_tag_selector 
        tb_new_token_update,             # 10. tb_new_token_text 
        btn_add_token_update,            # 11. btn_add_new_token 
        reset_new_bbox_state,            # 12. state_new_bbox 
        status_update                    # 13. status_output
    )


# --- FUNCIÓN AUXILIAR DE FLUJO: OCR y Gemini (MODIFICADA) ---

def process_image(image, api_key: str):
    """
    Ejecuta el OCR, la inferencia de Gemini (si hay API Key) y el preprocesamiento 
    inicial, guardando la imagen.
    """
    # 💡 La función ahora requiere api_key
    if image is None:
        # Nota: 8 outputs para la CONEXIÓN 1
        return None, [], None, [], "Sube una imagen para comenzar...", gr.update(visible=True), gr.update(visible=False, value=None), None 
            
    try:
        # process_and_setup requiere image y api_key (la lógica de consulta condicional está dentro de ocr_processor)
        # Retorna: image_orig, tokens_data, highlighted_image, df_data, status, image_filename
        result = process_and_setup(image, api_key) 
        
        if result[0] is None: 
            # Nota: 8 outputs para la CONEXIÓN 1
            return None, [], None, [], "Error en el procesamiento del OCR. Verifica logs.", gr.update(visible=True), gr.update(visible=False, value=None), None 
            
        image_orig, tokens_data, highlighted_image, df_data, status, image_filename = result
        
        # Convertir datos para el DataFrame de Gradio (lista de listas)
        df_rows = []
        if df_data and isinstance(df_data, dict):
            for t, n in zip(df_data['token'], df_data['ner_tag']):
                df_rows.append([t, n]) 
        
        # Nota: 8 outputs para la CONEXIÓN 1
        return (
            image_orig, 
            tokens_data, 
            highlighted_image, 
            df_rows, 
            status,
            gr.update(visible=False), # Ocultar image_input_file
            gr.update(visible=True),  # Mostrar image_output_display
            image_filename            # Nombre de archivo único
        )
        
    except Exception as e:
        print(f"Error en process_image: {str(e)}")
        # Nota: 8 outputs para la CONEXIÓN 1
        return None, [], None, [], f"Error: {str(e)}", gr.update(visible=True), gr.update(visible=False, value=None), None

def capture_highlight_index(evt: gr.SelectData):
    """Captura el índice de fila (0-index) seleccionado en el DataFrame."""
    if evt and evt.index is not None and evt.index[0] is not None:
        return evt.index[0] 
    return gr.State(-1) 

# --- INTERFAZ GRADIO (GR.BLOCKS) ---

with gr.Blocks(title="Anotador NER de Facturas (Doctr/LayoutXLM)") as app:
    gr.Markdown(
        """
        # 🧾 Anotador NER para Facturas (LayoutXLM)
        
        **Instrucciones:** 1. **Sube** una imagen (y opcionalmente la Clave API de Gemini para asistencia de etiquetado). La imagen se guarda automáticamente en `dataset/imagenes`.
        2. **Edita** los tokens o etiquetas. Los cambios se aplican automáticamente.
        3. Haz clic en **'Guardar Anotación Actual (JSON)'** para confirmar los datos de la factura actual en `dataset/anotacion_factura.json`.
        4. Haz clic en **'Descargar Dataset Completo (.zip)'** para obtener todas las imágenes y el JSON consolidado.
        """
    )

    # --- 1. Definición de Estados Globales ---
    image_orig_state = gr.State(None) 
    tokens_data_state = gr.State([]) 
    highlight_index_state = gr.State(-1) 
    image_filename_state = gr.State(None) 
    STATE_NEW_BBOX = gr.State(value=None) # Estado para BBox Manual
    
    with gr.Row():
        with gr.Column(scale=1):
            # Columna Izquierda: Carga y Visualización
            # 💡 setup_image_components retorna: api_key_input, image_input_file, image_output_display
            api_key_input, image_input_file, image_output_display = setup_image_components()
            
            # Se muestran explícitamente los componentes de entrada
            api_key_input 
            image_input_file
            
            status_output = gr.Markdown("Sube una imagen para comenzar...")
            btn_clear = gr.Button("🗑️ Quitar Imagen / Nuevo Documento", visible=True)
            
        with gr.Column(scale=2):
            # Columna Derecha: Edición de Etiquetas
            gr.Markdown("### 2. Edición de Etiquetas NER")
            
            # 💡 setup_label_components ya retorna los componentes de adición manual
            (
                df_label_input, tb_token_editor, dd_tag_selector, 
                btn_save_annotation, btn_export, file_output, 
                tb_new_token_text, btn_add_new_token, temp_state_dummy
            ) = setup_label_components()
            
            # Dataframe
            df_label_input

            # Contenedor para los editores (Token y Tag)
            with gr.Row(visible=True) as editor_row:
                with gr.Column(scale=2):
                    tb_token_editor 
                    # Campo de texto para el BBox manual (se muestra condicionalmente)
                    tb_new_token_text
                with gr.Column(scale=1):
                    dd_tag_selector
            
            # Contenedor para los botones de Guardar/Descargar/Agregar
            with gr.Row(visible=True):
                btn_save_annotation 
                btn_export 
                btn_add_new_token
            
            file_output


    # --- CONEXIONES DE EVENTOS ---
    
    # CONEXIÓN 1: EJECUTAR OCR y Gemini (MODIFICADA)
    image_input_file.change(
        fn=process_image,
        inputs=[image_input_file, api_key_input], # 💡 AÑADIR api_key_input
        outputs=[
            image_orig_state, tokens_data_state, image_output_display, df_label_input, status_output,
            image_input_file, image_output_display, image_filename_state 
        ],
        api_name=False
    )
    
    # CONEXIÓN 2: Selección de FILA
    df_label_input.select(
        fn=capture_highlight_index,
        inputs=None, 
        outputs=[highlight_index_state],
        queue=False 
    ).then(
        fn=display_selected_row,
        inputs=[tokens_data_state, highlight_index_state],
        outputs=[tb_token_editor, dd_tag_selector, highlight_index_state], 
    ).then(
        fn=update_ui,
        inputs=[image_orig_state, tokens_data_state, df_label_input, highlight_index_state],
        outputs=[tokens_data_state, image_output_display], 
        api_name=False
    )

    # CONEXIÓN 3: Edición de Tag o Token (Actualiza estado y UI)
    dd_tag_selector.change(
        fn=lambda t, d, i, new_tag_val: update_dataframe_and_state(t, d, new_tag_val, None, i, 'tag'),
        inputs=[tokens_data_state, df_label_input, highlight_index_state, dd_tag_selector],
        outputs=[tokens_data_state, df_label_input], 
    ).then(
        fn=update_ui,
        inputs=[image_orig_state, tokens_data_state, df_label_input, highlight_index_state],
        outputs=[tokens_data_state, image_output_display], 
        api_name=False
    )
    
    token_update_events = [tb_token_editor.blur, tb_token_editor.submit]
    for event in token_update_events:
        event(
            fn=lambda t, d, i, new_token_val: update_dataframe_and_state(t, d, None, new_token_val, i, 'token'),
            inputs=[tokens_data_state, df_label_input, highlight_index_state, tb_token_editor],
            outputs=[tokens_data_state, df_label_input], 
        ).then(
            fn=update_ui,
            inputs=[image_orig_state, tokens_data_state, df_label_input, highlight_index_state],
            outputs=[tokens_data_state, image_output_display], 
            api_name=False
        )

    # CONEXIÓN 4: Guardar y Exportar
    btn_save_annotation.click(
        fn=save_current_annotation_to_json,
        inputs=[image_orig_state, tokens_data_state, image_filename_state],
        outputs=[file_output, status_output], 
        api_name=False
    )
    
    btn_export.click(
        fn=export_and_zip_dataset, 
        inputs=[image_orig_state, tokens_data_state, image_filename_state],
        outputs=[file_output, status_output],
        api_name=False
    )

    # CONEXIÓN 5: Limpiar y Reiniciar (MODIFICADA)
    btn_clear.click(
        fn=clear_ui_and_reset_states,
        inputs=[api_key_input, tb_new_token_text, btn_add_new_token, STATE_NEW_BBOX], # 💡 AÑADIR api_key_input
        outputs=[
            image_orig_state, tokens_data_state, highlight_index_state,
            image_filename_state, api_key_input, image_input_file, image_output_display, # 💡 AÑADIR api_key_input
            df_label_input, tb_token_editor, dd_tag_selector, 
            tb_new_token_text, btn_add_new_token, STATE_NEW_BBOX, 
            status_output
        ],
        api_name=False
    )


if __name__ == "__main__":
    try:
        app.launch()
    except Exception as e:
        print(f"Error crítico durante la ejecución de la aplicación: {str(e)}")
        raise