Spaces:

lucasgagneten
/

OCR-NER-Facturas

Sleeping

App Files Files Community

Lucas Gagneten commited on Oct 25

Commit

f35ce5e

1 Parent(s): a3dc003

Gemini client

Browse files

Files changed (7) hide show

app.py +64 -117
gemini_ner_client.py +127 -0
image_loader.py +9 -2
label_editor.py +96 -50
ocr_processor.py +12 -5
requirements.txt +3 -0
setup.sh +0 -5

app.py CHANGED Viewed

@@ -3,12 +3,13 @@ from PIL import Image
 import pandas as pd
 # ----------------------------------------------------------------------
-# 💡 1. IMPORTACIONES ACTUALIZADAS
 # ----------------------------------------------------------------------
 # ocr_processor: Funciones de OCR (Doctr) y guardado de imagen
 from ocr_processor import process_and_setup
 from label_editor import (
     setup_label_components,
     update_ui,
@@ -16,26 +17,28 @@ from label_editor import (
     export_and_zip_dataset,
     update_dataframe_and_state,
     display_selected_row,
-    # ALL_NER_TAGS ya no es necesario si solo se usa en label_editor.py
 )
-# 💡 Importar las funciones de adición de BBox
-from bbox_adder import add_new_bbox_mode, append_new_token
-# --- Función de Limpieza (Mantener) ---
-def clear_ui_and_reset_states(tb_new_token_text, btn_add_new_token, state_new_bbox):
-    """Limpia los componentes de la interfaz y resetea los estados."""
     print("Reiniciando la interfaz y los estados...")
     # Valores de reseteo para los estados de Gradio
-    reset_image_orig_state = None
-    reset_tokens_data_state = []
-    reset_highlight_index_state = -1
     reset_image_filename_state = None
     # Actualizaciones para los componentes de la interfaz
     image_input_update = gr.update(value=None, visible=True)
     image_output_update = gr.update(value=None, visible=False)
     df_update = gr.update(value=[])
@@ -44,7 +47,7 @@ def clear_ui_and_reset_states(tb_new_token_text, btn_add_new_token, state_new_bb
     tb_update = gr.update(value="", visible=False)
     dd_update = gr.update(value="O", visible=False)
-    # 💡 Componentes de Adición (ocultar y resetear)
     tb_new_token_update = gr.update(value="", visible=False)
     btn_add_token_update = gr.update(visible=False)
     reset_new_bbox_state = None
@@ -52,34 +55,42 @@ def clear_ui_and_reset_states(tb_new_token_text, btn_add_new_token, state_new_bb
     status_update = "Sube una imagen para comenzar..."
     return (
-        reset_image_orig_state,         # 0. image_orig_state
-        reset_tokens_data_state,        # 1. tokens_data_state
-        reset_highlight_index_state,    # 2. highlight_index_state
-        reset_image_filename_state,     # 3. image_filename_state
-        image_input_update,             # 4. image_input_file
-        image_output_update,            # 5. image_output_display
-        df_update,                      # 6. df_label_input
-        tb_update,                      # 7. tb_token_editor
-        dd_update,                      # 8. dd_tag_selector
-        tb_new_token_update,            # 9. tb_new_token_text 💡
-        btn_add_token_update,           # 10. btn_add_new_token 💡
-        reset_new_bbox_state,           # 11. state_new_bbox 💡
-        status_update                   # 12. status_output
     )
-# --- FUNCIONES AUXILIARES DE FLUJO ---
-def process_image(image):
-    """Ejecuta el OCR y el preprocesamiento inicial, guardando la imagen."""
     if image is None:
         return None, [], None, [], "Sube una imagen para comenzar...", gr.update(visible=True), gr.update(visible=False, value=None), None
     try:
-        # process_and_setup retorna: image_orig, tokens_data, highlighted_image, df_data, status, image_filename
-        result = process_and_setup(image)
         if result[0] is None:
             return None, [], None, [], "Error en el procesamiento del OCR. Verifica logs.", gr.update(visible=True), gr.update(visible=False, value=None), None
         image_orig, tokens_data, highlighted_image, df_data, status, image_filename = result
@@ -90,6 +101,7 @@ def process_image(image):
             for t, n in zip(df_data['token'], df_data['ner_tag']):
                 df_rows.append([t, n])
         return (
             image_orig,
             tokens_data,
@@ -103,36 +115,15 @@ def process_image(image):
     except Exception as e:
         print(f"Error en process_image: {str(e)}")
-        # Nota: Los 8 outputs deben coincidir con la CONEXIÓN 1
         return None, [], None, [], f"Error: {str(e)}", gr.update(visible=True), gr.update(visible=False, value=None), None
 def capture_highlight_index(evt: gr.SelectData):
     """Captura el índice de fila (0-index) seleccionado en el DataFrame."""
     if evt and evt.index is not None and evt.index[0] is not None:
         return evt.index[0]
     return gr.State(-1)
-def setup_image_components():
-    """Define los componentes de carga y visualización de imagen."""
-    image_input_file = gr.Image(
-                type="pil",
-                label="1. Cargar Imagen de Factura",
-                sources=["upload"],
-                height=300,
-                interactive=True,
-                visible=True
-            )
-    image_output_display = gr.Image(
-        type="pil",
-        label="Factura con Bounding Box Resaltado (Haga clic y arrastre para añadir BBox)",
-        interactive=True,
-        height=800,
-        visible=False
-    )
-    return image_input_file, image_output_display
 # --- INTERFAZ GRADIO (GR.BLOCKS) ---
 with gr.Blocks(title="Anotador NER de Facturas (Doctr/LayoutXLM)") as app:
@@ -140,7 +131,7 @@ with gr.Blocks(title="Anotador NER de Facturas (Doctr/LayoutXLM)") as app:
         """
         # 🧾 Anotador NER para Facturas (LayoutXLM)
-        **Instrucciones:** 1. **Sube** una imagen. La imagen se guarda automáticamente en `dataset/imagenes`.
         2. **Edita** los tokens o etiquetas. Los cambios se aplican automáticamente.
         3. Haz clic en **'Guardar Anotación Actual (JSON)'** para confirmar los datos de la factura actual en `dataset/anotacion_factura.json`.
         4. Haz clic en **'Descargar Dataset Completo (.zip)'** para obtener todas las imágenes y el JSON consolidado.
@@ -152,14 +143,17 @@ with gr.Blocks(title="Anotador NER de Facturas (Doctr/LayoutXLM)") as app:
     tokens_data_state = gr.State([])
     highlight_index_state = gr.State(-1)
     image_filename_state = gr.State(None)
-    # 💡 Estado para el BBox Manual
-    STATE_NEW_BBOX = gr.State(value=None)
     with gr.Row():
         with gr.Column(scale=1):
             # Columna Izquierda: Carga y Visualización
-            image_input_file, image_output_display = setup_image_components()
             status_output = gr.Markdown("Sube una imagen para comenzar...")
             btn_clear = gr.Button("🗑️ Quitar Imagen / Nuevo Documento", visible=True)
@@ -168,11 +162,11 @@ with gr.Blocks(title="Anotador NER de Facturas (Doctr/LayoutXLM)") as app:
             # Columna Derecha: Edición de Etiquetas
             gr.Markdown("### 2. Edición de Etiquetas NER")
-            # 💡 DESESTRUCTURACIÓN ACTUALIZADA: Capturar los 3 nuevos componentes
             (
                 df_label_input, tb_token_editor, dd_tag_selector,
                 btn_save_annotation, btn_export, file_output,
-                tb_new_token_text, btn_add_new_token, temp_state_dummy # state_new_bbox es STATE_NEW_BBOX
             ) = setup_label_components()
             # Dataframe
@@ -182,16 +176,15 @@ with gr.Blocks(title="Anotador NER de Facturas (Doctr/LayoutXLM)") as app:
             with gr.Row(visible=True) as editor_row:
                 with gr.Column(scale=2):
                     tb_token_editor
-                    # 💡 NUEVO COMPONENTE: Campo de texto para el BBox manual
                     tb_new_token_text
                 with gr.Column(scale=1):
                     dd_tag_selector
-            # Contenedor para los botones de Guardar/Descargar
             with gr.Row(visible=True):
                 btn_save_annotation
                 btn_export
-                # 💡 NUEVO BOTÓN: Agregar token manualmente
                 btn_add_new_token
             file_output
@@ -199,10 +192,10 @@ with gr.Blocks(title="Anotador NER de Facturas (Doctr/LayoutXLM)") as app:
     # --- CONEXIONES DE EVENTOS ---
-    # CONEXIÓN 1: EJECUTAR OCR
     image_input_file.change(
         fn=process_image,
-        inputs=[image_input_file],
         outputs=[
             image_orig_state, tokens_data_state, image_output_display, df_label_input, status_output,
             image_input_file, image_output_display, image_filename_state
@@ -210,7 +203,7 @@ with gr.Blocks(title="Anotador NER de Facturas (Doctr/LayoutXLM)") as app:
         api_name=False
     )
-    # CONEXIÓN 2: Selección de FILA (Mantener)
     df_label_input.select(
         fn=capture_highlight_index,
         inputs=None,
@@ -227,8 +220,7 @@ with gr.Blocks(title="Anotador NER de Facturas (Doctr/LayoutXLM)") as app:
         api_name=False
     )
-    # CONEXIÓN 3: Edición de Tag o Token (Mantener)
-    # ... (Lógica de dd_tag_selector.change y tb_token_editor.blur/submit se mantiene) ...
     dd_tag_selector.change(
         fn=lambda t, d, i, new_tag_val: update_dataframe_and_state(t, d, new_tag_val, None, i, 'tag'),
         inputs=[tokens_data_state, df_label_input, highlight_index_state, dd_tag_selector],
@@ -252,52 +244,8 @@ with gr.Blocks(title="Anotador NER de Facturas (Doctr/LayoutXLM)") as app:
             outputs=[tokens_data_state, image_output_display],
             api_name=False
         )
-    # ----------------------------------------------------------------------
-    # 💡 CONEXIONES DE ADICIÓN MANUAL DE BBOX
-    # ----------------------------------------------------------------------
-    # 1. Capturar la selección en la imagen (Arrastrar el ratón sobre image_output_display)
-    image_output_display.select(
-        fn=add_new_bbox_mode,
-        inputs=[tokens_data_state, image_orig_state],
-        outputs=[
-            tb_token_editor,          # 1. Oculta editor existente (si estaba visible)
-            dd_tag_selector,          # 2. Muestra selector de tag (si estaba oculto)
-            tb_new_token_text,        # 3. Muestra campo de texto para nuevo token 👈 ESTO LOS HACE VISIBLES
-            btn_add_new_token,        # 4. Muestra botón de adición 👈 ESTO LOS HACE VISIBLES
-            STATE_NEW_BBOX            # 5. Guarda coordenadas
-        ]
-    )
-    # 2. Conectar el botón para agregar el nuevo token
-    btn_add_new_token.click(
-        fn=append_new_token,
-        inputs=[
-            tokens_data_state,
-            image_orig_state,
-            STATE_NEW_BBOX,
-            tb_new_token_text,
-            dd_tag_selector
-        ],
-        outputs=[
-            tokens_data_state,        # 1. Actualiza el estado de tokens
-            df_label_input,           # 2. Actualiza la tabla UI
-            tb_token_editor,          # 3. Oculta tb_token_editor
-            tb_new_token_text,        # 4. Oculta y limpia tb_new_token_text
-            btn_add_new_token,        # 5. Oculta btn_add_new_token
-            STATE_NEW_BBOX            # 6. Limpia estado BBox
-        ]
-    ).then(
-        # Sincronizar la imagen y el estado después de agregar el token
-        fn=update_ui,
-        inputs=[image_orig_state, tokens_data_state, df_label_input, highlight_index_state],
-        outputs=[tokens_data_state, image_output_display]
-    )
-    # ----------------------------------------------------------------------
-    # CONEXIÓN 4: Guardar y Exportar (Mantener)
     btn_save_annotation.click(
         fn=save_current_annotation_to_json,
         inputs=[image_orig_state, tokens_data_state, image_filename_state],
@@ -305,7 +253,6 @@ with gr.Blocks(title="Anotador NER de Facturas (Doctr/LayoutXLM)") as app:
         api_name=False
     )
-    # CONEXIÓN 4: Exportar y Comprimir (ZIP)
     btn_export.click(
         fn=export_and_zip_dataset,
         inputs=[image_orig_state, tokens_data_state, image_filename_state],
@@ -313,15 +260,15 @@ with gr.Blocks(title="Anotador NER de Facturas (Doctr/LayoutXLM)") as app:
         api_name=False
     )
-    # CONEXIÓN 5: Limpiar y Reiniciar
     btn_clear.click(
         fn=clear_ui_and_reset_states,
-        inputs=[tb_new_token_text, btn_add_new_token, STATE_NEW_BBOX], # 💡 Inputs de limpieza
         outputs=[
             image_orig_state, tokens_data_state, highlight_index_state,
-            image_filename_state, image_input_file, image_output_display,
             df_label_input, tb_token_editor, dd_tag_selector,
-            tb_new_token_text, btn_add_new_token, STATE_NEW_BBOX, # 💡 Outputs de limpieza
             status_output
         ],
         api_name=False

 import pandas as pd
 # ----------------------------------------------------------------------
+# 💡 1. IMPORTACIONES
 # ----------------------------------------------------------------------
 # ocr_processor: Funciones de OCR (Doctr) y guardado de imagen
 from ocr_processor import process_and_setup
+# label_editor: Funciones de setup, edición y persistencia (JSON/ZIP)
 from label_editor import (
     setup_label_components,
     update_ui,
     export_and_zip_dataset,
     update_dataframe_and_state,
     display_selected_row,
 )
+# image_loader: Define la UI para la carga de imagen y la API Key
+from image_loader import setup_image_components
+# bbox_adder: Funciones para añadir manualmente tokens no detectados por OCR
+from bbox_adder import add_new_bbox_mode, append_new_token
+# --- Función de Limpieza/Reset ---
+def clear_ui_and_reset_states(api_key_input, tb_new_token_text, btn_add_new_token, state_new_bbox):
+    """Limpia los componentes de la interfaz y resetea los estados a su valor inicial."""
     print("Reiniciando la interfaz y los estados...")
     # Valores de reseteo para los estados de Gradio
+    reset_image_orig_state = None
+    reset_tokens_data_state = []
+    reset_highlight_index_state = -1
     reset_image_filename_state = None
     # Actualizaciones para los componentes de la interfaz
+    api_key_update = gr.update(value="", visible=True)
     image_input_update = gr.update(value=None, visible=True)
     image_output_update = gr.update(value=None, visible=False)
     df_update = gr.update(value=[])
     tb_update = gr.update(value="", visible=False)
     dd_update = gr.update(value="O", visible=False)
+    # Componentes de Adición (ocultar y resetear)
     tb_new_token_update = gr.update(value="", visible=False)
     btn_add_token_update = gr.update(visible=False)
     reset_new_bbox_state = None
     status_update = "Sube una imagen para comenzar..."
     return (
+        reset_image_orig_state,          # 0. image_orig_state
+        reset_tokens_data_state,         # 1. tokens_data_state
+        reset_highlight_index_state,     # 2. highlight_index_state
+        reset_image_filename_state,      # 3. image_filename_state
+        api_key_update,                  # 4. api_key_input 💡
+        image_input_update,              # 5. image_input_file
+        image_output_update,             # 6. image_output_display
+        df_update,                       # 7. df_label_input
+        tb_update,                       # 8. tb_token_editor
+        dd_update,                       # 9. dd_tag_selector
+        tb_new_token_update,             # 10. tb_new_token_text
+        btn_add_token_update,            # 11. btn_add_new_token
+        reset_new_bbox_state,            # 12. state_new_bbox
+        status_update                    # 13. status_output
     )
+# --- FUNCIÓN AUXILIAR DE FLUJO: OCR y Gemini (MODIFICADA) ---
+def process_image(image, api_key: str):
+    """
+    Ejecuta el OCR, la inferencia de Gemini (si hay API Key) y el preprocesamiento
+    inicial, guardando la imagen.
+    """
+    # 💡 La función ahora requiere api_key
     if image is None:
+        # Nota: 8 outputs para la CONEXIÓN 1
         return None, [], None, [], "Sube una imagen para comenzar...", gr.update(visible=True), gr.update(visible=False, value=None), None
     try:
+        # process_and_setup requiere image y api_key (la lógica de consulta condicional está dentro de ocr_processor)
+        # Retorna: image_orig, tokens_data, highlighted_image, df_data, status, image_filename
+        result = process_and_setup(image, api_key)
         if result[0] is None:
+            # Nota: 8 outputs para la CONEXIÓN 1
             return None, [], None, [], "Error en el procesamiento del OCR. Verifica logs.", gr.update(visible=True), gr.update(visible=False, value=None), None
         image_orig, tokens_data, highlighted_image, df_data, status, image_filename = result
             for t, n in zip(df_data['token'], df_data['ner_tag']):
                 df_rows.append([t, n])
+        # Nota: 8 outputs para la CONEXIÓN 1
         return (
             image_orig,
             tokens_data,
     except Exception as e:
         print(f"Error en process_image: {str(e)}")
+        # Nota: 8 outputs para la CONEXIÓN 1
         return None, [], None, [], f"Error: {str(e)}", gr.update(visible=True), gr.update(visible=False, value=None), None
 def capture_highlight_index(evt: gr.SelectData):
     """Captura el índice de fila (0-index) seleccionado en el DataFrame."""
     if evt and evt.index is not None and evt.index[0] is not None:
         return evt.index[0]
     return gr.State(-1)
 # --- INTERFAZ GRADIO (GR.BLOCKS) ---
 with gr.Blocks(title="Anotador NER de Facturas (Doctr/LayoutXLM)") as app:
         """
         # 🧾 Anotador NER para Facturas (LayoutXLM)
+        **Instrucciones:** 1. **Sube** una imagen (y opcionalmente la Clave API de Gemini para asistencia de etiquetado). La imagen se guarda automáticamente en `dataset/imagenes`.
         2. **Edita** los tokens o etiquetas. Los cambios se aplican automáticamente.
         3. Haz clic en **'Guardar Anotación Actual (JSON)'** para confirmar los datos de la factura actual en `dataset/anotacion_factura.json`.
         4. Haz clic en **'Descargar Dataset Completo (.zip)'** para obtener todas las imágenes y el JSON consolidado.
     tokens_data_state = gr.State([])
     highlight_index_state = gr.State(-1)
     image_filename_state = gr.State(None)
+    STATE_NEW_BBOX = gr.State(value=None) # Estado para BBox Manual
     with gr.Row():
         with gr.Column(scale=1):
             # Columna Izquierda: Carga y Visualización
+            # 💡 setup_image_components retorna: api_key_input, image_input_file, image_output_display
+            api_key_input, image_input_file, image_output_display = setup_image_components()
+            # Se muestran explícitamente los componentes de entrada
+            api_key_input
+            image_input_file
             status_output = gr.Markdown("Sube una imagen para comenzar...")
             btn_clear = gr.Button("🗑️ Quitar Imagen / Nuevo Documento", visible=True)
             # Columna Derecha: Edición de Etiquetas
             gr.Markdown("### 2. Edición de Etiquetas NER")
+            # 💡 setup_label_components ya retorna los componentes de adición manual
             (
                 df_label_input, tb_token_editor, dd_tag_selector,
                 btn_save_annotation, btn_export, file_output,
+                tb_new_token_text, btn_add_new_token, temp_state_dummy
             ) = setup_label_components()
             # Dataframe
             with gr.Row(visible=True) as editor_row:
                 with gr.Column(scale=2):
                     tb_token_editor
+                    # Campo de texto para el BBox manual (se muestra condicionalmente)
                     tb_new_token_text
                 with gr.Column(scale=1):
                     dd_tag_selector
+            # Contenedor para los botones de Guardar/Descargar/Agregar
             with gr.Row(visible=True):
                 btn_save_annotation
                 btn_export
                 btn_add_new_token
             file_output
     # --- CONEXIONES DE EVENTOS ---
+    # CONEXIÓN 1: EJECUTAR OCR y Gemini (MODIFICADA)
     image_input_file.change(
         fn=process_image,
+        inputs=[image_input_file, api_key_input], # 💡 AÑADIR api_key_input
         outputs=[
             image_orig_state, tokens_data_state, image_output_display, df_label_input, status_output,
             image_input_file, image_output_display, image_filename_state
         api_name=False
     )
+    # CONEXIÓN 2: Selección de FILA
     df_label_input.select(
         fn=capture_highlight_index,
         inputs=None,
         api_name=False
     )
+    # CONEXIÓN 3: Edición de Tag o Token (Actualiza estado y UI)
     dd_tag_selector.change(
         fn=lambda t, d, i, new_tag_val: update_dataframe_and_state(t, d, new_tag_val, None, i, 'tag'),
         inputs=[tokens_data_state, df_label_input, highlight_index_state, dd_tag_selector],
             outputs=[tokens_data_state, image_output_display],
             api_name=False
         )
+    # CONEXIÓN 4: Guardar y Exportar
     btn_save_annotation.click(
         fn=save_current_annotation_to_json,
         inputs=[image_orig_state, tokens_data_state, image_filename_state],
         api_name=False
     )
     btn_export.click(
         fn=export_and_zip_dataset,
         inputs=[image_orig_state, tokens_data_state, image_filename_state],
         api_name=False
     )
+    # CONEXIÓN 5: Limpiar y Reiniciar (MODIFICADA)
     btn_clear.click(
         fn=clear_ui_and_reset_states,
+        inputs=[api_key_input, tb_new_token_text, btn_add_new_token, STATE_NEW_BBOX], # 💡 AÑADIR api_key_input
         outputs=[
             image_orig_state, tokens_data_state, highlight_index_state,
+            image_filename_state, api_key_input, image_input_file, image_output_display, # 💡 AÑADIR api_key_input
             df_label_input, tb_token_editor, dd_tag_selector,
+            tb_new_token_text, btn_add_new_token, STATE_NEW_BBOX,
             status_output
         ],
         api_name=False

gemini_ner_client.py ADDED Viewed

	@@ -0,0 +1,127 @@

+# gemini_ner_client.py
+from google import genai
+from google.genai import types
+import json
+from error_handler import ErrorHandler
+from ner_tags import BASE_TAGS, ALL_NER_TAGS
+# --- Función para generar las instrucciones del prompt ---
+def _generate_prompt_instructions(tokens_data: list):
+    """
+    Genera el texto de entrada para el modelo Gemini, incluyendo los tokens y
+    sus coordenadas normalizadas.
+    """
+    token_entries = []
+    for i, item in enumerate(tokens_data):
+        # Formato: [ID] token (x_min, y_min, x_max, y_max)
+        bbox_str = ', '.join(map(str, item['bbox_norm']))
+        token_entries.append(f"[{i}] {item['token']} ({bbox_str})")
+    # Listar las entidades base que el modelo debe identificar
+    entity_list = ', '.join(BASE_TAGS)
+    # 💡 RESTRINGIR LAS ETIQUETAS EXPLÍCITAMENTE
+    valid_tags_str = ", ".join(ALL_NER_TAGS)
+    system_prompt = (
+        "Eres un experto en reconocimiento de entidades nombradas (NER) para facturas. "
+        "Tu tarea es asignar etiquetas BIO a una lista de tokens de OCR. "
+        "El formato de salida DEBE ser un único objeto JSON con una clave 'annotations', "
+        "cuyo valor es una lista de diccionarios, donde cada diccionario tiene las claves 'id' y 'ner_tag'."
+        f"Las ÚNICAS etiquetas VÁLIDAS que puedes usar son: {valid_tags_str}. " # 💡 RESTRICCIÓN CLAVE EN EL PROMPT
+        "Si un token no es una entidad relevante, DEBES usar la etiqueta 'O'."
+        "El ID de cada token en el JSON debe ser el índice numérico que aparece entre corchetes ([ID])."
+    )
+    user_prompt = (
+        "Identifica y etiqueta las entidades BIO para los siguientes tokens. "
+        "NO cambies el ID. Incluye *todos* los IDs en la respuesta JSON. "
+        "Lista de Tokens: \n" + '\n'.join(token_entries)
+    )
+    return system_prompt, user_prompt
+# --- Función Principal de Inferencia con Gemini ---
+def get_gemini_ner_tags(api_key: str, tokens_data: list) -> list:
+    """
+    Consulta la API de Gemini para obtener etiquetas NER para los tokens de OCR.
+    Retorna la lista de tokens_data actualizada con las nuevas etiquetas.
+    """
+    # 💡 CONDICIÓN CLAVE: Solo se ejecuta si hay api_key
+    if not api_key:
+        print("Saltando NER asistido: No se proporcionó Clave API de Gemini.")
+        return tokens_data
+    # Solo proceder si hay tokens
+    if not tokens_data:
+        return tokens_data
+    try:
+        print("Iniciando NER asistido por Gemini...")
+        # 1. Inicializar el cliente
+        client = genai.Client(api_key=api_key)
+        # Usamos un modelo que soporta respuesta JSON
+        model_name = 'gemini-2.5-flash'
+        system_prompt, user_prompt = _generate_prompt_instructions(tokens_data)
+        # 2. Configurar el formato de respuesta JSON forzado
+        config = types.GenerateContentConfig(
+            system_instruction=system_prompt,
+            response_mime_type="application/json",
+            response_schema=types.Schema(
+                type=types.Type.OBJECT,
+                properties={
+                    "annotations": types.Schema(
+                        type=types.Type.ARRAY,
+                        description="Lista de anotaciones de tokens.",
+                        items=types.Schema(
+                            type=types.Type.OBJECT,
+                            properties={
+                                "id": types.Schema(type=types.Type.INTEGER, description="El índice del token original."),
+                                "ner_tag": types.Schema(type=types.Type.STRING, description="La etiqueta BIO asignada."),
+                            },
+                            required=["id", "ner_tag"]
+                        )
+                    )
+                },
+                required=["annotations"]
+            )
+        )
+        # 3. Llamar a la API
+        response = client.models.generate_content(
+            model=model_name,
+            contents=user_prompt,
+            config=config,
+        )
+        # 4. Procesar la respuesta
+        response_json = json.loads(response.text)
+        new_annotations = response_json.get('annotations', [])
+        # 5. Aplicar las nuevas etiquetas
+        updated_tokens_data = tokens_data.copy()
+        for ann in new_annotations:
+            token_id = ann.get('id')
+            ner_tag = ann.get('ner_tag')
+            if (token_id is not None and ner_tag is not None and
+                0 <= token_id < len(updated_tokens_data)):
+                # 💡 VALIDACIÓN DEL TAG GENERADO: Si el tag no existe, usa 'O' por defecto
+                if ner_tag not in ALL_NER_TAGS:
+                    print(f"Advertencia: Tag '{ner_tag}' no válido generado por Gemini. Usando 'O'.")
+                    ner_tag = 'O'
+                updated_tokens_data[token_id]['ner_tag'] = ner_tag
+        return updated_tokens_data
+    except Exception as e:
+        # Muestra el error de Gradio y retorna los datos originales
+        ErrorHandler.handle_ocr_error(f"Fallo en la inferencia de Gemini: {e}")
+        return tokens_data

image_loader.py CHANGED Viewed

@@ -4,6 +4,14 @@ import gradio as gr
 def setup_image_components():
     """Define y retorna los componentes de imagen de Gradio."""
     image_input = gr.Image(
         type="pil",
         label="1. Cargar Imagen de Factura",
@@ -20,5 +28,4 @@ def setup_image_components():
         visible=False # Ajustado para reflejar el estado inicial de app.py
     )
-    # Solo retornamos los dos componentes de imagen que definiste en app.py
-    return image_input, highlighted_image_output

 def setup_image_components():
     """Define y retorna los componentes de imagen de Gradio."""
+    api_key_input = gr.Textbox(
+        label="Clave API de Gemini (Opcional)",
+        type="password",
+        value="",
+        interactive=True,
+        visible=False
+    )
     image_input = gr.Image(
         type="pil",
         label="1. Cargar Imagen de Factura",
         visible=False # Ajustado para reflejar el estado inicial de app.py
     )
+    return api_key_input, image_input, highlighted_image_output

label_editor.py CHANGED Viewed

@@ -3,9 +3,10 @@ import json
 import pandas as pd
 import os
 import zipfile
-from error_handler import ErrorHandler # Asume que tienes este módulo
-from ner_tags import ALL_NER_TAGS # Asume que tienes este módulo
-from ocr_processor import draw_boxes # Importación forzada para evitar errores de referencia
 # --- Configuración de Directorios ---
 DATASET_BASE_DIR = "dataset"
@@ -13,12 +14,57 @@ JSON_FILENAME = "anotacion_factura.json"
 TEMP_ZIP_FILENAME = "dataset.zip"
 # --- Funciones de Configuración y UI ---
 def setup_label_components():
     """
-    Configura y retorna los componentes de edición de etiquetas, incluyendo
-    el nuevo botón 'Guardar Anotación'.
     """
     # 1. Dataframe NO INTERACTIVO (Solo para visualización y selección de fila)
@@ -27,7 +73,7 @@ def setup_label_components():
         col_count=(2, "fixed"),
         datatype=["str", "str"],
         label="Tabla de Tokens y Etiquetas (Haga clic en la FILA para seleccionar y editar abajo)",
-        interactive=False, # Deshabilitar la edición directa
         wrap=True,
         value=[]
     )
@@ -40,7 +86,8 @@ def setup_label_components():
     )
     dd_tag_selector = gr.Dropdown(
-        choices=ALL_NER_TAGS,
         label="Etiqueta NER Seleccionada",
         value="O",
         interactive=True,
@@ -48,8 +95,6 @@ def setup_label_components():
     )
     # 3. Botones y Salida
-    # Botón para guardar solo la factura actual en el JSON
     tb_new_token_text = gr.Textbox(
         label="Nuevo Texto a Agregar",
         interactive=True,
@@ -64,53 +109,57 @@ def setup_label_components():
     )
     btn_save_annotation = gr.Button("3. Guardar Anotación Actual (JSON)", variant="primary")
-    # Botón de Descargar ZIP (ahora es el paso 4)
     btn_export = gr.Button("4. Descargar Dataset Completo (.zip)", variant="secondary")
     file_output = gr.File(label="Archivo ZIP del Dataset (Imágenes + Anotaciones)")
     # 5. Nuevo Estado Temporal
     state_new_bbox = gr.State(value=None)
-    # Retornar el nuevo componente
     return (
         df_label_input, tb_token_editor, dd_tag_selector,
         btn_save_annotation, btn_export, file_output,
         tb_new_token_text, btn_add_new_token, state_new_bbox
     )
-# --- FUNCIÓN: Obtener la fila seleccionada y mostrar editores ---
 def display_selected_row(tokens_data: list, highlight_index: int):
     """
     Muestra el token y la etiqueta de la fila seleccionada en los editores externos.
     """
     if highlight_index >= 0 and highlight_index < len(tokens_data):
         token = tokens_data[highlight_index]['token']
         ner_tag = tokens_data[highlight_index]['ner_tag']
-        # Muestra los componentes
-        visible_update = gr.update(visible=True)
         return (
-            gr.update(value=token, visible=True),        # tb_token_editor
-            gr.update(value=ner_tag, visible=True),      # dd_tag_selector
             highlight_index
         )
     # Si no hay selección válida, oculta los componentes
-    hidden_update = gr.update(visible=False)
-    return gr.update(value="", visible=False), hidden_update, -1
-# --- FUNCIÓN: Actualizar el Dataframe y el estado de los tokens ---
 def update_dataframe_and_state(tokens_data: list, df_data_current, new_tag: str, new_token: str, row_index: int, update_type: str):
     """
     Función unificada para actualizar la lista de tokens (estado) y el Dataframe (UI).
     """
-    # Manejar el caso de entrada como Pandas DataFrame (por seguridad)
     if isinstance(df_data_current, pd.DataFrame):
         df_list = df_data_current.values.tolist()
     else:
@@ -119,37 +168,44 @@ def update_dataframe_and_state(tokens_data: list, df_data_current, new_tag: str,
     if row_index < 0 or row_index >= len(df_list):
         return tokens_data, df_list
-    if update_type == 'tag':
         df_list[row_index][1] = new_tag
         tokens_data[row_index]['ner_tag'] = new_tag
-    elif update_type == 'token':
         df_list[row_index][0] = new_token
         tokens_data[row_index]['token'] = new_token
     return tokens_data, df_list
-# --- Función de Sincronización de UI/Estados ---
 def update_ui(image_orig, tokens_data: list, df_labels: list, highlight_index: int):
     """Actualiza la imagen resaltada basándose en el estado interno de los tokens."""
-    # Generar la imagen resaltada.
     highlighted_image = draw_boxes(image_orig, tokens_data, highlight_index)
-    # Devolver el estado interno (que ya está actualizado) y la imagen
     return tokens_data, highlighted_image
-# --- FUNCIÓN: Guardar Anotación Actual (JSON) ---
 def save_current_annotation_to_json(image_orig, tokens_data: list, image_filename: str):
     """
-    Guarda la anotación del documento actual en el archivo JSON, sobrescribe si existe.
-    Retorna mensajes de estado a Gradio.
     """
     if not tokens_data or not image_filename:
         gr.Warning("Error: No hay tokens o la imagen no fue procesada.")
-        # Retorna el path (vacío) y el mensaje de estado (que se mostrará en status_output)
         return None, "Guardado fallido: No hay datos de imagen o tokens."
     # 1. Asegurarse de que la carpeta 'dataset' exista
@@ -160,10 +216,12 @@ def save_current_annotation_to_json(image_orig, tokens_data: list, image_filenam
     W, H = image_orig.size
     new_annotations = []
     for item in tokens_data:
         new_annotations.append({
             'token': item['token'],
             'bbox_normalized': [int(b) for b in item['bbox_norm']],
-            'ner_tag': item['ner_tag']
         })
     new_document_entry = {
@@ -183,20 +241,17 @@ def save_current_annotation_to_json(image_orig, tokens_data: list, image_filenam
                 if isinstance(data, list):
                     existing_document_list = data
     except Exception:
-        # Si falla la lectura, comenzar con una lista vacía
         pass
     # 4. Consolidar: Agregar o Sobrescribir el documento actual
     is_new = True
     for i, doc in enumerate(existing_document_list):
         if doc.get('image', {}).get('name') == image_filename:
-            # Documento ya existe (lo editamos), lo sobrescribimos con la versión editada
             existing_document_list[i] = new_document_entry
             is_new = False
             break
     if is_new:
-        # Es un documento nuevo, lo añadimos al final
         existing_document_list.append(new_document_entry)
     # 5. Escribir la lista completa
@@ -216,19 +271,19 @@ def save_current_annotation_to_json(image_orig, tokens_data: list, image_filenam
         return None, f"Error en guardado: {error_msg}"
-# --- FUNCIÓN PRINCIPAL DE EXPORTACIÓN: ZIP ---
 def export_and_zip_dataset(image_orig, tokens_data: list, image_filename: str):
     """
-    1. Llama a save_current_annotation_to_json para asegurar que el último documento esté guardado.
-    2. Comprime toda la carpeta 'dataset/' en un archivo ZIP.
     """
-    # Paso 1: Asegurar que la anotación actual se guarde (para incluir los últimos cambios)
-    # Utilizamos None para evitar que los mensajes de save_current_annotation_to_json sobrescriban el status_output antes del ZIP
     save_current_annotation_to_json(image_orig, tokens_data, image_filename)
-    # Paso 2: Obtener el total de documentos para el mensaje (si el guardado fue exitoso)
     json_path = os.path.join(DATASET_BASE_DIR, JSON_FILENAME)
     total_docs = 0
     if os.path.exists(json_path):
@@ -238,33 +293,24 @@ def export_and_zip_dataset(image_orig, tokens_data: list, image_filename: str):
                 if isinstance(data, list):
                     total_docs = len(data)
         except Exception:
-            pass # Si el archivo es inválido, total_docs = 0
     if total_docs == 0:
         gr.Warning("Error: No hay datos guardados para generar el ZIP.")
         return None, "Error: No se puede generar el ZIP. El archivo JSON está vacío o no existe."
     # Paso 3: Crear el archivo ZIP
-    # Esto garantiza que el archivo se crea temporalmente y se limpia después.
     zip_path = "/tmp/dataset.zip"
     try:
-        # Asegurarse de eliminar cualquier ZIP previo que pudiera estar en /tmp
         if os.path.exists(zip_path):
             os.remove(zip_path)
         with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
-            # Recorrer todos los archivos y carpetas dentro de DATASET_BASE_DIR
             for root, dirs, files in os.walk(DATASET_BASE_DIR):
                 for file in files:
                     file_path = os.path.join(root, file)
-                    # La ruta que aparecerá dentro del ZIP (relativa a la carpeta 'dataset')
                     arcname = os.path.relpath(file_path, DATASET_BASE_DIR)
-                    # ❌ NO EXCLUIMOS NADA (el ZIP ya no se crea dentro del propio dataset)
-                    # Eliminamos el if file != TEMP_ZIP_FILENAME:
                     zipf.write(file_path, arcname)
         gr.Info(f"✅ Dataset listo para descargar. Contiene {total_docs} documentos.")

 import pandas as pd
 import os
 import zipfile
+# Asume que estos módulos existen en tu proyecto
+from error_handler import ErrorHandler
+from ner_tags import ALL_NER_TAGS
+from ocr_processor import draw_boxes
 # --- Configuración de Directorios ---
 DATASET_BASE_DIR = "dataset"
 TEMP_ZIP_FILENAME = "dataset.zip"
+# ----------------------------------------------------------------------
+# 💡 FUNCIÓN AUXILIAR: Filtrado de Tags Utilizados
+# ----------------------------------------------------------------------
+def _get_available_tags(tokens_data: list, current_index: int) -> list:
+    """
+    Determina qué etiquetas 'B-' ya están en uso por otros tokens.
+    Retorna la lista de ALL_NER_TAGS filtrada, excluyendo las opciones 'B-'
+    que ya aparecen en tokens_data para otros índices (principio de no repetición).
+    """
+    used_b_tags = set()
+    # 1. Identificar qué etiquetas 'B-' ya están en uso en el documento
+    for i, item in enumerate(tokens_data):
+        # Excluimos el token actual para permitir la re-edición del tag existente
+        if i == current_index:
+            continue
+        tag = item['ner_tag']
+        # Si el tag es un tag 'B-', lo añadimos al conjunto de tags usados
+        if tag.startswith('B-'):
+            used_b_tags.add(tag)
+    # 2. Filtrar ALL_NER_TAGS
+    filtered_tags = []
+    current_tag = tokens_data[current_index]['ner_tag'] if 0 <= current_index < len(tokens_data) else None
+    for tag in ALL_NER_TAGS:
+        # La etiqueta 'O' y las etiquetas 'I-' siempre están disponibles.
+        if tag == 'O' or tag.startswith('I-'):
+            filtered_tags.append(tag)
+        # Una etiqueta 'B-' solo se añade si NO está en el conjunto de tags ya usados
+        elif tag.startswith('B-') and tag not in used_b_tags:
+            filtered_tags.append(tag)
+    # 3. Asegurar que el tag actual se pueda seleccionar (si ya estaba aplicado y fue filtrado)
+    if current_tag and current_tag.startswith('B-') and current_tag not in filtered_tags:
+         filtered_tags.append(current_tag)
+         filtered_tags.sort() # Opcional: Reordenar si se añade al final
+    return filtered_tags
+# ----------------------------------------------------------------------
 # --- Funciones de Configuración y UI ---
+# ----------------------------------------------------------------------
 def setup_label_components():
     """
+    Configura y retorna los componentes de edición de etiquetas.
     """
     # 1. Dataframe NO INTERACTIVO (Solo para visualización y selección de fila)
         col_count=(2, "fixed"),
         datatype=["str", "str"],
         label="Tabla de Tokens y Etiquetas (Haga clic en la FILA para seleccionar y editar abajo)",
+        interactive=False,
         wrap=True,
         value=[]
     )
     )
     dd_tag_selector = gr.Dropdown(
+        # Inicialmente usa la lista completa; las opciones se actualizarán dinámicamente
+        choices=ALL_NER_TAGS,
         label="Etiqueta NER Seleccionada",
         value="O",
         interactive=True,
     )
     # 3. Botones y Salida
     tb_new_token_text = gr.Textbox(
         label="Nuevo Texto a Agregar",
         interactive=True,
     )
     btn_save_annotation = gr.Button("3. Guardar Anotación Actual (JSON)", variant="primary")
     btn_export = gr.Button("4. Descargar Dataset Completo (.zip)", variant="secondary")
     file_output = gr.File(label="Archivo ZIP del Dataset (Imágenes + Anotaciones)")
     # 5. Nuevo Estado Temporal
     state_new_bbox = gr.State(value=None)
     return (
         df_label_input, tb_token_editor, dd_tag_selector,
         btn_save_annotation, btn_export, file_output,
         tb_new_token_text, btn_add_new_token, state_new_bbox
     )
+# ----------------------------------------------------------------------
+# --- FUNCIÓN: Obtener la fila seleccionada y mostrar editores (MODIFICADA) ---
+# ----------------------------------------------------------------------
 def display_selected_row(tokens_data: list, highlight_index: int):
     """
     Muestra el token y la etiqueta de la fila seleccionada en los editores externos.
+    ACTUALIZA las opciones del Dropdown para filtrar las etiquetas 'B-' ya utilizadas.
     """
     if highlight_index >= 0 and highlight_index < len(tokens_data):
         token = tokens_data[highlight_index]['token']
         ner_tag = tokens_data[highlight_index]['ner_tag']
+        # 💡 Lógica de Filtrado: Obtener solo los tags disponibles
+        available_tags = _get_available_tags(tokens_data, highlight_index)
+        # Muestra los componentes, actualizando las opciones del Dropdown
         return (
+            gr.update(value=token, visible=True),   # tb_token_editor
+            gr.update(value=ner_tag, choices=available_tags, visible=True), # dd_tag_selector
             highlight_index
         )
     # Si no hay selección válida, oculta los componentes
+    # Y restablece las opciones del Dropdown a la lista completa
+    return gr.update(value="", visible=False), gr.update(value="O", choices=ALL_NER_TAGS, visible=False), -1
+# ----------------------------------------------------------------------
+# --- FUNCIÓN: Actualizar el Dataframe y el estado de los tokens (VALIDACIÓN) ---
+# ----------------------------------------------------------------------
 def update_dataframe_and_state(tokens_data: list, df_data_current, new_tag: str, new_token: str, row_index: int, update_type: str):
     """
     Función unificada para actualizar la lista de tokens (estado) y el Dataframe (UI).
+    Incluye validación para forzar la etiqueta a 'O' si no es válida.
     """
+    # Asegurar que se trabaja con una lista de listas
     if isinstance(df_data_current, pd.DataFrame):
         df_list = df_data_current.values.tolist()
     else:
     if row_index < 0 or row_index >= len(df_list):
         return tokens_data, df_list
+    if update_type == 'tag' and new_tag is not None:
+        # 💡 VALIDACIÓN CRÍTICA: Forzar el tag a 'O' si no está en la lista maestra
+        if new_tag not in ALL_NER_TAGS:
+            print(f"ADVERTENCIA: Tag no válido detectado ('{new_tag}') en índice {row_index}. Forzando a 'O'.")
+            new_tag = 'O'
         df_list[row_index][1] = new_tag
         tokens_data[row_index]['ner_tag'] = new_tag
+    elif update_type == 'token' and new_token is not None:
         df_list[row_index][0] = new_token
         tokens_data[row_index]['token'] = new_token
     return tokens_data, df_list
+# ----------------------------------------------------------------------
+# --- Función de Sincronización de UI/Estados (Sin Cambios) ---
+# ----------------------------------------------------------------------
 def update_ui(image_orig, tokens_data: list, df_labels: list, highlight_index: int):
     """Actualiza la imagen resaltada basándose en el estado interno de los tokens."""
     highlighted_image = draw_boxes(image_orig, tokens_data, highlight_index)
     return tokens_data, highlighted_image
+# ----------------------------------------------------------------------
+# --- FUNCIÓN: Guardar Anotación Actual (JSON) (Mínima Modificación) ---
+# ----------------------------------------------------------------------
 def save_current_annotation_to_json(image_orig, tokens_data: list, image_filename: str):
     """
+    Guarda la anotación del documento actual en el archivo JSON.
     """
     if not tokens_data or not image_filename:
         gr.Warning("Error: No hay tokens o la imagen no fue procesada.")
         return None, "Guardado fallido: No hay datos de imagen o tokens."
     # 1. Asegurarse de que la carpeta 'dataset' exista
     W, H = image_orig.size
     new_annotations = []
     for item in tokens_data:
+        # Asegurar que el tag que se guarda es válido
+        tag_to_save = item['ner_tag'] if item['ner_tag'] in ALL_NER_TAGS else 'O'
         new_annotations.append({
             'token': item['token'],
             'bbox_normalized': [int(b) for b in item['bbox_norm']],
+            'ner_tag': tag_to_save
         })
     new_document_entry = {
                 if isinstance(data, list):
                     existing_document_list = data
     except Exception:
         pass
     # 4. Consolidar: Agregar o Sobrescribir el documento actual
     is_new = True
     for i, doc in enumerate(existing_document_list):
         if doc.get('image', {}).get('name') == image_filename:
             existing_document_list[i] = new_document_entry
             is_new = False
             break
     if is_new:
         existing_document_list.append(new_document_entry)
     # 5. Escribir la lista completa
         return None, f"Error en guardado: {error_msg}"
+# ----------------------------------------------------------------------
+# --- FUNCIÓN PRINCIPAL DE EXPORTACIÓN: ZIP (Sin Cambios) ---
+# ----------------------------------------------------------------------
 def export_and_zip_dataset(image_orig, tokens_data: list, image_filename: str):
     """
+    Asegura que la anotación actual se guarde y luego comprime toda la carpeta 'dataset/' en un archivo ZIP.
     """
+    # Paso 1: Asegurar que la anotación actual se guarde
     save_current_annotation_to_json(image_orig, tokens_data, image_filename)
+    # Paso 2: Obtener el total de documentos para el mensaje
     json_path = os.path.join(DATASET_BASE_DIR, JSON_FILENAME)
     total_docs = 0
     if os.path.exists(json_path):
                 if isinstance(data, list):
                     total_docs = len(data)
         except Exception:
+            pass
     if total_docs == 0:
         gr.Warning("Error: No hay datos guardados para generar el ZIP.")
         return None, "Error: No se puede generar el ZIP. El archivo JSON está vacío o no existe."
     # Paso 3: Crear el archivo ZIP
     zip_path = "/tmp/dataset.zip"
     try:
         if os.path.exists(zip_path):
             os.remove(zip_path)
         with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
             for root, dirs, files in os.walk(DATASET_BASE_DIR):
                 for file in files:
                     file_path = os.path.join(root, file)
                     arcname = os.path.relpath(file_path, DATASET_BASE_DIR)
                     zipf.write(file_path, arcname)
         gr.Info(f"✅ Dataset listo para descargar. Contiene {total_docs} documentos.")

ocr_processor.py CHANGED Viewed

@@ -3,6 +3,7 @@ import uuid
 from PIL import Image, ImageDraw
 import numpy as np
 import cv2
 # Módulos de Doctr
 from doctr.models import ocr_predictor
@@ -191,9 +192,9 @@ def save_image_to_dataset(image: Image.Image) -> str:
 # --- Función de Flujo Principal ---
-def process_and_setup(image_file):
     """
-    Función inicial: OCR con Doctr (incluyendo preprocesamiento),
     configuración del estado y guardar la imagen.
     """
     if image_file is None:
@@ -201,13 +202,19 @@ def process_and_setup(image_file):
         return None, [], None, empty_df, None, None
     # 💡 Llama a la función de OCR basada en Doctr
-    # image_orig es la imagen SIN preprocesar
     image_orig, tokens_data, _ = get_ocr_data_doctr(image_file)
     if image_orig is None:
         empty_df = {'token': [], 'ner_tag': []}
         return None, [], None, empty_df, "Error fatal al procesar el OCR con Doctr. Revise el log.", None
     # --- Guardar la Imagen Original ---
     image_filename = save_image_to_dataset(image_orig)
@@ -225,7 +232,7 @@ def process_and_setup(image_file):
     # La imagen inicial no tiene resaltado
     highlighted_image = image_orig.copy()
-    msg = f"OCR de Doctr completado. Tokens detectados: {len(tokens_data)}"
     print(msg)
     return image_orig, tokens_data, highlighted_image, df_data, msg, image_filename

 from PIL import Image, ImageDraw
 import numpy as np
 import cv2
+from gemini_ner_client import get_gemini_ner_tags
 # Módulos de Doctr
 from doctr.models import ocr_predictor
 # --- Función de Flujo Principal ---
+def process_and_setup(image_file, api_key: str): # 💡 ACEPTA LA LLAVE API
     """
+    Función inicial: OCR con Doctr, **NER asistido por Gemini**,
     configuración del estado y guardar la imagen.
     """
     if image_file is None:
         return None, [], None, empty_df, None, None
     # 💡 Llama a la función de OCR basada en Doctr
     image_orig, tokens_data, _ = get_ocr_data_doctr(image_file)
     if image_orig is None:
         empty_df = {'token': [], 'ner_tag': []}
         return None, [], None, empty_df, "Error fatal al procesar el OCR con Doctr. Revise el log.", None
+    # 💡 PASO NUEVO: NER Asistido por Gemini (CONDICIONAL)
+    msg_ner_assist = ""
+    if api_key:
+        # Llama a la función que SÓLO se ejecuta si api_key no es None/vacío
+        tokens_data = get_gemini_ner_tags(api_key, tokens_data)
+        msg_ner_assist = " (NER asistido)"
     # --- Guardar la Imagen Original ---
     image_filename = save_image_to_dataset(image_orig)
     # La imagen inicial no tiene resaltado
     highlighted_image = image_orig.copy()
+    msg = f"OCR de Doctr completado. Tokens detectados: {len(tokens_data)}.{msg_ner_assist}"
     print(msg)
     return image_orig, tokens_data, highlighted_image, df_data, msg, image_filename

requirements.txt CHANGED Viewed

@@ -10,6 +10,9 @@ python-doctr==0.7.0
 # Necesario para manejar arrays de imagen (np.array) y funcionalidades de CV
 opencv-python
 # --- Utilidades ---
 pillow==10.1.0     # Manipulación de imágenes (PIL)
 pandas             # Manejo de datos (DataFrames de OCR/Tokens)

 # Necesario para manejar arrays de imagen (np.array) y funcionalidades de CV
 opencv-python
+# --- Requerimientos de IA Generativa (Gemini) ---
+google-genai       # Librería oficial para interactuar con la API de Gemini
 # --- Utilidades ---
 pillow==10.1.0     # Manipulación de imágenes (PIL)
 pandas             # Manejo de datos (DataFrames de OCR/Tokens)

setup.sh DELETED Viewed

@@ -1,5 +0,0 @@
-#!/bin/bash
-# Instalar Tesseract OCR y el paquete de idioma español
-apt-get update
-apt-get install -y tesseract-ocr
-apt-get install -y tesseract-ocr-spa