Spaces:
Runtime error
Runtime error
| from gradio_huggingfacehub_search import HuggingfaceHubSearch | |
| from huggingface_hub import HfApi | |
| import pandas as pd | |
| import gradio as gr | |
| import duckdb | |
| import requests | |
| BASE_DATASETS_SERVER_URL = "/static-proxy?url=https%3A%2F%2Fdatasets-server.huggingface.co%26quot%3B%3C%2Fspan%3E%3C!-- HTML_TAG_END --> | |
| hf_api = HfApi() | |
| conn = duckdb.connect() | |
| def get_dataset_ddl(dataset_id: str) -> pd.DataFrame: | |
| view_name = "dataset_view" | |
| response = requests.get(f"{BASE_DATASETS_SERVER_URL}/parquet?dataset={dataset_id}") | |
| response.raise_for_status() # Check if the request was successful | |
| first_parquet = response.json().get("parquet_files", [])[0] | |
| first_parquet_url = first_parquet.get("url") | |
| if not first_parquet_url: | |
| raise ValueError("No valid URL found for the first parquet file.") | |
| conn.execute( | |
| f"CREATE OR REPLACE VIEW {view_name} as SELECT * FROM read_parquet('{first_parquet_url}');" | |
| ) | |
| dataset_ddl = conn.execute(f"PRAGMA table_info('{view_name}');").fetchall() | |
| column_data_types = ",\n\t".join( | |
| [f"{column[1]} {column[2]}" for column in dataset_ddl] | |
| ) | |
| sql_ddl = """ | |
| CREATE TABLE {} ( | |
| {} | |
| ); | |
| """.format( | |
| view_name, column_data_types | |
| ) | |
| return sql_ddl | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Query your HF Datasets with Natural Language ππ") | |
| dataset_name = HuggingfaceHubSearch( | |
| label="Hub Dataset ID", | |
| placeholder="Find your favorite dataset...", | |
| search_type="dataset", | |
| value="jamescalam/world-cities-geo", | |
| ) | |
| query_input = gr.Textbox("", label="Ask anything...") | |
| btn = gr.Button("Ask πͺ") | |
| df = gr.DataFrame(datatype="markdown") | |
| ddl = gr.Text("") | |
| btn.click( | |
| get_dataset_ddl, | |
| inputs=[dataset_name], | |
| outputs=[ddl], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |