Spaces:
Sleeping
Sleeping
| # %% | |
| import urllib | |
| from datetime import datetime | |
| import pandas as pd | |
| import time | |
| import requests | |
| today_rev = datetime.now().strftime("%Y%m%d") | |
| # url = 'https://api.os.uk/search/places/v1/uprn?%s' | |
| # params = urllib.parse.urlencode({'uprn':<UPRN>,'dataset':'LPI', 'key':os.environ["ADDRESSBASE_API_KEY"]}) | |
| # Places API | |
| # Technical guide: https://osdatahub.os.uk/docs/places/technicalSpecification | |
| def places_api_query(query, api_key, query_type): | |
| def make_api_call(url): | |
| max_retries = 3 | |
| retries = 0 | |
| while retries < max_retries: | |
| try: | |
| response = requests.get(url) | |
| if response.status_code == 200: | |
| # If successful response, return the response | |
| return response | |
| elif response.status_code == 429: | |
| # If rate limited, wait for 5 seconds before retrying | |
| print("Rate limited. Retrying in 5 seconds...") | |
| time.sleep(3) | |
| retries += 1 | |
| else: | |
| # For other errors, return the response | |
| return response | |
| except Exception as e: | |
| print("Error:", str(e)) | |
| retries += 1 | |
| # If maximum retries reached, return None | |
| return None | |
| if api_key: | |
| overall_tic = time.perf_counter() | |
| #filter_code_lsc = "LOGICAL_STATUS_CODE:1" | |
| filter_code_lpi_lsc ="LPI_LOGICAL_STATUS_CODE:1" | |
| concat_results = [] | |
| if query_type == "Address": | |
| url = 'https://api.os.uk/search/places/v1/find?%s' | |
| params = urllib.parse.urlencode({'query':query, | |
| 'dataset':'LPI', | |
| 'key':api_key, | |
| "maxresults" : 20, | |
| 'minmatch':0.70, # This includes partial matches | |
| 'matchprecision':2, | |
| 'fq':filter_code_lpi_lsc, | |
| 'lr':'EN'}) | |
| try: | |
| request_text = url % params | |
| #print(request_text) | |
| response = make_api_call(request_text) | |
| except Exception as e: | |
| print(str(e)) | |
| if response is not None: | |
| if response.status_code == 200: | |
| # Process the response | |
| print("Successful response") | |
| #print("Successful response:", response.json()) | |
| else: | |
| print("Error:", response.status_code) | |
| else: | |
| print("Maximum retries reached. Error occurred.") | |
| return pd.DataFrame() # Return blank dataframe | |
| # Load JSON response | |
| response_data = response.json() | |
| # Extract 'results' part | |
| try: | |
| results = response_data['results'] | |
| concat_results.extend(results) | |
| except Exception as e: | |
| print(str(e)) | |
| return pd.DataFrame() # Return blank dataframe | |
| # If querying postcode, need to use pagination and postcode API | |
| elif query_type == "Postcode": | |
| max_results_requested = 100 | |
| remaining_calls = 1 | |
| totalresults = max_results_requested | |
| call_number = 1 | |
| while remaining_calls > 0 and call_number <= 10: | |
| offset = (call_number-1) * max_results_requested | |
| #print("Remaining to query:", remaining_calls) | |
| url = 'https://api.os.uk/search/places/v1/postcode?%s' | |
| params = urllib.parse.urlencode({'postcode':query, | |
| 'dataset':'LPI', | |
| 'key':api_key, | |
| "maxresults" : max_results_requested, | |
| 'offset':offset, | |
| #'fq':filter_code_lsc, | |
| 'fq':filter_code_lpi_lsc, | |
| 'lr':'EN'}) | |
| try: | |
| request_text = url % params | |
| #print(request_text) | |
| response = make_api_call(request_text) | |
| except Exception as e: | |
| print(str(e)) | |
| if response is not None: | |
| if response.status_code == 200: | |
| totalresults = response.json()['header']['totalresults'] | |
| print("Successful response") | |
| print("Total results:", totalresults) | |
| remaining_calls = totalresults - (max_results_requested * call_number) | |
| call_number += 1 | |
| # Concat results together | |
| try: | |
| results = response.json()['results'] | |
| concat_results.extend(results) | |
| except Exception as e: | |
| print("Result concat failed with error: ", str(e)) | |
| concat_results.append({"invalid_request":True, "POSTCODE_LOCATOR": query}) | |
| else: | |
| print("Error:", response.status_code, "For postcode: ", query, " With query: ", request_text) | |
| concat_results.append({"invalid_request":True, "POSTCODE_LOCATOR": query}) | |
| return pd.DataFrame(data={"invalid_request":[True], "POSTCODE_LOCATOR": [query]},index=[0]) # Return blank dataframe | |
| else: | |
| print("Maximum retries reached. Error occurred.") | |
| return pd.DataFrame() # Return blank dataframe | |
| else: | |
| print("No API key provided.") | |
| return pd.DataFrame() # Return blank dataframe | |
| # Convert 'results' to DataFrame | |
| # Check if 'LPI' sub-branch exists in the JSON response | |
| #print(concat_results) | |
| if 'LPI' in concat_results[-1]: | |
| #print("LPI in result columns") | |
| df = pd.json_normalize(concat_results) | |
| df.rename(columns=lambda x: x.replace('LPI.', ''), inplace=True) | |
| else: | |
| # Normalize the entire JSON data if 'LPI' sub-branch doesn't exist | |
| df = pd.json_normalize(concat_results) | |
| # Ensure df is a DataFrame, even if it has a single row | |
| if isinstance(df, pd.Series): | |
| print("This is a series!") | |
| df = df.to_frame().T # Convert the Series to a DataFrame with a single row | |
| overall_toc = time.perf_counter() | |
| time_out = f"The API call took {overall_toc - overall_tic:0.1f} seconds" | |
| print(time_out) | |
| return df |