From 43903591349e6c2b8899b54c161b608ddf1a5ea4 Mon Sep 17 00:00:00 2001 From: Quildra Date: Sun, 29 Sep 2024 18:56:52 +0100 Subject: [PATCH] - Start to add better handling for pokemonw ith unknown games and locations. --- Utilities/DetermineOriginGame.py | 164 ++++++++++++++++++++++++++++++- 1 file changed, 163 insertions(+), 1 deletion(-) diff --git a/Utilities/DetermineOriginGame.py b/Utilities/DetermineOriginGame.py index 65adc67..9e2de8f 100644 --- a/Utilities/DetermineOriginGame.py +++ b/Utilities/DetermineOriginGame.py @@ -5,6 +5,7 @@ import json import os import re import sqlite3 +from bs4 import BeautifulSoup # Initialize the database connection conn = sqlite3.connect('pokemon_cache.db') @@ -203,6 +204,19 @@ def get_pokemon_data(pokemon_name, form, cache): return data return None +def get_pokemon_data_bulbapedia(pokemon_name, cache): + cache_key = f"pokemon_{pokemon_name}_bulbapedia" + if cache_key in cache: + return cache[cache_key] + + url = f"https://bulbapedia.bulbagarden.net/wiki/{pokemon_name}_(Pokémon)" + print(f"Fetching Pokémon data for {pokemon_name}: {url}") + response = requests.get(url) + if response.status_code == 200: + data = response.text + update_cache(cache_key, data) + return data + def get_pokemon_encounter_data(pokemon_name, form, cache): cache_key = f"pokemon_encounter_{pokemon_name}_{form}" if form else f"pokemon_encounter_{pokemon_name}" if cache_key in cache: @@ -371,6 +385,153 @@ def add_encounter_locations(pokemon_list, cache): print(f"Added encounter locations for {pokemon['name']} (#{pokemon['number']}) in {pokemon['earliest_game']}") return pokemon_list +def get_marriland_page(pokemon_name, cache): + url_name = pokemon_name.lower().replace(' ', '-').replace('(', '').replace(')', '') + cache_key = f"marriland_{url_name}" + if cache_key in cache: + return cache[cache_key] + + url = f"https://marriland.com/pokedex/{url_name}/" + + try: + response = requests.get(url) + response.raise_for_status() # Raise an exception for bad status codes + data = response.text + update_cache(cache_key, data) + return data + except requests.RequestException as e: + print(f"Error accessing the page for {pokemon_name}: {e}") + return None + +def is_event_pokemon(pokemon_name, cache): + page_data = get_marriland_page(pokemon_name, cache) + if not page_data: + return False + + soup = BeautifulSoup(page_data, 'html.parser') + + # Find the "Where to Find" section + location_section = soup.find('div', id='locations') + + if not location_section: + print(f"Could not find 'Where to Find' section for {pokemon_name}") + return None + + special_section = soup.find('div', class_='location-special') + location_tables = soup.find_all('table', class_='location-table') + + event_only = "Only available from events or promotions.".lower() + if len(location_tables) == 0 and special_section and event_only in special_section.get_text(strip=True).lower(): + return True + + return False + +def get_locations_from_bulbapedia(pokemon_name, cache): + page_data = get_pokemon_data_bulbapedia(pokemon_name, cache) + if not page_data: + return None + + soup = BeautifulSoup(page_data, 'html.parser') + + locations_section = soup.find('span', id='Game_locations') + if not locations_section: + return None + + locations_table = locations_section.find_next('table', class_='roundy') + if not locations_table: + return None + + game_locations = {} + + # Ok so the table is a bit of a mess. It has some nested tables and stuff. + # In each row is a nested table with all the games in a generation. + # Next is another nexted table, but i can't tell what for. + # within that nested table, is another nested table with the games, either the release pair or a single game spanning two columns. + # Next to that is another nested table with the locations. + + generation_tbody = locations_table.find('tbody', recursive=False) + generation_rows = generation_tbody.find_all('tr', recursive=False) + for generation_row in generation_rows: + random_nested_td = generation_row.find('td', recursive=False) + if not random_nested_td: + continue + random_nested_table = random_nested_td.find('table', recursive=False) + if not random_nested_table: + continue + random_nested_tbody = random_nested_table.find('tbody', recursive=False) + random_nested_rows = random_nested_tbody.find_all('tr', recursive=False) + for nested_row in random_nested_rows: + if 'Generation' in nested_row.get_text(strip=True): + continue + + games_container_td = nested_row.find('td', recursive=False) + if not games_container_td: + continue + games_container_table = games_container_td.find('table', recursive=False) + if not games_container_table: + continue + games_container_tbody = games_container_table.find('tbody', recursive=False) + games_container_rows = games_container_tbody.find_all('tr', recursive=False) + for games_container_row in games_container_rows: + games = games_container_row.find_all('th') + for game in games: + locations_container_td = games_container_row.find('td', recursive=False) + if not locations_container_td: + continue + locations_container_table = locations_container_td.find('table', recursive=False) + if not locations_container_table: + continue + locations_container_tbody = locations_container_table.find('tbody', recursive=False) + locations = locations_container_tbody.find_all('td') + for location in locations: + game_locations[game.get_text(strip=True)] = location.get_text() + print(f'{game.get_text(strip=True)}: {location.get_text()}') + + return game_locations + + +def check_alternative_sources(pokemon, cache): + # This function will check alternative sources for Pokémon with "Unknown" encounter types + species_data = get_species_data(pokemon['base_name'], cache) + + if species_data: + # Check if it's a mythical Pokémon + if species_data.get('is_mythical', False): + return "Event", "Event" + + # Check if it's a legendary Pokémon + if species_data.get('is_legendary', False): + return pokemon['earliest_game'], "Legendary" + + event_status = is_event_pokemon(pokemon['name'], cache) + if event_status: + return "Event", "Event" + + bulb_locations = get_locations_from_bulbapedia(pokemon['base_name'], cache) + if bulb_locations: + return bulb_locations[0], "Bulbapedia" + + # Check generation introduced + #generation = species_data.get('generation', {}).get('name', '') + #if generation: + # gen_number = int(generation.split('-')[1]) + # for game in all_games: + # if game != "Unknown" and get_generation(game) == gen_number: + # return game, "First appearance" + + return "Unknown", "Unknown" + +def handle_unknown_encounters(pokemon_list, cache): + for pokemon in pokemon_list: + if pokemon['earliest_game'] == "Unknown" or pokemon['obtain_method'] == "Unknown": + new_game, new_method = check_alternative_sources(pokemon, cache) + if new_game != "Unknown": + pokemon['earliest_game'] = new_game + pokemon['obtain_method'] = new_method + pokemon['encounter_locations'] = 'N/A' + print(f"Checked alternative sources for {pokemon['name']} (#{pokemon['number']}): {pokemon['earliest_game']} ({pokemon['obtain_method']})") + return pokemon_list + # Update the main function if __name__ == "__main__": get_cached_data() @@ -379,7 +540,8 @@ if __name__ == "__main__": pokemon_list_with_games = determine_earliest_games(pokemon_list, cache) pokemon_list_adjusted = adjust_for_evolution(pokemon_list_with_games, cache) pokemon_list_with_locations = add_encounter_locations(pokemon_list_adjusted, cache) - save_to_csv(pokemon_list_with_locations) + pokemon_list_final = handle_unknown_encounters(pokemon_list_with_locations, cache) + save_to_csv(pokemon_list_final) save_cached_data() # Save any remaining new entries conn.close() # Close the database connection