From 3e44dc9eb506c8d4135ab273da4dbeb939f56f38 Mon Sep 17 00:00:00 2001 From: Dan Date: Tue, 1 Oct 2024 14:55:25 +0100 Subject: [PATCH] - Update the parser to include encounter data from bulbapedia --- Utilities/DetermineOriginGame.py | 203 +++++++++++++++++++++++++++---- 1 file changed, 176 insertions(+), 27 deletions(-) diff --git a/Utilities/DetermineOriginGame.py b/Utilities/DetermineOriginGame.py index e67bb9c..5d3ccd7 100644 --- a/Utilities/DetermineOriginGame.py +++ b/Utilities/DetermineOriginGame.py @@ -7,6 +7,7 @@ import re import sqlite3 from bs4 import BeautifulSoup, Tag, NavigableString import copy +from typing import List, Optional # Initialize the database connection conn = sqlite3.connect('pokemon_cache.db') @@ -64,6 +65,95 @@ def update_cache(key, value): save_cached_data() time.sleep(1) +class EvolutionStage: + def __init__(self, pokemon: str, method: Optional[str] = None, stage: Optional[str] = None, form: Optional[str] = None): + self.pokemon = pokemon + self.method = method + self.next_stage: Optional[EvolutionStage] = None + self.branches: List[EvolutionStage] = [] + self.stage = stage + self.is_baby = self.stage is not None and 'Baby' in self.stage + self.form = form + + def __str__(self): + return f"{self.pokemon} {self.form if self.form else ''} ({self.method if self.method else 'Base'})" + +def parse_evolution_chain(table: Tag, form: Optional[str] = None) -> List[EvolutionStage]: + main_chain = [] + current_stage = None + pending_method = None + + tbody = table.find('tbody', recursive=False) + if not tbody: + return [] + + rows = tbody.find_all('tr', recursive=False) + main_row = rows[0] + branch_rows = rows[1:] + + # Parse main evolution chain + for td in main_row.find_all('td', recursive=False): + if td.find('table'): + # This TD contains Pokemon information + pokemon_name = extract_pokemon_name(td) + stage = extract_stage_form(td) + new_stage = EvolutionStage(pokemon_name, pending_method, stage, form) + pending_method = None + if current_stage: + current_stage.next_stage = new_stage + current_stage = new_stage + main_chain.append(current_stage) + else: + # This TD contains evolution method for the next Pokemon + pending_method = extract_evolution_method(td) + + # Parse branching evolutions + for row in branch_rows: + branch_stage = None + branch_method = None + for td in row.find_all('td', recursive=False): + if td.find('table'): + pokemon_name = extract_pokemon_name(td) + stage = extract_stage_form(td) + new_stage = EvolutionStage(pokemon_name, branch_method, stage, form) + branch_method = None + if branch_stage: + branch_stage.next_stage = new_stage + branch_stage = new_stage + # Find which main chain Pokemon this branches from + for main_stage in main_chain: + if td.get('rowspan') and main_stage.pokemon == pokemon_name: + main_stage.branches.append(branch_stage) + break + else: + branch_method = extract_evolution_method(td) + + return main_chain + +def extract_pokemon_name(td: Tag) -> str: + # Extract Pokemon name from the table within the TD + name_tag = td.find('table').find('a', class_='selflink') + if name_tag: + return name_tag.get_text(strip=True) + name_tag = td.find('table').find('a', title=True) + return name_tag.get_text(strip=True) + +def extract_evolution_method(td: Tag) -> str: + # Extract evolution method from the TD + return td.get_text(strip=True) + +def extract_stage_form(td: Tag) -> Optional[str]: + stage_tag = td.find('table').find('small') + if stage_tag: + return stage_tag.get_text(strip=True) + return None + +def extract_is_baby(td: Tag) -> bool: + stage_tag = td.find('table').find('small') + if stage_tag: + return 'Baby' in stage_tag.get_text(strip=True) + return False + def read_pokemon_list(filename, limit=50): pokemon_list = [] with open(filename, 'r', newline='', encoding='utf-8') as csvfile: @@ -273,6 +363,45 @@ def parse_form_information(html_content): return main_form, breed return None, None + +def get_evolution_data_from_bulbapedia(pokemon_name, form, cache): + page_data = get_pokemon_data_bulbapedia(pokemon_name, cache) + if not page_data: + return None + + soup = BeautifulSoup(page_data, 'html.parser') + + evolution_section = soup.find('span', id='Evolution_data') + if not evolution_section: + return None + + evolution_table = None + if form: + form = form.replace('Form', '').replace('form', '').strip() + for tag in evolution_section.parent.find_next_siblings(): + if tag.name == 'h4' and form in tag.get_text(strip=True): + evolution_table = tag.find_next('table') + break + if tag.name == 'h3': + break + else: + evolution_table = evolution_section.parent.find_next('table') + if not evolution_table: + return None + + evolution_chain = parse_evolution_chain(evolution_table, form) + return evolution_chain + + # This is going to be a little odd. + # the first TR contains a full evolution chain + # other TRs contain branching evolution chains + # any TDs in the first TR with a rowspan are part of the main evolution chain + # any other TDS are part of the branching evolution chains + # a table in a TD is information about the current Pokémon in that evolution stage + # a TD without a table is information on how to trigger the next evolution + + + def get_locations_from_bulbapedia(pokemon_name, form, cache): page_data = get_pokemon_data_bulbapedia(pokemon_name, cache) @@ -474,22 +603,30 @@ def get_evolution_chain(pokemon_name, cache): return evolution_data return None -def get_base_form(evolution_chain, cache): - if not evolution_chain or 'chain' not in evolution_chain: +def get_base_form(evolution_chain:List[EvolutionStage]): + if not evolution_chain: return None - current = evolution_chain['chain'] - while current: - species_name = current['species']['name'] - species_data = get_species_data(species_name, cache) - - if species_data and not species_data.get('is_baby', False): - return species_name - - if not current['evolves_to']: - return species_name - - current = current['evolves_to'][0] + for stage in evolution_chain: + if stage.stage == "Unevolved": + return stage.pokemon + if stage.is_baby: + return stage.next_stage.pokemon + + return None + + #current = evolution_chain['chain'] + #while current: + # species_name = current['species']['name'] + # species_data = get_species_data(species_name, cache) + # + # if species_data and not species_data.get('is_baby', False): + # return species_name + # + # if not current['evolves_to']: + # return species_name + # + # current = current['evolves_to'][0] return None @@ -497,20 +634,32 @@ def adjust_for_evolution(pokemon_list, cache): pokemon_dict = {f"{pokemon['base_name']}_{pokemon['form']}".lower(): pokemon for pokemon in pokemon_list} for pokemon in pokemon_list: - species_data = get_species_data(pokemon['base_name'], cache) - evolution_chain = get_evolution_chain(pokemon['base_name'], cache) - base_form = get_base_form(evolution_chain, cache) + evolution_chain = get_evolution_data_from_bulbapedia(pokemon['base_name'], pokemon['form'], cache) + if evolution_chain: + if evolution_chain[0].is_baby: + pokemon['obtain_method'] = 'Breed' + else: + base_form = get_base_form(evolution_chain) + base_key = f"{base_form}_{pokemon['form']}".lower() + if base_key in pokemon_dict: + base_pokemon = pokemon_dict[base_key] + if all_games.index(base_pokemon['earliest_game']) <= all_games.index(pokemon['earliest_game']) and base_pokemon['number'] != pokemon['number']: + pokemon['earliest_game'] = base_pokemon['earliest_game'] + pokemon['obtain_method'] = 'Evolve' + #species_data = get_species_data(pokemon['base_name'], cache) + #evolution_chain = get_evolution_chain(pokemon['base_name'], cache) + #base_form = get_base_form(evolution_chain, cache) # Check if the Pokémon is a baby - if species_data and species_data.get('is_baby', False): - pokemon['obtain_method'] = 'Breed' - elif base_form: - base_key = f"{base_form}_{pokemon['form']}".lower() - if base_key in pokemon_dict: - base_pokemon = pokemon_dict[base_key] - if all_games.index(base_pokemon['earliest_game']) <= all_games.index(pokemon['earliest_game']) and base_pokemon['number'] != pokemon['number']: - pokemon['earliest_game'] = base_pokemon['earliest_game'] - pokemon['obtain_method'] = 'Evolve' + #if species_data and species_data.get('is_baby', False): + # pokemon['obtain_method'] = 'Breed' + #elif base_form: + # base_key = f"{base_form}_{pokemon['form']}".lower() + # if base_key in pokemon_dict: + # base_pokemon = pokemon_dict[base_key] + # if all_games.index(base_pokemon['earliest_game']) <= all_games.index(pokemon['earliest_game']) and base_pokemon['number'] != pokemon['number']: + # pokemon['earliest_game'] = base_pokemon['earliest_game'] + # pokemon['obtain_method'] = 'Evolve' print(f"Adjusted {pokemon['name']} (#{pokemon['number']}): {pokemon['earliest_game']} ({pokemon['obtain_method']})") @@ -651,7 +800,7 @@ def handle_unknown_encounters(pokemon_list, cache): if __name__ == "__main__": get_cached_data() - pokemon_list = read_pokemon_list('pokemon_home_list.csv', limit=3000) + pokemon_list = read_pokemon_list('pokemon_home_list.csv', limit=200) pokemon_list_with_games = determine_earliest_games(pokemon_list, cache) pokemon_list_adjusted = adjust_for_evolution(pokemon_list_with_games, cache) pokemon_list_with_locations = add_encounter_locations(pokemon_list_adjusted, cache)