Browse Source

- Update the parser to include encounter data from bulbapedia

master
Dan 1 year ago
parent
commit
3e44dc9eb5
  1. 189
      Utilities/DetermineOriginGame.py

189
Utilities/DetermineOriginGame.py

@ -7,6 +7,7 @@ import re
import sqlite3
from bs4 import BeautifulSoup, Tag, NavigableString
import copy
from typing import List, Optional
# Initialize the database connection
conn = sqlite3.connect('pokemon_cache.db')
@ -64,6 +65,95 @@ def update_cache(key, value):
save_cached_data()
time.sleep(1)
class EvolutionStage:
def __init__(self, pokemon: str, method: Optional[str] = None, stage: Optional[str] = None, form: Optional[str] = None):
self.pokemon = pokemon
self.method = method
self.next_stage: Optional[EvolutionStage] = None
self.branches: List[EvolutionStage] = []
self.stage = stage
self.is_baby = self.stage is not None and 'Baby' in self.stage
self.form = form
def __str__(self):
return f"{self.pokemon} {self.form if self.form else ''} ({self.method if self.method else 'Base'})"
def parse_evolution_chain(table: Tag, form: Optional[str] = None) -> List[EvolutionStage]:
main_chain = []
current_stage = None
pending_method = None
tbody = table.find('tbody', recursive=False)
if not tbody:
return []
rows = tbody.find_all('tr', recursive=False)
main_row = rows[0]
branch_rows = rows[1:]
# Parse main evolution chain
for td in main_row.find_all('td', recursive=False):
if td.find('table'):
# This TD contains Pokemon information
pokemon_name = extract_pokemon_name(td)
stage = extract_stage_form(td)
new_stage = EvolutionStage(pokemon_name, pending_method, stage, form)
pending_method = None
if current_stage:
current_stage.next_stage = new_stage
current_stage = new_stage
main_chain.append(current_stage)
else:
# This TD contains evolution method for the next Pokemon
pending_method = extract_evolution_method(td)
# Parse branching evolutions
for row in branch_rows:
branch_stage = None
branch_method = None
for td in row.find_all('td', recursive=False):
if td.find('table'):
pokemon_name = extract_pokemon_name(td)
stage = extract_stage_form(td)
new_stage = EvolutionStage(pokemon_name, branch_method, stage, form)
branch_method = None
if branch_stage:
branch_stage.next_stage = new_stage
branch_stage = new_stage
# Find which main chain Pokemon this branches from
for main_stage in main_chain:
if td.get('rowspan') and main_stage.pokemon == pokemon_name:
main_stage.branches.append(branch_stage)
break
else:
branch_method = extract_evolution_method(td)
return main_chain
def extract_pokemon_name(td: Tag) -> str:
# Extract Pokemon name from the table within the TD
name_tag = td.find('table').find('a', class_='selflink')
if name_tag:
return name_tag.get_text(strip=True)
name_tag = td.find('table').find('a', title=True)
return name_tag.get_text(strip=True)
def extract_evolution_method(td: Tag) -> str:
# Extract evolution method from the TD
return td.get_text(strip=True)
def extract_stage_form(td: Tag) -> Optional[str]:
stage_tag = td.find('table').find('small')
if stage_tag:
return stage_tag.get_text(strip=True)
return None
def extract_is_baby(td: Tag) -> bool:
stage_tag = td.find('table').find('small')
if stage_tag:
return 'Baby' in stage_tag.get_text(strip=True)
return False
def read_pokemon_list(filename, limit=50):
pokemon_list = []
with open(filename, 'r', newline='', encoding='utf-8') as csvfile:
@ -274,6 +364,45 @@ def parse_form_information(html_content):
return None, None
def get_evolution_data_from_bulbapedia(pokemon_name, form, cache):
page_data = get_pokemon_data_bulbapedia(pokemon_name, cache)
if not page_data:
return None
soup = BeautifulSoup(page_data, 'html.parser')
evolution_section = soup.find('span', id='Evolution_data')
if not evolution_section:
return None
evolution_table = None
if form:
form = form.replace('Form', '').replace('form', '').strip()
for tag in evolution_section.parent.find_next_siblings():
if tag.name == 'h4' and form in tag.get_text(strip=True):
evolution_table = tag.find_next('table')
break
if tag.name == 'h3':
break
else:
evolution_table = evolution_section.parent.find_next('table')
if not evolution_table:
return None
evolution_chain = parse_evolution_chain(evolution_table, form)
return evolution_chain
# This is going to be a little odd.
# the first TR contains a full evolution chain
# other TRs contain branching evolution chains
# any TDs in the first TR with a rowspan are part of the main evolution chain
# any other TDS are part of the branching evolution chains
# a table in a TD is information about the current Pokémon in that evolution stage
# a TD without a table is information on how to trigger the next evolution
def get_locations_from_bulbapedia(pokemon_name, form, cache):
page_data = get_pokemon_data_bulbapedia(pokemon_name, cache)
if not page_data:
@ -474,22 +603,30 @@ def get_evolution_chain(pokemon_name, cache):
return evolution_data
return None
def get_base_form(evolution_chain, cache):
if not evolution_chain or 'chain' not in evolution_chain:
def get_base_form(evolution_chain:List[EvolutionStage]):
if not evolution_chain:
return None
current = evolution_chain['chain']
while current:
species_name = current['species']['name']
species_data = get_species_data(species_name, cache)
for stage in evolution_chain:
if stage.stage == "Unevolved":
return stage.pokemon
if stage.is_baby:
return stage.next_stage.pokemon
if species_data and not species_data.get('is_baby', False):
return species_name
if not current['evolves_to']:
return species_name
return None
current = current['evolves_to'][0]
#current = evolution_chain['chain']
#while current:
# species_name = current['species']['name']
# species_data = get_species_data(species_name, cache)
#
# if species_data and not species_data.get('is_baby', False):
# return species_name
#
# if not current['evolves_to']:
# return species_name
#
# current = current['evolves_to'][0]
return None
@ -497,20 +634,32 @@ def adjust_for_evolution(pokemon_list, cache):
pokemon_dict = {f"{pokemon['base_name']}_{pokemon['form']}".lower(): pokemon for pokemon in pokemon_list}
for pokemon in pokemon_list:
species_data = get_species_data(pokemon['base_name'], cache)
evolution_chain = get_evolution_chain(pokemon['base_name'], cache)
base_form = get_base_form(evolution_chain, cache)
# Check if the Pokémon is a baby
if species_data and species_data.get('is_baby', False):
evolution_chain = get_evolution_data_from_bulbapedia(pokemon['base_name'], pokemon['form'], cache)
if evolution_chain:
if evolution_chain[0].is_baby:
pokemon['obtain_method'] = 'Breed'
elif base_form:
else:
base_form = get_base_form(evolution_chain)
base_key = f"{base_form}_{pokemon['form']}".lower()
if base_key in pokemon_dict:
base_pokemon = pokemon_dict[base_key]
if all_games.index(base_pokemon['earliest_game']) <= all_games.index(pokemon['earliest_game']) and base_pokemon['number'] != pokemon['number']:
pokemon['earliest_game'] = base_pokemon['earliest_game']
pokemon['obtain_method'] = 'Evolve'
#species_data = get_species_data(pokemon['base_name'], cache)
#evolution_chain = get_evolution_chain(pokemon['base_name'], cache)
#base_form = get_base_form(evolution_chain, cache)
# Check if the Pokémon is a baby
#if species_data and species_data.get('is_baby', False):
# pokemon['obtain_method'] = 'Breed'
#elif base_form:
# base_key = f"{base_form}_{pokemon['form']}".lower()
# if base_key in pokemon_dict:
# base_pokemon = pokemon_dict[base_key]
# if all_games.index(base_pokemon['earliest_game']) <= all_games.index(pokemon['earliest_game']) and base_pokemon['number'] != pokemon['number']:
# pokemon['earliest_game'] = base_pokemon['earliest_game']
# pokemon['obtain_method'] = 'Evolve'
print(f"Adjusted {pokemon['name']} (#{pokemon['number']}): {pokemon['earliest_game']} ({pokemon['obtain_method']})")
@ -651,7 +800,7 @@ def handle_unknown_encounters(pokemon_list, cache):
if __name__ == "__main__":
get_cached_data()
pokemon_list = read_pokemon_list('pokemon_home_list.csv', limit=3000)
pokemon_list = read_pokemon_list('pokemon_home_list.csv', limit=200)
pokemon_list_with_games = determine_earliest_games(pokemon_list, cache)
pokemon_list_adjusted = adjust_for_evolution(pokemon_list_with_games, cache)
pokemon_list_with_locations = add_encounter_locations(pokemon_list_adjusted, cache)

Loading…
Cancel
Save