|
|
@ -7,6 +7,7 @@ import re |
|
|
import sqlite3 |
|
|
import sqlite3 |
|
|
from bs4 import BeautifulSoup, Tag, NavigableString |
|
|
from bs4 import BeautifulSoup, Tag, NavigableString |
|
|
import copy |
|
|
import copy |
|
|
|
|
|
from typing import List, Optional |
|
|
|
|
|
|
|
|
# Initialize the database connection |
|
|
# Initialize the database connection |
|
|
conn = sqlite3.connect('pokemon_cache.db') |
|
|
conn = sqlite3.connect('pokemon_cache.db') |
|
|
@ -64,6 +65,95 @@ def update_cache(key, value): |
|
|
save_cached_data() |
|
|
save_cached_data() |
|
|
time.sleep(1) |
|
|
time.sleep(1) |
|
|
|
|
|
|
|
|
|
|
|
class EvolutionStage: |
|
|
|
|
|
def __init__(self, pokemon: str, method: Optional[str] = None, stage: Optional[str] = None, form: Optional[str] = None): |
|
|
|
|
|
self.pokemon = pokemon |
|
|
|
|
|
self.method = method |
|
|
|
|
|
self.next_stage: Optional[EvolutionStage] = None |
|
|
|
|
|
self.branches: List[EvolutionStage] = [] |
|
|
|
|
|
self.stage = stage |
|
|
|
|
|
self.is_baby = self.stage is not None and 'Baby' in self.stage |
|
|
|
|
|
self.form = form |
|
|
|
|
|
|
|
|
|
|
|
def __str__(self): |
|
|
|
|
|
return f"{self.pokemon} {self.form if self.form else ''} ({self.method if self.method else 'Base'})" |
|
|
|
|
|
|
|
|
|
|
|
def parse_evolution_chain(table: Tag, form: Optional[str] = None) -> List[EvolutionStage]: |
|
|
|
|
|
main_chain = [] |
|
|
|
|
|
current_stage = None |
|
|
|
|
|
pending_method = None |
|
|
|
|
|
|
|
|
|
|
|
tbody = table.find('tbody', recursive=False) |
|
|
|
|
|
if not tbody: |
|
|
|
|
|
return [] |
|
|
|
|
|
|
|
|
|
|
|
rows = tbody.find_all('tr', recursive=False) |
|
|
|
|
|
main_row = rows[0] |
|
|
|
|
|
branch_rows = rows[1:] |
|
|
|
|
|
|
|
|
|
|
|
# Parse main evolution chain |
|
|
|
|
|
for td in main_row.find_all('td', recursive=False): |
|
|
|
|
|
if td.find('table'): |
|
|
|
|
|
# This TD contains Pokemon information |
|
|
|
|
|
pokemon_name = extract_pokemon_name(td) |
|
|
|
|
|
stage = extract_stage_form(td) |
|
|
|
|
|
new_stage = EvolutionStage(pokemon_name, pending_method, stage, form) |
|
|
|
|
|
pending_method = None |
|
|
|
|
|
if current_stage: |
|
|
|
|
|
current_stage.next_stage = new_stage |
|
|
|
|
|
current_stage = new_stage |
|
|
|
|
|
main_chain.append(current_stage) |
|
|
|
|
|
else: |
|
|
|
|
|
# This TD contains evolution method for the next Pokemon |
|
|
|
|
|
pending_method = extract_evolution_method(td) |
|
|
|
|
|
|
|
|
|
|
|
# Parse branching evolutions |
|
|
|
|
|
for row in branch_rows: |
|
|
|
|
|
branch_stage = None |
|
|
|
|
|
branch_method = None |
|
|
|
|
|
for td in row.find_all('td', recursive=False): |
|
|
|
|
|
if td.find('table'): |
|
|
|
|
|
pokemon_name = extract_pokemon_name(td) |
|
|
|
|
|
stage = extract_stage_form(td) |
|
|
|
|
|
new_stage = EvolutionStage(pokemon_name, branch_method, stage, form) |
|
|
|
|
|
branch_method = None |
|
|
|
|
|
if branch_stage: |
|
|
|
|
|
branch_stage.next_stage = new_stage |
|
|
|
|
|
branch_stage = new_stage |
|
|
|
|
|
# Find which main chain Pokemon this branches from |
|
|
|
|
|
for main_stage in main_chain: |
|
|
|
|
|
if td.get('rowspan') and main_stage.pokemon == pokemon_name: |
|
|
|
|
|
main_stage.branches.append(branch_stage) |
|
|
|
|
|
break |
|
|
|
|
|
else: |
|
|
|
|
|
branch_method = extract_evolution_method(td) |
|
|
|
|
|
|
|
|
|
|
|
return main_chain |
|
|
|
|
|
|
|
|
|
|
|
def extract_pokemon_name(td: Tag) -> str: |
|
|
|
|
|
# Extract Pokemon name from the table within the TD |
|
|
|
|
|
name_tag = td.find('table').find('a', class_='selflink') |
|
|
|
|
|
if name_tag: |
|
|
|
|
|
return name_tag.get_text(strip=True) |
|
|
|
|
|
name_tag = td.find('table').find('a', title=True) |
|
|
|
|
|
return name_tag.get_text(strip=True) |
|
|
|
|
|
|
|
|
|
|
|
def extract_evolution_method(td: Tag) -> str: |
|
|
|
|
|
# Extract evolution method from the TD |
|
|
|
|
|
return td.get_text(strip=True) |
|
|
|
|
|
|
|
|
|
|
|
def extract_stage_form(td: Tag) -> Optional[str]: |
|
|
|
|
|
stage_tag = td.find('table').find('small') |
|
|
|
|
|
if stage_tag: |
|
|
|
|
|
return stage_tag.get_text(strip=True) |
|
|
|
|
|
return None |
|
|
|
|
|
|
|
|
|
|
|
def extract_is_baby(td: Tag) -> bool: |
|
|
|
|
|
stage_tag = td.find('table').find('small') |
|
|
|
|
|
if stage_tag: |
|
|
|
|
|
return 'Baby' in stage_tag.get_text(strip=True) |
|
|
|
|
|
return False |
|
|
|
|
|
|
|
|
def read_pokemon_list(filename, limit=50): |
|
|
def read_pokemon_list(filename, limit=50): |
|
|
pokemon_list = [] |
|
|
pokemon_list = [] |
|
|
with open(filename, 'r', newline='', encoding='utf-8') as csvfile: |
|
|
with open(filename, 'r', newline='', encoding='utf-8') as csvfile: |
|
|
@ -273,6 +363,45 @@ def parse_form_information(html_content): |
|
|
return main_form, breed |
|
|
return main_form, breed |
|
|
|
|
|
|
|
|
return None, None |
|
|
return None, None |
|
|
|
|
|
|
|
|
|
|
|
def get_evolution_data_from_bulbapedia(pokemon_name, form, cache): |
|
|
|
|
|
page_data = get_pokemon_data_bulbapedia(pokemon_name, cache) |
|
|
|
|
|
if not page_data: |
|
|
|
|
|
return None |
|
|
|
|
|
|
|
|
|
|
|
soup = BeautifulSoup(page_data, 'html.parser') |
|
|
|
|
|
|
|
|
|
|
|
evolution_section = soup.find('span', id='Evolution_data') |
|
|
|
|
|
if not evolution_section: |
|
|
|
|
|
return None |
|
|
|
|
|
|
|
|
|
|
|
evolution_table = None |
|
|
|
|
|
if form: |
|
|
|
|
|
form = form.replace('Form', '').replace('form', '').strip() |
|
|
|
|
|
for tag in evolution_section.parent.find_next_siblings(): |
|
|
|
|
|
if tag.name == 'h4' and form in tag.get_text(strip=True): |
|
|
|
|
|
evolution_table = tag.find_next('table') |
|
|
|
|
|
break |
|
|
|
|
|
if tag.name == 'h3': |
|
|
|
|
|
break |
|
|
|
|
|
else: |
|
|
|
|
|
evolution_table = evolution_section.parent.find_next('table') |
|
|
|
|
|
if not evolution_table: |
|
|
|
|
|
return None |
|
|
|
|
|
|
|
|
|
|
|
evolution_chain = parse_evolution_chain(evolution_table, form) |
|
|
|
|
|
return evolution_chain |
|
|
|
|
|
|
|
|
|
|
|
# This is going to be a little odd. |
|
|
|
|
|
# the first TR contains a full evolution chain |
|
|
|
|
|
# other TRs contain branching evolution chains |
|
|
|
|
|
# any TDs in the first TR with a rowspan are part of the main evolution chain |
|
|
|
|
|
# any other TDS are part of the branching evolution chains |
|
|
|
|
|
# a table in a TD is information about the current Pokémon in that evolution stage |
|
|
|
|
|
# a TD without a table is information on how to trigger the next evolution |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_locations_from_bulbapedia(pokemon_name, form, cache): |
|
|
def get_locations_from_bulbapedia(pokemon_name, form, cache): |
|
|
page_data = get_pokemon_data_bulbapedia(pokemon_name, cache) |
|
|
page_data = get_pokemon_data_bulbapedia(pokemon_name, cache) |
|
|
@ -474,22 +603,30 @@ def get_evolution_chain(pokemon_name, cache): |
|
|
return evolution_data |
|
|
return evolution_data |
|
|
return None |
|
|
return None |
|
|
|
|
|
|
|
|
def get_base_form(evolution_chain, cache): |
|
|
def get_base_form(evolution_chain:List[EvolutionStage]): |
|
|
if not evolution_chain or 'chain' not in evolution_chain: |
|
|
if not evolution_chain: |
|
|
return None |
|
|
return None |
|
|
|
|
|
|
|
|
current = evolution_chain['chain'] |
|
|
for stage in evolution_chain: |
|
|
while current: |
|
|
if stage.stage == "Unevolved": |
|
|
species_name = current['species']['name'] |
|
|
return stage.pokemon |
|
|
species_data = get_species_data(species_name, cache) |
|
|
if stage.is_baby: |
|
|
|
|
|
return stage.next_stage.pokemon |
|
|
if species_data and not species_data.get('is_baby', False): |
|
|
|
|
|
return species_name |
|
|
return None |
|
|
|
|
|
|
|
|
if not current['evolves_to']: |
|
|
#current = evolution_chain['chain'] |
|
|
return species_name |
|
|
#while current: |
|
|
|
|
|
# species_name = current['species']['name'] |
|
|
current = current['evolves_to'][0] |
|
|
# species_data = get_species_data(species_name, cache) |
|
|
|
|
|
# |
|
|
|
|
|
# if species_data and not species_data.get('is_baby', False): |
|
|
|
|
|
# return species_name |
|
|
|
|
|
# |
|
|
|
|
|
# if not current['evolves_to']: |
|
|
|
|
|
# return species_name |
|
|
|
|
|
# |
|
|
|
|
|
# current = current['evolves_to'][0] |
|
|
|
|
|
|
|
|
return None |
|
|
return None |
|
|
|
|
|
|
|
|
@ -497,20 +634,32 @@ def adjust_for_evolution(pokemon_list, cache): |
|
|
pokemon_dict = {f"{pokemon['base_name']}_{pokemon['form']}".lower(): pokemon for pokemon in pokemon_list} |
|
|
pokemon_dict = {f"{pokemon['base_name']}_{pokemon['form']}".lower(): pokemon for pokemon in pokemon_list} |
|
|
|
|
|
|
|
|
for pokemon in pokemon_list: |
|
|
for pokemon in pokemon_list: |
|
|
species_data = get_species_data(pokemon['base_name'], cache) |
|
|
evolution_chain = get_evolution_data_from_bulbapedia(pokemon['base_name'], pokemon['form'], cache) |
|
|
evolution_chain = get_evolution_chain(pokemon['base_name'], cache) |
|
|
if evolution_chain: |
|
|
base_form = get_base_form(evolution_chain, cache) |
|
|
if evolution_chain[0].is_baby: |
|
|
|
|
|
pokemon['obtain_method'] = 'Breed' |
|
|
|
|
|
else: |
|
|
|
|
|
base_form = get_base_form(evolution_chain) |
|
|
|
|
|
base_key = f"{base_form}_{pokemon['form']}".lower() |
|
|
|
|
|
if base_key in pokemon_dict: |
|
|
|
|
|
base_pokemon = pokemon_dict[base_key] |
|
|
|
|
|
if all_games.index(base_pokemon['earliest_game']) <= all_games.index(pokemon['earliest_game']) and base_pokemon['number'] != pokemon['number']: |
|
|
|
|
|
pokemon['earliest_game'] = base_pokemon['earliest_game'] |
|
|
|
|
|
pokemon['obtain_method'] = 'Evolve' |
|
|
|
|
|
#species_data = get_species_data(pokemon['base_name'], cache) |
|
|
|
|
|
#evolution_chain = get_evolution_chain(pokemon['base_name'], cache) |
|
|
|
|
|
#base_form = get_base_form(evolution_chain, cache) |
|
|
|
|
|
|
|
|
# Check if the Pokémon is a baby |
|
|
# Check if the Pokémon is a baby |
|
|
if species_data and species_data.get('is_baby', False): |
|
|
#if species_data and species_data.get('is_baby', False): |
|
|
pokemon['obtain_method'] = 'Breed' |
|
|
# pokemon['obtain_method'] = 'Breed' |
|
|
elif base_form: |
|
|
#elif base_form: |
|
|
base_key = f"{base_form}_{pokemon['form']}".lower() |
|
|
# base_key = f"{base_form}_{pokemon['form']}".lower() |
|
|
if base_key in pokemon_dict: |
|
|
# if base_key in pokemon_dict: |
|
|
base_pokemon = pokemon_dict[base_key] |
|
|
# base_pokemon = pokemon_dict[base_key] |
|
|
if all_games.index(base_pokemon['earliest_game']) <= all_games.index(pokemon['earliest_game']) and base_pokemon['number'] != pokemon['number']: |
|
|
# if all_games.index(base_pokemon['earliest_game']) <= all_games.index(pokemon['earliest_game']) and base_pokemon['number'] != pokemon['number']: |
|
|
pokemon['earliest_game'] = base_pokemon['earliest_game'] |
|
|
# pokemon['earliest_game'] = base_pokemon['earliest_game'] |
|
|
pokemon['obtain_method'] = 'Evolve' |
|
|
# pokemon['obtain_method'] = 'Evolve' |
|
|
|
|
|
|
|
|
print(f"Adjusted {pokemon['name']} (#{pokemon['number']}): {pokemon['earliest_game']} ({pokemon['obtain_method']})") |
|
|
print(f"Adjusted {pokemon['name']} (#{pokemon['number']}): {pokemon['earliest_game']} ({pokemon['obtain_method']})") |
|
|
|
|
|
|
|
|
@ -651,7 +800,7 @@ def handle_unknown_encounters(pokemon_list, cache): |
|
|
if __name__ == "__main__": |
|
|
if __name__ == "__main__": |
|
|
get_cached_data() |
|
|
get_cached_data() |
|
|
|
|
|
|
|
|
pokemon_list = read_pokemon_list('pokemon_home_list.csv', limit=3000) |
|
|
pokemon_list = read_pokemon_list('pokemon_home_list.csv', limit=200) |
|
|
pokemon_list_with_games = determine_earliest_games(pokemon_list, cache) |
|
|
pokemon_list_with_games = determine_earliest_games(pokemon_list, cache) |
|
|
pokemon_list_adjusted = adjust_for_evolution(pokemon_list_with_games, cache) |
|
|
pokemon_list_adjusted = adjust_for_evolution(pokemon_list_with_games, cache) |
|
|
pokemon_list_with_locations = add_encounter_locations(pokemon_list_adjusted, cache) |
|
|
pokemon_list_with_locations = add_encounter_locations(pokemon_list_adjusted, cache) |
|
|
|