from typing import Optional from PyQt6.QtCore import QObject, pyqtSignal, QRunnable from bs4 import BeautifulSoup, Tag from cache import cache from db import db from utility.functions import get_form_name, get_display_name class GatherEvolutionsWorkerSignals(QObject): finished = pyqtSignal(list) class GatherEvolutions(QRunnable): def __init__(self): super().__init__() self.signals = GatherEvolutionsWorkerSignals() self.base_url = "https://bulbapedia.bulbagarden.net/wiki/" def run(self): try: gathered_data = self.gather_evolution_data() self.signals.finished.emit(gathered_data) except Exception as e: print(f"Error gathering Pokémon home storage status: {e}") def gather_evolution_data(self): all_pokemon_forms = db.get_list_of_pokemon_forms() evolutions = [] for pokemon_form in all_pokemon_forms: print(f"Processing {get_display_name(pokemon_form)}'s evolutions") url = f"https://bulbapedia.bulbagarden.net/wiki/{pokemon_form["name"]}_(Pokémon)" page_data = cache.fetch_url(url) if not page_data: continue soup = BeautifulSoup(page_data, 'html.parser') evolution_section = soup.find('span', id='Evolution_data') if not evolution_section: continue evolution_table = None form = get_form_name(pokemon_form, not pokemon_form["gender_relevant"]) pokemon_name = pokemon_form["name"] evolution_table = evolution_section.parent.find_next('table') if form: form_without_form = form.replace('Form', '').replace('form', '').strip() for tag in evolution_section.parent.find_next_siblings(): if tag.name == 'h4' and form_without_form in tag.get_text(strip=True): evolution_table = tag.find_next('table') break if tag.name == 'h3': break if not evolution_table: continue if pokemon_name == "Eevee": evolution_chain = self.parse_eevee_evolution_chain(evolution_table, pokemon_form) evolutions.append(evolution_chain) else: evolution_chain = self.parse_evolution_chain(evolution_table, pokemon_form) evolutions.append(evolution_chain) return evolutions def parse_evolution_chain(self, table, pokemon_form, force_refresh = False): cache_record_name = f"evo_{pokemon_form["pfic"]}" if force_refresh: cache.purge(cache_record_name) cached_entry = cache.get(cache_record_name) if cached_entry != None: return cached_entry main_chain = [] current_stage = None pending_method = None form = get_form_name(pokemon_form, not pokemon_form["gender_relevant"]) tbody = table.find('tbody', recursive=False) if not tbody: return [] rows = tbody.find_all('tr', recursive=False) main_row = rows[0] branch_rows = rows[1:] # Parse main evolution chain for td in main_row.find_all('td', recursive=False): if td.find('table'): # This TD contains Pokemon information pokemon_name = self.extract_pokemon_name(td) stage = self.extract_stage_form(td) evolution_form = self.extract_evolution_form(td, pokemon_name) new_stage = { "pokemon":pokemon_name, "method": pending_method, "stage": stage, "form": evolution_form, "next_stage": None, "previous_stage": None, "branches": [], "pfic": pokemon_form["pfic"] } pending_method = None if current_stage: current_stage["next_stage"] = new_stage new_stage["previous_stage"] = current_stage # Set the back link current_stage = new_stage main_chain.append(current_stage) else: # This TD contains evolution method for the next Pokemon pending_method = self.extract_evolution_method(td) # Parse branching evolutions for row in branch_rows: branch_stage = None branch_method = None for td in row.find_all('td', recursive=False): if td.find('table'): pokemon_name = self.extract_pokemon_name(td) stage = self.extract_stage_form(td) evolution_form = self.extract_evolution_form(td, pokemon_name) new_stage = { "pokemon":pokemon_name, "method": branch_method, "stage": stage, "form": evolution_form, "next_stage": None, "previous_stage": None, "branches": [], "pfic": pokemon_form["pfic"] } branch_method = None if branch_stage: branch_stage["next_stage"] = new_stage new_stage["previous_stage"] = branch_stage # Set the back link branch_stage = new_stage # Find which main chain Pokemon this branches from for main_stage in main_chain: if td.get('rowspan') and main_stage.pokemon == pokemon_name: main_stage["branches"].append(branch_stage) branch_stage["previous_stage"] = main_stage # Set the back link to the main chain break else: branch_method = self.extract_evolution_method(td) cache.set(cache_record_name, main_chain) return main_chain def extract_pokemon_name(self, td: Tag) -> Optional[str]: name_tag = self.find_name_tag(td) if name_tag: return name_tag.get_text(strip=True) return None def find_name_tag(self, td: Tag) -> Optional[Tag]: table = td.find('table') name_tag = table.find('a', class_='selflink') if name_tag: return name_tag name_tag = table.find('a', title=True, class_=lambda x: x != 'image') return name_tag def extract_stage_form(self, td: Tag) -> Optional[str]: stage_tag = td.find('table').find('small') if stage_tag: return stage_tag.get_text(strip=True) return None def extract_evolution_form(self, td: Tag, name: str) -> Optional[str]: name_tag = self.find_name_tag(td) if name_tag: name_row = name_tag.parent small_tags = name_row.find_all('small') if len(small_tags) > 1: return small_tags[0].get_text(strip=True) return None def extract_evolution_method(self, td: Tag) -> str: # Extract evolution method from the TD return td.get_text(strip=True) def parse_eevee_evolution_chain(self, table, pokemon_form): tbody = table.find('tbody', recursive=False) if not tbody: return [] rows = tbody.find_all('tr', recursive=False) eevee_row = rows[1] method_row = rows[2] eeveelutions_row = rows[3] eevee_td = eevee_row.find('td', recursive=False) pokemon_name, stage = self.parse_pokemon_subtable(eevee_td) eevee_stage = { "pokemon":pokemon_name, "method": None, "stage": stage, "form": None, "next_stage": None, "previous_stage": None, "branches": [], "pfic": pokemon_form["pfic"] } methods = [] for method in method_row.find_all('td', recursive=False): methods.append(self.extract_evolution_method(method)) eeveelutions = [] index = 0 for eeveelution in eeveelutions_row.find_all('td', recursive=False): pokemon_name, stage = self.parse_pokemon_subtable(eeveelution) eeveelution_stage = { "pokemon":pokemon_name, "method": methods[index], "stage": stage, "form": None, "next_stage": None, "previous_stage": None, "branches": [], "pfic": pokemon_form["pfic"] } eeveelution_stage["previous_stage"] = eevee_stage # Set the back link to Eevee eeveelutions.append(eeveelution_stage) index += 1 eevee_stage["branches"] = eeveelutions # Set the branches directly, not as a nested list return [eevee_stage] def parse_pokemon_subtable(self, td): if td.find('table'): # This TD contains Pokemon information pokemon_name = self.extract_pokemon_name(td) stage = self.extract_stage_form(td) return pokemon_name, stage return None, None