from typing import Optional from PyQt6.QtCore import QObject, pyqtSignal, QRunnable from bs4 import BeautifulSoup, Tag from fuzzywuzzy import fuzz from cache import cache from db import db import re from utility.functions import get_form_name, get_display_name, parse_pfic from utility.data import non_evolution_forms class GatherEvolutionsWorkerSignals(QObject): finished = pyqtSignal(list) class GatherEvolutions(QRunnable): def __init__(self): super().__init__() self.signals = GatherEvolutionsWorkerSignals() self.base_url = "https://bulbapedia.bulbagarden.net/wiki/" def run(self): try: gathered_data = self.gather_evolution_data() self.signals.finished.emit(gathered_data) except Exception as e: print(f"Error gathering Pokémon home storage status: {e}") def gather_evolution_data(self, force_refresh = True): all_pokemon_forms = db.get_list_of_pokemon_forms() #evolutions = [] evolutions = {} for pokemon_form in all_pokemon_forms: print(f"Processing {get_display_name(pokemon_form)}'s evolutions") pokemon_name = pokemon_form["name"] form = get_form_name(pokemon_form) if pokemon_form["form_name"] and any(s in pokemon_form["form_name"] for s in non_evolution_forms): continue cache_record_name = f"chain_{pokemon_name}_{form}" if force_refresh: cache.purge(cache_record_name) #cached_entry = cache.get(cache_record_name) #if cached_entry != None: # evolutions[pokemon_form["pfic"]] = cached_entry # continue #form = get_form_name(pokemon_form, not pokemon_form["gender_relevant"]) search_form = form if search_form and pokemon_name in search_form: search_form = search_form.replace(pokemon_name, "").strip() gender = None if search_form and "male" in search_form.lower(): gender = search_form search_form = None if pokemon_name == "Flabébé": # Bulbapedia doesn't detail out Flabébé's evolution chain fully. as its exactly the same for each form, but the coloured form remains constant # through the evolution line, Red->Red->Red, Yellow->Yellow->Yellow etc. search_form = None url = f"https://bulbapedia.bulbagarden.net/wiki/{pokemon_name}_(Pokémon)" page_data = cache.fetch_url(url) if not page_data: continue soup = BeautifulSoup(page_data, 'html.parser') evolution_section = soup.find('span', id='Evolution_data') if not evolution_section: continue evolution_table = None evolution_table = evolution_section.parent.find_next('table') if form: form_without_form = form.replace('Form', '').replace('form', '').strip() for tag in evolution_section.parent.find_next_siblings(): if tag.name == 'h4' and form_without_form in tag.get_text(strip=True): evolution_table = tag.find_next('table') break if tag.name == 'h3': break if not evolution_table: continue evolution_chain = [] evolution_tree = None if pokemon_name == "Eevee": evolution_tree = self.parse_eevee_evolution_chain(evolution_table, pokemon_form) #evolutions.append(evolution_chain) else: evolution_tree = self.parse_evolution_chain(evolution_table, pokemon_form) #evolutions.append(evolution_chain) if evolution_tree: self.traverse_and_store(evolution_tree, evolutions, gender) chain = [] for pokemon in evolution_chain: from_pfic = self.get_pokemon_form_by_name(pokemon["pokemon"], pokemon["form"], gender=gender) if not from_pfic: #logger.warning(f"Could not find PFIC for {stage.pokemon} {stage.form}") continue stage = pokemon["next_stage"] if stage: to_pfic = self.get_pokemon_form_by_name(stage["pokemon"], stage["form"], gender=gender) if to_pfic: evolution_info = { "from_pfic": from_pfic, "to_pfic": to_pfic, "method": stage["method"] } evolutions[pokemon_form["pfic"]] = evolution_info chain.append(evolution_info) #insert_evolution_info(evolution_info) #if "breed" in stage["next_stage"]["method"].lower(): # update_pokemon_baby_status(from_pfic, True) for branch in pokemon["branches"]: to_pfic = self.get_pokemon_form_by_name(branch["pokemon"], branch["form"], gender=gender) if to_pfic: evolution_info = { "from_pfic": from_pfic, "to_pfic": to_pfic, "method": branch["method"] } evolutions[pokemon_form["pfic"]] = evolution_info chain.append(evolution_info) #EvolutionInfo(from_pfic, to_pfic, branch.method) #insert_evolution_info(evolution_info) #if "breed" in branch.method.lower(): # update_pokemon_baby_status(from_pfic, True) cache.set(cache_record_name, chain) return evolutions def traverse_and_store(self, node, evolutions, gender): """Helper function to traverse evolution tree and store evolutions.""" from_pfic = self.get_pokemon_form_by_name(node["pokemon"], node["form"], gender=gender) if not from_pfic: return for next_stage in node["evolves_to"]: to_pfic = self.get_pokemon_form_by_name(next_stage["pokemon"], next_stage["form"], gender=gender) if to_pfic: composite_key = f"{from_pfic}->{to_pfic}" evolution_info = { "from_pfic": from_pfic, "to_pfic": to_pfic, "method": next_stage["method"] } evolutions[composite_key] = (evolution_info) self.traverse_and_store(next_stage, evolutions, gender) def parse_evolution_chain(self, table, pokemon_form, force_refresh = False): cache_record_name = f"evo_{pokemon_form['pfic']}" if force_refresh: cache.purge(cache_record_name) cached_entry = cache.get(cache_record_name) if cached_entry is not None: return cached_entry form = get_form_name(pokemon_form, not pokemon_form["gender_relevant"]) tbody = table.find('tbody', recursive=False) if not tbody: return None rows = tbody.find_all('tr', recursive=False) main_row = rows[0] branch_rows = rows[1:] def create_stage(td): pokemon_name = self.extract_pokemon_name(td) evolution_form = self.extract_evolution_form(td, pokemon_name) return { "pokemon": pokemon_name, "form": evolution_form, "method": None, "evolves_to": [] } # Parse main evolution chain pending_method = None root = None current_stage = None for td in main_row.find_all('td', recursive=False): if td.find('table'): new_stage = create_stage(td) new_stage["method"] = pending_method pending_method = None if root is None: root = new_stage # Assign the root node if current_stage: current_stage["evolves_to"].append(new_stage) current_stage = new_stage else: pending_method = self.extract_evolution_method(td) # Parse branching evolutions for row in branch_rows: branch_method = None branch_stage = None for td in row.find_all('td', recursive=False): if td.find('table'): new_stage = create_stage(td) new_stage["method"] = branch_method branch_method = None if branch_stage: branch_stage["evolves_to"].append(new_stage) branch_stage = new_stage # Find which main chain Pokémon this branches from for main_stage in self.find_stages(root): if td.get('rowspan') and main_stage["pokemon"] == new_stage["pokemon"]: main_stage["evolves_to"].append(branch_stage) break else: branch_method = self.extract_evolution_method(td) cache.set(cache_record_name, root) return root def find_stages(self, node): """Helper function to find all stages in the evolution chain recursively.""" stages = [node] for stage in node["evolves_to"]: stages.extend(self.find_stages(stage)) return stages def extract_pokemon_name(self, td: Tag) -> Optional[str]: name_tag = self.find_name_tag(td) if name_tag: return name_tag.get_text(strip=True) return None def find_name_tag(self, td: Tag) -> Optional[Tag]: table = td.find('table') name_tag = table.find('a', class_='selflink') if name_tag: return name_tag name_tag = table.find('a', title=True, class_=lambda x: x != 'image') return name_tag def extract_stage_form(self, td: Tag) -> Optional[str]: stage_tag = td.find('table').find('small') if stage_tag: return stage_tag.get_text(strip=True) return None def extract_evolution_form(self, td: Tag, name: str) -> Optional[str]: name_tag = self.find_name_tag(td) if name_tag: name_row = name_tag.parent small_tags = name_row.find_all('small') if len(small_tags) > 1: return small_tags[0].get_text(strip=True) return None def extract_evolution_method(self, td: Tag) -> str: # Extract evolution method from the TD return td.get_text(strip=True) def parse_eevee_evolution_chain(self, table, pokemon_form): tbody = table.find('tbody', recursive=False) if not tbody: return [] def create_stage(td): pokemon_name = self.extract_pokemon_name(td) stage = self.extract_stage_form(td) return { "pokemon": pokemon_name, "form": None, "method": None, "evolves_to": [] } rows = tbody.find_all('tr', recursive=False) eevee_row = rows[1] method_row = rows[2] eeveelutions_row = rows[3] eevee_td = eevee_row.find('td', recursive=False) eevee_stage = create_stage(eevee_td) #pokemon_name, stage = self.parse_pokemon_subtable(eevee_td) #eevee_stage = { # "pokemon":pokemon_name, # "method": None, # "stage": stage, # "form": None, # "next_stage": None, # "previous_stage": None, # "branches": [], # "pfic": pokemon_form["pfic"] #} methods = [] for method in method_row.find_all('td', recursive=False): methods.append(self.extract_evolution_method(method)) eeveelutions = [] index = 0 for eeveelution in eeveelutions_row.find_all('td', recursive=False): #pokemon_name, stage = self.parse_pokemon_subtable(eeveelution) #eeveelution_stage = { # "pokemon":pokemon_name, # "method": methods[index], # "stage": stage, # "form": None, # "next_stage": None, # "previous_stage": None, # "branches": [], # "pfic": pokemon_form["pfic"] #} eeveelution_stage = create_stage(eeveelution) #eeveelution_stage["previous_stage"] = eevee_stage # Set the back link to Eevee eeveelutions.append(eeveelution_stage) index += 1 eevee_stage["evolves_to"] = eeveelutions # Set the branches directly, not as a nested list return eevee_stage def parse_pokemon_subtable(self, td): if td.find('table'): # This TD contains Pokemon information pokemon_name = self.extract_pokemon_name(td) stage = self.extract_stage_form(td) return pokemon_name, stage return None, None def get_pokemon_form_by_name(self, name: str, form: Optional[str] = None, threshold: int = 80, gender: Optional[str] = None): fields = [ "pfic", "name", "form_name" ] results = db.get_pokemon_details_by_name(name, fields) #results = db_controller.execute_query('SELECT PFIC, name, form_name FROM pokemon_forms WHERE name = ?', (name,)) if not results: return None results.sort(key=lambda x: parse_pfic(x["pfic"])) if form is None and gender is None: if len(results) > 1: if results[0]["form_name"] == None: return results[0]["pfic"] else: return self.get_pokemon_form_by_name(name, "Male", threshold=100, gender=gender) else: return results[0]["pfic"] # Return the PFIC of the first result if no form is specified if gender: gendered_form = self.get_pokemon_form_by_name(name, gender, threshold=100) if gendered_form: return gendered_form stripped_form = self.strip_pokemon_name(name, form) for entry in results: stripped_db_form = self.strip_pokemon_name(entry["name"], entry["form_name"]) if self.fuzzy_match_form(stripped_form, stripped_db_form, threshold): return entry["pfic"] # Some times we get a form for a pokemon that doesn't really have one. if len(results) > 1 and form != None: return results[0]["pfic"] return None def strip_pokemon_name(self, pokemon_name: str, form_name: str) -> str: if form_name: form_name = form_name.replace("Form", "").strip() form_name = re.sub(f'{re.escape(pokemon_name)}\\s*', '', form_name, flags=re.IGNORECASE).strip() form_name = form_name.replace(" ", " ") return form_name return form_name def fuzzy_match_form(self, form1: str, form2: str, threshold: int = 80) -> bool: if form1 is None or form2 is None: return form1 == form2 return fuzz.ratio(form1.lower(), form2.lower()) >= threshold