diff --git a/ui/main_window_controller.py b/ui/main_window_controller.py index bb09c90..e20ce15 100644 --- a/ui/main_window_controller.py +++ b/ui/main_window_controller.py @@ -5,6 +5,7 @@ import os from ui.workers.gather_home_storage_status_worker import GatherHomeStorageStatus from ui.workers.gather_pokemon_forms_worker import GatherPokemonFormsWorker +from ui.workers.gather_evolutions_worker import GatherEvolutions from utility.functions import get_display_name from db import db @@ -110,16 +111,20 @@ class MainWindowController: worker = GatherHomeStorageStatus() worker.signals.finished.connect(self.on_home_status_gathered) self.thread_pool.start(worker) - pass def on_home_status_gathered(self, data): print("Works Done!") for pfic in data: db.update_home_status(pfic, True) - pass + def gather_evolution_info(self): - pass + worker = GatherEvolutions() + worker.signals.finished.connect(self.on_evolutions_gathered) + self.thread_pool.start(worker) + + def on_evolutions_gathered(self, data): + print("Works Done!") def reinitialize_database(self): pass diff --git a/ui/workers/gather_evolutions_worker.py b/ui/workers/gather_evolutions_worker.py index 6953380..3bc5844 100644 --- a/ui/workers/gather_evolutions_worker.py +++ b/ui/workers/gather_evolutions_worker.py @@ -1,16 +1,19 @@ +from typing import Optional from PyQt6.QtCore import QObject, pyqtSignal, QRunnable -from bs4 import BeautifulSoup +from bs4 import BeautifulSoup, Tag from cache import cache from db import db +from utility.functions import get_form_name, get_display_name + class GatherEvolutionsWorkerSignals(QObject): finished = pyqtSignal(list) -class GatherHEvolutions(QRunnable): +class GatherEvolutions(QRunnable): def __init__(self): super().__init__() self.signals = GatherEvolutionsWorkerSignals() - self.base_url = "https://www.serebii.net/pokemonhome/" + self.base_url = "https://bulbapedia.bulbagarden.net/wiki/" def run(self): try: @@ -24,6 +27,210 @@ class GatherHEvolutions(QRunnable): evolutions = [] for pokemon_form in all_pokemon_forms: - pass + print(f"Processing {get_display_name(pokemon_form)}'s evolutions") + url = f"https://bulbapedia.bulbagarden.net/wiki/{pokemon_form["name"]}_(Pokémon)" + page_data = cache.fetch_url(url) + if not page_data: + continue + soup = BeautifulSoup(page_data, 'html.parser') + evolution_section = soup.find('span', id='Evolution_data') + if not evolution_section: + continue + evolution_table = None + form = get_form_name(pokemon_form, not pokemon_form["gender_relevant"]) + pokemon_name = pokemon_form["name"] + evolution_table = evolution_section.parent.find_next('table') + if form: + form_without_form = form.replace('Form', '').replace('form', '').strip() + for tag in evolution_section.parent.find_next_siblings(): + if tag.name == 'h4' and form_without_form in tag.get_text(strip=True): + evolution_table = tag.find_next('table') + break + if tag.name == 'h3': + break + if not evolution_table: + continue + + if pokemon_name == "Eevee": + evolution_chain = self.parse_eevee_evolution_chain(evolution_table, pokemon_form) + evolutions.append(evolution_chain) + else: + evolution_chain = self.parse_evolution_chain(evolution_table, pokemon_form) + evolutions.append(evolution_chain) + + return evolutions + + def parse_evolution_chain(self, table, pokemon_form, force_refresh = False): + cache_record_name = f"evo_{pokemon_form["pfic"]}" + if force_refresh: + cache.purge(cache_record_name) + + cached_entry = cache.get(cache_record_name) + if cached_entry != None: + return cached_entry + + main_chain = [] + current_stage = None + pending_method = None + form = get_form_name(pokemon_form, not pokemon_form["gender_relevant"]) + + tbody = table.find('tbody', recursive=False) + if not tbody: + return [] + + rows = tbody.find_all('tr', recursive=False) + main_row = rows[0] + branch_rows = rows[1:] + + # Parse main evolution chain + for td in main_row.find_all('td', recursive=False): + if td.find('table'): + # This TD contains Pokemon information + pokemon_name = self.extract_pokemon_name(td) + stage = self.extract_stage_form(td) + evolution_form = self.extract_evolution_form(td, pokemon_name) + new_stage = { + "pokemon":pokemon_name, + "method": pending_method, + "stage": stage, + "form": evolution_form, + "next_stage": None, + "previous_stage": None, + "branches": [], + "pfic": pokemon_form["pfic"] + } + pending_method = None + if current_stage: + current_stage["next_stage"] = new_stage + new_stage["previous_stage"] = current_stage # Set the back link + current_stage = new_stage + main_chain.append(current_stage) + else: + # This TD contains evolution method for the next Pokemon + pending_method = self.extract_evolution_method(td) + + # Parse branching evolutions + for row in branch_rows: + branch_stage = None + branch_method = None + for td in row.find_all('td', recursive=False): + if td.find('table'): + pokemon_name = self.extract_pokemon_name(td) + stage = self.extract_stage_form(td) + evolution_form = self.extract_evolution_form(td, pokemon_name) + new_stage = { + "pokemon":pokemon_name, + "method": branch_method, + "stage": stage, + "form": evolution_form, + "next_stage": None, + "previous_stage": None, + "branches": [], + "pfic": pokemon_form["pfic"] + } + branch_method = None + if branch_stage: + branch_stage["next_stage"] = new_stage + new_stage["previous_stage"] = branch_stage # Set the back link + branch_stage = new_stage + # Find which main chain Pokemon this branches from + for main_stage in main_chain: + if td.get('rowspan') and main_stage.pokemon == pokemon_name: + main_stage["branches"].append(branch_stage) + branch_stage["previous_stage"] = main_stage # Set the back link to the main chain + break + else: + branch_method = self.extract_evolution_method(td) + + cache.set(cache_record_name, main_chain) + return main_chain + + def extract_pokemon_name(self, td: Tag) -> Optional[str]: + name_tag = self.find_name_tag(td) + if name_tag: + return name_tag.get_text(strip=True) + return None + + def find_name_tag(self, td: Tag) -> Optional[Tag]: + table = td.find('table') + name_tag = table.find('a', class_='selflink') + if name_tag: + return name_tag + name_tag = table.find('a', title=True, class_=lambda x: x != 'image') + return name_tag + + def extract_stage_form(self, td: Tag) -> Optional[str]: + stage_tag = td.find('table').find('small') + if stage_tag: + return stage_tag.get_text(strip=True) + return None + + def extract_evolution_form(self, td: Tag, name: str) -> Optional[str]: + name_tag = self.find_name_tag(td) + if name_tag: + name_row = name_tag.parent + small_tags = name_row.find_all('small') + if len(small_tags) > 1: + return small_tags[0].get_text(strip=True) + return None + + def extract_evolution_method(self, td: Tag) -> str: + # Extract evolution method from the TD + return td.get_text(strip=True) + + def parse_eevee_evolution_chain(self, table, pokemon_form): + tbody = table.find('tbody', recursive=False) + if not tbody: + return [] + + rows = tbody.find_all('tr', recursive=False) + eevee_row = rows[1] + method_row = rows[2] + eeveelutions_row = rows[3] + + eevee_td = eevee_row.find('td', recursive=False) + pokemon_name, stage = self.parse_pokemon_subtable(eevee_td) + eevee_stage = { + "pokemon":pokemon_name, + "method": None, + "stage": stage, + "form": None, + "next_stage": None, + "previous_stage": None, + "branches": [], + "pfic": pokemon_form["pfic"] + } + + methods = [] + for method in method_row.find_all('td', recursive=False): + methods.append(self.extract_evolution_method(method)) + + eeveelutions = [] + index = 0 + for eeveelution in eeveelutions_row.find_all('td', recursive=False): + pokemon_name, stage = self.parse_pokemon_subtable(eeveelution) + eeveelution_stage = { + "pokemon":pokemon_name, + "method": methods[index], + "stage": stage, + "form": None, + "next_stage": None, + "previous_stage": None, + "branches": [], + "pfic": pokemon_form["pfic"] + } + eeveelution_stage["previous_stage"] = eevee_stage # Set the back link to Eevee + eeveelutions.append(eeveelution_stage) + index += 1 + + eevee_stage["branches"] = eeveelutions # Set the branches directly, not as a nested list - return evolutions \ No newline at end of file + return [eevee_stage] + + def parse_pokemon_subtable(self, td): + if td.find('table'): + # This TD contains Pokemon information + pokemon_name = self.extract_pokemon_name(td) + stage = self.extract_stage_form(td) + return pokemon_name, stage + return None, None \ No newline at end of file diff --git a/utility/functions.py b/utility/functions.py index 4d6dfe2..b54d010 100644 --- a/utility/functions.py +++ b/utility/functions.py @@ -79,10 +79,16 @@ def get_objects_by_number(array, target_number): def get_display_name(pokemon, strip_gender = False): display_name = f"{pokemon["national_dex"]:04d} - {pokemon["name"]}" + form = get_form_name(pokemon, strip_gender) + if form: + display_name += f" ({form})" + return display_name + +def get_form_name(pokemon, strip_gender = False): if pokemon["form_name"]: form = pokemon["form_name"] if strip_gender: form = form.replace("Female", "").replace("Male", "").strip() if form != "": - display_name += f" ({form})" - return display_name \ No newline at end of file + return form + return None \ No newline at end of file