from PyQt6.QtCore import QObject, pyqtSignal, QRunnable from bs4 import BeautifulSoup import re from cache import cache from utility.functions import get_generation_from_national_dex, sanitise_pokemon_name_for_url, remove_accents, compare_pokemon_forms, find_game_generation, format_pokemon_id from utility.pokemon_word_ninja import PokemonWordNinja class GatherPokemonFormsWorkerSignals(QObject): finished = pyqtSignal(list) class GatherPokemonFormsWorker(QRunnable): def __init__(self): super().__init__() self.signals = GatherPokemonFormsWorkerSignals() self.splitter = PokemonWordNinja() def run(self): try: gathered_data = self.gather_forms_data() self.signals.finished.emit(gathered_data) except Exception as e: print(f"Error gathering Pokémon forms: {e}") def gather_forms_data(self): # Get the sprites page from pokemondb. # This gives us every pokemon in its default form. url = "https://pokemondb.net/sprites" page_data = cache.fetch_url(url) if not page_data: return None soup = BeautifulSoup(page_data, 'html.parser') pokemon = soup.find_all('a', class_='infocard') # Loop through each card for the pokemon so we can extract out more information pokemon_forms = [] for index, mon in enumerate(pokemon): new_forms = self.process_pokemon_entry(index+1, mon) if new_forms: pokemon_forms.extend(new_forms) return pokemon_forms def get_pokemon_sprites_page_data(self, pokemon_name: str): url = f"https://pokemondb.net/sprites/{pokemon_name}" return cache.fetch_url(url) def get_pokemon_dex_page(self, pokemon_name: str): url = f"https://pokemondb.net/pokedex/{pokemon_name}" return cache.fetch_url(url) def extract_form_name(self, soup): if soup.find('small'): smalls = soup.find_all('small') form_name = "" for small in smalls: form_name += small.get_text(strip=True) + " " form_name = form_name.strip() form_name = self.splitter.split(form_name) return form_name return "None" def process_pokemon_entry(self, national_dex_number, pokemon_soup, force_refresh = False): found_forms = [] generation = get_generation_from_national_dex(national_dex_number) pokemon_name = pokemon_soup.get_text(strip=True) self.splitter.add_custom_word(pokemon_name) print(f"Processing {pokemon_name}") url_name = sanitise_pokemon_name_for_url(pokemon_name) if force_refresh: cache.purge(url_name) cached_entry = cache.get(url_name) if cached_entry != None: return cached_entry sprites_page_data = self.get_pokemon_sprites_page_data(url_name) if not sprites_page_data: return None form_pattern = re.compile(r'a(?:n)? (\w+) Form(?:,)? introduced in (?:the )?([\w\s:]+)(?:\/([\w\s:]+))?', re.IGNORECASE) update_pattern = re.compile(r'a(?:n)? (\w+) form(?:,)? available in the latest update to ([\w\s:]+)(?:& ([\w\s:]+))?', re.IGNORECASE) multiple_forms_pattern = re.compile(r'has (?:\w+) new (\w+) Form(?:s)?(?:,)? available in (?:the )?([\w\s:]+)(?:& ([\w\s:]+))?', re.IGNORECASE) expansion_pass_pattern = re.compile(r'a(?:n)? (\w+) form(?:,)? introduced in the Crown Tundra Expansion Pass to ([\w\s:]+)(?:& ([\w\s:]+))?', re.IGNORECASE) patterns = [form_pattern, update_pattern, multiple_forms_pattern, expansion_pass_pattern] sprites_soup = BeautifulSoup(sprites_page_data, 'html.parser') generation_8_table = sprites_soup.find('h2', string='Generation 8') if generation_8_table: generation_8_table = generation_8_table.find_next('table') if generation_8_table: generation_8_rows = generation_8_table.select('tbody > tr') generation_8_rows = [row for row in generation_8_rows if "Home" in row.get_text(strip=True)] for row in generation_8_rows: sprites = row.find_all('span', class_='sprites-table-card') if not sprites: continue form_index = 0 for sprite in sprites: sprite_img = sprite.find('img') sprite_url = "missing" if sprite_img: sprite_url = sprite_img.get('src') if "shiny" in sprite_url: continue form_name = self.extract_form_name(sprite) #logger.info(f'{sprite_url}, {form_name}') record_male_form = False record_female_form = False record_genderless_form = False gender_relevant = False if form_name != "None": form_index += 1 gender = 0 if form_name.startswith("Male"): form_index -= 1 gender = 1 gender_relevant = True elif form_name.startswith("Female"): form_index -= 1 gender = 2 gender_relevant = True dex_page_data = self.get_pokemon_dex_page(url_name) if dex_page_data: dex_soup = BeautifulSoup(dex_page_data, 'html.parser') #Find a heading that has the pokemon name in it dex_header = dex_soup.find('h1', string=pokemon_name) if dex_header: #The next
tag contains the generation number, in the format "{pokemon name} is a {type}(/{2nd_type}) type Pokémon introduced in Generation {generation number}." generation_tag = dex_header.find_next('p') dex_text = generation_tag.get_text() pattern = r'^(.+?) is a (\w+)(?:/(\w+))? type Pokémon introduced in Generation (\d+)\.$' match = re.match(pattern, dex_text) if match: name, type1, type2, gen = match.groups() generation = int(gen) if form_name != "None": next_tag = generation_tag.find_next('p') if next_tag: extra_text = next_tag.get_text() extra_text = remove_accents(extra_text) test_form = form_name.replace(pokemon_name, "").replace("Male", "").replace("Female", "").strip() if pokemon_name == "Tauros" and (form_name == "Aqua Breed" or form_name == "Blaze Breed" or form_name == "Combat Breed"): test_form = "Paldean" for pattern in patterns: matches = re.findall(pattern, extra_text) generation_found = False for i, (regional, game1, game2) in enumerate(matches, 1): if compare_pokemon_forms(test_form, regional): target_game = game1.replace("Pokemon", "").strip() result = find_game_generation(target_game) if result: generation = result generation_found = True break if generation_found: break if not gender_relevant: # see if we can find gender info on the page to see if it has male and female forms anyway. gender_header = dex_soup.find('th', string="Gender") if gender_header: gender_info = gender_header.findNext('td').getText().replace(",", "").split() skip_next = False for info in gender_info: if skip_next: skip_next = False continue if info.lower().startswith("0%"): skip_next = True continue if info.lower() == "male": record_male_form = True elif info.lower() == "female": record_female_form = True if not record_female_form and not record_male_form: record_genderless_form = True if pokemon_name == "Basculin" and form_name.lower() == "white-striped form": generation = 8 elif pokemon_name == "Burmy": gender_relevant = True if pokemon_name == "Ursaluna" and form_name.lower() == "blood moon": generation = 9 if gender_relevant or record_genderless_form: pokemon_form = { "pfic":format_pokemon_id(national_dex_number, generation, form_index, gender), "name":pokemon_name, "form_name":form_name if form_name != "None" else None, "sprite_url":sprite_url, "national_dex":national_dex_number, "generation":generation, "gender_relevant": gender_relevant } found_forms.append(pokemon_form) else: if record_male_form: gendered_form = form_name if gendered_form == "None": gendered_form = "Male" else: gendered_form = "Male " + gendered_form pokemon_form = { "pfic":format_pokemon_id(national_dex_number, generation, form_index, 1), "name":pokemon_name, "form_name":gendered_form if gendered_form != "None" else None, "sprite_url":sprite_url, "national_dex":national_dex_number, "generation":generation, "gender_relevant": gender_relevant } found_forms.append(pokemon_form) if record_female_form: gendered_form = form_name if gendered_form == "None": gendered_form = "Female" else: gendered_form = "Female " + gendered_form pokemon_form = { "pfic":format_pokemon_id(national_dex_number, generation, form_index, 2), "name":pokemon_name, "form_name":gendered_form if gendered_form != "None" else None, "sprite_url":sprite_url, "national_dex":national_dex_number, "generation":generation, "gender_relevant": gender_relevant } found_forms.append(pokemon_form) cache.set(url_name, found_forms) return found_forms