You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

170 lines
8.0 KiB

from PyQt6.QtCore import QObject, pyqtSignal, QRunnable
from bs4 import BeautifulSoup
import re
from cache import cache
from utility.functions import get_generation_from_national_dex, sanitise_pokemon_name_for_url, remove_accents, compare_pokemon_forms, find_game_generation, format_pokemon_id
class GatherPokemonFormsWorkerSignals(QObject):
finished = pyqtSignal(list)
class GatherPokemonFormsWorker(QRunnable):
def __init__(self):
super().__init__()
self.signals = GatherPokemonFormsWorkerSignals()
def run(self):
try:
gathered_data = self.gather_forms_data()
self.signals.finished.emit(gathered_data)
except Exception as e:
print(f"Error gathering Pokémon forms: {e}")
def gather_forms_data(self):
# Get the sprites page from pokemondb.
# This gives us every pokemon in its default form.
url = "https://pokemondb.net/sprites"
page_data = cache.fetch_url(url)
if not page_data:
return None
soup = BeautifulSoup(page_data, 'html.parser')
pokemon = soup.find_all('a', class_='infocard')
# Loop through each card for the pokemon so we can extract out more information
pokemon_forms = []
for index, mon in enumerate(pokemon):
new_forms = self.process_pokemon_entry(index+1, mon)
if new_forms:
pokemon_forms.extend(new_forms)
return pokemon_forms
def get_pokemon_sprites_page_data(self, pokemon_name: str):
url = f"https://pokemondb.net/sprites/{pokemon_name}"
return cache.fetch_url(url)
def get_pokemon_dex_page(self, pokemon_name: str):
url = f"https://pokemondb.net/pokedex/{pokemon_name}"
return cache.fetch_url(url)
def extract_form_name(self, soup):
if soup.find('small'):
smalls = soup.find_all('small')
form_name = ""
for small in smalls:
form_name += small.get_text(strip=True) + " "
form_name = form_name.strip()
return form_name
return "None"
def process_pokemon_entry(self, national_dex_number, pokemon_soup, force_refresh = True):
found_forms = []
generation = get_generation_from_national_dex(national_dex_number)
pokemon_name = pokemon_soup.get_text(strip=True)
print(f"Processing {pokemon_name}")
url_name = sanitise_pokemon_name_for_url(pokemon_name)
if force_refresh:
cache.purge(url_name)
cached_entry = cache.get(url_name)
if cached_entry != None:
return cached_entry
sprites_page_data = self.get_pokemon_sprites_page_data(url_name)
if not sprites_page_data:
return None
form_pattern = re.compile(r'a(?:n)? (\w+) Form(?:,)? introduced in (?:the )?([\w\s:]+)(?:\/([\w\s:]+))?', re.IGNORECASE)
update_pattern = re.compile(r'a(?:n)? (\w+) form(?:,)? available in the latest update to ([\w\s:]+)(?:& ([\w\s:]+))?', re.IGNORECASE)
multiple_forms_pattern = re.compile(r'has (?:\w+) new (\w+) Form(?:s)?(?:,)? available in (?:the )?([\w\s:]+)(?:& ([\w\s:]+))?', re.IGNORECASE)
expansion_pass_pattern = re.compile(r'a(?:n)? (\w+) form(?:,)? introduced in the Crown Tundra Expansion Pass to ([\w\s:]+)(?:& ([\w\s:]+))?', re.IGNORECASE)
patterns = [form_pattern, update_pattern, multiple_forms_pattern, expansion_pass_pattern]
sprites_soup = BeautifulSoup(sprites_page_data, 'html.parser')
generation_8_table = sprites_soup.find('h2', string='Generation 8')
if generation_8_table:
generation_8_table = generation_8_table.find_next('table')
if generation_8_table:
generation_8_rows = generation_8_table.select('tbody > tr')
generation_8_rows = [row for row in generation_8_rows if "Home" in row.get_text(strip=True)]
for row in generation_8_rows:
sprites = row.find_all('span', class_='sprites-table-card')
if not sprites:
continue
form_index = 0
for sprite in sprites:
sprite_img = sprite.find('img')
sprite_url = "missing"
if sprite_img:
sprite_url = sprite_img.get('src')
if "shiny" in sprite_url:
continue
form_name = self.extract_form_name(sprite)
#logger.info(f'{sprite_url}, {form_name}')
if form_name != "None":
form_index += 1
gender = 0
if form_name.startswith("Male"):
form_index -= 1
gender = 1
elif form_name.startswith("Female"):
form_index -= 1
gender = 2
dex_page_data = self.get_pokemon_dex_page(url_name)
if dex_page_data:
dex_soup = BeautifulSoup(dex_page_data, 'html.parser')
#Find a heading that has the pokemon name in it
dex_header = dex_soup.find('h1', string=pokemon_name)
if dex_header:
#The next <p> tag contains the generation number, in the format "{pokemon name} is a {type}(/{2nd_type}) type Pokémon introduced in Generation {generation number}."
generation_tag = dex_header.find_next('p')
dex_text = generation_tag.get_text()
pattern = r'^(.+?) is a (\w+)(?:/(\w+))? type Pokémon introduced in Generation (\d+)\.$'
match = re.match(pattern, dex_text)
if match:
name, type1, type2, gen = match.groups()
generation = int(gen)
if form_name != "None":
next_tag = generation_tag.find_next('p')
if next_tag:
extra_text = next_tag.get_text()
extra_text = remove_accents(extra_text)
test_form = form_name.replace(pokemon_name, "").replace("Male", "").replace("Female", "").strip()
if pokemon_name == "Tauros" and (form_name == "Aqua Breed" or form_name == "Blaze Breed" or form_name == "Combat Breed"):
test_form = "Paldean"
for pattern in patterns:
matches = re.findall(pattern, extra_text)
generation_found = False
for i, (regional, game1, game2) in enumerate(matches, 1):
if compare_pokemon_forms(test_form, regional):
target_game = game1.replace("Pokemon", "").strip()
result = find_game_generation(target_game)
if result:
generation = result
generation_found = True
break
if generation_found:
break
pokemon_form = {
"pfic":format_pokemon_id(national_dex_number, generation, form_index, gender),
"name":pokemon_name,
"form_name":form_name if form_name != "None" else None,
"sprite_url":sprite_url,
"national_dex":national_dex_number,
"generation":generation
}
found_forms.append(pokemon_form)
cache.set(url_name, found_forms)
return found_forms