You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
170 lines
8.0 KiB
170 lines
8.0 KiB
from PyQt6.QtCore import QObject, pyqtSignal, QRunnable
|
|
from bs4 import BeautifulSoup
|
|
import re
|
|
|
|
from cache import cache
|
|
from utility.functions import get_generation_from_national_dex, sanitise_pokemon_name_for_url, remove_accents, compare_pokemon_forms, find_game_generation, format_pokemon_id
|
|
|
|
class GatherPokemonFormsWorkerSignals(QObject):
|
|
finished = pyqtSignal(list)
|
|
|
|
class GatherPokemonFormsWorker(QRunnable):
|
|
def __init__(self):
|
|
super().__init__()
|
|
self.signals = GatherPokemonFormsWorkerSignals()
|
|
|
|
def run(self):
|
|
try:
|
|
gathered_data = self.gather_forms_data()
|
|
self.signals.finished.emit(gathered_data)
|
|
except Exception as e:
|
|
print(f"Error gathering Pokémon forms: {e}")
|
|
|
|
def gather_forms_data(self):
|
|
# Get the sprites page from pokemondb.
|
|
# This gives us every pokemon in its default form.
|
|
url = "https://pokemondb.net/sprites"
|
|
page_data = cache.fetch_url(url)
|
|
|
|
if not page_data:
|
|
return None
|
|
|
|
soup = BeautifulSoup(page_data, 'html.parser')
|
|
pokemon = soup.find_all('a', class_='infocard')
|
|
|
|
# Loop through each card for the pokemon so we can extract out more information
|
|
pokemon_forms = []
|
|
for index, mon in enumerate(pokemon):
|
|
new_forms = self.process_pokemon_entry(index+1, mon)
|
|
if new_forms:
|
|
pokemon_forms.extend(new_forms)
|
|
|
|
return pokemon_forms
|
|
|
|
def get_pokemon_sprites_page_data(self, pokemon_name: str):
|
|
url = f"https://pokemondb.net/sprites/{pokemon_name}"
|
|
return cache.fetch_url(url)
|
|
|
|
def get_pokemon_dex_page(self, pokemon_name: str):
|
|
url = f"https://pokemondb.net/pokedex/{pokemon_name}"
|
|
return cache.fetch_url(url)
|
|
|
|
def extract_form_name(self, soup):
|
|
if soup.find('small'):
|
|
smalls = soup.find_all('small')
|
|
form_name = ""
|
|
for small in smalls:
|
|
form_name += small.get_text(strip=True) + " "
|
|
form_name = form_name.strip()
|
|
return form_name
|
|
return "None"
|
|
|
|
def process_pokemon_entry(self, national_dex_number, pokemon_soup, force_refresh = True):
|
|
found_forms = []
|
|
generation = get_generation_from_national_dex(national_dex_number)
|
|
pokemon_name = pokemon_soup.get_text(strip=True)
|
|
print(f"Processing {pokemon_name}")
|
|
|
|
url_name = sanitise_pokemon_name_for_url(pokemon_name)
|
|
|
|
if force_refresh:
|
|
cache.purge(url_name)
|
|
|
|
cached_entry = cache.get(url_name)
|
|
if cached_entry != None:
|
|
return cached_entry
|
|
|
|
sprites_page_data = self.get_pokemon_sprites_page_data(url_name)
|
|
if not sprites_page_data:
|
|
return None
|
|
|
|
form_pattern = re.compile(r'a(?:n)? (\w+) Form(?:,)? introduced in (?:the )?([\w\s:]+)(?:\/([\w\s:]+))?', re.IGNORECASE)
|
|
update_pattern = re.compile(r'a(?:n)? (\w+) form(?:,)? available in the latest update to ([\w\s:]+)(?:& ([\w\s:]+))?', re.IGNORECASE)
|
|
multiple_forms_pattern = re.compile(r'has (?:\w+) new (\w+) Form(?:s)?(?:,)? available in (?:the )?([\w\s:]+)(?:& ([\w\s:]+))?', re.IGNORECASE)
|
|
expansion_pass_pattern = re.compile(r'a(?:n)? (\w+) form(?:,)? introduced in the Crown Tundra Expansion Pass to ([\w\s:]+)(?:& ([\w\s:]+))?', re.IGNORECASE)
|
|
patterns = [form_pattern, update_pattern, multiple_forms_pattern, expansion_pass_pattern]
|
|
|
|
sprites_soup = BeautifulSoup(sprites_page_data, 'html.parser')
|
|
generation_8_table = sprites_soup.find('h2', string='Generation 8')
|
|
if generation_8_table:
|
|
generation_8_table = generation_8_table.find_next('table')
|
|
|
|
if generation_8_table:
|
|
generation_8_rows = generation_8_table.select('tbody > tr')
|
|
generation_8_rows = [row for row in generation_8_rows if "Home" in row.get_text(strip=True)]
|
|
for row in generation_8_rows:
|
|
sprites = row.find_all('span', class_='sprites-table-card')
|
|
if not sprites:
|
|
continue
|
|
form_index = 0
|
|
for sprite in sprites:
|
|
sprite_img = sprite.find('img')
|
|
sprite_url = "missing"
|
|
if sprite_img:
|
|
sprite_url = sprite_img.get('src')
|
|
|
|
if "shiny" in sprite_url:
|
|
continue
|
|
|
|
form_name = self.extract_form_name(sprite)
|
|
#logger.info(f'{sprite_url}, {form_name}')
|
|
if form_name != "None":
|
|
form_index += 1
|
|
gender = 0
|
|
if form_name.startswith("Male"):
|
|
form_index -= 1
|
|
gender = 1
|
|
elif form_name.startswith("Female"):
|
|
form_index -= 1
|
|
gender = 2
|
|
|
|
dex_page_data = self.get_pokemon_dex_page(url_name)
|
|
if dex_page_data:
|
|
dex_soup = BeautifulSoup(dex_page_data, 'html.parser')
|
|
|
|
#Find a heading that has the pokemon name in it
|
|
dex_header = dex_soup.find('h1', string=pokemon_name)
|
|
if dex_header:
|
|
#The next <p> tag contains the generation number, in the format "{pokemon name} is a {type}(/{2nd_type}) type Pokémon introduced in Generation {generation number}."
|
|
generation_tag = dex_header.find_next('p')
|
|
dex_text = generation_tag.get_text()
|
|
pattern = r'^(.+?) is a (\w+)(?:/(\w+))? type Pokémon introduced in Generation (\d+)\.$'
|
|
match = re.match(pattern, dex_text)
|
|
if match:
|
|
name, type1, type2, gen = match.groups()
|
|
generation = int(gen)
|
|
|
|
if form_name != "None":
|
|
next_tag = generation_tag.find_next('p')
|
|
if next_tag:
|
|
extra_text = next_tag.get_text()
|
|
extra_text = remove_accents(extra_text)
|
|
test_form = form_name.replace(pokemon_name, "").replace("Male", "").replace("Female", "").strip()
|
|
if pokemon_name == "Tauros" and (form_name == "Aqua Breed" or form_name == "Blaze Breed" or form_name == "Combat Breed"):
|
|
test_form = "Paldean"
|
|
for pattern in patterns:
|
|
matches = re.findall(pattern, extra_text)
|
|
generation_found = False
|
|
for i, (regional, game1, game2) in enumerate(matches, 1):
|
|
if compare_pokemon_forms(test_form, regional):
|
|
target_game = game1.replace("Pokemon", "").strip()
|
|
result = find_game_generation(target_game)
|
|
if result:
|
|
generation = result
|
|
generation_found = True
|
|
break
|
|
if generation_found:
|
|
break
|
|
|
|
pokemon_form = {
|
|
"pfic":format_pokemon_id(national_dex_number, generation, form_index, gender),
|
|
"name":pokemon_name,
|
|
"form_name":form_name if form_name != "None" else None,
|
|
"sprite_url":sprite_url,
|
|
"national_dex":national_dex_number,
|
|
"generation":generation
|
|
}
|
|
found_forms.append(pokemon_form)
|
|
|
|
cache.set(url_name, found_forms)
|
|
return found_forms
|