from PyQt6 . QtCore import QObject , pyqtSignal , QRunnable
from bs4 import BeautifulSoup
import re
from cache import cache
from utility . functions import get_generation_from_national_dex , sanitise_pokemon_name_for_url , remove_accents , compare_pokemon_forms , find_game_generation , format_pokemon_id
from utility . pokemon_word_ninja import PokemonWordNinja
class GatherPokemonFormsWorkerSignals ( QObject ) :
finished = pyqtSignal ( list )
class GatherPokemonFormsWorker ( QRunnable ) :
def __init__ ( self ) :
super ( ) . __init__ ( )
self . signals = GatherPokemonFormsWorkerSignals ( )
self . splitter = PokemonWordNinja ( )
def run ( self ) :
try :
gathered_data = self . gather_forms_data ( )
self . signals . finished . emit ( gathered_data )
except Exception as e :
print ( f " Error gathering Pokémon forms: { e } " )
def gather_forms_data ( self ) :
# Get the sprites page from pokemondb.
# This gives us every pokemon in its default form.
url = " https://pokemondb.net/sprites "
page_data = cache . fetch_url ( url )
if not page_data :
return None
soup = BeautifulSoup ( page_data , ' html.parser ' )
pokemon = soup . find_all ( ' a ' , class_ = ' infocard ' )
# Loop through each card for the pokemon so we can extract out more information
pokemon_forms = [ ]
for index , mon in enumerate ( pokemon ) :
new_forms = self . process_pokemon_entry ( index + 1 , mon )
if new_forms :
pokemon_forms . extend ( new_forms )
return pokemon_forms
def get_pokemon_sprites_page_data ( self , pokemon_name : str ) :
url = f " https://pokemondb.net/sprites/ { pokemon_name } "
return cache . fetch_url ( url )
def get_pokemon_dex_page ( self , pokemon_name : str ) :
url = f " https://pokemondb.net/pokedex/ { pokemon_name } "
return cache . fetch_url ( url )
def extract_form_name ( self , soup ) :
if soup . find ( ' small ' ) :
smalls = soup . find_all ( ' small ' )
form_name = " "
for small in smalls :
form_name + = small . get_text ( strip = True ) + " "
form_name = form_name . strip ( )
form_name = self . splitter . split ( form_name )
return form_name
return " None "
def process_pokemon_entry ( self , national_dex_number , pokemon_soup , force_refresh = True ) :
found_forms = [ ]
generation = get_generation_from_national_dex ( national_dex_number )
pokemon_name = pokemon_soup . get_text ( strip = True )
self . splitter . add_custom_word ( pokemon_name )
print ( f " Processing { pokemon_name } " )
url_name = sanitise_pokemon_name_for_url ( pokemon_name )
if force_refresh :
cache . purge ( url_name )
cached_entry = cache . get ( url_name )
if cached_entry != None :
return cached_entry
sprites_page_data = self . get_pokemon_sprites_page_data ( url_name )
if not sprites_page_data :
return None
form_pattern = re . compile ( r ' a(?:n)? ( \ w+) Form(?:,)? introduced in (?:the )?([ \ w \ s:]+)(?: \ /([ \ w \ s:]+))? ' , re . IGNORECASE )
update_pattern = re . compile ( r ' a(?:n)? ( \ w+) form(?:,)? available in the latest update to ([ \ w \ s:]+)(?:& ([ \ w \ s:]+))? ' , re . IGNORECASE )
multiple_forms_pattern = re . compile ( r ' has (?: \ w+) new ( \ w+) Form(?:s)?(?:,)? available in (?:the )?([ \ w \ s:]+)(?:& ([ \ w \ s:]+))? ' , re . IGNORECASE )
expansion_pass_pattern = re . compile ( r ' a(?:n)? ( \ w+) form(?:,)? introduced in the Crown Tundra Expansion Pass to ([ \ w \ s:]+)(?:& ([ \ w \ s:]+))? ' , re . IGNORECASE )
patterns = [ form_pattern , update_pattern , multiple_forms_pattern , expansion_pass_pattern ]
sprites_soup = BeautifulSoup ( sprites_page_data , ' html.parser ' )
generation_8_table = sprites_soup . find ( ' h2 ' , string = ' Generation 8 ' )
if generation_8_table :
generation_8_table = generation_8_table . find_next ( ' table ' )
if generation_8_table :
generation_8_rows = generation_8_table . select ( ' tbody > tr ' )
generation_8_rows = [ row for row in generation_8_rows if " Home " in row . get_text ( strip = True ) ]
for row in generation_8_rows :
sprites = row . find_all ( ' span ' , class_ = ' sprites-table-card ' )
if not sprites :
continue
form_index = 0
for sprite in sprites :
sprite_img = sprite . find ( ' img ' )
sprite_url = " missing "
if sprite_img :
sprite_url = sprite_img . get ( ' src ' )
if " shiny " in sprite_url :
continue
form_name = self . extract_form_name ( sprite )
#logger.info(f'{sprite_url}, {form_name}')
record_male_form = False
record_female_form = False
record_genderless_form = False
gender_relevant = False
if form_name != " None " :
form_index + = 1
gender = 0
if form_name . startswith ( " Male " ) :
form_index - = 1
gender = 1
gender_relevant = True
elif form_name . startswith ( " Female " ) :
form_index - = 1
gender = 2
gender_relevant = True
dex_page_data = self . get_pokemon_dex_page ( url_name )
if dex_page_data :
dex_soup = BeautifulSoup ( dex_page_data , ' html.parser ' )
#Find a heading that has the pokemon name in it
dex_header = dex_soup . find ( ' h1 ' , string = pokemon_name )
if dex_header :
#The next <p> tag contains the generation number, in the format "{pokemon name} is a {type}(/{2nd_type}) type Pokémon introduced in Generation {generation number}."
generation_tag = dex_header . find_next ( ' p ' )
dex_text = generation_tag . get_text ( )
pattern = r ' ^(.+?) is a ( \ w+)(?:/( \ w+))? type Pokémon introduced in Generation ( \ d+) \ .$ '
match = re . match ( pattern , dex_text )
if match :
name , type1 , type2 , gen = match . groups ( )
generation = int ( gen )
if form_name != " None " :
next_tag = generation_tag . find_next ( ' p ' )
if next_tag :
extra_text = next_tag . get_text ( )
extra_text = remove_accents ( extra_text )
test_form = form_name . replace ( pokemon_name , " " ) . replace ( " Male " , " " ) . replace ( " Female " , " " ) . strip ( )
if pokemon_name == " Tauros " and ( form_name == " Aqua Breed " or form_name == " Blaze Breed " or form_name == " Combat Breed " ) :
test_form = " Paldean "
for pattern in patterns :
matches = re . findall ( pattern , extra_text )
generation_found = False
for i , ( regional , game1 , game2 ) in enumerate ( matches , 1 ) :
if compare_pokemon_forms ( test_form , regional ) :
target_game = game1 . replace ( " Pokemon " , " " ) . strip ( )
result = find_game_generation ( target_game )
if result :
generation = result
generation_found = True
break
if generation_found :
break
if not gender_relevant :
# see if we can find gender info on the page to see if it has male and female forms anyway.
gender_header = dex_soup . find ( ' th ' , string = " Gender " )
if gender_header :
gender_info = gender_header . findNext ( ' td ' ) . getText ( ) . replace ( " , " , " " ) . split ( )
skip_next = False
for info in gender_info :
if skip_next :
skip_next = False
continue
if info . lower ( ) . startswith ( " 0 % " ) :
skip_next = True
continue
if info . lower ( ) == " male " :
record_male_form = True
elif info . lower ( ) == " female " :
record_female_form = True
if not record_female_form and not record_male_form :
record_genderless_form = True
if gender_relevant or record_genderless_form :
pokemon_form = {
" pfic " : format_pokemon_id ( national_dex_number , generation , form_index , gender ) ,
" name " : pokemon_name ,
" form_name " : form_name if form_name != " None " else None ,
" sprite_url " : sprite_url ,
" national_dex " : national_dex_number ,
" generation " : generation ,
" gender_relevant " : gender_relevant
}
found_forms . append ( pokemon_form )
else :
if record_male_form :
gendered_form = form_name
if gendered_form == " None " :
gendered_form = " Male "
else :
gendered_form = " Male " + gendered_form
pokemon_form = {
" pfic " : format_pokemon_id ( national_dex_number , generation , form_index , 1 ) ,
" name " : pokemon_name ,
" form_name " : gendered_form if gendered_form != " None " else None ,
" sprite_url " : sprite_url ,
" national_dex " : national_dex_number ,
" generation " : generation ,
" gender_relevant " : gender_relevant
}
found_forms . append ( pokemon_form )
if record_female_form :
gendered_form = form_name
if gendered_form == " None " :
gendered_form = " Female "
else :
gendered_form = " Female " + gendered_form
pokemon_form = {
" pfic " : format_pokemon_id ( national_dex_number , generation , form_index , 2 ) ,
" name " : pokemon_name ,
" form_name " : gendered_form if gendered_form != " None " else None ,
" sprite_url " : sprite_url ,
" national_dex " : national_dex_number ,
" generation " : generation ,
" gender_relevant " : gender_relevant
}
found_forms . append ( pokemon_form )
cache . set ( url_name , found_forms )
return found_forms