import requests
from bs4 import BeautifulSoup
from typing import Dict , List , Optional
from dataclasses import dataclass , asdict
import os
import sqlite3
import sys
import logging
import re
import unicodedata
sys . path . append ( os . path . dirname ( os . path . dirname ( os . path . abspath ( __file__ ) ) ) )
from DataGatherers . cache_manager import CacheManager
logger = logging . getLogger ( ' ui_feedback ' )
@dataclass
class PokemonForm :
id : str # This will be our PFIC
name : str
form_name : Optional [ str ]
sprite_url : str
national_dex : int
generation : int
def create_pokemon_db ( ) :
conn = sqlite3 . connect ( ' pokemon_forms.db ' )
cursor = conn . cursor ( )
cursor . execute ( '''
CREATE TABLE IF NOT EXISTS pokemon_forms (
PFIC TEXT PRIMARY KEY ,
name TEXT NOT NULL ,
form_name TEXT ,
national_dex INTEGER NOT NULL ,
generation INTEGER NOT NULL
)
''' )
conn . commit ( )
return conn
def create_pokemon_storage_db ( ) :
conn = sqlite3 . connect ( ' pokemon_forms.db ' )
cursor = conn . cursor ( )
cursor . execute ( '''
CREATE TABLE IF NOT EXISTS pokemon_storage (
PFIC TEXT PRIMARY KEY ,
storable_in_home BOOLEAN NOT NULL ,
FOREIGN KEY ( PFIC ) REFERENCES pokemon_forms ( PFIC )
)
''' )
conn . commit ( )
return conn
def initialize_db ( ) :
create_pokemon_db ( )
create_pokemon_storage_db ( )
def insert_pokemon_form ( conn , pokemon_form ) :
cursor = conn . cursor ( )
cursor . execute ( '''
INSERT OR REPLACE INTO pokemon_forms
( PFIC , name , form_name , national_dex , generation )
VALUES ( ? , ? , ? , ? , ? )
''' , (
pokemon_form . id ,
pokemon_form . name ,
pokemon_form . form_name ,
pokemon_form . national_dex ,
pokemon_form . generation
) )
conn . commit ( )
def insert_pokemon_storage ( conn , pfic : str , storable_in_home : bool ) :
cursor = conn . cursor ( )
cursor . execute ( '''
INSERT OR REPLACE INTO pokemon_storage
( PFIC , storable_in_home )
VALUES ( ? , ? )
''' , (pfic, storable_in_home))
conn . commit ( )
class PokemonDatabase :
def __init__ ( self ) :
self . pokemon : Dict [ str , List [ PokemonForm ] ] = { }
def add_pokemon ( self , national_dex : int , name : str , region_code : int , form_index : int , gender_code : int , form_name : Optional [ str ] , sprite_url : str ) :
pokemon_id = format_pokemon_id ( national_dex , region_code , form_index , gender_code )
pokemon_form = PokemonForm ( id = pokemon_id , name = name , form_name = form_name , sprite_url = sprite_url , national_dex = national_dex , generation = region_code )
if national_dex not in self . pokemon :
self . pokemon [ national_dex ] = [ ]
self . pokemon [ national_dex ] . append ( pokemon_form )
def get_pokemon ( self , national_dex : Optional [ int ] = None , region_code : Optional [ int ] = None ,
form_index : Optional [ int ] = None , gender_code : Optional [ int ] = None ) - > List [ PokemonForm ] :
results = [ ]
for dex_forms in self . pokemon . values ( ) :
for form in dex_forms :
parts = form . id . split ( ' - ' )
if ( national_dex is None or int ( parts [ 0 ] ) == national_dex ) and \
( region_code is None or int ( parts [ 1 ] ) == region_code ) and \
( form_index is None or int ( parts [ 2 ] ) == form_index ) and \
( gender_code is None or int ( parts [ 3 ] ) == gender_code ) :
results . append ( form )
return results
def get_pokemon_by_id ( self , pokemon_id : str ) - > Optional [ PokemonForm ] :
national_dex = int ( pokemon_id . split ( ' - ' ) [ 0 ] )
if national_dex in self . pokemon :
for form in self . pokemon [ national_dex ] :
if form . id == pokemon_id :
return form
return None
def format_pokemon_id ( national_dex : int , region_code : int , form_index : int , gender_code : int ) - > str :
return f " { national_dex : 04d } - { region_code : 02d } - { form_index : 03d } - { gender_code } "
def get_pokemon_sprites_page ( cache : CacheManager ) :
url = " https://pokemondb.net/sprites "
return cache . fetch_url ( url )
def get_pokemon_sprites_page_data ( cache : CacheManager , pokemon_name : str ) :
url = f " https://pokemondb.net/sprites/ { pokemon_name } "
return cache . fetch_url ( url )
def get_pokemon_dex_page ( cache : CacheManager , pokemon_name : str ) :
url = f " https://pokemondb.net/pokedex/ { pokemon_name } "
return cache . fetch_url ( url )
def remove_accents ( input_str ) :
nfkd_form = unicodedata . normalize ( ' NFKD ' , input_str )
return u " " . join ( [ c for c in nfkd_form if not unicodedata . combining ( c ) ] )
def compare_forms ( a , b ) :
if a == None or b == None :
return False
if a == b :
return True
temp_a = a . lower ( ) . replace ( " forme " , " " ) . replace ( " form " , " " ) . replace ( " é " , " e " ) . strip ( )
temp_b = b . lower ( ) . replace ( " forme " , " " ) . replace ( " form " , " " ) . replace ( " é " , " e " ) . strip ( )
temp_a = temp_a . replace ( " deputante " , " debutante " ) . replace ( " p ' au " , " pa ' u " ) . replace ( " blood moon " , " bloodmoon " )
temp_b = temp_b . replace ( " deputante " , " debutante " ) . replace ( " p ' au " , " pa ' u " ) . replace ( " blood moon " , " bloodmoon " )
if temp_a == temp_b :
return True
return False
def download_image ( url , filename ) :
response = requests . get ( url )
if response . status_code == 200 :
with open ( filename , ' wb ' ) as f :
f . write ( response . content )
def retrieve_all_pokemon_forms ( cache : CacheManager , progress_callback = None ) :
db = PokemonDatabase ( )
pokemon_db_conn = create_pokemon_db ( )
create_pokemon_storage_db ( )
page_data = get_pokemon_sprites_page ( cache )
if not page_data :
return None
soup = BeautifulSoup ( page_data , ' html.parser ' )
pokemon = soup . find_all ( ' a ' , class_ = ' infocard ' )
pokemon_generations = {
1 : { " min " : 1 , " max " : 151 } ,
2 : { " min " : 152 , " max " : 251 } ,
3 : { " min " : 252 , " max " : 386 } ,
4 : { " min " : 387 , " max " : 493 } ,
5 : { " min " : 494 , " max " : 649 } ,
6 : { " min " : 650 , " max " : 721 } ,
7 : { " min " : 722 , " max " : 809 } ,
8 : { " min " : 810 , " max " : 905 } ,
9 : { " min " : 906 , " max " : 1025 } ,
}
national_dex_index = 1
for mon in pokemon :
generation = 1
for gen in pokemon_generations :
if pokemon_generations [ gen ] [ " min " ] < = national_dex_index < = pokemon_generations [ gen ] [ " max " ] :
generation = gen
break
pokemon_name = mon . get_text ( strip = True )
logger . info ( pokemon_name )
if progress_callback :
progress_callback ( f " Processing { pokemon_name } " )
pokemon_url_name = pokemon_name . replace ( " ♀ " , " -f " ) . replace ( " ♂ " , " -m " ) . replace ( " ' " , " " ) . replace ( " . " , " " ) . replace ( ' é ' , ' e ' ) . replace ( ' : ' , ' ' )
pokemon_url_name = pokemon_url_name . replace ( " " , " - " )
sprites_page_data = get_pokemon_sprites_page_data ( cache , pokemon_url_name )
if not sprites_page_data :
return None
sprites_soup = BeautifulSoup ( sprites_page_data , ' html.parser ' )
generation_8_header = sprites_soup . find ( ' h2 ' , string = ' Generation 8 ' )
if not generation_8_header :
continue
generation_8_table = generation_8_header . find_next ( ' table ' )
if not generation_8_table :
continue
generation_8_tbody = generation_8_table . find ( ' tbody ' )
if not generation_8_tbody :
continue
generation_8_rows = generation_8_tbody . find_all ( ' tr ' )
for row in generation_8_rows :
row_text = row . get_text ( strip = True )
if ' Home ' in row_text :
sprites = row . find_all ( ' span ' , class_ = ' sprites-table-card ' )
if not sprites :
continue
form = 0
for sprite in sprites :
sprite_img = sprite . find ( ' img ' )
sprite_url = " missing "
if sprite_img :
sprite_url = sprite_img . get ( ' src ' )
if " shiny " in sprite_url :
continue
form_name = " None "
if sprite . find ( ' small ' ) :
smalls = sprite . find_all ( ' small ' )
form_name = " "
for small in smalls :
form_name + = small . get_text ( strip = True ) + " "
form_name = form_name . strip ( )
logger . info ( f ' { sprite_url } , { form_name } ' )
if form_name != " None " :
form + = 1
gender = 0
if form_name . startswith ( " Male " ) :
form - = 1
gender = 1
elif form_name . startswith ( " Female " ) :
form - = 1
gender = 2
dex_page_data = get_pokemon_dex_page ( cache , pokemon_name . replace ( " ' " , " " ) . replace ( " . " , " - " ) . replace ( " " , " " ) )
if dex_page_data :
dex_soup = BeautifulSoup ( dex_page_data , ' html.parser ' )
#Find a heading that has the pokemon name in it
dex_header = dex_soup . find ( ' h1 ' , string = pokemon_name )
if dex_header :
#The next <p> tag contains the generation number, in the format "{pokemon name} is a {type}(/{2nd_type}) type Pokémon introduced in Generation {generation number}."
generation_tag = dex_header . find_next ( ' p ' )
dex_text = generation_tag . get_text ( )
pattern = r ' ^(.+?) is a ( \ w+)(?:/( \ w+))? type Pokémon introduced in Generation ( \ d+) \ .$ '
match = re . match ( pattern , dex_text )
if match :
name , type1 , type2 , gen = match . groups ( )
generation = int ( gen )
if form_name != " None " :
next_tag = generation_tag . find_next ( ' p ' )
if next_tag :
extra_text = next_tag . get_text ( )
extra_text = remove_accents ( extra_text )
form_pattern = r ' a(?:n)? ( \ w+) Form(?:,)? introduced in (?:the )?([ \ w \ s:]+)(?: \ /([ \ w \ s:]+))? '
update_pattern = r ' a(?:n)? ( \ w+) form(?:,)? available in the latest update to ([ \ w \ s:]+)(?:& ([ \ w \ s:]+))? '
multiple_forms_pattern = r ' has (?: \ w+) new ( \ w+) Form(?:s)?(?:,)? available in (?:the )?([ \ w \ s:]+)(?:& ([ \ w \ s:]+))? '
expansion_pass_pattern = r ' a(?:n)? ( \ w+) form(?:,)? introduced in the Crown Tundra Expansion Pass to ([ \ w \ s:]+)(?:& ([ \ w \ s:]+))? '
patterns = [ form_pattern , update_pattern , multiple_forms_pattern , expansion_pass_pattern ]
test_form = form_name . replace ( pokemon_name , " " ) . replace ( " Male " , " " ) . replace ( " Female " , " " ) . strip ( )
if pokemon_name == " Tauros " and ( form_name == " Aqua Breed " or form_name == " Blaze Breed " or form_name == " Combat Breed " ) :
test_form = " Paldean "
for pattern in patterns :
matches = re . findall ( pattern , extra_text , re . IGNORECASE )
generation_found = False
for i , ( regional , game1 , game2 ) in enumerate ( matches , 1 ) :
if compare_forms ( test_form , regional ) :
target_game = game1 . replace ( " Pokemon " , " " ) . strip ( )
cursor = pokemon_db_conn . cursor ( )
cursor . execute ( '''
SELECT g . generation
FROM games g
LEFT JOIN alternate_game_names agn ON g . id = agn . game_id
WHERE g . name = ? OR agn . alternate_name = ?
LIMIT 1
''' , (target_game, target_game))
result = cursor . fetchone ( )
if result :
generation = result [ 0 ]
generation_found = True
break
if generation_found :
break
pokemon_form = PokemonForm (
id = format_pokemon_id ( national_dex_index , generation , form , gender ) ,
name = pokemon_name ,
form_name = form_name if form_name != " None " else None ,
sprite_url = sprite_url ,
national_dex = national_dex_index ,
generation = generation
)
db . add_pokemon (
national_dex_index ,
pokemon_name ,
generation ,
form ,
gender ,
form_name if form_name != " None " else None ,
sprite_url
)
insert_pokemon_form ( pokemon_db_conn , pokemon_form )
storable_in_home = not any ( keyword in form_name . lower ( ) for keyword in [ ' mega ' , ' gigantamax ' ] ) if form_name else True
insert_pokemon_storage ( pokemon_db_conn , pokemon_form . id , storable_in_home )
national_dex_index + = 1
logger . info ( f " Total Pokémon forms: { sum ( len ( forms ) for forms in db . pokemon . values ( ) ) } " )
logger . info ( f " Pokémon with multiple forms: { sum ( 1 for forms in db . pokemon . values ( ) if len ( forms ) > 1 ) } " )
if not os . path . exists ( ' images-new ' ) :
os . makedirs ( ' images-new ' )
for pokemon in db . pokemon . values ( ) :
for form in pokemon :
filename = f " images-new/ { form . id } .png "
if os . path . exists ( filename ) :
logger . info ( f " Image for { form . id } already exists, skipping download " )
else :
download_image ( form . sprite_url , filename )
logger . info ( f " Downloaded image for { form . id } " )
pokemon_db_conn . close ( )
if __name__ == " __main__ " :
cache = CacheManager ( )
retrieve_all_pokemon_forms ( cache )
cache . close ( )