You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

347 lines
14 KiB

import requests
from bs4 import BeautifulSoup
from typing import Dict, List, Optional
from dataclasses import dataclass, asdict
import os
import sqlite3
import sys
import logging
import re
import unicodedata
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from DataGatherers.cache_manager import CacheManager
logger = logging.getLogger('ui_feedback')
@dataclass
class PokemonForm:
id: str # This will be our PFIC
name: str
form_name: Optional[str]
sprite_url: str
national_dex: int
generation: int
def create_pokemon_db():
conn = sqlite3.connect('pokemon_forms.db')
cursor = conn.cursor()
cursor.execute('''
CREATE TABLE IF NOT EXISTS pokemon_forms (
PFIC TEXT PRIMARY KEY,
name TEXT NOT NULL,
form_name TEXT,
national_dex INTEGER NOT NULL,
generation INTEGER NOT NULL
)
''')
conn.commit()
return conn
def create_pokemon_storage_db():
conn = sqlite3.connect('pokemon_forms.db')
cursor = conn.cursor()
cursor.execute('''
CREATE TABLE IF NOT EXISTS pokemon_storage (
PFIC TEXT PRIMARY KEY,
storable_in_home BOOLEAN NOT NULL,
FOREIGN KEY (PFIC) REFERENCES pokemon_forms (PFIC)
)
''')
conn.commit()
return conn
def initialize_db():
create_pokemon_db()
create_pokemon_storage_db()
def insert_pokemon_form(conn, pokemon_form):
cursor = conn.cursor()
cursor.execute('''
INSERT OR REPLACE INTO pokemon_forms
(PFIC, name, form_name, national_dex, generation)
VALUES (?, ?, ?, ?, ?)
''', (
pokemon_form.id,
pokemon_form.name,
pokemon_form.form_name,
pokemon_form.national_dex,
pokemon_form.generation
))
conn.commit()
def insert_pokemon_storage(conn, pfic: str, storable_in_home: bool):
cursor = conn.cursor()
cursor.execute('''
INSERT OR REPLACE INTO pokemon_storage
(PFIC, storable_in_home)
VALUES (?, ?)
''', (pfic, storable_in_home))
conn.commit()
class PokemonDatabase:
def __init__(self):
self.pokemon: Dict[str, List[PokemonForm]] = {}
def add_pokemon(self, national_dex: int, name: str, region_code: int, form_index: int, gender_code: int, form_name: Optional[str], sprite_url: str):
pokemon_id = format_pokemon_id(national_dex, region_code, form_index, gender_code)
pokemon_form = PokemonForm(id=pokemon_id, name=name, form_name=form_name, sprite_url=sprite_url, national_dex=national_dex, generation=region_code)
if national_dex not in self.pokemon:
self.pokemon[national_dex] = []
self.pokemon[national_dex].append(pokemon_form)
def get_pokemon(self, national_dex: Optional[int] = None, region_code: Optional[int] = None,
form_index: Optional[int] = None, gender_code: Optional[int] = None) -> List[PokemonForm]:
results = []
for dex_forms in self.pokemon.values():
for form in dex_forms:
parts = form.id.split('-')
if (national_dex is None or int(parts[0]) == national_dex) and \
(region_code is None or int(parts[1]) == region_code) and \
(form_index is None or int(parts[2]) == form_index) and \
(gender_code is None or int(parts[3]) == gender_code):
results.append(form)
return results
def get_pokemon_by_id(self, pokemon_id: str) -> Optional[PokemonForm]:
national_dex = int(pokemon_id.split('-')[0])
if national_dex in self.pokemon:
for form in self.pokemon[national_dex]:
if form.id == pokemon_id:
return form
return None
def format_pokemon_id(national_dex: int, region_code: int, form_index: int, gender_code: int) -> str:
return f"{national_dex:04d}-{region_code:02d}-{form_index:03d}-{gender_code}"
def get_pokemon_sprites_page(cache: CacheManager):
url = "https://pokemondb.net/sprites"
return cache.fetch_url(url)
def get_pokemon_sprites_page_data(cache: CacheManager, pokemon_name: str):
url = f"https://pokemondb.net/sprites/{pokemon_name}"
return cache.fetch_url(url)
def get_pokemon_dex_page(cache: CacheManager, pokemon_name: str):
url = f"https://pokemondb.net/pokedex/{pokemon_name}"
return cache.fetch_url(url)
def remove_accents(input_str):
nfkd_form = unicodedata.normalize('NFKD', input_str)
return u"".join([c for c in nfkd_form if not unicodedata.combining(c)])
def compare_forms(a, b):
if a == None or b == None:
return False
if a == b:
return True
temp_a = a.lower().replace("forme", "").replace("form", "").replace("é", "e").strip()
temp_b = b.lower().replace("forme", "").replace("form", "").replace("é", "e").strip()
temp_a = temp_a.replace("deputante", "debutante").replace("p'au", "pa'u").replace("blood moon", "bloodmoon")
temp_b = temp_b.replace("deputante", "debutante").replace("p'au", "pa'u").replace("blood moon", "bloodmoon")
if temp_a == temp_b:
return True
return False
def download_image(url, filename):
response = requests.get(url)
if response.status_code == 200:
with open(filename, 'wb') as f:
f.write(response.content)
def retrieve_all_pokemon_forms(cache: CacheManager, progress_callback=None):
db = PokemonDatabase()
pokemon_db_conn = create_pokemon_db()
create_pokemon_storage_db()
page_data = get_pokemon_sprites_page(cache)
if not page_data:
return None
soup = BeautifulSoup(page_data, 'html.parser')
pokemon = soup.find_all('a', class_='infocard')
pokemon_generations = {
1: {"min": 1, "max": 151},
2: {"min": 152, "max": 251},
3: {"min": 252, "max": 386},
4: {"min": 387, "max": 493},
5: {"min": 494, "max": 649},
6: {"min": 650, "max": 721},
7: {"min": 722, "max": 809},
8: {"min": 810, "max": 905},
9: {"min": 906, "max": 1025},
}
national_dex_index = 1
for mon in pokemon:
generation = 1
for gen in pokemon_generations:
if pokemon_generations[gen]["min"] <= national_dex_index <= pokemon_generations[gen]["max"]:
generation = gen
break
pokemon_name = mon.get_text(strip=True)
logger.info(pokemon_name)
if progress_callback:
progress_callback(f"Processing {pokemon_name}")
pokemon_url_name = pokemon_name.replace("", "-f").replace("", "-m").replace("'", "").replace(".", "").replace('é', 'e').replace(':', '')
pokemon_url_name = pokemon_url_name.replace(" ", "-")
sprites_page_data = get_pokemon_sprites_page_data(cache, pokemon_url_name)
if not sprites_page_data:
return None
sprites_soup = BeautifulSoup(sprites_page_data, 'html.parser')
generation_8_header = sprites_soup.find('h2', string='Generation 8')
if not generation_8_header:
continue
generation_8_table = generation_8_header.find_next('table')
if not generation_8_table:
continue
generation_8_tbody = generation_8_table.find('tbody')
if not generation_8_tbody:
continue
generation_8_rows = generation_8_tbody.find_all('tr')
for row in generation_8_rows:
row_text = row.get_text(strip=True)
if 'Home' in row_text:
sprites = row.find_all('span', class_='sprites-table-card')
if not sprites:
continue
form = 0
for sprite in sprites:
sprite_img = sprite.find('img')
sprite_url = "missing"
if sprite_img:
sprite_url = sprite_img.get('src')
if "shiny" in sprite_url:
continue
form_name = "None"
if sprite.find('small'):
smalls = sprite.find_all('small')
form_name = ""
for small in smalls:
form_name += small.get_text(strip=True) + " "
form_name = form_name.strip()
logger.info(f'{sprite_url}, {form_name}')
if form_name != "None":
form += 1
gender = 0
if form_name.startswith("Male"):
form -= 1
gender = 1
elif form_name.startswith("Female"):
form -= 1
gender = 2
dex_page_data = get_pokemon_dex_page(cache, pokemon_name.replace("'", "").replace(".", "-").replace(" ", ""))
if dex_page_data:
dex_soup = BeautifulSoup(dex_page_data, 'html.parser')
#Find a heading that has the pokemon name in it
dex_header = dex_soup.find('h1', string=pokemon_name)
if dex_header:
#The next <p> tag contains the generation number, in the format "{pokemon name} is a {type}(/{2nd_type}) type Pokémon introduced in Generation {generation number}."
generation_tag = dex_header.find_next('p')
dex_text = generation_tag.get_text()
pattern = r'^(.+?) is a (\w+)(?:/(\w+))? type Pokémon introduced in Generation (\d+)\.$'
match = re.match(pattern, dex_text)
if match:
name, type1, type2, gen = match.groups()
generation = int(gen)
if form_name != "None":
next_tag = generation_tag.find_next('p')
if next_tag:
extra_text = next_tag.get_text()
extra_text = remove_accents(extra_text)
form_pattern = r'a(?:n)? (\w+) Form(?:,)? introduced in (?:the )?([\w\s:]+)(?:\/([\w\s:]+))?'
update_pattern = r'a(?:n)? (\w+) form(?:,)? available in the latest update to ([\w\s:]+)(?:& ([\w\s:]+))?'
multiple_forms_pattern = r'has (?:\w+) new (\w+) Form(?:s)?(?:,)? available in (?:the )?([\w\s:]+)(?:& ([\w\s:]+))?'
expansion_pass_pattern = r'a(?:n)? (\w+) form(?:,)? introduced in the Crown Tundra Expansion Pass to ([\w\s:]+)(?:& ([\w\s:]+))?'
patterns = [form_pattern, update_pattern, multiple_forms_pattern, expansion_pass_pattern]
test_form = form_name.replace(pokemon_name, "").replace("Male", "").replace("Female", "").strip()
if pokemon_name == "Tauros" and (form_name == "Aqua Breed" or form_name == "Blaze Breed" or form_name == "Combat Breed"):
test_form = "Paldean"
for pattern in patterns:
matches = re.findall(pattern, extra_text, re.IGNORECASE)
generation_found = False
for i, (regional, game1, game2) in enumerate(matches, 1):
if compare_forms(test_form, regional):
target_game = game1.replace("Pokemon", "").strip()
cursor = pokemon_db_conn.cursor()
cursor.execute('''
SELECT g.generation
FROM games g
LEFT JOIN alternate_game_names agn ON g.id = agn.game_id
WHERE g.name = ? OR agn.alternate_name = ?
LIMIT 1
''', (target_game, target_game))
result = cursor.fetchone()
if result:
generation = result[0]
generation_found = True
break
if generation_found:
break
pokemon_form = PokemonForm(
id=format_pokemon_id(national_dex_index, generation, form, gender),
name=pokemon_name,
form_name=form_name if form_name != "None" else None,
sprite_url=sprite_url,
national_dex=national_dex_index,
generation=generation
)
db.add_pokemon(
national_dex_index,
pokemon_name,
generation,
form,
gender,
form_name if form_name != "None" else None,
sprite_url
)
insert_pokemon_form(pokemon_db_conn, pokemon_form)
storable_in_home = not any(keyword in form_name.lower() for keyword in ['mega', 'gigantamax']) if form_name else True
insert_pokemon_storage(pokemon_db_conn, pokemon_form.id, storable_in_home)
national_dex_index += 1
logger.info(f"Total Pokémon forms: {sum(len(forms) for forms in db.pokemon.values())}")
logger.info(f"Pokémon with multiple forms: {sum(1 for forms in db.pokemon.values() if len(forms) > 1)}")
if not os.path.exists('images-new'):
os.makedirs('images-new')
for pokemon in db.pokemon.values():
for form in pokemon:
filename = f"images-new/{form.id}.png"
if os.path.exists(filename):
logger.info(f"Image for {form.id} already exists, skipping download")
else:
download_image(form.sprite_url, filename)
logger.info(f"Downloaded image for {form.id}")
pokemon_db_conn.close()
if __name__ == "__main__":
cache = CacheManager()
retrieve_all_pokemon_forms(cache)
cache.close()