You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
347 lines
14 KiB
347 lines
14 KiB
import requests
|
|
from bs4 import BeautifulSoup
|
|
from typing import Dict, List, Optional
|
|
from dataclasses import dataclass, asdict
|
|
import os
|
|
import sqlite3
|
|
import sys
|
|
import logging
|
|
import re
|
|
import unicodedata
|
|
|
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
|
|
from DataGatherers.cache_manager import CacheManager
|
|
|
|
logger = logging.getLogger('ui_feedback')
|
|
|
|
@dataclass
|
|
class PokemonForm:
|
|
id: str # This will be our PFIC
|
|
name: str
|
|
form_name: Optional[str]
|
|
sprite_url: str
|
|
national_dex: int
|
|
generation: int
|
|
|
|
def create_pokemon_db():
|
|
conn = sqlite3.connect('pokemon_forms.db')
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS pokemon_forms (
|
|
PFIC TEXT PRIMARY KEY,
|
|
name TEXT NOT NULL,
|
|
form_name TEXT,
|
|
national_dex INTEGER NOT NULL,
|
|
generation INTEGER NOT NULL
|
|
)
|
|
''')
|
|
conn.commit()
|
|
return conn
|
|
|
|
def create_pokemon_storage_db():
|
|
conn = sqlite3.connect('pokemon_forms.db')
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS pokemon_storage (
|
|
PFIC TEXT PRIMARY KEY,
|
|
storable_in_home BOOLEAN NOT NULL,
|
|
FOREIGN KEY (PFIC) REFERENCES pokemon_forms (PFIC)
|
|
)
|
|
''')
|
|
conn.commit()
|
|
return conn
|
|
|
|
def initialize_db():
|
|
create_pokemon_db()
|
|
create_pokemon_storage_db()
|
|
|
|
def insert_pokemon_form(conn, pokemon_form):
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
INSERT OR REPLACE INTO pokemon_forms
|
|
(PFIC, name, form_name, national_dex, generation)
|
|
VALUES (?, ?, ?, ?, ?)
|
|
''', (
|
|
pokemon_form.id,
|
|
pokemon_form.name,
|
|
pokemon_form.form_name,
|
|
pokemon_form.national_dex,
|
|
pokemon_form.generation
|
|
))
|
|
conn.commit()
|
|
|
|
def insert_pokemon_storage(conn, pfic: str, storable_in_home: bool):
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
INSERT OR REPLACE INTO pokemon_storage
|
|
(PFIC, storable_in_home)
|
|
VALUES (?, ?)
|
|
''', (pfic, storable_in_home))
|
|
conn.commit()
|
|
|
|
class PokemonDatabase:
|
|
def __init__(self):
|
|
self.pokemon: Dict[str, List[PokemonForm]] = {}
|
|
|
|
def add_pokemon(self, national_dex: int, name: str, region_code: int, form_index: int, gender_code: int, form_name: Optional[str], sprite_url: str):
|
|
pokemon_id = format_pokemon_id(national_dex, region_code, form_index, gender_code)
|
|
pokemon_form = PokemonForm(id=pokemon_id, name=name, form_name=form_name, sprite_url=sprite_url, national_dex=national_dex, generation=region_code)
|
|
|
|
if national_dex not in self.pokemon:
|
|
self.pokemon[national_dex] = []
|
|
self.pokemon[national_dex].append(pokemon_form)
|
|
|
|
def get_pokemon(self, national_dex: Optional[int] = None, region_code: Optional[int] = None,
|
|
form_index: Optional[int] = None, gender_code: Optional[int] = None) -> List[PokemonForm]:
|
|
results = []
|
|
for dex_forms in self.pokemon.values():
|
|
for form in dex_forms:
|
|
parts = form.id.split('-')
|
|
if (national_dex is None or int(parts[0]) == national_dex) and \
|
|
(region_code is None or int(parts[1]) == region_code) and \
|
|
(form_index is None or int(parts[2]) == form_index) and \
|
|
(gender_code is None or int(parts[3]) == gender_code):
|
|
results.append(form)
|
|
return results
|
|
|
|
def get_pokemon_by_id(self, pokemon_id: str) -> Optional[PokemonForm]:
|
|
national_dex = int(pokemon_id.split('-')[0])
|
|
if national_dex in self.pokemon:
|
|
for form in self.pokemon[national_dex]:
|
|
if form.id == pokemon_id:
|
|
return form
|
|
return None
|
|
|
|
def format_pokemon_id(national_dex: int, region_code: int, form_index: int, gender_code: int) -> str:
|
|
return f"{national_dex:04d}-{region_code:02d}-{form_index:03d}-{gender_code}"
|
|
|
|
def get_pokemon_sprites_page(cache: CacheManager):
|
|
url = "https://pokemondb.net/sprites"
|
|
return cache.fetch_url(url)
|
|
|
|
def get_pokemon_sprites_page_data(cache: CacheManager, pokemon_name: str):
|
|
url = f"https://pokemondb.net/sprites/{pokemon_name}"
|
|
return cache.fetch_url(url)
|
|
|
|
def get_pokemon_dex_page(cache: CacheManager, pokemon_name: str):
|
|
url = f"https://pokemondb.net/pokedex/{pokemon_name}"
|
|
return cache.fetch_url(url)
|
|
|
|
def remove_accents(input_str):
|
|
nfkd_form = unicodedata.normalize('NFKD', input_str)
|
|
return u"".join([c for c in nfkd_form if not unicodedata.combining(c)])
|
|
|
|
def compare_forms(a, b):
|
|
if a == None or b == None:
|
|
return False
|
|
|
|
if a == b:
|
|
return True
|
|
|
|
temp_a = a.lower().replace("forme", "").replace("form", "").replace("é", "e").strip()
|
|
temp_b = b.lower().replace("forme", "").replace("form", "").replace("é", "e").strip()
|
|
|
|
temp_a = temp_a.replace("deputante", "debutante").replace("p'au", "pa'u").replace("blood moon", "bloodmoon")
|
|
temp_b = temp_b.replace("deputante", "debutante").replace("p'au", "pa'u").replace("blood moon", "bloodmoon")
|
|
|
|
if temp_a == temp_b:
|
|
return True
|
|
|
|
return False
|
|
|
|
def download_image(url, filename):
|
|
response = requests.get(url)
|
|
if response.status_code == 200:
|
|
with open(filename, 'wb') as f:
|
|
f.write(response.content)
|
|
|
|
def retrieve_all_pokemon_forms(cache: CacheManager, progress_callback=None):
|
|
db = PokemonDatabase()
|
|
pokemon_db_conn = create_pokemon_db()
|
|
create_pokemon_storage_db()
|
|
|
|
page_data = get_pokemon_sprites_page(cache)
|
|
if not page_data:
|
|
return None
|
|
|
|
soup = BeautifulSoup(page_data, 'html.parser')
|
|
|
|
pokemon = soup.find_all('a', class_='infocard')
|
|
|
|
pokemon_generations = {
|
|
1: {"min": 1, "max": 151},
|
|
2: {"min": 152, "max": 251},
|
|
3: {"min": 252, "max": 386},
|
|
4: {"min": 387, "max": 493},
|
|
5: {"min": 494, "max": 649},
|
|
6: {"min": 650, "max": 721},
|
|
7: {"min": 722, "max": 809},
|
|
8: {"min": 810, "max": 905},
|
|
9: {"min": 906, "max": 1025},
|
|
}
|
|
|
|
national_dex_index = 1
|
|
for mon in pokemon:
|
|
generation = 1
|
|
for gen in pokemon_generations:
|
|
if pokemon_generations[gen]["min"] <= national_dex_index <= pokemon_generations[gen]["max"]:
|
|
generation = gen
|
|
break
|
|
|
|
pokemon_name = mon.get_text(strip=True)
|
|
logger.info(pokemon_name)
|
|
if progress_callback:
|
|
progress_callback(f"Processing {pokemon_name}")
|
|
|
|
pokemon_url_name = pokemon_name.replace("♀", "-f").replace("♂", "-m").replace("'", "").replace(".", "").replace('é', 'e').replace(':', '')
|
|
pokemon_url_name = pokemon_url_name.replace(" ", "-")
|
|
|
|
sprites_page_data = get_pokemon_sprites_page_data(cache, pokemon_url_name)
|
|
if not sprites_page_data:
|
|
return None
|
|
sprites_soup = BeautifulSoup(sprites_page_data, 'html.parser')
|
|
|
|
generation_8_header = sprites_soup.find('h2', string='Generation 8')
|
|
if not generation_8_header:
|
|
continue
|
|
generation_8_table = generation_8_header.find_next('table')
|
|
if not generation_8_table:
|
|
continue
|
|
|
|
generation_8_tbody = generation_8_table.find('tbody')
|
|
if not generation_8_tbody:
|
|
continue
|
|
|
|
generation_8_rows = generation_8_tbody.find_all('tr')
|
|
|
|
for row in generation_8_rows:
|
|
row_text = row.get_text(strip=True)
|
|
if 'Home' in row_text:
|
|
sprites = row.find_all('span', class_='sprites-table-card')
|
|
if not sprites:
|
|
continue
|
|
form = 0
|
|
for sprite in sprites:
|
|
sprite_img = sprite.find('img')
|
|
sprite_url = "missing"
|
|
if sprite_img:
|
|
sprite_url = sprite_img.get('src')
|
|
|
|
if "shiny" in sprite_url:
|
|
continue
|
|
|
|
form_name = "None"
|
|
if sprite.find('small'):
|
|
smalls = sprite.find_all('small')
|
|
form_name = ""
|
|
for small in smalls:
|
|
form_name += small.get_text(strip=True) + " "
|
|
form_name = form_name.strip()
|
|
logger.info(f'{sprite_url}, {form_name}')
|
|
if form_name != "None":
|
|
form += 1
|
|
gender = 0
|
|
if form_name.startswith("Male"):
|
|
form -= 1
|
|
gender = 1
|
|
elif form_name.startswith("Female"):
|
|
form -= 1
|
|
gender = 2
|
|
|
|
dex_page_data = get_pokemon_dex_page(cache, pokemon_name.replace("'", "").replace(".", "-").replace(" ", ""))
|
|
if dex_page_data:
|
|
dex_soup = BeautifulSoup(dex_page_data, 'html.parser')
|
|
|
|
#Find a heading that has the pokemon name in it
|
|
dex_header = dex_soup.find('h1', string=pokemon_name)
|
|
if dex_header:
|
|
#The next <p> tag contains the generation number, in the format "{pokemon name} is a {type}(/{2nd_type}) type Pokémon introduced in Generation {generation number}."
|
|
generation_tag = dex_header.find_next('p')
|
|
dex_text = generation_tag.get_text()
|
|
pattern = r'^(.+?) is a (\w+)(?:/(\w+))? type Pokémon introduced in Generation (\d+)\.$'
|
|
match = re.match(pattern, dex_text)
|
|
if match:
|
|
name, type1, type2, gen = match.groups()
|
|
generation = int(gen)
|
|
|
|
if form_name != "None":
|
|
next_tag = generation_tag.find_next('p')
|
|
if next_tag:
|
|
extra_text = next_tag.get_text()
|
|
extra_text = remove_accents(extra_text)
|
|
form_pattern = r'a(?:n)? (\w+) Form(?:,)? introduced in (?:the )?([\w\s:]+)(?:\/([\w\s:]+))?'
|
|
update_pattern = r'a(?:n)? (\w+) form(?:,)? available in the latest update to ([\w\s:]+)(?:& ([\w\s:]+))?'
|
|
multiple_forms_pattern = r'has (?:\w+) new (\w+) Form(?:s)?(?:,)? available in (?:the )?([\w\s:]+)(?:& ([\w\s:]+))?'
|
|
expansion_pass_pattern = r'a(?:n)? (\w+) form(?:,)? introduced in the Crown Tundra Expansion Pass to ([\w\s:]+)(?:& ([\w\s:]+))?'
|
|
patterns = [form_pattern, update_pattern, multiple_forms_pattern, expansion_pass_pattern]
|
|
test_form = form_name.replace(pokemon_name, "").replace("Male", "").replace("Female", "").strip()
|
|
if pokemon_name == "Tauros" and (form_name == "Aqua Breed" or form_name == "Blaze Breed" or form_name == "Combat Breed"):
|
|
test_form = "Paldean"
|
|
for pattern in patterns:
|
|
matches = re.findall(pattern, extra_text, re.IGNORECASE)
|
|
generation_found = False
|
|
for i, (regional, game1, game2) in enumerate(matches, 1):
|
|
if compare_forms(test_form, regional):
|
|
target_game = game1.replace("Pokemon", "").strip()
|
|
cursor = pokemon_db_conn.cursor()
|
|
cursor.execute('''
|
|
SELECT g.generation
|
|
FROM games g
|
|
LEFT JOIN alternate_game_names agn ON g.id = agn.game_id
|
|
WHERE g.name = ? OR agn.alternate_name = ?
|
|
LIMIT 1
|
|
''', (target_game, target_game))
|
|
result = cursor.fetchone()
|
|
if result:
|
|
generation = result[0]
|
|
generation_found = True
|
|
break
|
|
if generation_found:
|
|
break
|
|
|
|
pokemon_form = PokemonForm(
|
|
id=format_pokemon_id(national_dex_index, generation, form, gender),
|
|
name=pokemon_name,
|
|
form_name=form_name if form_name != "None" else None,
|
|
sprite_url=sprite_url,
|
|
national_dex=national_dex_index,
|
|
generation=generation
|
|
)
|
|
db.add_pokemon(
|
|
national_dex_index,
|
|
pokemon_name,
|
|
generation,
|
|
form,
|
|
gender,
|
|
form_name if form_name != "None" else None,
|
|
sprite_url
|
|
)
|
|
insert_pokemon_form(pokemon_db_conn, pokemon_form)
|
|
|
|
storable_in_home = not any(keyword in form_name.lower() for keyword in ['mega', 'gigantamax']) if form_name else True
|
|
insert_pokemon_storage(pokemon_db_conn, pokemon_form.id, storable_in_home)
|
|
|
|
national_dex_index += 1
|
|
|
|
logger.info(f"Total Pokémon forms: {sum(len(forms) for forms in db.pokemon.values())}")
|
|
logger.info(f"Pokémon with multiple forms: {sum(1 for forms in db.pokemon.values() if len(forms) > 1)}")
|
|
|
|
if not os.path.exists('images-new'):
|
|
os.makedirs('images-new')
|
|
|
|
for pokemon in db.pokemon.values():
|
|
for form in pokemon:
|
|
filename = f"images-new/{form.id}.png"
|
|
if os.path.exists(filename):
|
|
logger.info(f"Image for {form.id} already exists, skipping download")
|
|
else:
|
|
download_image(form.sprite_url, filename)
|
|
logger.info(f"Downloaded image for {form.id}")
|
|
|
|
pokemon_db_conn.close()
|
|
|
|
if __name__ == "__main__":
|
|
cache = CacheManager()
|
|
retrieve_all_pokemon_forms(cache)
|
|
cache.close()
|