You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

255 lines
9.0 KiB

import requests
from bs4 import BeautifulSoup
from typing import Dict, List, Optional
from dataclasses import dataclass, asdict
import os
import sqlite3
from DataGatherers.cache_manager import CacheManager
@dataclass
class PokemonForm:
id: str # This will be our PFIC
name: str
form_name: Optional[str]
sprite_url: str
national_dex: int
generation: int
def create_pokemon_db():
conn = sqlite3.connect('pokemon_forms.db')
cursor = conn.cursor()
cursor.execute('''
CREATE TABLE IF NOT EXISTS pokemon_forms (
PFIC TEXT PRIMARY KEY,
name TEXT NOT NULL,
form_name TEXT,
national_dex INTEGER NOT NULL,
generation INTEGER NOT NULL
)
''')
conn.commit()
return conn
def create_pokemon_storage_db():
conn = sqlite3.connect('pokemon_forms.db')
cursor = conn.cursor()
cursor.execute('''
CREATE TABLE IF NOT EXISTS pokemon_storage (
PFIC TEXT PRIMARY KEY,
storable_in_home BOOLEAN NOT NULL,
FOREIGN KEY (PFIC) REFERENCES pokemon_forms (PFIC)
)
''')
conn.commit()
return conn
def initialize_db():
create_pokemon_db()
create_pokemon_storage_db()
def insert_pokemon_form(conn, pokemon_form):
cursor = conn.cursor()
cursor.execute('''
INSERT OR REPLACE INTO pokemon_forms
(PFIC, name, form_name, national_dex, generation)
VALUES (?, ?, ?, ?, ?)
''', (
pokemon_form.id,
pokemon_form.name,
pokemon_form.form_name,
pokemon_form.national_dex,
pokemon_form.generation
))
conn.commit()
def insert_pokemon_storage(conn, pfic: str, storable_in_home: bool):
cursor = conn.cursor()
cursor.execute('''
INSERT OR REPLACE INTO pokemon_storage
(PFIC, storable_in_home)
VALUES (?, ?)
''', (pfic, storable_in_home))
conn.commit()
class PokemonDatabase:
def __init__(self):
self.pokemon: Dict[str, List[PokemonForm]] = {}
def add_pokemon(self, national_dex: int, name: str, region_code: int, form_index: int, gender_code: int, form_name: Optional[str], sprite_url: str):
pokemon_id = format_pokemon_id(national_dex, region_code, form_index, gender_code)
pokemon_form = PokemonForm(id=pokemon_id, name=name, form_name=form_name, sprite_url=sprite_url, national_dex=national_dex, generation=region_code)
if national_dex not in self.pokemon:
self.pokemon[national_dex] = []
self.pokemon[national_dex].append(pokemon_form)
def get_pokemon(self, national_dex: Optional[int] = None, region_code: Optional[int] = None,
form_index: Optional[int] = None, gender_code: Optional[int] = None) -> List[PokemonForm]:
results = []
for dex_forms in self.pokemon.values():
for form in dex_forms:
parts = form.id.split('-')
if (national_dex is None or int(parts[0]) == national_dex) and \
(region_code is None or int(parts[1]) == region_code) and \
(form_index is None or int(parts[2]) == form_index) and \
(gender_code is None or int(parts[3]) == gender_code):
results.append(form)
return results
def get_pokemon_by_id(self, pokemon_id: str) -> Optional[PokemonForm]:
national_dex = int(pokemon_id.split('-')[0])
if national_dex in self.pokemon:
for form in self.pokemon[national_dex]:
if form.id == pokemon_id:
return form
return None
def format_pokemon_id(national_dex: int, region_code: int, form_index: int, gender_code: int) -> str:
return f"{national_dex:04d}-{region_code:02d}-{form_index:03d}-{gender_code}"
def get_pokemon_sprites_page(cache: CacheManager):
url = "https://pokemondb.net/sprites"
return cache.fetch_url(url)
def get_pokemon_sprites_page_data(cache: CacheManager, pokemon_name: str):
url = f"https://pokemondb.net/sprites/{pokemon_name}"
return cache.fetch_url(url)
def download_image(url, filename):
response = requests.get(url)
if response.status_code == 200:
with open(filename, 'wb') as f:
f.write(response.content)
def thingy(cache: CacheManager):
db = PokemonDatabase()
pokemon_db_conn = create_pokemon_db()
create_pokemon_storage_db()
page_data = get_pokemon_sprites_page(cache)
if not page_data:
return None
soup = BeautifulSoup(page_data, 'html.parser')
pokemon = soup.find_all('a', class_='infocard')
pokemon_generations = {
1: {"min": 1, "max": 151},
2: {"min": 152, "max": 251},
3: {"min": 252, "max": 386},
4: {"min": 387, "max": 493},
5: {"min": 494, "max": 649},
6: {"min": 650, "max": 721},
7: {"min": 722, "max": 809},
8: {"min": 810, "max": 905},
9: {"min": 906, "max": 1025},
}
national_dex_index = 1
for mon in pokemon:
generation = 1
for gen in pokemon_generations:
if pokemon_generations[gen]["min"] <= national_dex_index <= pokemon_generations[gen]["max"]:
generation = gen
break
pokemon_name = mon.get_text(strip=True)
print(pokemon_name)
pokemon_url_name = pokemon_name.replace("", "-f").replace("", "-m").replace("'", "").replace(".", "").replace('é', 'e').replace(':', '')
pokemon_url_name = pokemon_url_name.replace(" ", "-")
sprites_page_data = get_pokemon_sprites_page_data(cache, pokemon_url_name)
if not sprites_page_data:
return None
sprites_soup = BeautifulSoup(sprites_page_data, 'html.parser')
generation_8_header = sprites_soup.find('h2', string='Generation 8')
if not generation_8_header:
continue
generation_8_table = generation_8_header.find_next('table')
if not generation_8_table:
continue
generation_8_tbody = generation_8_table.find('tbody')
if not generation_8_tbody:
continue
generation_8_rows = generation_8_tbody.find_all('tr')
for row in generation_8_rows:
row_text = row.get_text(strip=True)
if 'Home' in row_text:
sprites = row.find_all('span', class_='sprites-table-card')
if not sprites:
continue
form = 0
for sprite in sprites:
sprite_img = sprite.find('img')
sprite_url = "missing"
if sprite_img:
sprite_url = sprite_img.get('src')
if "shiny" in sprite_url:
continue
form_name = "None"
if sprite.find('small'):
form_name = sprite.find('small').get_text(strip=True)
print(sprite_url, form_name)
if form_name != "None":
form += 1
gender = 0
if form_name == "Female":
form -= 1
gender = 1
elif form_name == "Male":
form -= 1
gender = 2
pokemon_form = PokemonForm(
id=format_pokemon_id(national_dex_index, generation, form, gender),
name=pokemon_name,
form_name=form_name if form_name != "None" else None,
sprite_url=sprite_url,
national_dex=national_dex_index,
generation=generation
)
db.add_pokemon(
national_dex_index,
pokemon_name,
generation,
form,
gender,
form_name if form_name != "None" else None,
sprite_url
)
insert_pokemon_form(pokemon_db_conn, pokemon_form)
storable_in_home = not any(keyword in form_name.lower() for keyword in ['mega', 'gigantamax']) if form_name else True
insert_pokemon_storage(pokemon_db_conn, pokemon_form.id, storable_in_home)
national_dex_index += 1
print(f"Total Pokémon forms: {sum(len(forms) for forms in db.pokemon.values())}")
print(f"Pokémon with multiple forms: {sum(1 for forms in db.pokemon.values() if len(forms) > 1)}")
if not os.path.exists('images-new'):
os.makedirs('images-new')
for pokemon in db.pokemon.values():
for form in pokemon:
filename = f"images-new/{form.id}.png"
if os.path.exists(filename):
print(f"Image for {form.id} already exists, skipping download")
else:
download_image(form.sprite_url, filename)
print(f"Downloaded image for {form.id}")
pokemon_db_conn.close()
if __name__ == "__main__":
cache = CacheManager()
thingy(cache)
cache.close()