from __future__ import annotations
import csv
import requests
import time
import json
import os
import re
import sqlite3
from bs4 import BeautifulSoup , Tag , NavigableString
import copy
from typing import List , Optional
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
from collections import defaultdict
# Initialize the database connection
conn = sqlite3 . connect ( ' pokemon_cache.db ' )
cursor = conn . cursor ( )
# Create the cache table if it doesn't exist
cursor . execute ( '''
CREATE TABLE IF NOT EXISTS cache (
key TEXT PRIMARY KEY ,
value TEXT
)
''' )
conn . commit ( )
# List of all main series Pokémon games in chronological order, with special games first in each generation
all_games = [
" Yellow " , " Red " , " Blue " ,
" Crystal " , " Gold " , " Silver " ,
" Emerald " , " FireRed " , " LeafGreen " , " Ruby " , " Sapphire " ,
" Platinum " , " HeartGold " , " SoulSilver " , " Diamond " , " Pearl " ,
" Black 2 " , " White 2 " , " Black " , " White " ,
" X " , " Y " , " Omega Ruby " , " Alpha Sapphire " ,
" Ultra Sun " , " Ultra Moon " , " Sun " , " Moon " ,
" Sword " , " Shield " , " Expansion Pass " ,
" Brilliant Diamond " , " Shining Pearl " ,
" Legends: Arceus " ,
" Scarlet " , " Violet " , " The Teal Mask " , " The Hidden Treasure of Area Zero " , " The Hidden Treasure of Area Zero (Scarlet) " , " The Hidden Treasure of Area Zero (Violet) " , " The Teal Mask (Scarlet) " , " The Teal Mask (Violet) " ,
" Unknown " ,
" Pokémon Home " ,
" Pokémon Go " ,
]
big_pokemon_list = [ ]
cache = { }
new_entries_count = 0
def get_cached_data ( ) :
global cache
cursor . execute ( " SELECT key, value FROM cache " )
for key , value in cursor . fetchall ( ) :
cache [ key ] = json . loads ( value )
def save_cached_data ( ) :
global cache , new_entries_count
if new_entries_count > 0 :
for key , value in cache . items ( ) :
cursor . execute ( " INSERT OR REPLACE INTO cache (key, value) VALUES (?, ?) " ,
( key , json . dumps ( value ) ) )
conn . commit ( )
new_entries_count = 0
def update_cache ( key , value ) :
global cache , new_entries_count
if key not in cache :
cache [ key ] = value
new_entries_count + = 1
if new_entries_count > = 1 :
save_cached_data ( )
time . sleep ( 1 )
pokemon_index = None
def create_pokemon_index ( pokemon_list ) :
name_index = defaultdict ( list )
for pokemon in pokemon_list :
name_index [ pokemon . name . lower ( ) ] . append ( pokemon )
return name_index
def find_pokemon ( name , form = None , threshold = 80 ) :
name = name . lower ( )
if name in pokemon_index :
candidates = pokemon_index [ name ]
if not form :
return candidates [ 0 ] if candidates else None
best_match = None
best_score = 0
for pokemon in candidates :
if pokemon . form :
score = fuzz . ratio ( form . lower ( ) , pokemon . form . lower ( ) )
if score > best_score :
best_score = score
best_match = pokemon
if best_match and best_score > = threshold :
return best_match
# If no exact name match, try fuzzy matching on names
best_name_match = None
best_name_score = 0
for pokemon_name in pokemon_index :
score = fuzz . ratio ( name , pokemon_name )
if score > best_name_score :
best_name_score = score
best_name_match = pokemon_name
if best_name_match and best_name_score > = threshold :
candidates = pokemon_index [ best_name_match ]
if not form :
return candidates [ 0 ]
best_match = None
best_score = 0
for pokemon in candidates :
if pokemon . form :
score = fuzz . ratio ( form . lower ( ) , pokemon . form . lower ( ) )
if score > best_score :
best_score = score
best_match = pokemon
if best_match and best_score > = threshold :
return best_match
return None
class Pokemon :
def __init__ ( self , name : str , number : int , form : Optional [ str ] = None ) :
self . name = name
self . number = number
self . form = form
self . stage : Optional [ str ] = None
self . evolution_chain : Optional [ List [ ' EvolutionStage ' ] ] = [ ]
self . is_baby = False
self . encounter_information : Optional [ List [ ' EncounterInformation ' ] ] = [ ]
self . earliest_game : Optional [ ' EncounterInformation ' ] = None
self . obtain_method : Optional [ str ] = None
def get_earliest_game_and_method ( self ) :
if self . evolution_chain :
for stage in self . evolution_chain :
if self . is_baby :
return stage . pokemon_reference . earliest_game . game , " Breed "
else :
if stage . pokemon_reference == self :
return self . earliest_game . game , self . earliest_game . method
return stage . pokemon_reference . earliest_game . game , " Evolve "
if self . earliest_game :
return self . earliest_game . game , self . earliest_game . method
return None , None
def __str__ ( self ) :
return f " { self . name } { ' ' if self . form else ' ' } { self . form if self . form else ' ' } (# { self . number } ) "
def add_evolution_chain ( self , evolution_chain : List [ ' EvolutionStage ' ] ) :
self . evolution_chain = evolution_chain
def add_stage ( self , stage : str ) :
self . stage = stage
self . is_baby = self . stage is not None and ' Baby ' in self . stage
def update_encounter_information ( self , exclude_events = True , exclude_home = True , exclude_go = True ) :
if not self . encounter_information :
return
non_catchable_methods = [ " trade " , " global link " , " poké transfer " , " time capsule " , " unobtainable " ]
if exclude_events :
non_catchable_methods . append ( " event " )
if exclude_home :
non_catchable_methods . append ( " pokemon home " )
if exclude_go :
non_catchable_methods . append ( " pokémon go " )
for encounter in self . encounter_information :
encounter . method = None
for location in encounter . locations :
skip_location = False
for non_catchable in non_catchable_methods :
if non_catchable in location . lower ( ) :
skip_location = True
break
if skip_location :
continue
if " first partner " in location . lower ( ) :
encounter . method = " Starter "
elif " received " in location . lower ( ) :
encounter . method = " Gift "
elif " evolve " in location . lower ( ) :
encounter . method = " Evolve "
elif " event " in location . lower ( ) :
encounter . method = " Event "
else :
encounter . method = " Catchable "
def parse_encoutners_for_games ( self ) :
game_methods = { }
for encounter in self . encounter_information :
if encounter . method :
game_methods [ encounter . game . lower ( ) ] = encounter
for game in all_games :
if game . lower ( ) in game_methods :
self . earliest_game = game_methods [ game . lower ( ) ]
return
def determine_earliest_game ( self ) :
if not self . encounter_information :
self . earliest_game = None
return
self . update_encounter_information ( )
self . parse_encoutners_for_games ( )
if self . earliest_game != None :
return
self . update_encounter_information ( exclude_events = False )
self . parse_encoutners_for_games ( )
if self . earliest_game != None :
return
self . update_encounter_information ( exclude_home = False )
self . parse_encoutners_for_games ( )
if self . earliest_game != None :
return
self . update_encounter_information ( exclude_go = False )
self . parse_encoutners_for_games ( )
if self . earliest_game != None :
return
self . earliest_game = None
class EvolutionStage :
def __init__ ( self , pokemon : str , method : Optional [ str ] = None , stage : Optional [ str ] = None , form : Optional [ str ] = None ) :
self . pokemon = pokemon
self . method = method
self . next_stage : Optional [ EvolutionStage ] = None
self . branches : List [ EvolutionStage ] = [ ]
self . stage = stage
self . is_baby = self . stage is not None and ' Baby ' in self . stage
self . pokemon_reference = find_pokemon ( pokemon , form )
if self . pokemon_reference == None :
self . pokemon_reference = find_pokemon ( pokemon , None )
self . form = form
def __str__ ( self ) :
return f " { self . pokemon } { self . form if self . form else ' ' } ( { self . method if self . method else ' Base ' } ) "
class EncounterInformation :
def __init__ ( self , game : str , locations : List [ str ] ) :
self . game = game
self . method = " Unknown "
self . locations = locations
def parse_evolution_chain ( table : Tag , form : Optional [ str ] = None ) - > List [ EvolutionStage ] :
main_chain = [ ]
current_stage = None
pending_method = None
tbody = table . find ( ' tbody ' , recursive = False )
if not tbody :
return [ ]
rows = tbody . find_all ( ' tr ' , recursive = False )
main_row = rows [ 0 ]
branch_rows = rows [ 1 : ]
# Parse main evolution chain
for td in main_row . find_all ( ' td ' , recursive = False ) :
if td . find ( ' table ' ) :
# This TD contains Pokemon information
pokemon_name = extract_pokemon_name ( td )
stage = extract_stage_form ( td )
new_stage = EvolutionStage ( pokemon_name , pending_method , stage , form )
pending_method = None
if current_stage :
current_stage . next_stage = new_stage
current_stage = new_stage
main_chain . append ( current_stage )
else :
# This TD contains evolution method for the next Pokemon
pending_method = extract_evolution_method ( td )
# Parse branching evolutions
for row in branch_rows :
branch_stage = None
branch_method = None
for td in row . find_all ( ' td ' , recursive = False ) :
if td . find ( ' table ' ) :
pokemon_name = extract_pokemon_name ( td )
stage = extract_stage_form ( td )
new_stage = EvolutionStage ( pokemon_name , branch_method , stage , form )
branch_method = None
if branch_stage :
branch_stage . next_stage = new_stage
branch_stage = new_stage
# Find which main chain Pokemon this branches from
for main_stage in main_chain :
if td . get ( ' rowspan ' ) and main_stage . pokemon == pokemon_name :
main_stage . branches . append ( branch_stage )
break
else :
branch_method = extract_evolution_method ( td )
return main_chain
def extract_pokemon_name ( td : Tag ) - > str :
# Extract Pokemon name from the table within the TD
table = td . find ( ' table ' )
name_tag = table . find ( ' a ' , class_ = ' selflink ' )
if name_tag :
return name_tag . get_text ( strip = True )
name_tag = table . find ( ' a ' , title = True , class_ = lambda x : x != ' image ' )
return name_tag . get_text ( strip = True )
def extract_evolution_method ( td : Tag ) - > str :
# Extract evolution method from the TD
return td . get_text ( strip = True )
def extract_stage_form ( td : Tag ) - > Optional [ str ] :
stage_tag = td . find ( ' table ' ) . find ( ' small ' )
if stage_tag :
return stage_tag . get_text ( strip = True )
return None
def extract_is_baby ( td : Tag ) - > bool :
stage_tag = td . find ( ' table ' ) . find ( ' small ' )
if stage_tag :
return ' Baby ' in stage_tag . get_text ( strip = True )
return False
def read_pokemon_list ( filename , limit = 50 ) :
pokemon_list = [ ]
with open ( filename , ' r ' , newline = ' ' , encoding = ' utf-8 ' ) as csvfile :
reader = csv . DictReader ( csvfile )
for i , row in enumerate ( reader ) :
if i > = limit :
break
# Split the name into base name and form
match = re . match ( r ' (.*?) \ s*( \ (.* \ ))?$ ' , row [ ' name ' ] )
base_name , form = match . groups ( ) if match else ( row [ ' name ' ] , None )
row [ ' base_name ' ] = base_name . strip ( )
row [ ' form ' ] = form . strip ( ' () ' ) if form else None
pokemon_list . append ( row )
new_pokemon = Pokemon ( row [ ' base_name ' ] , row [ ' number ' ] , row [ ' form ' ] )
big_pokemon_list . append ( new_pokemon )
return pokemon_list
def sanitize_name_and_form ( name , form ) :
adjusted_form = None
if form :
adjusted_form = form . lower ( )
#Some stupid special cases
if name . lower ( ) == ' tauros ' :
if adjusted_form == ' paldean form ' :
adjusted_form = ' paldea combat breed '
elif ' blaze ' in adjusted_form :
adjusted_form = ' paldea blaze breed '
elif ' aqua ' in adjusted_form :
adjusted_form = ' paldea aqua breed '
replacements = { ' forme ' : ' ' ,
' form ' : ' ' ,
' alolan ' : ' alola ' ,
' galarian ' : ' galar ' ,
' hisuian ' : ' hisui ' ,
' paldean ' : ' paldea ' ,
' size ' : ' ' ,
' 10 % ' : ' 10 power construct ' ,
' hoopa ' : ' ' ,
' style ' : ' ' ,
' core ' : ' ' ,
' color ' : ' ' ,
' blood moon ' : ' bloodmoon ' } ;
for old , new in replacements . items ( ) :
adjusted_form = adjusted_form . replace ( old , new ) . strip ( )
missing_forms = [ ' burmy ' ,
' shellos ' ,
' gastrodon ' ,
' wormadam ' ,
' unown ' ,
" deerling " ,
" sawsbuck " ,
" vivillon " ,
" flabébé " ,
" floette " ,
" florges " ,
" furfrou " ,
" sinistea " ,
" polteageist " ,
" alcremie " ,
" poltchageist " ,
" sinistcha " ]
if name . lower ( ) in missing_forms :
adjusted_form = None
if name . lower ( ) == ' wormadam ' :
adjusted_form = adjusted_form . replace ( ' cloak ' , ' ' ) . strip ( )
if name . lower ( ) == ' rotom ' :
adjusted_form = adjusted_form . replace ( ' rotom ' , ' ' ) . strip ( )
if name . lower ( ) == ' darmanitan ' :
adjusted_form = adjusted_form + ' standard '
else :
default_forms = { ' deoxys ' : ' normal ' ,
' wormadam ' : ' plant ' ,
' giratina ' : ' origin ' ,
' tornadus ' : ' incarnate ' ,
' shaymin ' : ' land ' ,
' basculin ' : ' red-striped ' ,
' darmanitan ' : ' standard ' ,
' thundurus ' : ' incarnate ' ,
' landorus ' : ' incarnate ' ,
' enamorus ' : ' incarnate ' ,
' keldeo ' : ' ordinary ' ,
' meloetta ' : ' aria ' ,
' meowstic ' : ' male ' ,
' aegislash ' : ' shield ' ,
' pumpkaboo ' : ' average ' ,
' gourgeist ' : ' average ' ,
' minior ' : ' red-meteor ' ,
' zygarde ' : ' 50 power construct ' ,
' oricorio ' : ' baile ' ,
' lycanroc ' : ' midday ' ,
' wishiwashi ' : ' solo ' ,
' mimikyu ' : ' disguised ' ,
' cramorant ' : ' gulping ' ,
' toxtricity ' : ' low-key ' ,
' eiscue ' : ' ice ' ,
' indeedee ' : ' male ' ,
' urshifu ' : ' single-strike ' ,
' morpeko ' : ' full belly ' ,
' oinkologne ' : ' male ' ,
' maushold ' : ' family of three ' ,
' squawkabilly ' : ' green plumage ' ,
' palafin ' : ' zero ' ,
' tatsugiri ' : ' curly ' ,
' dudunsparce ' : ' two segment ' ,
' basculegion ' : ' male ' }
if name . lower ( ) in default_forms :
adjusted_form = default_forms [ name . lower ( ) ]
if adjusted_form :
api_name = f " { name . lower ( ) } - { adjusted_form } "
else :
api_name = name . lower ( )
api_name = api_name . replace ( ' ' , ' - ' ) . replace ( " ' " , " " ) . replace ( " . " , " " ) . replace ( ' é ' , ' e ' ) . replace ( ' : ' , ' ' )
#more special cases
if api_name == ' oinkologne-male ' :
api_name = ' 916 '
return api_name
def get_pokemon_data ( pokemon_name , form , cache ) :
cache_key = f " pokemon_ { pokemon_name } _ { form } " if form else f " pokemon_ { pokemon_name } "
if cache_key in cache :
return cache [ cache_key ]
api_name = sanitize_name_and_form ( pokemon_name , form )
url = f " https://pokeapi.co/api/v2/pokemon/ { api_name } "
print ( f " Fetching Pokémon data for { pokemon_name } : { url } " )
response = requests . get ( url )
if response . status_code == 200 :
data = response . json ( )
update_cache ( cache_key , data )
return data
return None
def get_pokemon_data_bulbapedia ( pokemon_name , cache ) :
cache_key = f " pokemon_ { pokemon_name } _bulbapedia "
if cache_key in cache :
return cache [ cache_key ]
url = f " https://bulbapedia.bulbagarden.net/wiki/ { pokemon_name } _(Pokémon) "
print ( f " Fetching Pokémon data for { pokemon_name } : { url } " )
response = requests . get ( url )
if response . status_code == 200 :
data = response . text
update_cache ( cache_key , data )
return data
def get_pokemon_encounter_data ( pokemon_name , form , cache ) :
cache_key = f " pokemon_encounter_ { pokemon_name } _ { form } " if form else f " pokemon_encounter_ { pokemon_name } "
if cache_key in cache :
return cache [ cache_key ]
api_name = sanitize_name_and_form ( pokemon_name , form )
url = f " https://pokeapi.co/api/v2/pokemon/ { api_name } /encounters "
print ( f " Fetching encounter data for { pokemon_name } : { url } " )
response = requests . get ( url )
if response . status_code == 200 :
data = response . json ( )
update_cache ( cache_key , data )
return data
else :
return None
def split_td_contents ( td ) :
groups = [ ]
current_group = [ ]
for content in td . contents :
if isinstance ( content , Tag ) and content . name == ' br ' :
if current_group :
groups . append ( BeautifulSoup ( ' ' , ' html.parser ' ) . new_tag ( ' div ' ) )
for item in current_group :
groups [ - 1 ] . append ( copy . copy ( item ) )
current_group = [ ]
else :
current_group . append ( content )
if current_group :
groups . append ( BeautifulSoup ( ' ' , ' html.parser ' ) . new_tag ( ' div ' ) )
for item in current_group :
groups [ - 1 ] . append ( copy . copy ( item ) )
return groups
def parse_form_information ( html_content ) :
soup = BeautifulSoup ( html_content , ' html.parser ' )
form_info = soup . find ( ' small ' )
if form_info :
form_text = form_info . get_text ( strip = True )
# Remove parentheses
form_text = form_text . strip ( ' () ' )
# Split the text into main form and breed (if present)
parts = form_text . split ( ' ( ' )
main_form = parts [ 0 ] . strip ( )
breed = parts [ 1 ] . strip ( ' ) ' ) if len ( parts ) > 1 else None
return main_form , breed
return None , None
def get_evolution_data_from_bulbapedia ( pokemon_name , form , cache ) :
page_data = get_pokemon_data_bulbapedia ( pokemon_name , cache )
if not page_data :
return None
soup = BeautifulSoup ( page_data , ' html.parser ' )
evolution_section = soup . find ( ' span ' , id = ' Evolution_data ' )
if not evolution_section :
return None
evolution_table = None
if form :
form_without_form = form . replace ( ' Form ' , ' ' ) . replace ( ' form ' , ' ' ) . strip ( )
for tag in evolution_section . parent . find_next_siblings ( ) :
if tag . name == ' h4 ' and form_without_form in tag . get_text ( strip = True ) :
evolution_table = tag . find_next ( ' table ' )
break
if tag . name == ' h3 ' :
break
else :
evolution_table = evolution_section . parent . find_next ( ' table ' )
if not evolution_table :
return None
eeveelutions = [ " eevee " , " vaporeon " , " jolteon " , " flareon " , " espeon " , " umbreon " , " leafeon " , " glaceon " , " sylveon " ]
if pokemon_name == " Eevee " :
evolution_chain = parse_eevee_evolution_chain ( evolution_table )
else :
evolution_chain = parse_evolution_chain ( evolution_table , form )
return evolution_chain
# This is going to be a little odd.
# the first TR contains a full evolution chain
# other TRs contain branching evolution chains
# any TDs in the first TR with a rowspan are part of the main evolution chain
# any other TDS are part of the branching evolution chains
# a table in a TD is information about the current Pokémon in that evolution stage
# a TD without a table is information on how to trigger the next evolution
def parse_pokemon_subtable ( td ) :
if td . find ( ' table ' ) :
# This TD contains Pokemon information
pokemon_name = extract_pokemon_name ( td )
stage = extract_stage_form ( td )
return pokemon_name , stage
return None , None
def parse_eevee_evolution_chain ( table ) :
tbody = table . find ( ' tbody ' , recursive = False )
if not tbody :
return [ ]
rows = tbody . find_all ( ' tr ' , recursive = False )
eevee_row = rows [ 1 ]
method_row = rows [ 2 ]
eeveelutions_row = rows [ 3 ]
eevee_td = eevee_row . find ( ' td ' , recursive = False )
pokemon_name , stage = parse_pokemon_subtable ( eevee_td )
eevee_stage = EvolutionStage ( pokemon_name , None , stage , None )
methods = [ ]
for method in method_row . find_all ( ' td ' , recursive = False ) :
methods . append ( extract_evolution_method ( method ) )
eeveelutions = [ ]
index = 0
for eeveelution in eeveelutions_row . find_all ( ' td ' , recursive = False ) :
pokemon_name , stage = parse_pokemon_subtable ( eeveelution )
eeveelutions . append ( EvolutionStage ( pokemon_name , methods [ index ] , stage , None ) )
index + = 1
eevee_stage . branches . append ( eeveelutions )
return [ eevee_stage ]
def get_locations_from_bulbapedia ( pokemon_name , form , cache ) :
page_data = get_pokemon_data_bulbapedia ( pokemon_name , cache )
if not page_data :
return None
soup = BeautifulSoup ( page_data , ' html.parser ' )
locations_section = soup . find ( ' span ' , id = ' Game_locations ' )
if not locations_section :
return None
locations_table = locations_section . find_next ( ' table ' , class_ = ' roundy ' )
if not locations_table :
return None
raw_game_locations = { }
# Ok so the table is a bit of a mess. It has some nested tables and stuff.
# In each row is a nested table with all the games in a generation.
# Next is another nexted table, but i can't tell what for.
# within that nested table, is another nested table with the games, either the release pair or a single game spanning two columns.
# Next to that is another nested table with the locations.
generation_tbody = locations_table . find ( ' tbody ' , recursive = False )
generation_rows = generation_tbody . find_all ( ' tr ' , recursive = False )
for generation_row in generation_rows :
random_nested_td = generation_row . find ( ' td ' , recursive = False )
if not random_nested_td :
continue
random_nested_table = random_nested_td . find ( ' table ' , recursive = False )
if not random_nested_table :
continue
random_nested_tbody = random_nested_table . find ( ' tbody ' , recursive = False )
random_nested_rows = random_nested_tbody . find_all ( ' tr ' , recursive = False )
for nested_row in random_nested_rows :
if ' Generation ' in nested_row . get_text ( strip = True ) :
continue
games_container_td = nested_row . find ( ' td ' , recursive = False )
if not games_container_td :
continue
games_container_table = games_container_td . find ( ' table ' , recursive = False )
if not games_container_table :
continue
games_container_tbody = games_container_table . find ( ' tbody ' , recursive = False )
games_container_rows = games_container_tbody . find_all ( ' tr ' , recursive = False )
for games_container_row in games_container_rows :
games = games_container_row . find_all ( ' th ' )
for game in games :
raw_game = game . get_text ( strip = True )
if raw_game not in all_games :
continue
locations_container_td = games_container_row . find ( ' td ' , recursive = False )
if not locations_container_td :
continue
locations_container_table = locations_container_td . find ( ' table ' , recursive = False )
if not locations_container_table :
continue
locations_container_tbody = locations_container_table . find ( ' tbody ' , recursive = False )
locations = locations_container_tbody . find_all ( ' td ' )
for location in locations :
groups = split_td_contents ( location )
for group in groups :
if raw_game not in raw_game_locations :
raw_game_locations [ raw_game ] = [ ]
raw_game_locations [ raw_game ] . append ( group )
events_section = soup . find ( ' span ' , id = ' In_events ' )
event_tables = { }
if events_section :
event_header = events_section . parent
variant = " "
for sibling in event_header . find_next_siblings ( ) :
if sibling . name == ' h4 ' or " held " in sibling . getText ( strip = True ) . lower ( ) :
break
if sibling . name == ' h5 ' :
variant = sibling . get_text ( strip = True )
if sibling . name == ' table ' :
event_tables [ variant ] = sibling
game_locations = { }
for raw_game , raw_locations in raw_game_locations . items ( ) :
if form is None :
for raw_location in raw_locations :
locations = raw_location . get_text ( ) . split ( ' , ' )
for location in locations :
if raw_game not in game_locations :
game_locations [ raw_game ] = [ ]
game_locations [ raw_game ] . append ( location . strip ( ) )
else :
for raw_location in raw_locations :
main_form , sub_form = parse_form_information ( str ( raw_location ) )
if not main_form :
continue
if main_form == " All Forms " :
main_form = form
main_form_match = fuzz . partial_ratio ( form . lower ( ) , main_form . lower ( ) ) > = 80
sub_form_match = False if not sub_form else fuzz . partial_ratio ( form . lower ( ) , sub_form . lower ( ) ) > = 80
if main_form_match or sub_form_match :
locations = raw_location . get_text ( ) . split ( ' , ' )
for location in locations :
if raw_game not in game_locations :
game_locations [ raw_game ] = [ ]
game_locations [ raw_game ] . append ( location . strip ( ) )
# For Later
for variant in event_tables :
if ( variant == pokemon_name and form is None ) or ( form and form in variant ) :
games_container_rows = event_tables [ variant ] . find_all ( ' tr ' )
for game_row in games_container_rows :
entries = game_row . find_all ( ' td ' )
if len ( entries ) > 1 :
games_string = entries [ 0 ] . find ( ' a ' ) . get ( ' title ' )
for game in all_games :
if game in games_string :
if game not in game_locations :
game_locations [ game ] = [ ]
game_locations [ game ] . append ( " Event " )
return game_locations
def get_earliest_game ( encounter_data , pokemon_name , form ) :
if not encounter_data :
return " Unknown " , " Unknown "
non_catchable_methods = [ " trade " , " event " , " global link " , " poké transfer " , " time capsule " , " unobtainable " , " pokémon home " ]
game_methods = { }
for game , locations in encounter_data . items ( ) :
for location in locations :
method = " Catchable "
for non_catchable in non_catchable_methods :
if non_catchable in location . lower ( ) :
method = None
break
if method is None :
continue
if " first partner " in location . lower ( ) :
method = " Starter "
elif " received " in location . lower ( ) :
method = " Gift "
elif " evolve " in location . lower ( ) :
method = " Evolve "
else :
method = " Catchable "
if method :
if game not in game_methods :
game_methods [ game . lower ( ) ] = method
else :
if method == " Catchable " :
game_methods [ game . lower ( ) ] = method
for game in all_games :
if game . lower ( ) in game_methods :
return game , game_methods [ game . lower ( ) ]
return " Unknown " , " Unknown "
def handle_unown ( pokemon , encounter_data ) :
if not pokemon . name == " Unown " :
return
one_form_unown = find_pokemon ( pokemon . name , None )
if not one_form_unown :
return
# The ! and ? forms were added in HeartGold and SoulSilver.
if ( pokemon . form == " ! " or pokemon . form == " ? " ) and encounter_data :
for encounter in encounter_data :
encounter_information = EncounterInformation ( encounter , encounter_data [ encounter ] )
pokemon . encounter_information . append ( encounter_information )
found_heartgold = False
found_soulsilver = False
for game in all_games :
if game == " HeartGold " :
found_heartgold = True
continue
elif game == " SoulSilver " :
found_soulsilver = True
continue
if not found_heartgold or not found_soulsilver :
continue
for encounter in one_form_unown . encounter_information :
if game == encounter . game :
pokemon . encounter_information . append ( encounter )
break
else :
pokemon . encounter_information = one_form_unown . encounter_information
def handle_deoxys ( pokemon , encounter_data ) :
if not pokemon . name == " Deoxys " :
return
normal_form_deoxys = find_pokemon ( pokemon . name , None )
if not normal_form_deoxys :
return
if pokemon . form :
pokemon . encounter_information = normal_form_deoxys . encounter_information
list_of_shifting_form_pokemon = [
" Deoxys " ,
" Burmy " ,
" Wormadam " ,
" Rotom " ,
" Shaymin " ,
" Keldeo " ,
" Furfrou " ,
" Hoopa " ,
" Pumpkaboo " ,
" Gourgeist " ,
" Zygarde " ,
" Magearna " ,
" Vivillon " ,
" Minior " ,
" Urshifu " ,
" Oinkologne " ,
" Basculegion " ,
" Enamorus " ,
]
def handle_form_shift ( pokemon , encounter_data ) :
if not pokemon . name in list_of_shifting_form_pokemon :
return
if pokemon . form is None :
return
normal_form_pokemon = find_pokemon ( pokemon . name , None )
if not normal_form_pokemon :
return
pokemon . encounter_information = normal_form_pokemon . encounter_information
phony_authentic = [ " Sinistea " , " Polteageist " ]
countefieit_atrisan = [ " Poltchageist " ]
unremarkable_masterpiece = [ " Sinistcha " ]
bad_tea_pokemon = phony_authentic + countefieit_atrisan + unremarkable_masterpiece
def get_bad_tea_form ( pokemon ) :
if not pokemon . name in bad_tea_pokemon :
return
if pokemon . name in phony_authentic :
if pokemon . form == None :
return " Phony Form "
if pokemon . form == " Authentic Form " :
return None
if pokemon . name in countefieit_atrisan :
if pokemon . form == None :
return " Counterfeit Form "
if pokemon . form == " Artisan Form " :
return None
if pokemon . name in unremarkable_masterpiece :
if pokemon . form == None :
return " Unremarkable Form "
else :
return pokemon . form
def determine_earliest_games ( pokemon_list , cache ) :
for pokemon in big_pokemon_list :
print ( f " Processing { pokemon } " )
form_to_find = pokemon . form
if pokemon . name == " Minior " and pokemon . form == " Orange Core " :
form_to_find = None
if pokemon . name == " Squawkabilly " and pokemon . form :
form_to_find = pokemon . form . replace ( " Plumage " , " " ) . strip ( )
if pokemon . name == " Alcremie " :
form_to_find = None
if pokemon . name in bad_tea_pokemon :
form_to_find = get_bad_tea_form ( pokemon )
encounter_data = get_locations_from_bulbapedia ( pokemon . name , form_to_find , cache )
for encounter in encounter_data :
encounter_information = EncounterInformation ( encounter , encounter_data [ encounter ] )
pokemon . encounter_information . append ( encounter_information )
handle_unown ( pokemon , encounter_data )
handle_form_shift ( pokemon , encounter_data )
if pokemon . name == " Gimmighoul " and pokemon . form == " Roaming Form " :
encounter_information = EncounterInformation ( " Pokémon Go " , [ " Pokémon Go " ] )
pokemon . encounter_information . append ( encounter_information )
pokemon . determine_earliest_game ( )
print ( f " Processed { pokemon } : { pokemon . earliest_game . game } ( { pokemon . earliest_game . method } ) " )
#for pokemon in pokemon_list:
# print(f"Processing {pokemon['name']} (#{pokemon['number']})")
# encounter_data = get_locations_from_bulbapedia(pokemon['base_name'], pokemon['form'], cache)
# pokemon['earliest_game'], pokemon['obtain_method'] = get_earliest_game(encounter_data, pokemon['base_name'], pokemon['form'])
# print(f"Processed {pokemon['name']} (#{pokemon['number']}): {pokemon['earliest_game']} ({pokemon['obtain_method']})")
# #pokemon_data = get_pokemon_data(pokemon['base_name'], pokemon['form'], cache)
# #encounter_data = get_pokemon_encounter_data(pokemon['base_name'], pokemon['form'], cache)
# #pokemon['earliest_game'], pokemon['obtain_method'] = get_earliest_game(encounter_data)
# #print(f"Processed {pokemon['name']} (#{pokemon['number']}): {pokemon['earliest_game']} ({pokemon['obtain_method']})")
return pokemon_list
def get_species_data ( pokemon_name , cache ) :
cache_key = f " species_ { pokemon_name } "
if cache_key in cache :
return cache [ cache_key ]
api_name = sanitize_name_and_form ( pokemon_name , None )
url = f " https://pokeapi.co/api/v2/pokemon-species/ { api_name } / "
print ( f " Fetching species data for { pokemon_name } : { url } " )
response = requests . get ( url )
if response . status_code == 200 :
data = response . json ( )
update_cache ( cache_key , data )
return data
return None
def get_evolution_chain ( pokemon_name , cache ) :
species_data = get_species_data ( pokemon_name , cache )
if not species_data :
return None
cache_key = f " evolution_ { species_data [ ' evolution_chain ' ] [ ' url ' ] } "
if cache_key in cache :
return cache [ cache_key ]
evolution_response = requests . get ( species_data [ ' evolution_chain ' ] [ ' url ' ] )
if evolution_response . status_code == 200 :
evolution_data = evolution_response . json ( )
update_cache ( cache_key , evolution_data )
return evolution_data
return None
def get_base_form ( evolution_chain : List [ EvolutionStage ] ) :
if not evolution_chain :
return None
for stage in evolution_chain :
if stage . stage == " Unevolved " :
return stage . pokemon
if stage . is_baby :
return stage . next_stage . pokemon
return None
#current = evolution_chain['chain']
#while current:
# species_name = current['species']['name']
# species_data = get_species_data(species_name, cache)
#
# if species_data and not species_data.get('is_baby', False):
# return species_name
#
# if not current['evolves_to']:
# return species_name
#
# current = current['evolves_to'][0]
return None
def adjust_for_evolution ( pokemon_list , cache ) :
for pokemon in big_pokemon_list :
evolution_chain = get_evolution_data_from_bulbapedia ( pokemon . name , pokemon . form , cache )
pokemon . add_evolution_chain ( evolution_chain )
game , method = pokemon . get_earliest_game_and_method ( )
print ( f " Adjusted { pokemon } : { game } ( { method } ) " )
return [ ]
pokemon_dict = { f " { pokemon [ ' base_name ' ] } _ { pokemon [ ' form ' ] } " . lower ( ) : pokemon for pokemon in pokemon_list }
for pokemon in pokemon_list :
evolution_chain = get_evolution_data_from_bulbapedia ( pokemon [ ' base_name ' ] , pokemon [ ' form ' ] , cache )
if evolution_chain :
if evolution_chain [ 0 ] . is_baby :
pokemon [ ' obtain_method ' ] = ' Breed '
else :
base_form = get_base_form ( evolution_chain )
base_key = f " { base_form } _ { pokemon [ ' form ' ] } " . lower ( )
if base_key in pokemon_dict :
base_pokemon = pokemon_dict [ base_key ]
if all_games . index ( base_pokemon [ ' earliest_game ' ] ) < = all_games . index ( pokemon [ ' earliest_game ' ] ) and base_pokemon [ ' number ' ] != pokemon [ ' number ' ] :
pokemon [ ' earliest_game ' ] = base_pokemon [ ' earliest_game ' ]
pokemon [ ' obtain_method ' ] = ' Evolve '
#species_data = get_species_data(pokemon['base_name'], cache)
#evolution_chain = get_evolution_chain(pokemon['base_name'], cache)
#base_form = get_base_form(evolution_chain, cache)
# Check if the Pokémon is a baby
#if species_data and species_data.get('is_baby', False):
# pokemon['obtain_method'] = 'Breed'
#elif base_form:
# base_key = f"{base_form}_{pokemon['form']}".lower()
# if base_key in pokemon_dict:
# base_pokemon = pokemon_dict[base_key]
# if all_games.index(base_pokemon['earliest_game']) <= all_games.index(pokemon['earliest_game']) and base_pokemon['number'] != pokemon['number']:
# pokemon['earliest_game'] = base_pokemon['earliest_game']
# pokemon['obtain_method'] = 'Evolve'
print ( f " Adjusted { pokemon [ ' name ' ] } (# { pokemon [ ' number ' ] } ): { pokemon [ ' earliest_game ' ] } ( { pokemon [ ' obtain_method ' ] } ) " )
return pokemon_list
def save_to_csv ( pokemon_list , filename = ' pokemon_earliest_games.csv ' ) :
with open ( filename , ' w ' , newline = ' ' , encoding = ' utf-8 ' ) as csvfile :
fieldnames = [ ' number ' , ' name ' , ' earliest_game ' , ' obtain_method ' , ' encounter_locations ' ]
writer = csv . DictWriter ( csvfile , fieldnames = fieldnames )
writer . writeheader ( )
for pokemon in big_pokemon_list :
encounter_locations = [ ]
for encounter in pokemon . encounter_information :
if encounter . game == pokemon . earliest_game . game :
encounter_locations . extend ( encounter . locations )
writer . writerow ( {
' number ' : pokemon . number ,
' name ' : f " { pokemon . name } ( { pokemon . form } ) " ,
' earliest_game ' : pokemon . earliest_game . game ,
' obtain_method ' : pokemon . earliest_game . method ,
' encounter_locations ' : ' | ' . join ( ( str ( item ) for item in encounter_locations ) )
} )
def parse_encounter_locations ( encounter_data , game ) :
locations = [ ]
for location_area in encounter_data :
for version_detail in location_area [ ' version_details ' ] :
if version_detail [ ' version ' ] [ ' name ' ] == game . lower ( ) :
location_name = location_area [ ' location_area ' ] [ ' name ' ]
for encounter_detail in version_detail [ ' encounter_details ' ] :
method = encounter_detail [ ' method ' ] [ ' name ' ]
condition = encounter_detail . get ( ' condition ' , ' Any ' )
time = ' , ' . join ( encounter_detail . get ( ' time ' , [ ' Any ' ] ) )
encounter_info = f " { location_name } ( { method } "
if condition != ' Any ' :
encounter_info + = f " , { condition } "
if time != ' Any ' :
encounter_info + = f " , { time } "
encounter_info + = " ) "
if encounter_info not in locations :
locations . append ( encounter_info )
return locations
def add_encounter_locations ( pokemon_list , cache ) :
for pokemon in pokemon_list :
if pokemon [ ' obtain_method ' ] == ' Catchable ' :
encounter_data = get_pokemon_encounter_data ( pokemon [ ' base_name ' ] , pokemon [ ' form ' ] , cache )
locations = parse_encounter_locations ( encounter_data , pokemon [ ' earliest_game ' ] )
pokemon [ ' encounter_locations ' ] = ' | ' . join ( locations ) if locations else ' Unknown '
else :
pokemon [ ' encounter_locations ' ] = ' N/A '
print ( f " Added encounter locations for { pokemon [ ' name ' ] } (# { pokemon [ ' number ' ] } ) in { pokemon [ ' earliest_game ' ] } " )
return pokemon_list
def get_marriland_page ( pokemon_name , cache ) :
url_name = pokemon_name . lower ( ) . replace ( ' ' , ' - ' ) . replace ( ' ( ' , ' ' ) . replace ( ' ) ' , ' ' )
cache_key = f " marriland_ { url_name } "
if cache_key in cache :
return cache [ cache_key ]
url = f " https://marriland.com/pokedex/ { url_name } / "
try :
response = requests . get ( url )
response . raise_for_status ( ) # Raise an exception for bad status codes
data = response . text
update_cache ( cache_key , data )
return data
except requests . RequestException as e :
print ( f " Error accessing the page for { pokemon_name } : { e } " )
return None
def is_event_pokemon ( pokemon_name , cache ) :
page_data = get_marriland_page ( pokemon_name , cache )
if not page_data :
return False
soup = BeautifulSoup ( page_data , ' html.parser ' )
# Find the "Where to Find" section
location_section = soup . find ( ' div ' , id = ' locations ' )
if not location_section :
print ( f " Could not find ' Where to Find ' section for { pokemon_name } " )
return None
special_section = soup . find ( ' div ' , class_ = ' location-special ' )
location_tables = soup . find_all ( ' table ' , class_ = ' location-table ' )
event_only = " Only available from events or promotions. " . lower ( )
if len ( location_tables ) == 0 and special_section and event_only in special_section . get_text ( strip = True ) . lower ( ) :
return True
return False
def check_alternative_sources ( pokemon , cache ) :
# This function will check alternative sources for Pokémon with "Unknown" encounter types
species_data = get_species_data ( pokemon [ ' base_name ' ] , cache )
if species_data :
# Check if it's a mythical Pokémon
if species_data . get ( ' is_mythical ' , False ) :
return " Event " , " Event "
# Check if it's a legendary Pokémon
if species_data . get ( ' is_legendary ' , False ) :
return pokemon [ ' earliest_game ' ] , " Legendary "
event_status = is_event_pokemon ( pokemon [ ' name ' ] , cache )
if event_status :
return " Event " , " Event "
#bulb_locations = get_locations_from_bulbapedia(pokemon['base_name'], pokemon['form'], cache)
#if bulb_locations:
# return bulb_locations[0], "Bulbapedia"
# Check generation introduced
#generation = species_data.get('generation', {}).get('name', '')
#if generation:
# gen_number = int(generation.split('-')[1])
# for game in all_games:
# if game != "Unknown" and get_generation(game) == gen_number:
# return game, "First appearance"
return " Unknown " , " Unknown "
def handle_unknown_encounters ( pokemon_list , cache ) :
for pokemon in big_pokemon_list :
if pokemon . earliest_game == None or pokemon . earliest_game . method == None :
print ( f " Checking alternative sources for { pokemon . name } " )
return
for pokemon in pokemon_list :
if pokemon [ ' earliest_game ' ] == " Unknown " or pokemon [ ' obtain_method ' ] == " Unknown " :
new_game , new_method = check_alternative_sources ( pokemon , cache )
if new_game != " Unknown " :
pokemon [ ' earliest_game ' ] = new_game
pokemon [ ' obtain_method ' ] = new_method
pokemon [ ' encounter_locations ' ] = ' N/A '
print ( f " Checked alternative sources for { pokemon [ ' name ' ] } (# { pokemon [ ' number ' ] } ): { pokemon [ ' earliest_game ' ] } ( { pokemon [ ' obtain_method ' ] } ) " )
return pokemon_list
# Update the main function
if __name__ == " __main__ " :
get_cached_data ( )
pokemon_list = read_pokemon_list ( ' pokemon_home_list.csv ' , limit = 3000 )
pokemon_index = create_pokemon_index ( big_pokemon_list )
pokemon_list_with_games = determine_earliest_games ( pokemon_list , cache )
pokemon_list_adjusted = adjust_for_evolution ( pokemon_list_with_games , cache )
#pokemon_list_with_locations = add_encounter_locations(pokemon_list_adjusted, cache)
pokemon_list_final = handle_unknown_encounters ( pokemon_list_adjusted , cache )
save_to_csv ( pokemon_list_final )
save_cached_data ( ) # Save any remaining new entries
conn . close ( ) # Close the database connection
print ( f " Earliest obtainable games and encounter locations determined for { len ( pokemon_list ) } Pokémon and saved to pokemon_earliest_games.csv " )