from __future__ import annotations
import csv
import requests
import time
import json
import os
import re
import sqlite3
from bs4 import BeautifulSoup , Tag , NavigableString
import copy
from typing import List , Optional
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
from collections import defaultdict
import os
import sys
sys . path . append ( os . path . dirname ( os . path . dirname ( os . path . abspath ( __file__ ) ) ) )
from DataGatherers . cache_manager import CacheManager
# List of all main series Pokémon games in chronological order, with special games first in each generation
all_games = [
" Yellow " , " Red " , " Blue " ,
" Crystal " , " Gold " , " Silver " ,
" Emerald " , " FireRed " , " LeafGreen " , " Ruby " , " Sapphire " ,
" Platinum " , " HeartGold " , " SoulSilver " , " Diamond " , " Pearl " ,
" Black 2 " , " White 2 " , " Black " , " White " ,
" X " , " Y " , " Omega Ruby " , " Alpha Sapphire " ,
" Ultra Sun " , " Ultra Moon " , " Sun " , " Moon " ,
" Sword " , " Shield " , " Expansion Pass " ,
" Brilliant Diamond " , " Shining Pearl " ,
" Legends: Arceus " ,
" Scarlet " , " Violet " , " The Teal Mask " , " The Hidden Treasure of Area Zero " , " The Hidden Treasure of Area Zero (Scarlet) " , " The Hidden Treasure of Area Zero (Violet) " , " The Teal Mask (Scarlet) " , " The Teal Mask (Violet) " ,
" Unknown " ,
" Pokémon Home " ,
" Pokémon Go " ,
]
big_pokemon_list = [ ]
pokemon_index = { }
def create_pokemon_index ( pokemon_list ) :
global pokemon_index
name_index = defaultdict ( list )
for pokemon in pokemon_list :
name_index [ pokemon . name . lower ( ) ] . append ( pokemon )
pokemon_index = name_index
def find_pokemon ( name , form = None , threshold = 80 ) :
global pokemon_index
name = name . lower ( )
if name in pokemon_index :
candidates = pokemon_index [ name ]
if not form :
return candidates [ 0 ] if candidates else None
best_match = None
best_score = 0
for pokemon in candidates :
if pokemon . form :
score = fuzz . ratio ( form . lower ( ) , pokemon . form . lower ( ) )
if score > best_score :
best_score = score
best_match = pokemon
if best_match and best_score > = threshold :
return best_match
# If no exact name match, try fuzzy matching on names
best_name_match = None
best_name_score = 0
for pokemon_name in pokemon_index :
score = fuzz . ratio ( name , pokemon_name )
if score > best_name_score :
best_name_score = score
best_name_match = pokemon_name
if best_name_match and best_name_score > = threshold :
candidates = pokemon_index [ best_name_match ]
if not form :
return candidates [ 0 ]
best_match = None
best_score = 0
for pokemon in candidates :
if pokemon . form :
score = fuzz . ratio ( form . lower ( ) , pokemon . form . lower ( ) )
if score > best_score :
best_score = score
best_match = pokemon
if best_match and best_score > = threshold :
return best_match
return None
def roman_to_int ( s ) :
roman_values = {
' I ' : 1 ,
' V ' : 5 ,
' X ' : 10 ,
' L ' : 50 ,
' C ' : 100 ,
' D ' : 500 ,
' M ' : 1000
}
total = 0
prev_value = 0
for char in reversed ( s ) :
current_value = roman_values [ char ]
if current_value > = prev_value :
total + = current_value
else :
total - = current_value
prev_value = current_value
return total
class Pokemon :
def __init__ ( self , name : str , number : int , form : Optional [ str ] = None ) :
self . name = name
self . number = number
self . form = form
self . stage : Optional [ str ] = None
self . evolution_chain : Optional [ List [ ' EvolutionStage ' ] ] = [ ]
self . is_baby = False
self . encounter_information : Optional [ List [ ' EncounterInformation ' ] ] = [ ]
self . earliest_game : Optional [ ' EncounterInformation ' ] = None
self . obtain_method : Optional [ str ] = None
self . introduced_in_gen = None
def get_earliest_game_and_method ( self ) :
if self . evolution_chain :
for stage in self . evolution_chain :
if self . is_baby :
return stage . pokemon_reference . earliest_game . game , " Breed "
else :
if stage . pokemon_reference == self :
return self . earliest_game . game , self . earliest_game . method
return stage . pokemon_reference . earliest_game . game , " Evolve "
if self . earliest_game :
return self . earliest_game . game , self . earliest_game . method
return None , None
def __str__ ( self ) :
return f " { self . name } { ' ' if self . form else ' ' } { self . form if self . form else ' ' } (# { self . number } ) "
def add_evolution_chain ( self , evolution_chain : List [ ' EvolutionStage ' ] ) :
self . evolution_chain = evolution_chain
def add_stage ( self , stage : str ) :
self . stage = stage
self . is_baby = self . stage is not None and ' Baby ' in self . stage
def update_encounter_information ( self , exclude_events = True , exclude_home = True , exclude_go = True ) :
if not self . encounter_information :
return
non_catchable_methods = [ " trade " , " global link " , " poké transfer " , " time capsule " , " unobtainable " ]
if exclude_events :
non_catchable_methods . append ( " event " )
if exclude_home :
non_catchable_methods . append ( " pokemon home " )
if exclude_go :
non_catchable_methods . append ( " pokémon go " )
for encounter in self . encounter_information :
encounter . method = None
for location in encounter . locations :
skip_location = False
for non_catchable in non_catchable_methods :
if non_catchable in location . lower ( ) :
skip_location = True
break
if skip_location :
continue
if " first partner " in location . lower ( ) :
encounter . method = " Starter "
elif " received " in location . lower ( ) :
encounter . method = " Gift "
elif " evolve " in location . lower ( ) :
encounter . method = " Evolve "
elif " event " in location . lower ( ) :
encounter . method = " Event "
else :
encounter . method = " Catchable "
def parse_encoutners_for_games ( self ) :
game_methods = { }
for encounter in self . encounter_information :
if encounter . method :
game_methods [ encounter . game . lower ( ) ] = encounter
for game in all_games :
if game . lower ( ) in game_methods :
self . earliest_game = game_methods [ game . lower ( ) ]
return
def determine_earliest_game ( self ) :
if not self . encounter_information :
self . earliest_game = None
return
self . update_encounter_information ( )
self . parse_encoutners_for_games ( )
if self . earliest_game != None :
return
self . update_encounter_information ( exclude_events = False )
self . parse_encoutners_for_games ( )
if self . earliest_game != None :
return
self . update_encounter_information ( exclude_home = False )
self . parse_encoutners_for_games ( )
if self . earliest_game != None :
return
self . update_encounter_information ( exclude_go = False )
self . parse_encoutners_for_games ( )
if self . earliest_game != None :
return
self . earliest_game = None
class EvolutionStage :
def __init__ ( self , pokemon : str , method : Optional [ str ] = None , stage : Optional [ str ] = None , form : Optional [ str ] = None ) :
self . pokemon = pokemon
self . method = method
self . next_stage : Optional [ EvolutionStage ] = None
self . previous_stage : Optional [ EvolutionStage ] = None # New attribute
self . branches : List [ EvolutionStage ] = [ ]
self . stage = stage
self . is_baby = self . stage is not None and ' Baby ' in self . stage
self . pokemon_reference = find_pokemon ( pokemon , form )
if self . pokemon_reference == None :
self . pokemon_reference = find_pokemon ( pokemon , None )
self . form = form
def __str__ ( self ) :
return f " { self . pokemon } { self . form if self . form else ' ' } ( { self . method if self . method else ' Base ' } ) "
class EncounterInformation :
def __init__ ( self , game : str , locations : List [ str ] ) :
self . game = game
self . method = " Unknown "
self . locations = locations
def parse_evolution_chain ( table : Tag , form : Optional [ str ] = None ) - > List [ EvolutionStage ] :
main_chain = [ ]
current_stage = None
pending_method = None
tbody = table . find ( ' tbody ' , recursive = False )
if not tbody :
return [ ]
rows = tbody . find_all ( ' tr ' , recursive = False )
main_row = rows [ 0 ]
branch_rows = rows [ 1 : ]
# Parse main evolution chain
for td in main_row . find_all ( ' td ' , recursive = False ) :
if td . find ( ' table ' ) :
# This TD contains Pokemon information
pokemon_name = extract_pokemon_name ( td )
stage = extract_stage_form ( td )
evolution_form = extract_evolution_form ( td , pokemon_name )
new_stage = EvolutionStage ( pokemon_name , pending_method , stage , evolution_form )
pending_method = None
if current_stage :
current_stage . next_stage = new_stage
new_stage . previous_stage = current_stage # Set the back link
current_stage = new_stage
main_chain . append ( current_stage )
else :
# This TD contains evolution method for the next Pokemon
pending_method = extract_evolution_method ( td )
# Parse branching evolutions
for row in branch_rows :
branch_stage = None
branch_method = None
for td in row . find_all ( ' td ' , recursive = False ) :
if td . find ( ' table ' ) :
pokemon_name = extract_pokemon_name ( td )
stage = extract_stage_form ( td )
evolution_form = extract_evolution_form ( td , pokemon_name )
new_stage = EvolutionStage ( pokemon_name , branch_method , stage , evolution_form )
branch_method = None
if branch_stage :
branch_stage . next_stage = new_stage
new_stage . previous_stage = branch_stage # Set the back link
branch_stage = new_stage
# Find which main chain Pokemon this branches from
for main_stage in main_chain :
if td . get ( ' rowspan ' ) and main_stage . pokemon == pokemon_name :
main_stage . branches . append ( branch_stage )
branch_stage . previous_stage = main_stage # Set the back link to the main chain
break
else :
branch_method = extract_evolution_method ( td )
return main_chain
def find_name_tag ( td : Tag ) - > Optional [ Tag ] :
table = td . find ( ' table ' )
name_tag = table . find ( ' a ' , class_ = ' selflink ' )
if name_tag :
return name_tag
name_tag = table . find ( ' a ' , title = True , class_ = lambda x : x != ' image ' )
return name_tag
def extract_pokemon_name ( td : Tag ) - > Optional [ str ] :
name_tag = find_name_tag ( td )
if name_tag :
return name_tag . get_text ( strip = True )
return None
def extract_evolution_method ( td : Tag ) - > str :
# Extract evolution method from the TD
return td . get_text ( strip = True )
def extract_stage_form ( td : Tag ) - > Optional [ str ] :
stage_tag = td . find ( ' table ' ) . find ( ' small ' )
if stage_tag :
return stage_tag . get_text ( strip = True )
return None
def extract_evolution_form ( td : Tag , name : str ) - > Optional [ str ] :
name_tag = find_name_tag ( td )
if name_tag :
name_row = name_tag . parent
small_tags = name_row . find_all ( ' small ' )
if len ( small_tags ) > 1 :
return small_tags [ 0 ] . get_text ( strip = True )
return None
def read_pokemon_list ( filename , limit = 50 ) :
pokemon_list = [ ]
with open ( filename , ' r ' , newline = ' ' , encoding = ' utf-8 ' ) as csvfile :
reader = csv . DictReader ( csvfile )
for i , row in enumerate ( reader ) :
if i > = limit :
break
# Split the name into base name and form
match = re . match ( r ' (.*?) \ s*( \ (.* \ ))?$ ' , row [ ' name ' ] )
base_name , form = match . groups ( ) if match else ( row [ ' name ' ] , None )
row [ ' base_name ' ] = base_name . strip ( )
row [ ' form ' ] = form . strip ( ' () ' ) if form else None
pokemon_list . append ( row )
new_pokemon = Pokemon ( row [ ' base_name ' ] , row [ ' number ' ] , row [ ' form ' ] )
big_pokemon_list . append ( new_pokemon )
return big_pokemon_list
def get_pokemon_data_bulbapedia ( pokemon_name , cache : CacheManager ) :
url = f " https://bulbapedia.bulbagarden.net/wiki/ { pokemon_name } _(Pokémon) "
return cache . fetch_url ( url )
def split_td_contents ( td ) :
groups = [ ]
current_group = [ ]
for content in td . contents :
if isinstance ( content , Tag ) and ( content . name == ' br ' or content . name == ' p ' ) :
if current_group :
groups . append ( BeautifulSoup ( ' ' , ' html.parser ' ) . new_tag ( ' div ' ) )
for item in current_group :
groups [ - 1 ] . append ( copy . copy ( item ) )
if content . name == ' p ' :
groups [ - 1 ] . append ( copy . copy ( content ) )
current_group = [ ]
else :
current_group . append ( content )
if current_group :
groups . append ( BeautifulSoup ( ' ' , ' html.parser ' ) . new_tag ( ' div ' ) )
for item in current_group :
groups [ - 1 ] . append ( copy . copy ( item ) )
return groups
def parse_form_information ( html_content ) :
soup = BeautifulSoup ( html_content , ' html.parser ' )
small_tag = soup . find ( ' small ' )
forms = [ ]
# Form info is in bold inside a small tag.
if small_tag :
bold_tags = small_tag . find_all ( ' b ' )
for bold_tag in bold_tags :
form_text = bold_tag . get_text ( strip = True )
# Remove parentheses
form_text = form_text . strip ( ' () ' )
if " / " in form_text :
last_word = form_text . split ( ) [ - 1 ]
form_text = form_text . replace ( last_word , " " ) . strip ( )
parts = form_text . split ( ' / ' )
for part in parts :
main_form = part . strip ( ) + " " + last_word
info = {
" main_form " : main_form ,
" sub_form " : None
}
forms . append ( info )
continue
# Split the text into main form and breed (if present)
parts = form_text . split ( ' ( ' )
main_form = parts [ 0 ] . strip ( )
# "Factor"s are not actual forms, they are properties of the pokemon you can encoutner.
if main_form and " factor " in main_form . lower ( ) :
continue
breed = parts [ 1 ] . strip ( ' ) ' ) if len ( parts ) > 1 else None
info = {
" main_form " : main_form ,
" sub_form " : breed
}
forms . append ( info )
else : #..... Gimmighoul
headings = soup . find_all ( ' b ' )
if len ( headings ) > 0 :
for heading in headings :
if heading . parent . name == ' sup ' :
continue
if " form " not in heading . get_text ( strip = True ) . lower ( ) :
continue
main_form = heading . get_text ( strip = True )
info = {
" main_form " : main_form ,
" sub_form " : None
}
forms . append ( info )
return forms
def get_evolution_data_from_bulbapedia ( pokemon_name , form , cache : CacheManager , gender : Optional [ str ] = None ) :
page_data = get_pokemon_data_bulbapedia ( pokemon_name , cache )
if not page_data :
return None
soup = BeautifulSoup ( page_data , ' html.parser ' )
evolution_section = soup . find ( ' span ' , id = ' Evolution_data ' )
if not evolution_section :
return None
evolution_table = None
if form :
form_without_form = form . replace ( ' Form ' , ' ' ) . replace ( ' form ' , ' ' ) . strip ( )
for tag in evolution_section . parent . find_next_siblings ( ) :
if tag . name == ' h4 ' and form_without_form in tag . get_text ( strip = True ) :
evolution_table = tag . find_next ( ' table ' )
break
if tag . name == ' h3 ' :
break
else :
evolution_table = evolution_section . parent . find_next ( ' table ' )
if not evolution_table :
return None
eeveelutions = [ " eevee " , " vaporeon " , " jolteon " , " flareon " , " espeon " , " umbreon " , " leafeon " , " glaceon " , " sylveon " ]
if pokemon_name == " Eevee " :
evolution_chain = parse_eevee_evolution_chain ( evolution_table )
else :
evolution_chain = parse_evolution_chain ( evolution_table , form )
return evolution_chain
# This is going to be a little odd.
# the first TR contains a full evolution chain
# other TRs contain branching evolution chains
# any TDs in the first TR with a rowspan are part of the main evolution chain
# any other TDS are part of the branching evolution chains
# a table in a TD is information about the current Pokémon in that evolution stage
# a TD without a table is information on how to trigger the next evolution
def parse_pokemon_subtable ( td ) :
if td . find ( ' table ' ) :
# This TD contains Pokemon information
pokemon_name = extract_pokemon_name ( td )
stage = extract_stage_form ( td )
return pokemon_name , stage
return None , None
def parse_eevee_evolution_chain ( table ) :
tbody = table . find ( ' tbody ' , recursive = False )
if not tbody :
return [ ]
rows = tbody . find_all ( ' tr ' , recursive = False )
eevee_row = rows [ 1 ]
method_row = rows [ 2 ]
eeveelutions_row = rows [ 3 ]
eevee_td = eevee_row . find ( ' td ' , recursive = False )
pokemon_name , stage = parse_pokemon_subtable ( eevee_td )
eevee_stage = EvolutionStage ( pokemon_name , None , stage , None )
methods = [ ]
for method in method_row . find_all ( ' td ' , recursive = False ) :
methods . append ( extract_evolution_method ( method ) )
eeveelutions = [ ]
index = 0
for eeveelution in eeveelutions_row . find_all ( ' td ' , recursive = False ) :
pokemon_name , stage = parse_pokemon_subtable ( eeveelution )
eeveelution_stage = EvolutionStage ( pokemon_name , methods [ index ] , stage , None )
eeveelution_stage . previous_stage = eevee_stage # Set the back link to Eevee
eeveelutions . append ( eeveelution_stage )
index + = 1
eevee_stage . branches = eeveelutions # Set the branches directly, not as a nested list
return [ eevee_stage ]
def get_intro_generation ( pokemon_name , form , cache : CacheManager ) :
page_data = get_pokemon_data_bulbapedia ( pokemon_name , cache )
if not page_data :
return None
soup = BeautifulSoup ( page_data , ' html.parser ' )
locations_section = soup . find ( ' span ' , id = ' Game_locations ' )
if not locations_section :
return None
locations_table = locations_section . find_next ( ' table ' , class_ = ' roundy ' )
if not locations_table :
return None
generation_tbody = locations_table . find ( ' tbody ' , recursive = False )
generation_rows = generation_tbody . find_all ( ' tr ' , recursive = False )
for generation_row in generation_rows :
random_nested_td = generation_row . find ( ' td ' , recursive = False )
if not random_nested_td :
continue
random_nested_table = random_nested_td . find ( ' table ' , recursive = False )
if not random_nested_table :
continue
random_nested_tbody = random_nested_table . find ( ' tbody ' , recursive = False )
random_nested_rows = random_nested_tbody . find_all ( ' tr ' , recursive = False )
for nested_row in random_nested_rows :
test_text = None
pattern = r " Generation \ s+([IVXLCDM]+) "
match = re . search ( pattern , nested_row . get_text ( strip = True ) )
if match :
test_text = match . group ( 1 ) # This returns just the Roman numeral
if test_text :
return roman_to_int ( test_text . replace ( " Generation " , " " ) . strip ( ) )
return None
def compare_forms ( a , b ) :
if a == None or b == None :
return False
if a == b :
return True
temp_a = a . lower ( ) . replace ( " forme " , " " ) . replace ( " form " , " " ) . replace ( " é " , " e " ) . strip ( )
temp_b = b . lower ( ) . replace ( " forme " , " " ) . replace ( " form " , " " ) . replace ( " é " , " e " ) . strip ( )
temp_a = temp_a . replace ( " deputante " , " debutante " ) . replace ( " p ' au " , " pa ' u " ) . replace ( " blood moon " , " bloodmoon " )
temp_b = temp_b . replace ( " deputante " , " debutante " ) . replace ( " p ' au " , " pa ' u " ) . replace ( " blood moon " , " bloodmoon " )
if temp_a == temp_b :
return True
return False
def get_locations_from_bulbapedia ( pokemon_name , form , cache : CacheManager , default_forms = None ) :
page_data = get_pokemon_data_bulbapedia ( pokemon_name , cache )
if not page_data :
return None
soup = BeautifulSoup ( page_data , ' html.parser ' )
locations_section = soup . find ( ' span ' , id = ' Game_locations ' )
if not locations_section :
return None
locations_table = locations_section . find_next ( ' table ' , class_ = ' roundy ' )
if not locations_table :
return None
raw_game_locations = { }
# Ok so the table is a bit of a mess. It has some nested tables and stuff.
# In each row is a nested table with all the games in a generation.
# Next is another nexted table, but i can't tell what for.
# within that nested table, is another nested table with the games, either the release pair or a single game spanning two columns.
# Next to that is another nested table with the locations.
generation_tbody = locations_table . find ( ' tbody ' , recursive = False )
generation_rows = generation_tbody . find_all ( ' tr ' , recursive = False )
for generation_row in generation_rows :
random_nested_td = generation_row . find ( ' td ' , recursive = False )
if not random_nested_td :
continue
random_nested_table = random_nested_td . find ( ' table ' , recursive = False )
if not random_nested_table :
continue
random_nested_tbody = random_nested_table . find ( ' tbody ' , recursive = False )
random_nested_rows = random_nested_tbody . find_all ( ' tr ' , recursive = False )
for nested_row in random_nested_rows :
if ' Generation ' in nested_row . get_text ( strip = True ) :
continue
games_container_td = nested_row . find ( ' td ' , recursive = False )
if not games_container_td :
continue
games_container_table = games_container_td . find ( ' table ' , recursive = False )
if not games_container_table :
continue
games_container_tbody = games_container_table . find ( ' tbody ' , recursive = False )
games_container_rows = games_container_tbody . find_all ( ' tr ' , recursive = False )
for games_container_row in games_container_rows :
games = games_container_row . find_all ( ' th ' )
for game in games :
raw_game = game . get_text ( strip = True )
if raw_game not in all_games :
continue
locations_container_td = games_container_row . find ( ' td ' , recursive = False )
if not locations_container_td :
continue
locations_container_table = locations_container_td . find ( ' table ' , recursive = False )
if not locations_container_table :
continue
locations_container_tbody = locations_container_table . find ( ' tbody ' , recursive = False )
locations = locations_container_tbody . find_all ( ' td ' )
for location in locations :
groups = split_td_contents ( location )
for group in groups :
if raw_game not in raw_game_locations :
raw_game_locations [ raw_game ] = [ ]
raw_game_locations [ raw_game ] . append ( group )
events_section = soup . find ( ' span ' , id = ' In_events ' )
event_tables = { }
if events_section :
event_header = events_section . parent
variant = " "
for sibling in event_header . find_next_siblings ( ) :
if sibling . name == ' h4 ' or " held " in sibling . getText ( strip = True ) . lower ( ) :
break
if sibling . name == ' h5 ' :
variant = sibling . get_text ( strip = True )
if sibling . name == ' table ' :
event_tables [ variant ] = sibling
game_locations = { }
for raw_game , raw_locations in raw_game_locations . items ( ) :
if form is None :
for raw_location in raw_locations :
raw_text = raw_location . get_text ( )
forms = parse_form_information ( str ( raw_location ) )
if len ( forms ) > 0 :
for form_info in forms :
main_form = form_info [ " main_form " ]
if default_forms and main_form and main_form in default_forms :
main_form = None
if main_form and ( main_form != " All Forms " and main_form != " Kantonian Form " and main_form != " All Sizes " ) :
continue
record_location_info ( raw_game , game_locations , raw_location , raw_text )
else :
record_location_info ( raw_game , game_locations , raw_location , raw_text )
else :
for raw_location in raw_locations :
forms = parse_form_information ( str ( raw_location ) )
for form_info in forms :
main_form = form_info [ " main_form " ]
sub_form = form_info [ " sub_form " ]
if not main_form :
continue
if main_form == " All Forms " or main_form == " Kantonian Form " or main_form == " All Sizes " :
main_form = form
main_form_match = compare_forms ( form , main_form )
if not main_form_match :
main_form_match = fuzz . partial_ratio ( form . lower ( ) , main_form . lower ( ) ) > = 80
sub_form_match = compare_forms ( form , sub_form )
if not sub_form_match :
sub_form_match = False if not sub_form else fuzz . partial_ratio ( form . lower ( ) , sub_form . lower ( ) ) > = 80
if main_form_match or sub_form_match :
raw_text = raw_location . get_text ( )
record_location_info ( raw_game , game_locations , raw_location , raw_text )
# For Later
for variant in event_tables :
if ( variant == pokemon_name and form is None ) or ( form and form in variant ) :
games_container_rows = event_tables [ variant ] . find_all ( ' tr ' )
for game_row in games_container_rows :
entries = game_row . find_all ( ' td ' )
if len ( entries ) > 1 :
games_string = entries [ 0 ] . find ( ' a ' ) . get ( ' title ' )
for game in all_games :
if game in games_string :
record_location_info ( game , game_locations , " Event " , " Event " )
return game_locations
def record_location_info ( raw_game , game_locations , raw_location , raw_text ) :
if raw_game not in game_locations :
game_locations [ raw_game ] = [ ]
info = { }
info [ " location " ] = raw_text
info [ " tag " ] = str ( raw_location )
game_locations [ raw_game ] . append ( info )
def split_outside_brackets ( str ) :
return re . split ( r ' ,(?![^()]* \ )) ' , str )
def handle_unown ( pokemon , encounter_data ) :
if not pokemon . name == " Unown " :
return
one_form_unown = find_pokemon ( pokemon . name , None )
if not one_form_unown :
return
# The ! and ? forms were added in HeartGold and SoulSilver.
if ( pokemon . form == " ! " or pokemon . form == " ? " ) and encounter_data :
for encounter in encounter_data :
encounter_information = EncounterInformation ( encounter , encounter_data [ encounter ] )
pokemon . encounter_information . append ( encounter_information )
found_heartgold = False
found_soulsilver = False
for game in all_games :
if game == " HeartGold " :
found_heartgold = True
continue
elif game == " SoulSilver " :
found_soulsilver = True
continue
if not found_heartgold or not found_soulsilver :
continue
for encounter in one_form_unown . encounter_information :
if game == encounter . game :
pokemon . encounter_information . append ( encounter )
break
else :
pokemon . encounter_information = one_form_unown . encounter_information
list_of_shifting_form_pokemon = [
" Deoxys " ,
" Burmy " ,
" Wormadam " ,
" Rotom " ,
" Shaymin " ,
" Keldeo " ,
" Furfrou " ,
" Hoopa " ,
" Pumpkaboo " ,
" Gourgeist " ,
" Zygarde " ,
" Magearna " ,
" Vivillon " ,
" Minior " ,
" Urshifu " ,
" Oinkologne " ,
" Basculegion " ,
" Enamorus " ,
]
def handle_form_shift ( pokemon , encounter_data ) :
if not pokemon . name in list_of_shifting_form_pokemon :
return
if pokemon . form is None :
return
normal_form_pokemon = find_pokemon ( pokemon . name , None )
if not normal_form_pokemon :
return
pokemon . encounter_information = normal_form_pokemon . encounter_information
phony_authentic = [ " Sinistea " , " Polteageist " ]
countefieit_atrisan = [ " Poltchageist " ]
unremarkable_masterpiece = [ " Sinistcha " ]
bad_tea_pokemon = phony_authentic + countefieit_atrisan + unremarkable_masterpiece
def get_bad_tea_form ( pokemon ) :
if not pokemon . name in bad_tea_pokemon :
return
if pokemon . name in phony_authentic :
if pokemon . form == None :
return " Phony Form "
if pokemon . form == " Authentic Form " :
return None
if pokemon . name in countefieit_atrisan :
if pokemon . form == None :
return " Counterfeit Form "
if pokemon . form == " Artisan Form " :
return None
if pokemon . name in unremarkable_masterpiece :
if pokemon . form == None :
return " Unremarkable Form "
else :
return pokemon . form
def determine_earliest_games ( cache : CacheManager ) :
for pokemon in big_pokemon_list :
print ( f " Processing { pokemon } " )
form_to_find = pokemon . form
if pokemon . name == " Minior " and pokemon . form == " Orange Core " :
form_to_find = None
if pokemon . name == " Squawkabilly " and pokemon . form :
form_to_find = pokemon . form . replace ( " Plumage " , " " ) . strip ( )
if pokemon . name == " Alcremie " :
form_to_find = None
if pokemon . name in bad_tea_pokemon :
form_to_find = get_bad_tea_form ( pokemon )
pokemon . introduced_in_gen = get_intro_generation ( pokemon . name , form_to_find , cache )
encounter_data = get_locations_from_bulbapedia ( pokemon . name , form_to_find , cache )
for encounter in encounter_data :
encounter_information = EncounterInformation ( encounter , encounter_data [ encounter ] )
pokemon . encounter_information . append ( encounter_information )
handle_unown ( pokemon , encounter_data )
handle_form_shift ( pokemon , encounter_data )
if pokemon . name == " Gimmighoul " and pokemon . form == " Roaming Form " :
encounter_information = EncounterInformation ( " Pokémon Go " , [ " Pokémon Go " ] )
pokemon . encounter_information . append ( encounter_information )
pokemon . determine_earliest_game ( )
print ( f " Processed { pokemon } : { pokemon . earliest_game . game } ( { pokemon . earliest_game . method } ) " )
def get_base_form ( evolution_chain : List [ EvolutionStage ] ) :
if not evolution_chain :
return None
for stage in evolution_chain :
if stage . stage == " Unevolved " :
return stage . pokemon
if stage . is_baby :
return stage . next_stage . pokemon
return None
def adjust_for_evolution ( cache : CacheManager ) :
for pokemon in big_pokemon_list :
evolution_chain = get_evolution_data_from_bulbapedia ( pokemon . name , pokemon . form , cache )
pokemon . add_evolution_chain ( evolution_chain )
game , method = pokemon . get_earliest_game_and_method ( )
print ( f " Adjusted { pokemon } : { game } ( { method } ) " )
def save_to_csv ( filename = ' pokemon_earliest_games.csv ' ) :
with open ( filename , ' w ' , newline = ' ' , encoding = ' utf-8 ' ) as csvfile :
fieldnames = [ ' number ' , ' name ' , ' introduced_in_gen ' , ' earliest_game ' , ' obtain_method ' , ' encounter_locations ' ]
writer = csv . DictWriter ( csvfile , fieldnames = fieldnames )
writer . writeheader ( )
for pokemon in big_pokemon_list :
encounter_locations = [ ]
for encounter in pokemon . encounter_information :
if encounter . game == pokemon . earliest_game . game :
encounter_locations . extend ( encounter . locations )
writer . writerow ( {
' number ' : pokemon . number ,
' name ' : f " { pokemon . name } ( { pokemon . form } ) " ,
' introduced_in_gen ' : pokemon . introduced_in_gen ,
' earliest_game ' : pokemon . earliest_game . game ,
' obtain_method ' : pokemon . earliest_game . method ,
' encounter_locations ' : ' | ' . join ( ( str ( item ) for item in encounter_locations ) )
} )
def handle_unknown_encounters ( cache ) :
for pokemon in big_pokemon_list :
if pokemon . earliest_game == None or pokemon . earliest_game . method == None :
print ( f " Checking alternative sources for { pokemon . name } " )
# Update the main function
if __name__ == " __main__ " :
cache = CacheManager ( )
pokemon_list = read_pokemon_list ( ' pokemon_home_list.csv ' , limit = 3000 )
create_pokemon_index ( big_pokemon_list )
determine_earliest_games ( cache )
adjust_for_evolution ( cache )
handle_unknown_encounters ( cache )
save_to_csv ( )
cache . close ( )
print ( f " Earliest obtainable games and encounter locations determined for { len ( pokemon_list ) } Pokémon and saved to pokemon_earliest_games.csv " )