You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

663 lines
26 KiB

import csv
import requests
import time
import json
import os
import re
import sqlite3
from bs4 import BeautifulSoup, Tag, NavigableString
import copy
# Initialize the database connection
conn = sqlite3.connect('pokemon_cache.db')
cursor = conn.cursor()
# Create the cache table if it doesn't exist
cursor.execute('''
CREATE TABLE IF NOT EXISTS cache (
key TEXT PRIMARY KEY,
value TEXT
)
''')
conn.commit()
# List of all main series Pokémon games in chronological order, with special games first in each generation
all_games = [
"Yellow", "Red", "Blue",
"Crystal", "Gold", "Silver",
"Emerald", "FireRed", "LeafGreen", "Ruby", "Sapphire",
"Platinum", "HeartGold", "SoulSilver", "Diamond", "Pearl",
"Black 2", "White 2", "Black", "White",
"X", "Y", "Omega Ruby", "Alpha Sapphire",
"Ultra Sun", "Ultra Moon", "Sun", "Moon",
"Sword", "Shield", "Expansion Pass",
"Brilliant Diamond", "Shining Pearl",
"Legends: Arceus",
"Scarlet", "Violet", "The Teal Mask", "The Hidden Treasure of Area Zero",
"Unknown"
]
cache = {}
new_entries_count = 0
def get_cached_data():
global cache
cursor.execute("SELECT key, value FROM cache")
for key, value in cursor.fetchall():
cache[key] = json.loads(value)
def save_cached_data():
global cache, new_entries_count
if new_entries_count > 0:
for key, value in cache.items():
cursor.execute("INSERT OR REPLACE INTO cache (key, value) VALUES (?, ?)",
(key, json.dumps(value)))
conn.commit()
new_entries_count = 0
def update_cache(key, value):
global cache, new_entries_count
if key not in cache:
cache[key] = value
new_entries_count += 1
if new_entries_count >= 1:
save_cached_data()
time.sleep(1)
def read_pokemon_list(filename, limit=50):
pokemon_list = []
with open(filename, 'r', newline='', encoding='utf-8') as csvfile:
reader = csv.DictReader(csvfile)
for i, row in enumerate(reader):
if i >= limit:
break
# Split the name into base name and form
match = re.match(r'(.*?)\s*(\(.*\))?$', row['name'])
base_name, form = match.groups() if match else (row['name'], None)
row['base_name'] = base_name.strip()
row['form'] = form.strip('() ') if form else None
pokemon_list.append(row)
return pokemon_list
def sanitize_name_and_form(name, form):
adjusted_form = None
if form:
adjusted_form = form.lower()
#Some stupid special cases
if name.lower() == 'tauros':
if adjusted_form == 'paldean form':
adjusted_form = 'paldea combat breed'
elif 'blaze' in adjusted_form:
adjusted_form = 'paldea blaze breed'
elif 'aqua' in adjusted_form:
adjusted_form = 'paldea aqua breed'
replacements = {'forme': '',
'form': '',
'alolan': 'alola',
'galarian': 'galar',
'hisuian': 'hisui',
'paldean': 'paldea',
'size': '',
'10%': '10 power construct',
'hoopa': '',
'style': '',
'core': '',
'color': '',
'blood moon': 'bloodmoon'};
for old, new in replacements.items():
adjusted_form = adjusted_form.replace(old, new).strip()
missing_forms = ['burmy',
'shellos',
'gastrodon',
'wormadam',
'unown',
"deerling",
"sawsbuck",
"vivillon",
"flabébé",
"floette",
"florges",
"furfrou",
"sinistea",
"polteageist",
"alcremie",
"poltchageist",
"sinistcha"]
if name.lower() in missing_forms:
adjusted_form = None
if name.lower() == 'wormadam':
adjusted_form = adjusted_form.replace('cloak', '').strip()
if name.lower() == 'rotom':
adjusted_form = adjusted_form.replace('rotom', '').strip()
if name.lower() == 'darmanitan':
adjusted_form = adjusted_form + ' standard'
else:
default_forms = {'deoxys': 'normal',
'wormadam': 'plant',
'giratina': 'origin',
'tornadus': 'incarnate',
'shaymin': 'land',
'basculin': 'red-striped',
'darmanitan': 'standard',
'thundurus': 'incarnate',
'landorus': 'incarnate',
'enamorus': 'incarnate',
'keldeo': 'ordinary',
'meloetta': 'aria',
'meowstic': 'male',
'aegislash': 'shield',
'pumpkaboo': 'average',
'gourgeist': 'average',
'minior': 'red-meteor',
'zygarde': '50 power construct',
'oricorio': 'baile',
'lycanroc': 'midday',
'wishiwashi': 'solo',
'mimikyu': 'disguised',
'cramorant': 'gulping',
'toxtricity': 'low-key',
'eiscue': 'ice',
'indeedee': 'male',
'urshifu': 'single-strike',
'morpeko': 'full belly',
'oinkologne': 'male',
'maushold': 'family of three',
'squawkabilly': 'green plumage',
'palafin': 'zero',
'tatsugiri': 'curly',
'dudunsparce': 'two segment',
'basculegion': 'male'}
if name.lower() in default_forms:
adjusted_form = default_forms[name.lower()]
if adjusted_form:
api_name = f"{name.lower()}-{adjusted_form}"
else:
api_name = name.lower()
api_name = api_name.replace(' ', '-').replace("'", "").replace(".", "").replace('é', 'e').replace(':', '')
#more special cases
if api_name == 'oinkologne-male':
api_name = '916'
return api_name
def get_pokemon_data(pokemon_name, form, cache):
cache_key = f"pokemon_{pokemon_name}_{form}" if form else f"pokemon_{pokemon_name}"
if cache_key in cache:
return cache[cache_key]
api_name = sanitize_name_and_form(pokemon_name, form)
url = f"https://pokeapi.co/api/v2/pokemon/{api_name}"
print(f"Fetching Pokémon data for {pokemon_name}: {url}")
response = requests.get(url)
if response.status_code == 200:
data = response.json()
update_cache(cache_key, data)
return data
return None
def get_pokemon_data_bulbapedia(pokemon_name, cache):
cache_key = f"pokemon_{pokemon_name}_bulbapedia"
if cache_key in cache:
return cache[cache_key]
url = f"https://bulbapedia.bulbagarden.net/wiki/{pokemon_name}_(Pokémon)"
print(f"Fetching Pokémon data for {pokemon_name}: {url}")
response = requests.get(url)
if response.status_code == 200:
data = response.text
update_cache(cache_key, data)
return data
def get_pokemon_encounter_data(pokemon_name, form, cache):
cache_key = f"pokemon_encounter_{pokemon_name}_{form}" if form else f"pokemon_encounter_{pokemon_name}"
if cache_key in cache:
return cache[cache_key]
api_name = sanitize_name_and_form(pokemon_name, form)
url = f"https://pokeapi.co/api/v2/pokemon/{api_name}/encounters"
print(f"Fetching encounter data for {pokemon_name}: {url}")
response = requests.get(url)
if response.status_code == 200:
data = response.json()
update_cache(cache_key, data)
return data
else:
return None
def split_td_contents(td):
groups = []
current_group = []
for content in td.contents:
if isinstance(content, Tag) and content.name == 'br':
if current_group:
groups.append(BeautifulSoup('', 'html.parser').new_tag('div'))
for item in current_group:
groups[-1].append(copy.copy(item))
current_group = []
else:
current_group.append(content)
if current_group:
groups.append(BeautifulSoup('', 'html.parser').new_tag('div'))
for item in current_group:
groups[-1].append(copy.copy(item))
return groups
def parse_form_information(html_content):
soup = BeautifulSoup(html_content, 'html.parser')
form_info = soup.find('small')
if form_info:
form_text = form_info.get_text(strip=True)
# Remove parentheses
form_text = form_text.strip('()')
# Split the text into main form and breed (if present)
parts = form_text.split('(')
main_form = parts[0].strip()
breed = parts[1].strip(')') if len(parts) > 1 else None
return main_form, breed
return None, None
def get_locations_from_bulbapedia(pokemon_name, form, cache):
page_data = get_pokemon_data_bulbapedia(pokemon_name, cache)
if not page_data:
return None
soup = BeautifulSoup(page_data, 'html.parser')
locations_section = soup.find('span', id='Game_locations')
if not locations_section:
return None
locations_table = locations_section.find_next('table', class_='roundy')
if not locations_table:
return None
raw_game_locations = {}
# Ok so the table is a bit of a mess. It has some nested tables and stuff.
# In each row is a nested table with all the games in a generation.
# Next is another nexted table, but i can't tell what for.
# within that nested table, is another nested table with the games, either the release pair or a single game spanning two columns.
# Next to that is another nested table with the locations.
generation_tbody = locations_table.find('tbody', recursive=False)
generation_rows = generation_tbody.find_all('tr', recursive=False)
for generation_row in generation_rows:
random_nested_td = generation_row.find('td', recursive=False)
if not random_nested_td:
continue
random_nested_table = random_nested_td.find('table', recursive=False)
if not random_nested_table:
continue
random_nested_tbody = random_nested_table.find('tbody', recursive=False)
random_nested_rows = random_nested_tbody.find_all('tr', recursive=False)
for nested_row in random_nested_rows:
if 'Generation' in nested_row.get_text(strip=True):
continue
games_container_td = nested_row.find('td', recursive=False)
if not games_container_td:
continue
games_container_table = games_container_td.find('table', recursive=False)
if not games_container_table:
continue
games_container_tbody = games_container_table.find('tbody', recursive=False)
games_container_rows = games_container_tbody.find_all('tr', recursive=False)
for games_container_row in games_container_rows:
games = games_container_row.find_all('th')
for game in games:
raw_game = game.get_text(strip=True)
if raw_game not in all_games:
continue
locations_container_td = games_container_row.find('td', recursive=False)
if not locations_container_td:
continue
locations_container_table = locations_container_td.find('table', recursive=False)
if not locations_container_table:
continue
locations_container_tbody = locations_container_table.find('tbody', recursive=False)
locations = locations_container_tbody.find_all('td')
for location in locations:
groups = split_td_contents(location)
for group in groups:
if raw_game not in raw_game_locations:
raw_game_locations[raw_game] = []
raw_game_locations[raw_game].append(group)
events_section = soup.find('span', id='In_events')
event_tables = {}
if events_section:
event_header = events_section.parent
variant = ""
for sibling in event_header.find_next_siblings():
if sibling.name == 'h4' or "held" in sibling.getText(strip=True).lower():
break
if sibling.name == 'h5':
variant = sibling.get_text(strip=True)
if sibling.name == 'table':
event_tables[variant] = sibling
game_locations = {}
for raw_game, raw_locations in raw_game_locations.items():
if form is None:
for raw_location in raw_locations:
locations = raw_location.get_text().split(',')
for location in locations:
if raw_game not in game_locations:
game_locations[raw_game] = []
game_locations[raw_game].append(location.strip())
else:
for raw_location in raw_locations:
main_form, sub_form = parse_form_information(str(raw_location))
if main_form == form:
locations = raw_location.get_text().split(',')
for location in locations:
if raw_game not in game_locations:
game_locations[raw_game] = []
game_locations[raw_game].append(location.strip())
# For Later
for variant in event_tables:
if (variant == pokemon_name and form is None)or (form and form in variant):
games_container_rows = event_tables[variant].find_all('tr')
for game_row in games_container_rows:
entries = game_row.find_all('td')
if len(entries) > 1:
games_string = entries[0].find('a').get('title')
for game in all_games:
if game in games_string:
if game not in game_locations:
game_locations[game] = []
game_locations[game].append("Event")
return game_locations
def get_earliest_game(encounter_data, pokemon_name, form):
if not encounter_data:
return "Unknown", "Unknown"
non_catchable_methods = ["trade", "event", "global link", "poké transfer", "time capsule", "unobtainable", "pokémon home"]
game_methods = {}
for game, locations in encounter_data.items():
for location in locations:
method = "Catchable"
for non_catchable in non_catchable_methods:
if non_catchable in location.lower():
method = None
break
if method is None:
continue
if "first partner" in location.lower():
method = "Starter"
elif "received" in location.lower():
method = "Gift"
elif "evolve" in location.lower():
method = "Evolve"
else:
method = "Catchable"
if method:
if game not in game_methods:
game_methods[game.lower()] = method
else:
if method == "Catchable":
game_methods[game.lower()] = method
for game in all_games:
if game.lower() in game_methods:
return game, game_methods[game.lower()]
return "Unknown", "Unknown"
def determine_earliest_games(pokemon_list, cache):
for pokemon in pokemon_list:
print(f"Processing {pokemon['name']} (#{pokemon['number']})")
encounter_data = get_locations_from_bulbapedia(pokemon['base_name'], pokemon['form'], cache)
pokemon['earliest_game'], pokemon['obtain_method'] = get_earliest_game(encounter_data, pokemon['base_name'], pokemon['form'])
print(f"Processed {pokemon['name']} (#{pokemon['number']}): {pokemon['earliest_game']} ({pokemon['obtain_method']})")
#pokemon_data = get_pokemon_data(pokemon['base_name'], pokemon['form'], cache)
#encounter_data = get_pokemon_encounter_data(pokemon['base_name'], pokemon['form'], cache)
#pokemon['earliest_game'], pokemon['obtain_method'] = get_earliest_game(encounter_data)
#print(f"Processed {pokemon['name']} (#{pokemon['number']}): {pokemon['earliest_game']} ({pokemon['obtain_method']})")
return pokemon_list
def get_species_data(pokemon_name, cache):
cache_key = f"species_{pokemon_name}"
if cache_key in cache:
return cache[cache_key]
api_name = sanitize_name_and_form(pokemon_name, None)
url = f"https://pokeapi.co/api/v2/pokemon-species/{api_name}/"
print(f"Fetching species data for {pokemon_name}: {url}")
response = requests.get(url)
if response.status_code == 200:
data = response.json()
update_cache(cache_key, data)
return data
return None
def get_evolution_chain(pokemon_name, cache):
species_data = get_species_data(pokemon_name, cache)
if not species_data:
return None
cache_key = f"evolution_{species_data['evolution_chain']['url']}"
if cache_key in cache:
return cache[cache_key]
evolution_response = requests.get(species_data['evolution_chain']['url'])
if evolution_response.status_code == 200:
evolution_data = evolution_response.json()
update_cache(cache_key, evolution_data)
return evolution_data
return None
def get_base_form(evolution_chain, cache):
if not evolution_chain or 'chain' not in evolution_chain:
return None
current = evolution_chain['chain']
while current:
species_name = current['species']['name']
species_data = get_species_data(species_name, cache)
if species_data and not species_data.get('is_baby', False):
return species_name
if not current['evolves_to']:
return species_name
current = current['evolves_to'][0]
return None
def adjust_for_evolution(pokemon_list, cache):
pokemon_dict = {f"{pokemon['base_name']}_{pokemon['form']}".lower(): pokemon for pokemon in pokemon_list}
for pokemon in pokemon_list:
species_data = get_species_data(pokemon['base_name'], cache)
evolution_chain = get_evolution_chain(pokemon['base_name'], cache)
base_form = get_base_form(evolution_chain, cache)
# Check if the Pokémon is a baby
if species_data and species_data.get('is_baby', False):
pokemon['obtain_method'] = 'Breed'
elif base_form:
base_key = f"{base_form}_{pokemon['form']}".lower()
if base_key in pokemon_dict:
base_pokemon = pokemon_dict[base_key]
if all_games.index(base_pokemon['earliest_game']) <= all_games.index(pokemon['earliest_game']) and base_pokemon['number'] != pokemon['number']:
pokemon['earliest_game'] = base_pokemon['earliest_game']
pokemon['obtain_method'] = 'Evolve'
print(f"Adjusted {pokemon['name']} (#{pokemon['number']}): {pokemon['earliest_game']} ({pokemon['obtain_method']})")
return pokemon_list
def save_to_csv(pokemon_list, filename='pokemon_earliest_games.csv'):
with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
fieldnames = ['number', 'name', 'earliest_game', 'obtain_method', 'encounter_locations']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for pokemon in pokemon_list:
writer.writerow({
'number': pokemon['number'],
'name': pokemon['name'],
'earliest_game': pokemon['earliest_game'],
'obtain_method': pokemon['obtain_method'],
'encounter_locations': pokemon['encounter_locations']
})
def parse_encounter_locations(encounter_data, game):
locations = []
for location_area in encounter_data:
for version_detail in location_area['version_details']:
if version_detail['version']['name'] == game.lower():
location_name = location_area['location_area']['name']
for encounter_detail in version_detail['encounter_details']:
method = encounter_detail['method']['name']
condition = encounter_detail.get('condition', 'Any')
time = ', '.join(encounter_detail.get('time', ['Any']))
encounter_info = f"{location_name} ({method}"
if condition != 'Any':
encounter_info += f", {condition}"
if time != 'Any':
encounter_info += f", {time}"
encounter_info += ")"
if encounter_info not in locations:
locations.append(encounter_info)
return locations
def add_encounter_locations(pokemon_list, cache):
for pokemon in pokemon_list:
if pokemon['obtain_method'] == 'Catchable':
encounter_data = get_pokemon_encounter_data(pokemon['base_name'], pokemon['form'], cache)
locations = parse_encounter_locations(encounter_data, pokemon['earliest_game'])
pokemon['encounter_locations'] = ' | '.join(locations) if locations else 'Unknown'
else:
pokemon['encounter_locations'] = 'N/A'
print(f"Added encounter locations for {pokemon['name']} (#{pokemon['number']}) in {pokemon['earliest_game']}")
return pokemon_list
def get_marriland_page(pokemon_name, cache):
url_name = pokemon_name.lower().replace(' ', '-').replace('(', '').replace(')', '')
cache_key = f"marriland_{url_name}"
if cache_key in cache:
return cache[cache_key]
url = f"https://marriland.com/pokedex/{url_name}/"
try:
response = requests.get(url)
response.raise_for_status() # Raise an exception for bad status codes
data = response.text
update_cache(cache_key, data)
return data
except requests.RequestException as e:
print(f"Error accessing the page for {pokemon_name}: {e}")
return None
def is_event_pokemon(pokemon_name, cache):
page_data = get_marriland_page(pokemon_name, cache)
if not page_data:
return False
soup = BeautifulSoup(page_data, 'html.parser')
# Find the "Where to Find" section
location_section = soup.find('div', id='locations')
if not location_section:
print(f"Could not find 'Where to Find' section for {pokemon_name}")
return None
special_section = soup.find('div', class_='location-special')
location_tables = soup.find_all('table', class_='location-table')
event_only = "Only available from events or promotions.".lower()
if len(location_tables) == 0 and special_section and event_only in special_section.get_text(strip=True).lower():
return True
return False
def check_alternative_sources(pokemon, cache):
# This function will check alternative sources for Pokémon with "Unknown" encounter types
species_data = get_species_data(pokemon['base_name'], cache)
if species_data:
# Check if it's a mythical Pokémon
if species_data.get('is_mythical', False):
return "Event", "Event"
# Check if it's a legendary Pokémon
if species_data.get('is_legendary', False):
return pokemon['earliest_game'], "Legendary"
event_status = is_event_pokemon(pokemon['name'], cache)
if event_status:
return "Event", "Event"
#bulb_locations = get_locations_from_bulbapedia(pokemon['base_name'], pokemon['form'], cache)
#if bulb_locations:
# return bulb_locations[0], "Bulbapedia"
# Check generation introduced
#generation = species_data.get('generation', {}).get('name', '')
#if generation:
# gen_number = int(generation.split('-')[1])
# for game in all_games:
# if game != "Unknown" and get_generation(game) == gen_number:
# return game, "First appearance"
return "Unknown", "Unknown"
def handle_unknown_encounters(pokemon_list, cache):
for pokemon in pokemon_list:
if pokemon['earliest_game'] == "Unknown" or pokemon['obtain_method'] == "Unknown":
new_game, new_method = check_alternative_sources(pokemon, cache)
if new_game != "Unknown":
pokemon['earliest_game'] = new_game
pokemon['obtain_method'] = new_method
pokemon['encounter_locations'] = 'N/A'
print(f"Checked alternative sources for {pokemon['name']} (#{pokemon['number']}): {pokemon['earliest_game']} ({pokemon['obtain_method']})")
return pokemon_list
# Update the main function
if __name__ == "__main__":
get_cached_data()
pokemon_list = read_pokemon_list('pokemon_home_list.csv', limit=3000)
pokemon_list_with_games = determine_earliest_games(pokemon_list, cache)
pokemon_list_adjusted = adjust_for_evolution(pokemon_list_with_games, cache)
pokemon_list_with_locations = add_encounter_locations(pokemon_list_adjusted, cache)
pokemon_list_final = handle_unknown_encounters(pokemon_list_with_locations, cache)
save_to_csv(pokemon_list_final)
save_cached_data() # Save any remaining new entries
conn.close() # Close the database connection
print(f"Earliest obtainable games and encounter locations determined for {len(pokemon_list)} Pokémon and saved to pokemon_earliest_games.csv")