Browse Source

- Start to add better handling for pokemonw ith unknown games and locations.

main
Quildra 1 year ago
parent
commit
4390359134
  1. 164
      Utilities/DetermineOriginGame.py

164
Utilities/DetermineOriginGame.py

@ -5,6 +5,7 @@ import json
import os
import re
import sqlite3
from bs4 import BeautifulSoup
# Initialize the database connection
conn = sqlite3.connect('pokemon_cache.db')
@ -203,6 +204,19 @@ def get_pokemon_data(pokemon_name, form, cache):
return data
return None
def get_pokemon_data_bulbapedia(pokemon_name, cache):
cache_key = f"pokemon_{pokemon_name}_bulbapedia"
if cache_key in cache:
return cache[cache_key]
url = f"https://bulbapedia.bulbagarden.net/wiki/{pokemon_name}_(Pokémon)"
print(f"Fetching Pokémon data for {pokemon_name}: {url}")
response = requests.get(url)
if response.status_code == 200:
data = response.text
update_cache(cache_key, data)
return data
def get_pokemon_encounter_data(pokemon_name, form, cache):
cache_key = f"pokemon_encounter_{pokemon_name}_{form}" if form else f"pokemon_encounter_{pokemon_name}"
if cache_key in cache:
@ -371,6 +385,153 @@ def add_encounter_locations(pokemon_list, cache):
print(f"Added encounter locations for {pokemon['name']} (#{pokemon['number']}) in {pokemon['earliest_game']}")
return pokemon_list
def get_marriland_page(pokemon_name, cache):
url_name = pokemon_name.lower().replace(' ', '-').replace('(', '').replace(')', '')
cache_key = f"marriland_{url_name}"
if cache_key in cache:
return cache[cache_key]
url = f"https://marriland.com/pokedex/{url_name}/"
try:
response = requests.get(url)
response.raise_for_status() # Raise an exception for bad status codes
data = response.text
update_cache(cache_key, data)
return data
except requests.RequestException as e:
print(f"Error accessing the page for {pokemon_name}: {e}")
return None
def is_event_pokemon(pokemon_name, cache):
page_data = get_marriland_page(pokemon_name, cache)
if not page_data:
return False
soup = BeautifulSoup(page_data, 'html.parser')
# Find the "Where to Find" section
location_section = soup.find('div', id='locations')
if not location_section:
print(f"Could not find 'Where to Find' section for {pokemon_name}")
return None
special_section = soup.find('div', class_='location-special')
location_tables = soup.find_all('table', class_='location-table')
event_only = "Only available from events or promotions.".lower()
if len(location_tables) == 0 and special_section and event_only in special_section.get_text(strip=True).lower():
return True
return False
def get_locations_from_bulbapedia(pokemon_name, cache):
page_data = get_pokemon_data_bulbapedia(pokemon_name, cache)
if not page_data:
return None
soup = BeautifulSoup(page_data, 'html.parser')
locations_section = soup.find('span', id='Game_locations')
if not locations_section:
return None
locations_table = locations_section.find_next('table', class_='roundy')
if not locations_table:
return None
game_locations = {}
# Ok so the table is a bit of a mess. It has some nested tables and stuff.
# In each row is a nested table with all the games in a generation.
# Next is another nexted table, but i can't tell what for.
# within that nested table, is another nested table with the games, either the release pair or a single game spanning two columns.
# Next to that is another nested table with the locations.
generation_tbody = locations_table.find('tbody', recursive=False)
generation_rows = generation_tbody.find_all('tr', recursive=False)
for generation_row in generation_rows:
random_nested_td = generation_row.find('td', recursive=False)
if not random_nested_td:
continue
random_nested_table = random_nested_td.find('table', recursive=False)
if not random_nested_table:
continue
random_nested_tbody = random_nested_table.find('tbody', recursive=False)
random_nested_rows = random_nested_tbody.find_all('tr', recursive=False)
for nested_row in random_nested_rows:
if 'Generation' in nested_row.get_text(strip=True):
continue
games_container_td = nested_row.find('td', recursive=False)
if not games_container_td:
continue
games_container_table = games_container_td.find('table', recursive=False)
if not games_container_table:
continue
games_container_tbody = games_container_table.find('tbody', recursive=False)
games_container_rows = games_container_tbody.find_all('tr', recursive=False)
for games_container_row in games_container_rows:
games = games_container_row.find_all('th')
for game in games:
locations_container_td = games_container_row.find('td', recursive=False)
if not locations_container_td:
continue
locations_container_table = locations_container_td.find('table', recursive=False)
if not locations_container_table:
continue
locations_container_tbody = locations_container_table.find('tbody', recursive=False)
locations = locations_container_tbody.find_all('td')
for location in locations:
game_locations[game.get_text(strip=True)] = location.get_text()
print(f'{game.get_text(strip=True)}: {location.get_text()}')
return game_locations
def check_alternative_sources(pokemon, cache):
# This function will check alternative sources for Pokémon with "Unknown" encounter types
species_data = get_species_data(pokemon['base_name'], cache)
if species_data:
# Check if it's a mythical Pokémon
if species_data.get('is_mythical', False):
return "Event", "Event"
# Check if it's a legendary Pokémon
if species_data.get('is_legendary', False):
return pokemon['earliest_game'], "Legendary"
event_status = is_event_pokemon(pokemon['name'], cache)
if event_status:
return "Event", "Event"
bulb_locations = get_locations_from_bulbapedia(pokemon['base_name'], cache)
if bulb_locations:
return bulb_locations[0], "Bulbapedia"
# Check generation introduced
#generation = species_data.get('generation', {}).get('name', '')
#if generation:
# gen_number = int(generation.split('-')[1])
# for game in all_games:
# if game != "Unknown" and get_generation(game) == gen_number:
# return game, "First appearance"
return "Unknown", "Unknown"
def handle_unknown_encounters(pokemon_list, cache):
for pokemon in pokemon_list:
if pokemon['earliest_game'] == "Unknown" or pokemon['obtain_method'] == "Unknown":
new_game, new_method = check_alternative_sources(pokemon, cache)
if new_game != "Unknown":
pokemon['earliest_game'] = new_game
pokemon['obtain_method'] = new_method
pokemon['encounter_locations'] = 'N/A'
print(f"Checked alternative sources for {pokemon['name']} (#{pokemon['number']}): {pokemon['earliest_game']} ({pokemon['obtain_method']})")
return pokemon_list
# Update the main function
if __name__ == "__main__":
get_cached_data()
@ -379,7 +540,8 @@ if __name__ == "__main__":
pokemon_list_with_games = determine_earliest_games(pokemon_list, cache)
pokemon_list_adjusted = adjust_for_evolution(pokemon_list_with_games, cache)
pokemon_list_with_locations = add_encounter_locations(pokemon_list_adjusted, cache)
save_to_csv(pokemon_list_with_locations)
pokemon_list_final = handle_unknown_encounters(pokemon_list_with_locations, cache)
save_to_csv(pokemon_list_final)
save_cached_data() # Save any remaining new entries
conn.close() # Close the database connection

Loading…
Cancel
Save