|
|
|
@ -5,6 +5,7 @@ import json |
|
|
|
import os |
|
|
|
import re |
|
|
|
import sqlite3 |
|
|
|
from bs4 import BeautifulSoup |
|
|
|
|
|
|
|
# Initialize the database connection |
|
|
|
conn = sqlite3.connect('pokemon_cache.db') |
|
|
|
@ -203,6 +204,19 @@ def get_pokemon_data(pokemon_name, form, cache): |
|
|
|
return data |
|
|
|
return None |
|
|
|
|
|
|
|
def get_pokemon_data_bulbapedia(pokemon_name, cache): |
|
|
|
cache_key = f"pokemon_{pokemon_name}_bulbapedia" |
|
|
|
if cache_key in cache: |
|
|
|
return cache[cache_key] |
|
|
|
|
|
|
|
url = f"https://bulbapedia.bulbagarden.net/wiki/{pokemon_name}_(Pokémon)" |
|
|
|
print(f"Fetching Pokémon data for {pokemon_name}: {url}") |
|
|
|
response = requests.get(url) |
|
|
|
if response.status_code == 200: |
|
|
|
data = response.text |
|
|
|
update_cache(cache_key, data) |
|
|
|
return data |
|
|
|
|
|
|
|
def get_pokemon_encounter_data(pokemon_name, form, cache): |
|
|
|
cache_key = f"pokemon_encounter_{pokemon_name}_{form}" if form else f"pokemon_encounter_{pokemon_name}" |
|
|
|
if cache_key in cache: |
|
|
|
@ -371,6 +385,153 @@ def add_encounter_locations(pokemon_list, cache): |
|
|
|
print(f"Added encounter locations for {pokemon['name']} (#{pokemon['number']}) in {pokemon['earliest_game']}") |
|
|
|
return pokemon_list |
|
|
|
|
|
|
|
def get_marriland_page(pokemon_name, cache): |
|
|
|
url_name = pokemon_name.lower().replace(' ', '-').replace('(', '').replace(')', '') |
|
|
|
cache_key = f"marriland_{url_name}" |
|
|
|
if cache_key in cache: |
|
|
|
return cache[cache_key] |
|
|
|
|
|
|
|
url = f"https://marriland.com/pokedex/{url_name}/" |
|
|
|
|
|
|
|
try: |
|
|
|
response = requests.get(url) |
|
|
|
response.raise_for_status() # Raise an exception for bad status codes |
|
|
|
data = response.text |
|
|
|
update_cache(cache_key, data) |
|
|
|
return data |
|
|
|
except requests.RequestException as e: |
|
|
|
print(f"Error accessing the page for {pokemon_name}: {e}") |
|
|
|
return None |
|
|
|
|
|
|
|
def is_event_pokemon(pokemon_name, cache): |
|
|
|
page_data = get_marriland_page(pokemon_name, cache) |
|
|
|
if not page_data: |
|
|
|
return False |
|
|
|
|
|
|
|
soup = BeautifulSoup(page_data, 'html.parser') |
|
|
|
|
|
|
|
# Find the "Where to Find" section |
|
|
|
location_section = soup.find('div', id='locations') |
|
|
|
|
|
|
|
if not location_section: |
|
|
|
print(f"Could not find 'Where to Find' section for {pokemon_name}") |
|
|
|
return None |
|
|
|
|
|
|
|
special_section = soup.find('div', class_='location-special') |
|
|
|
location_tables = soup.find_all('table', class_='location-table') |
|
|
|
|
|
|
|
event_only = "Only available from events or promotions.".lower() |
|
|
|
if len(location_tables) == 0 and special_section and event_only in special_section.get_text(strip=True).lower(): |
|
|
|
return True |
|
|
|
|
|
|
|
return False |
|
|
|
|
|
|
|
def get_locations_from_bulbapedia(pokemon_name, cache): |
|
|
|
page_data = get_pokemon_data_bulbapedia(pokemon_name, cache) |
|
|
|
if not page_data: |
|
|
|
return None |
|
|
|
|
|
|
|
soup = BeautifulSoup(page_data, 'html.parser') |
|
|
|
|
|
|
|
locations_section = soup.find('span', id='Game_locations') |
|
|
|
if not locations_section: |
|
|
|
return None |
|
|
|
|
|
|
|
locations_table = locations_section.find_next('table', class_='roundy') |
|
|
|
if not locations_table: |
|
|
|
return None |
|
|
|
|
|
|
|
game_locations = {} |
|
|
|
|
|
|
|
# Ok so the table is a bit of a mess. It has some nested tables and stuff. |
|
|
|
# In each row is a nested table with all the games in a generation. |
|
|
|
# Next is another nexted table, but i can't tell what for. |
|
|
|
# within that nested table, is another nested table with the games, either the release pair or a single game spanning two columns. |
|
|
|
# Next to that is another nested table with the locations. |
|
|
|
|
|
|
|
generation_tbody = locations_table.find('tbody', recursive=False) |
|
|
|
generation_rows = generation_tbody.find_all('tr', recursive=False) |
|
|
|
for generation_row in generation_rows: |
|
|
|
random_nested_td = generation_row.find('td', recursive=False) |
|
|
|
if not random_nested_td: |
|
|
|
continue |
|
|
|
random_nested_table = random_nested_td.find('table', recursive=False) |
|
|
|
if not random_nested_table: |
|
|
|
continue |
|
|
|
random_nested_tbody = random_nested_table.find('tbody', recursive=False) |
|
|
|
random_nested_rows = random_nested_tbody.find_all('tr', recursive=False) |
|
|
|
for nested_row in random_nested_rows: |
|
|
|
if 'Generation' in nested_row.get_text(strip=True): |
|
|
|
continue |
|
|
|
|
|
|
|
games_container_td = nested_row.find('td', recursive=False) |
|
|
|
if not games_container_td: |
|
|
|
continue |
|
|
|
games_container_table = games_container_td.find('table', recursive=False) |
|
|
|
if not games_container_table: |
|
|
|
continue |
|
|
|
games_container_tbody = games_container_table.find('tbody', recursive=False) |
|
|
|
games_container_rows = games_container_tbody.find_all('tr', recursive=False) |
|
|
|
for games_container_row in games_container_rows: |
|
|
|
games = games_container_row.find_all('th') |
|
|
|
for game in games: |
|
|
|
locations_container_td = games_container_row.find('td', recursive=False) |
|
|
|
if not locations_container_td: |
|
|
|
continue |
|
|
|
locations_container_table = locations_container_td.find('table', recursive=False) |
|
|
|
if not locations_container_table: |
|
|
|
continue |
|
|
|
locations_container_tbody = locations_container_table.find('tbody', recursive=False) |
|
|
|
locations = locations_container_tbody.find_all('td') |
|
|
|
for location in locations: |
|
|
|
game_locations[game.get_text(strip=True)] = location.get_text() |
|
|
|
print(f'{game.get_text(strip=True)}: {location.get_text()}') |
|
|
|
|
|
|
|
return game_locations |
|
|
|
|
|
|
|
|
|
|
|
def check_alternative_sources(pokemon, cache): |
|
|
|
# This function will check alternative sources for Pokémon with "Unknown" encounter types |
|
|
|
species_data = get_species_data(pokemon['base_name'], cache) |
|
|
|
|
|
|
|
if species_data: |
|
|
|
# Check if it's a mythical Pokémon |
|
|
|
if species_data.get('is_mythical', False): |
|
|
|
return "Event", "Event" |
|
|
|
|
|
|
|
# Check if it's a legendary Pokémon |
|
|
|
if species_data.get('is_legendary', False): |
|
|
|
return pokemon['earliest_game'], "Legendary" |
|
|
|
|
|
|
|
event_status = is_event_pokemon(pokemon['name'], cache) |
|
|
|
if event_status: |
|
|
|
return "Event", "Event" |
|
|
|
|
|
|
|
bulb_locations = get_locations_from_bulbapedia(pokemon['base_name'], cache) |
|
|
|
if bulb_locations: |
|
|
|
return bulb_locations[0], "Bulbapedia" |
|
|
|
|
|
|
|
# Check generation introduced |
|
|
|
#generation = species_data.get('generation', {}).get('name', '') |
|
|
|
#if generation: |
|
|
|
# gen_number = int(generation.split('-')[1]) |
|
|
|
# for game in all_games: |
|
|
|
# if game != "Unknown" and get_generation(game) == gen_number: |
|
|
|
# return game, "First appearance" |
|
|
|
|
|
|
|
return "Unknown", "Unknown" |
|
|
|
|
|
|
|
def handle_unknown_encounters(pokemon_list, cache): |
|
|
|
for pokemon in pokemon_list: |
|
|
|
if pokemon['earliest_game'] == "Unknown" or pokemon['obtain_method'] == "Unknown": |
|
|
|
new_game, new_method = check_alternative_sources(pokemon, cache) |
|
|
|
if new_game != "Unknown": |
|
|
|
pokemon['earliest_game'] = new_game |
|
|
|
pokemon['obtain_method'] = new_method |
|
|
|
pokemon['encounter_locations'] = 'N/A' |
|
|
|
print(f"Checked alternative sources for {pokemon['name']} (#{pokemon['number']}): {pokemon['earliest_game']} ({pokemon['obtain_method']})") |
|
|
|
return pokemon_list |
|
|
|
|
|
|
|
# Update the main function |
|
|
|
if __name__ == "__main__": |
|
|
|
get_cached_data() |
|
|
|
@ -379,7 +540,8 @@ if __name__ == "__main__": |
|
|
|
pokemon_list_with_games = determine_earliest_games(pokemon_list, cache) |
|
|
|
pokemon_list_adjusted = adjust_for_evolution(pokemon_list_with_games, cache) |
|
|
|
pokemon_list_with_locations = add_encounter_locations(pokemon_list_adjusted, cache) |
|
|
|
save_to_csv(pokemon_list_with_locations) |
|
|
|
pokemon_list_final = handle_unknown_encounters(pokemon_list_with_locations, cache) |
|
|
|
save_to_csv(pokemon_list_final) |
|
|
|
|
|
|
|
save_cached_data() # Save any remaining new entries |
|
|
|
conn.close() # Close the database connection |
|
|
|
|