You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
830 lines
31 KiB
830 lines
31 KiB
|
1 year ago
|
from __future__ import annotations
|
||
|
1 year ago
|
import csv
|
||
|
|
import requests
|
||
|
|
import time
|
||
|
|
import json
|
||
|
|
import os
|
||
|
|
import re
|
||
|
|
import sqlite3
|
||
|
1 year ago
|
from bs4 import BeautifulSoup, Tag, NavigableString
|
||
|
|
import copy
|
||
|
1 year ago
|
from typing import List, Optional
|
||
|
1 year ago
|
from fuzzywuzzy import fuzz
|
||
|
|
from fuzzywuzzy import process
|
||
|
|
from collections import defaultdict
|
||
|
1 year ago
|
|
||
|
1 year ago
|
from DataGatherers.cache_manager import CacheManager
|
||
|
1 year ago
|
|
||
|
|
# List of all main series Pokémon games in chronological order, with special games first in each generation
|
||
|
|
all_games = [
|
||
|
|
"Yellow", "Red", "Blue",
|
||
|
|
"Crystal", "Gold", "Silver",
|
||
|
|
"Emerald", "FireRed", "LeafGreen", "Ruby", "Sapphire",
|
||
|
|
"Platinum", "HeartGold", "SoulSilver", "Diamond", "Pearl",
|
||
|
1 year ago
|
"Black 2", "White 2", "Black", "White",
|
||
|
|
"X", "Y", "Omega Ruby", "Alpha Sapphire",
|
||
|
|
"Ultra Sun", "Ultra Moon", "Sun", "Moon",
|
||
|
|
"Sword", "Shield", "Expansion Pass",
|
||
|
|
"Brilliant Diamond", "Shining Pearl",
|
||
|
|
"Legends: Arceus",
|
||
|
1 year ago
|
"Scarlet", "Violet", "The Teal Mask", "The Hidden Treasure of Area Zero", "The Hidden Treasure of Area Zero (Scarlet)", "The Hidden Treasure of Area Zero (Violet)", "The Teal Mask (Scarlet)", "The Teal Mask (Violet)",
|
||
|
1 year ago
|
"Unknown",
|
||
|
|
"Pokémon Home",
|
||
|
|
"Pokémon Go",
|
||
|
1 year ago
|
]
|
||
|
|
|
||
|
1 year ago
|
big_pokemon_list = []
|
||
|
1 year ago
|
pokemon_index = {}
|
||
|
1 year ago
|
|
||
|
|
def create_pokemon_index(pokemon_list):
|
||
|
1 year ago
|
global pokemon_index
|
||
|
1 year ago
|
name_index = defaultdict(list)
|
||
|
|
for pokemon in pokemon_list:
|
||
|
|
name_index[pokemon.name.lower()].append(pokemon)
|
||
|
1 year ago
|
pokemon_index = name_index
|
||
|
1 year ago
|
|
||
|
|
def find_pokemon(name, form=None, threshold=80):
|
||
|
1 year ago
|
global pokemon_index
|
||
|
1 year ago
|
name = name.lower()
|
||
|
|
if name in pokemon_index:
|
||
|
|
candidates = pokemon_index[name]
|
||
|
|
if not form:
|
||
|
|
return candidates[0] if candidates else None
|
||
|
|
|
||
|
|
best_match = None
|
||
|
|
best_score = 0
|
||
|
|
for pokemon in candidates:
|
||
|
|
if pokemon.form:
|
||
|
|
score = fuzz.ratio(form.lower(), pokemon.form.lower())
|
||
|
|
if score > best_score:
|
||
|
|
best_score = score
|
||
|
|
best_match = pokemon
|
||
|
|
|
||
|
|
if best_match and best_score >= threshold:
|
||
|
|
return best_match
|
||
|
|
|
||
|
|
# If no exact name match, try fuzzy matching on names
|
||
|
|
best_name_match = None
|
||
|
|
best_name_score = 0
|
||
|
|
for pokemon_name in pokemon_index:
|
||
|
|
score = fuzz.ratio(name, pokemon_name)
|
||
|
|
if score > best_name_score:
|
||
|
|
best_name_score = score
|
||
|
|
best_name_match = pokemon_name
|
||
|
|
|
||
|
|
if best_name_match and best_name_score >= threshold:
|
||
|
|
candidates = pokemon_index[best_name_match]
|
||
|
|
if not form:
|
||
|
|
return candidates[0]
|
||
|
|
|
||
|
|
best_match = None
|
||
|
|
best_score = 0
|
||
|
|
for pokemon in candidates:
|
||
|
|
if pokemon.form:
|
||
|
|
score = fuzz.ratio(form.lower(), pokemon.form.lower())
|
||
|
|
if score > best_score:
|
||
|
|
best_score = score
|
||
|
|
best_match = pokemon
|
||
|
|
|
||
|
|
if best_match and best_score >= threshold:
|
||
|
|
return best_match
|
||
|
|
|
||
|
|
return None
|
||
|
|
|
||
|
1 year ago
|
def roman_to_int(s):
|
||
|
|
roman_values = {
|
||
|
|
'I': 1,
|
||
|
|
'V': 5,
|
||
|
|
'X': 10,
|
||
|
|
'L': 50,
|
||
|
|
'C': 100,
|
||
|
|
'D': 500,
|
||
|
|
'M': 1000
|
||
|
|
}
|
||
|
|
|
||
|
|
total = 0
|
||
|
|
prev_value = 0
|
||
|
|
|
||
|
|
for char in reversed(s):
|
||
|
|
current_value = roman_values[char]
|
||
|
|
if current_value >= prev_value:
|
||
|
|
total += current_value
|
||
|
|
else:
|
||
|
|
total -= current_value
|
||
|
|
prev_value = current_value
|
||
|
|
|
||
|
|
return total
|
||
|
|
|
||
|
1 year ago
|
class Pokemon:
|
||
|
|
def __init__(self, name: str, number: int, form: Optional[str] = None):
|
||
|
|
self.name = name
|
||
|
|
self.number = number
|
||
|
|
self.form = form
|
||
|
|
self.stage: Optional[str] = None
|
||
|
|
self.evolution_chain: Optional[List['EvolutionStage']] = []
|
||
|
|
self.is_baby = False
|
||
|
|
self.encounter_information: Optional[List['EncounterInformation']] = []
|
||
|
|
self.earliest_game: Optional['EncounterInformation'] = None
|
||
|
1 year ago
|
self.obtain_method: Optional[str] = None
|
||
|
1 year ago
|
self.introduced_in_gen = None
|
||
|
1 year ago
|
|
||
|
|
def get_earliest_game_and_method(self):
|
||
|
|
if self.evolution_chain:
|
||
|
|
for stage in self.evolution_chain:
|
||
|
|
if self.is_baby:
|
||
|
1 year ago
|
return stage.pokemon_reference.earliest_game.game, "Breed"
|
||
|
1 year ago
|
else:
|
||
|
1 year ago
|
if stage.pokemon_reference == self:
|
||
|
|
return self.earliest_game.game, self.earliest_game.method
|
||
|
|
return stage.pokemon_reference.earliest_game.game, "Evolve"
|
||
|
1 year ago
|
|
||
|
|
if self.earliest_game:
|
||
|
|
return self.earliest_game.game, self.earliest_game.method
|
||
|
|
return None, None
|
||
|
1 year ago
|
|
||
|
|
def __str__(self):
|
||
|
1 year ago
|
return f"{self.name}{' ' if self.form else ''}{self.form if self.form else ''} (#{self.number})"
|
||
|
1 year ago
|
|
||
|
|
def add_evolution_chain(self, evolution_chain: List['EvolutionStage']):
|
||
|
|
self.evolution_chain = evolution_chain
|
||
|
|
|
||
|
|
def add_stage(self, stage: str):
|
||
|
|
self.stage = stage
|
||
|
|
self.is_baby = self.stage is not None and 'Baby' in self.stage
|
||
|
|
|
||
|
1 year ago
|
def update_encounter_information(self, exclude_events=True, exclude_home=True, exclude_go=True):
|
||
|
1 year ago
|
if not self.encounter_information:
|
||
|
|
return
|
||
|
|
|
||
|
1 year ago
|
non_catchable_methods = ["trade", "global link", "poké transfer", "time capsule", "unobtainable"]
|
||
|
1 year ago
|
|
||
|
|
if exclude_events:
|
||
|
|
non_catchable_methods.append("event")
|
||
|
1 year ago
|
if exclude_home:
|
||
|
|
non_catchable_methods.append("pokemon home")
|
||
|
|
if exclude_go:
|
||
|
|
non_catchable_methods.append("pokémon go")
|
||
|
1 year ago
|
|
||
|
|
for encounter in self.encounter_information:
|
||
|
1 year ago
|
encounter.method = None
|
||
|
1 year ago
|
for location in encounter.locations:
|
||
|
1 year ago
|
skip_location = False
|
||
|
1 year ago
|
for non_catchable in non_catchable_methods:
|
||
|
|
if non_catchable in location.lower():
|
||
|
1 year ago
|
skip_location = True
|
||
|
1 year ago
|
break
|
||
|
|
|
||
|
1 year ago
|
if skip_location:
|
||
|
1 year ago
|
continue
|
||
|
|
|
||
|
|
if "first partner" in location.lower():
|
||
|
|
encounter.method = "Starter"
|
||
|
|
elif "received" in location.lower():
|
||
|
|
encounter.method = "Gift"
|
||
|
|
elif "evolve" in location.lower():
|
||
|
|
encounter.method = "Evolve"
|
||
|
1 year ago
|
elif "event" in location.lower():
|
||
|
|
encounter.method = "Event"
|
||
|
1 year ago
|
else:
|
||
|
|
encounter.method = "Catchable"
|
||
|
|
|
||
|
1 year ago
|
def parse_encoutners_for_games(self):
|
||
|
1 year ago
|
game_methods = {}
|
||
|
|
for encounter in self.encounter_information:
|
||
|
|
if encounter.method:
|
||
|
|
game_methods[encounter.game.lower()] = encounter
|
||
|
|
|
||
|
|
for game in all_games:
|
||
|
|
if game.lower() in game_methods:
|
||
|
|
self.earliest_game = game_methods[game.lower()]
|
||
|
|
return
|
||
|
|
|
||
|
1 year ago
|
def determine_earliest_game(self):
|
||
|
|
if not self.encounter_information:
|
||
|
|
self.earliest_game = None
|
||
|
|
return
|
||
|
|
|
||
|
|
self.update_encounter_information()
|
||
|
|
self.parse_encoutners_for_games()
|
||
|
|
if self.earliest_game != None:
|
||
|
|
return
|
||
|
|
|
||
|
1 year ago
|
|
||
|
1 year ago
|
self.update_encounter_information(exclude_events=False)
|
||
|
|
self.parse_encoutners_for_games()
|
||
|
|
if self.earliest_game != None:
|
||
|
|
return
|
||
|
|
|
||
|
|
self.update_encounter_information(exclude_home=False)
|
||
|
|
self.parse_encoutners_for_games()
|
||
|
|
if self.earliest_game != None:
|
||
|
|
return
|
||
|
|
|
||
|
|
self.update_encounter_information(exclude_go=False)
|
||
|
|
self.parse_encoutners_for_games()
|
||
|
|
if self.earliest_game != None:
|
||
|
|
return
|
||
|
1 year ago
|
|
||
|
|
self.earliest_game = None
|
||
|
|
|
||
|
1 year ago
|
class EvolutionStage:
|
||
|
|
def __init__(self, pokemon: str, method: Optional[str] = None, stage: Optional[str] = None, form: Optional[str] = None):
|
||
|
|
self.pokemon = pokemon
|
||
|
|
self.method = method
|
||
|
|
self.next_stage: Optional[EvolutionStage] = None
|
||
|
1 year ago
|
self.previous_stage: Optional[EvolutionStage] = None # New attribute
|
||
|
1 year ago
|
self.branches: List[EvolutionStage] = []
|
||
|
|
self.stage = stage
|
||
|
|
self.is_baby = self.stage is not None and 'Baby' in self.stage
|
||
|
1 year ago
|
self.pokemon_reference = find_pokemon(pokemon, form)
|
||
|
1 year ago
|
if self.pokemon_reference == None:
|
||
|
|
self.pokemon_reference = find_pokemon(pokemon, None)
|
||
|
1 year ago
|
self.form = form
|
||
|
|
|
||
|
|
def __str__(self):
|
||
|
|
return f"{self.pokemon} {self.form if self.form else ''} ({self.method if self.method else 'Base'})"
|
||
|
|
|
||
|
1 year ago
|
class EncounterInformation:
|
||
|
|
def __init__(self, game: str, locations: List[str]):
|
||
|
|
self.game = game
|
||
|
|
self.method = "Unknown"
|
||
|
|
self.locations = locations
|
||
|
|
|
||
|
1 year ago
|
def parse_evolution_chain(table: Tag, form: Optional[str] = None) -> List[EvolutionStage]:
|
||
|
|
main_chain = []
|
||
|
|
current_stage = None
|
||
|
|
pending_method = None
|
||
|
|
|
||
|
|
tbody = table.find('tbody', recursive=False)
|
||
|
|
if not tbody:
|
||
|
|
return []
|
||
|
|
|
||
|
|
rows = tbody.find_all('tr', recursive=False)
|
||
|
|
main_row = rows[0]
|
||
|
|
branch_rows = rows[1:]
|
||
|
|
|
||
|
|
# Parse main evolution chain
|
||
|
|
for td in main_row.find_all('td', recursive=False):
|
||
|
|
if td.find('table'):
|
||
|
|
# This TD contains Pokemon information
|
||
|
|
pokemon_name = extract_pokemon_name(td)
|
||
|
|
stage = extract_stage_form(td)
|
||
|
1 year ago
|
evolution_form = extract_evolution_form(td, pokemon_name)
|
||
|
|
new_stage = EvolutionStage(pokemon_name, pending_method, stage, evolution_form)
|
||
|
1 year ago
|
pending_method = None
|
||
|
|
if current_stage:
|
||
|
|
current_stage.next_stage = new_stage
|
||
|
1 year ago
|
new_stage.previous_stage = current_stage # Set the back link
|
||
|
1 year ago
|
current_stage = new_stage
|
||
|
|
main_chain.append(current_stage)
|
||
|
|
else:
|
||
|
|
# This TD contains evolution method for the next Pokemon
|
||
|
|
pending_method = extract_evolution_method(td)
|
||
|
|
|
||
|
|
# Parse branching evolutions
|
||
|
|
for row in branch_rows:
|
||
|
|
branch_stage = None
|
||
|
|
branch_method = None
|
||
|
|
for td in row.find_all('td', recursive=False):
|
||
|
|
if td.find('table'):
|
||
|
|
pokemon_name = extract_pokemon_name(td)
|
||
|
|
stage = extract_stage_form(td)
|
||
|
1 year ago
|
evolution_form = extract_evolution_form(td, pokemon_name)
|
||
|
|
new_stage = EvolutionStage(pokemon_name, branch_method, stage, evolution_form)
|
||
|
1 year ago
|
branch_method = None
|
||
|
|
if branch_stage:
|
||
|
|
branch_stage.next_stage = new_stage
|
||
|
1 year ago
|
new_stage.previous_stage = branch_stage # Set the back link
|
||
|
1 year ago
|
branch_stage = new_stage
|
||
|
|
# Find which main chain Pokemon this branches from
|
||
|
|
for main_stage in main_chain:
|
||
|
|
if td.get('rowspan') and main_stage.pokemon == pokemon_name:
|
||
|
|
main_stage.branches.append(branch_stage)
|
||
|
1 year ago
|
branch_stage.previous_stage = main_stage # Set the back link to the main chain
|
||
|
1 year ago
|
break
|
||
|
|
else:
|
||
|
|
branch_method = extract_evolution_method(td)
|
||
|
|
|
||
|
|
return main_chain
|
||
|
|
|
||
|
1 year ago
|
def find_name_tag(td: Tag) -> Optional[Tag]:
|
||
|
1 year ago
|
table = td.find('table')
|
||
|
|
name_tag = table.find('a', class_='selflink')
|
||
|
1 year ago
|
if name_tag:
|
||
|
1 year ago
|
return name_tag
|
||
|
1 year ago
|
name_tag = table.find('a', title=True, class_=lambda x: x != 'image')
|
||
|
1 year ago
|
return name_tag
|
||
|
|
|
||
|
|
def extract_pokemon_name(td: Tag) -> Optional[str]:
|
||
|
|
name_tag = find_name_tag(td)
|
||
|
|
if name_tag:
|
||
|
|
return name_tag.get_text(strip=True)
|
||
|
|
return None
|
||
|
1 year ago
|
|
||
|
|
def extract_evolution_method(td: Tag) -> str:
|
||
|
|
# Extract evolution method from the TD
|
||
|
|
return td.get_text(strip=True)
|
||
|
|
|
||
|
|
def extract_stage_form(td: Tag) -> Optional[str]:
|
||
|
|
stage_tag = td.find('table').find('small')
|
||
|
|
if stage_tag:
|
||
|
|
return stage_tag.get_text(strip=True)
|
||
|
|
return None
|
||
|
|
|
||
|
1 year ago
|
def extract_evolution_form(td: Tag, name: str) -> Optional[str]:
|
||
|
|
name_tag = find_name_tag(td)
|
||
|
|
if name_tag:
|
||
|
|
name_row = name_tag.parent
|
||
|
|
small_tags = name_row.find_all('small')
|
||
|
|
if len(small_tags) > 1:
|
||
|
|
return small_tags[0].get_text(strip=True)
|
||
|
|
return None
|
||
|
1 year ago
|
|
||
|
1 year ago
|
def read_pokemon_list(filename, limit=50):
|
||
|
|
pokemon_list = []
|
||
|
|
with open(filename, 'r', newline='', encoding='utf-8') as csvfile:
|
||
|
|
reader = csv.DictReader(csvfile)
|
||
|
|
for i, row in enumerate(reader):
|
||
|
|
if i >= limit:
|
||
|
|
break
|
||
|
|
# Split the name into base name and form
|
||
|
|
match = re.match(r'(.*?)\s*(\(.*\))?$', row['name'])
|
||
|
|
base_name, form = match.groups() if match else (row['name'], None)
|
||
|
|
row['base_name'] = base_name.strip()
|
||
|
|
row['form'] = form.strip('() ') if form else None
|
||
|
|
pokemon_list.append(row)
|
||
|
1 year ago
|
|
||
|
|
new_pokemon = Pokemon(row['base_name'], row['number'], row['form'])
|
||
|
|
big_pokemon_list.append(new_pokemon)
|
||
|
1 year ago
|
|
||
|
1 year ago
|
return big_pokemon_list
|
||
|
1 year ago
|
|
||
|
1 year ago
|
def get_pokemon_data_bulbapedia(pokemon_name, cache: CacheManager):
|
||
|
1 year ago
|
url = f"https://bulbapedia.bulbagarden.net/wiki/{pokemon_name}_(Pokémon)"
|
||
|
1 year ago
|
return cache.fetch_url(url)
|
||
|
1 year ago
|
|
||
|
|
def split_td_contents(td):
|
||
|
|
groups = []
|
||
|
|
current_group = []
|
||
|
|
|
||
|
|
for content in td.contents:
|
||
|
|
if isinstance(content, Tag) and content.name == 'br':
|
||
|
|
if current_group:
|
||
|
|
groups.append(BeautifulSoup('', 'html.parser').new_tag('div'))
|
||
|
|
for item in current_group:
|
||
|
|
groups[-1].append(copy.copy(item))
|
||
|
|
current_group = []
|
||
|
|
else:
|
||
|
|
current_group.append(content)
|
||
|
|
|
||
|
|
if current_group:
|
||
|
|
groups.append(BeautifulSoup('', 'html.parser').new_tag('div'))
|
||
|
|
for item in current_group:
|
||
|
|
groups[-1].append(copy.copy(item))
|
||
|
|
|
||
|
|
return groups
|
||
|
1 year ago
|
|
||
|
1 year ago
|
def parse_form_information(html_content):
|
||
|
|
soup = BeautifulSoup(html_content, 'html.parser')
|
||
|
|
form_info = soup.find('small')
|
||
|
|
|
||
|
|
if form_info:
|
||
|
|
form_text = form_info.get_text(strip=True)
|
||
|
|
# Remove parentheses
|
||
|
|
form_text = form_text.strip('()')
|
||
|
|
|
||
|
|
# Split the text into main form and breed (if present)
|
||
|
|
parts = form_text.split('(')
|
||
|
|
main_form = parts[0].strip()
|
||
|
|
breed = parts[1].strip(')') if len(parts) > 1 else None
|
||
|
|
|
||
|
|
return main_form, breed
|
||
|
|
|
||
|
|
return None, None
|
||
|
1 year ago
|
|
||
|
1 year ago
|
def get_evolution_data_from_bulbapedia(pokemon_name, form, cache: CacheManager, gender: Optional[str] = None):
|
||
|
1 year ago
|
page_data = get_pokemon_data_bulbapedia(pokemon_name, cache)
|
||
|
|
if not page_data:
|
||
|
|
return None
|
||
|
|
|
||
|
|
soup = BeautifulSoup(page_data, 'html.parser')
|
||
|
|
|
||
|
|
evolution_section = soup.find('span', id='Evolution_data')
|
||
|
|
if not evolution_section:
|
||
|
|
return None
|
||
|
|
|
||
|
|
evolution_table = None
|
||
|
|
if form:
|
||
|
1 year ago
|
form_without_form = form.replace('Form', '').replace('form', '').strip()
|
||
|
1 year ago
|
for tag in evolution_section.parent.find_next_siblings():
|
||
|
1 year ago
|
if tag.name == 'h4' and form_without_form in tag.get_text(strip=True):
|
||
|
1 year ago
|
evolution_table = tag.find_next('table')
|
||
|
|
break
|
||
|
|
if tag.name == 'h3':
|
||
|
|
break
|
||
|
|
else:
|
||
|
|
evolution_table = evolution_section.parent.find_next('table')
|
||
|
|
if not evolution_table:
|
||
|
|
return None
|
||
|
|
|
||
|
1 year ago
|
eeveelutions = ["eevee", "vaporeon", "jolteon", "flareon", "espeon", "umbreon", "leafeon", "glaceon", "sylveon"]
|
||
|
|
|
||
|
|
if pokemon_name == "Eevee":
|
||
|
|
evolution_chain = parse_eevee_evolution_chain(evolution_table)
|
||
|
|
else:
|
||
|
|
evolution_chain = parse_evolution_chain(evolution_table, form)
|
||
|
1 year ago
|
return evolution_chain
|
||
|
|
|
||
|
|
# This is going to be a little odd.
|
||
|
|
# the first TR contains a full evolution chain
|
||
|
|
# other TRs contain branching evolution chains
|
||
|
|
# any TDs in the first TR with a rowspan are part of the main evolution chain
|
||
|
|
# any other TDS are part of the branching evolution chains
|
||
|
|
# a table in a TD is information about the current Pokémon in that evolution stage
|
||
|
|
# a TD without a table is information on how to trigger the next evolution
|
||
|
|
|
||
|
1 year ago
|
def parse_pokemon_subtable(td):
|
||
|
|
if td.find('table'):
|
||
|
|
# This TD contains Pokemon information
|
||
|
|
pokemon_name = extract_pokemon_name(td)
|
||
|
|
stage = extract_stage_form(td)
|
||
|
|
return pokemon_name, stage
|
||
|
|
return None, None
|
||
|
|
|
||
|
|
def parse_eevee_evolution_chain(table):
|
||
|
|
tbody = table.find('tbody', recursive=False)
|
||
|
|
if not tbody:
|
||
|
|
return []
|
||
|
|
|
||
|
|
rows = tbody.find_all('tr', recursive=False)
|
||
|
|
eevee_row = rows[1]
|
||
|
|
method_row = rows[2]
|
||
|
|
eeveelutions_row = rows[3]
|
||
|
|
|
||
|
|
eevee_td = eevee_row.find('td', recursive=False)
|
||
|
|
pokemon_name, stage = parse_pokemon_subtable(eevee_td)
|
||
|
|
eevee_stage = EvolutionStage(pokemon_name, None, stage, None)
|
||
|
|
|
||
|
|
methods = []
|
||
|
|
for method in method_row.find_all('td', recursive=False):
|
||
|
|
methods.append(extract_evolution_method(method))
|
||
|
|
|
||
|
|
eeveelutions = []
|
||
|
|
index = 0
|
||
|
|
for eeveelution in eeveelutions_row.find_all('td', recursive=False):
|
||
|
|
pokemon_name, stage = parse_pokemon_subtable(eeveelution)
|
||
|
1 year ago
|
eeveelution_stage = EvolutionStage(pokemon_name, methods[index], stage, None)
|
||
|
|
eeveelution_stage.previous_stage = eevee_stage # Set the back link to Eevee
|
||
|
|
eeveelutions.append(eeveelution_stage)
|
||
|
1 year ago
|
index += 1
|
||
|
|
|
||
|
1 year ago
|
eevee_stage.branches = eeveelutions # Set the branches directly, not as a nested list
|
||
|
1 year ago
|
|
||
|
|
return [eevee_stage]
|
||
|
1 year ago
|
|
||
|
1 year ago
|
def get_intro_generation(pokemon_name, form, cache: CacheManager):
|
||
|
1 year ago
|
page_data = get_pokemon_data_bulbapedia(pokemon_name, cache)
|
||
|
|
if not page_data:
|
||
|
|
return None
|
||
|
|
|
||
|
|
soup = BeautifulSoup(page_data, 'html.parser')
|
||
|
|
|
||
|
|
locations_section = soup.find('span', id='Game_locations')
|
||
|
|
if not locations_section:
|
||
|
|
return None
|
||
|
1 year ago
|
|
||
|
1 year ago
|
locations_table = locations_section.find_next('table', class_='roundy')
|
||
|
|
if not locations_table:
|
||
|
|
return None
|
||
|
|
|
||
|
|
generation_tbody = locations_table.find('tbody', recursive=False)
|
||
|
|
generation_rows = generation_tbody.find_all('tr', recursive=False)
|
||
|
|
for generation_row in generation_rows:
|
||
|
|
random_nested_td = generation_row.find('td', recursive=False)
|
||
|
|
if not random_nested_td:
|
||
|
|
continue
|
||
|
|
random_nested_table = random_nested_td.find('table', recursive=False)
|
||
|
|
if not random_nested_table:
|
||
|
|
continue
|
||
|
|
random_nested_tbody = random_nested_table.find('tbody', recursive=False)
|
||
|
|
random_nested_rows = random_nested_tbody.find_all('tr', recursive=False)
|
||
|
|
|
||
|
|
for nested_row in random_nested_rows:
|
||
|
|
test_text = None
|
||
|
|
pattern = r"Generation\s+([IVXLCDM]+)"
|
||
|
|
match = re.search(pattern, nested_row.get_text(strip=True))
|
||
|
|
if match:
|
||
|
|
test_text = match.group(1) # This returns just the Roman numeral
|
||
|
|
|
||
|
|
if test_text:
|
||
|
|
return roman_to_int(test_text.replace("Generation ", "").strip())
|
||
|
|
|
||
|
|
return None
|
||
|
|
|
||
|
1 year ago
|
def get_locations_from_bulbapedia(pokemon_name, form, cache: CacheManager):
|
||
|
1 year ago
|
page_data = get_pokemon_data_bulbapedia(pokemon_name, cache)
|
||
|
|
if not page_data:
|
||
|
|
return None
|
||
|
|
|
||
|
|
soup = BeautifulSoup(page_data, 'html.parser')
|
||
|
|
|
||
|
|
locations_section = soup.find('span', id='Game_locations')
|
||
|
|
if not locations_section:
|
||
|
|
return None
|
||
|
|
|
||
|
|
locations_table = locations_section.find_next('table', class_='roundy')
|
||
|
|
if not locations_table:
|
||
|
|
return None
|
||
|
|
|
||
|
|
raw_game_locations = {}
|
||
|
|
|
||
|
|
# Ok so the table is a bit of a mess. It has some nested tables and stuff.
|
||
|
|
# In each row is a nested table with all the games in a generation.
|
||
|
|
# Next is another nexted table, but i can't tell what for.
|
||
|
|
# within that nested table, is another nested table with the games, either the release pair or a single game spanning two columns.
|
||
|
|
# Next to that is another nested table with the locations.
|
||
|
|
|
||
|
|
generation_tbody = locations_table.find('tbody', recursive=False)
|
||
|
|
generation_rows = generation_tbody.find_all('tr', recursive=False)
|
||
|
|
for generation_row in generation_rows:
|
||
|
|
random_nested_td = generation_row.find('td', recursive=False)
|
||
|
|
if not random_nested_td:
|
||
|
|
continue
|
||
|
|
random_nested_table = random_nested_td.find('table', recursive=False)
|
||
|
|
if not random_nested_table:
|
||
|
|
continue
|
||
|
|
random_nested_tbody = random_nested_table.find('tbody', recursive=False)
|
||
|
|
random_nested_rows = random_nested_tbody.find_all('tr', recursive=False)
|
||
|
1 year ago
|
intro_gen = None
|
||
|
|
|
||
|
1 year ago
|
for nested_row in random_nested_rows:
|
||
|
|
if 'Generation' in nested_row.get_text(strip=True):
|
||
|
|
continue
|
||
|
|
|
||
|
|
games_container_td = nested_row.find('td', recursive=False)
|
||
|
|
if not games_container_td:
|
||
|
|
continue
|
||
|
|
games_container_table = games_container_td.find('table', recursive=False)
|
||
|
|
if not games_container_table:
|
||
|
|
continue
|
||
|
|
games_container_tbody = games_container_table.find('tbody', recursive=False)
|
||
|
|
games_container_rows = games_container_tbody.find_all('tr', recursive=False)
|
||
|
|
for games_container_row in games_container_rows:
|
||
|
|
games = games_container_row.find_all('th')
|
||
|
|
for game in games:
|
||
|
|
raw_game = game.get_text(strip=True)
|
||
|
|
if raw_game not in all_games:
|
||
|
|
continue
|
||
|
|
locations_container_td = games_container_row.find('td', recursive=False)
|
||
|
|
if not locations_container_td:
|
||
|
|
continue
|
||
|
|
locations_container_table = locations_container_td.find('table', recursive=False)
|
||
|
|
if not locations_container_table:
|
||
|
|
continue
|
||
|
|
locations_container_tbody = locations_container_table.find('tbody', recursive=False)
|
||
|
|
locations = locations_container_tbody.find_all('td')
|
||
|
|
for location in locations:
|
||
|
|
groups = split_td_contents(location)
|
||
|
|
for group in groups:
|
||
|
|
if raw_game not in raw_game_locations:
|
||
|
|
raw_game_locations[raw_game] = []
|
||
|
|
raw_game_locations[raw_game].append(group)
|
||
|
|
|
||
|
|
events_section = soup.find('span', id='In_events')
|
||
|
|
event_tables = {}
|
||
|
|
if events_section:
|
||
|
|
event_header = events_section.parent
|
||
|
|
|
||
|
|
variant = ""
|
||
|
|
for sibling in event_header.find_next_siblings():
|
||
|
1 year ago
|
if sibling.name == 'h4' or "held" in sibling.getText(strip=True).lower():
|
||
|
1 year ago
|
break
|
||
|
|
if sibling.name == 'h5':
|
||
|
|
variant = sibling.get_text(strip=True)
|
||
|
|
if sibling.name == 'table':
|
||
|
|
event_tables[variant] = sibling
|
||
|
|
|
||
|
|
game_locations = {}
|
||
|
|
for raw_game, raw_locations in raw_game_locations.items():
|
||
|
|
if form is None:
|
||
|
|
for raw_location in raw_locations:
|
||
|
1 year ago
|
raw_text = raw_location.get_text()
|
||
|
|
raw_text = raw_text.replace("and", ",")
|
||
|
|
locations = raw_text.split(',')
|
||
|
1 year ago
|
for location in locations:
|
||
|
|
if raw_game not in game_locations:
|
||
|
|
game_locations[raw_game] = []
|
||
|
|
game_locations[raw_game].append(location.strip())
|
||
|
|
else:
|
||
|
|
for raw_location in raw_locations:
|
||
|
|
main_form, sub_form = parse_form_information(str(raw_location))
|
||
|
1 year ago
|
if not main_form:
|
||
|
|
continue
|
||
|
|
|
||
|
1 year ago
|
if main_form == "All Forms":
|
||
|
|
main_form = form
|
||
|
|
|
||
|
1 year ago
|
main_form_match = fuzz.partial_ratio(form.lower(), main_form.lower()) >= 80
|
||
|
|
sub_form_match = False if not sub_form else fuzz.partial_ratio(form.lower(), sub_form.lower()) >= 80
|
||
|
|
|
||
|
|
if main_form_match or sub_form_match:
|
||
|
1 year ago
|
locations = raw_location.get_text().replace('and', ',').replace('#', '').split(',')
|
||
|
1 year ago
|
for location in locations:
|
||
|
|
if raw_game not in game_locations:
|
||
|
|
game_locations[raw_game] = []
|
||
|
|
game_locations[raw_game].append(location.strip())
|
||
|
|
|
||
|
|
# For Later
|
||
|
|
for variant in event_tables:
|
||
|
1 year ago
|
if (variant == pokemon_name and form is None)or (form and form in variant):
|
||
|
1 year ago
|
games_container_rows = event_tables[variant].find_all('tr')
|
||
|
|
for game_row in games_container_rows:
|
||
|
|
entries = game_row.find_all('td')
|
||
|
|
if len(entries) > 1:
|
||
|
|
games_string = entries[0].find('a').get('title')
|
||
|
|
for game in all_games:
|
||
|
|
if game in games_string:
|
||
|
1 year ago
|
if game not in game_locations:
|
||
|
|
game_locations[game] = []
|
||
|
|
game_locations[game].append("Event")
|
||
|
1 year ago
|
|
||
|
|
return game_locations
|
||
|
1 year ago
|
|
||
|
1 year ago
|
def handle_unown(pokemon, encounter_data):
|
||
|
|
if not pokemon.name == "Unown":
|
||
|
|
return
|
||
|
|
|
||
|
|
one_form_unown = find_pokemon(pokemon.name, None)
|
||
|
|
if not one_form_unown:
|
||
|
|
return
|
||
|
|
|
||
|
|
# The ! and ? forms were added in HeartGold and SoulSilver.
|
||
|
|
if (pokemon.form == "!" or pokemon.form == "?") and encounter_data:
|
||
|
|
for encounter in encounter_data:
|
||
|
|
encounter_information = EncounterInformation(encounter, encounter_data[encounter])
|
||
|
|
pokemon.encounter_information.append(encounter_information)
|
||
|
|
found_heartgold = False
|
||
|
|
found_soulsilver = False
|
||
|
|
for game in all_games:
|
||
|
|
if game == "HeartGold":
|
||
|
|
found_heartgold = True
|
||
|
|
continue
|
||
|
|
elif game == "SoulSilver":
|
||
|
|
found_soulsilver = True
|
||
|
|
continue
|
||
|
|
if not found_heartgold or not found_soulsilver:
|
||
|
|
continue
|
||
|
|
for encounter in one_form_unown.encounter_information:
|
||
|
|
if game == encounter.game:
|
||
|
|
pokemon.encounter_information.append(encounter)
|
||
|
|
break
|
||
|
|
else:
|
||
|
|
pokemon.encounter_information = one_form_unown.encounter_information
|
||
|
|
|
||
|
1 year ago
|
list_of_shifting_form_pokemon = [
|
||
|
|
"Deoxys",
|
||
|
|
"Burmy",
|
||
|
|
"Wormadam",
|
||
|
|
"Rotom",
|
||
|
|
"Shaymin",
|
||
|
|
"Keldeo",
|
||
|
|
"Furfrou",
|
||
|
|
"Hoopa",
|
||
|
|
"Pumpkaboo",
|
||
|
|
"Gourgeist",
|
||
|
|
"Zygarde",
|
||
|
|
"Magearna",
|
||
|
|
"Vivillon",
|
||
|
1 year ago
|
"Minior",
|
||
|
|
"Urshifu",
|
||
|
|
"Oinkologne",
|
||
|
|
"Basculegion",
|
||
|
|
"Enamorus",
|
||
|
1 year ago
|
]
|
||
|
|
|
||
|
|
def handle_form_shift(pokemon, encounter_data):
|
||
|
|
if not pokemon.name in list_of_shifting_form_pokemon:
|
||
|
|
return
|
||
|
|
|
||
|
|
if pokemon.form is None:
|
||
|
|
return
|
||
|
|
|
||
|
|
normal_form_pokemon = find_pokemon(pokemon.name, None)
|
||
|
|
if not normal_form_pokemon:
|
||
|
|
return
|
||
|
|
|
||
|
|
pokemon.encounter_information = normal_form_pokemon.encounter_information
|
||
|
|
|
||
|
|
phony_authentic = ["Sinistea", "Polteageist"]
|
||
|
1 year ago
|
countefieit_atrisan = ["Poltchageist"]
|
||
|
|
unremarkable_masterpiece = ["Sinistcha"]
|
||
|
|
bad_tea_pokemon = phony_authentic + countefieit_atrisan + unremarkable_masterpiece
|
||
|
1 year ago
|
|
||
|
|
def get_bad_tea_form(pokemon):
|
||
|
|
if not pokemon.name in bad_tea_pokemon:
|
||
|
|
return
|
||
|
|
|
||
|
|
if pokemon.name in phony_authentic:
|
||
|
|
if pokemon.form == None:
|
||
|
|
return "Phony Form"
|
||
|
|
if pokemon.form == "Authentic Form":
|
||
|
|
return None
|
||
|
|
|
||
|
|
if pokemon.name in countefieit_atrisan:
|
||
|
|
if pokemon.form == None:
|
||
|
|
return "Counterfeit Form"
|
||
|
|
if pokemon.form == "Artisan Form":
|
||
|
|
return None
|
||
|
1 year ago
|
|
||
|
|
if pokemon.name in unremarkable_masterpiece:
|
||
|
|
if pokemon.form == None:
|
||
|
|
return "Unremarkable Form"
|
||
|
|
else:
|
||
|
|
return pokemon.form
|
||
|
1 year ago
|
|
||
|
1 year ago
|
def determine_earliest_games(cache: CacheManager):
|
||
|
1 year ago
|
for pokemon in big_pokemon_list:
|
||
|
|
print(f"Processing {pokemon}")
|
||
|
1 year ago
|
form_to_find = pokemon.form
|
||
|
|
if pokemon.name == "Minior" and pokemon.form == "Orange Core":
|
||
|
|
form_to_find = None
|
||
|
1 year ago
|
if pokemon.name == "Squawkabilly" and pokemon.form:
|
||
|
|
form_to_find = pokemon.form.replace("Plumage", "").strip()
|
||
|
1 year ago
|
if pokemon.name == "Alcremie":
|
||
|
|
form_to_find = None
|
||
|
|
if pokemon.name in bad_tea_pokemon:
|
||
|
|
form_to_find = get_bad_tea_form(pokemon)
|
||
|
1 year ago
|
pokemon.introduced_in_gen = get_intro_generation(pokemon.name, form_to_find, cache)
|
||
|
1 year ago
|
encounter_data = get_locations_from_bulbapedia(pokemon.name, form_to_find, cache)
|
||
|
1 year ago
|
for encounter in encounter_data:
|
||
|
|
encounter_information = EncounterInformation(encounter, encounter_data[encounter])
|
||
|
|
pokemon.encounter_information.append(encounter_information)
|
||
|
1 year ago
|
handle_unown(pokemon, encounter_data)
|
||
|
1 year ago
|
handle_form_shift(pokemon, encounter_data)
|
||
|
1 year ago
|
if pokemon.name == "Gimmighoul" and pokemon.form == "Roaming Form":
|
||
|
1 year ago
|
encounter_information = EncounterInformation("Pokémon Go", ["Pokémon Go"])
|
||
|
|
pokemon.encounter_information.append(encounter_information)
|
||
|
|
|
||
|
1 year ago
|
pokemon.determine_earliest_game()
|
||
|
|
print(f"Processed {pokemon}: {pokemon.earliest_game.game} ({pokemon.earliest_game.method})")
|
||
|
|
|
||
|
1 year ago
|
def get_base_form(evolution_chain:List[EvolutionStage]):
|
||
|
|
if not evolution_chain:
|
||
|
1 year ago
|
return None
|
||
|
|
|
||
|
1 year ago
|
for stage in evolution_chain:
|
||
|
|
if stage.stage == "Unevolved":
|
||
|
|
return stage.pokemon
|
||
|
|
if stage.is_baby:
|
||
|
|
return stage.next_stage.pokemon
|
||
|
|
|
||
|
|
return None
|
||
|
|
|
||
|
1 year ago
|
def adjust_for_evolution(cache: CacheManager):
|
||
|
1 year ago
|
for pokemon in big_pokemon_list:
|
||
|
|
evolution_chain = get_evolution_data_from_bulbapedia(pokemon.name, pokemon.form, cache)
|
||
|
1 year ago
|
pokemon.add_evolution_chain(evolution_chain)
|
||
|
1 year ago
|
game, method = pokemon.get_earliest_game_and_method()
|
||
|
1 year ago
|
print(f"Adjusted {pokemon}: {game} ({method})")
|
||
|
1 year ago
|
|
||
|
1 year ago
|
def save_to_csv(filename='pokemon_earliest_games.csv'):
|
||
|
1 year ago
|
with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
|
||
|
1 year ago
|
fieldnames = ['number', 'name', 'introduced_in_gen', 'earliest_game', 'obtain_method', 'encounter_locations']
|
||
|
1 year ago
|
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
||
|
|
|
||
|
|
writer.writeheader()
|
||
|
1 year ago
|
for pokemon in big_pokemon_list:
|
||
|
|
encounter_locations = []
|
||
|
|
for encounter in pokemon.encounter_information:
|
||
|
1 year ago
|
if encounter.game == pokemon.earliest_game.game:
|
||
|
|
encounter_locations.extend(encounter.locations)
|
||
|
1 year ago
|
writer.writerow({
|
||
|
1 year ago
|
'number': pokemon.number,
|
||
|
1 year ago
|
'name': f"{pokemon.name} ({pokemon.form})",
|
||
|
1 year ago
|
'introduced_in_gen': pokemon.introduced_in_gen,
|
||
|
1 year ago
|
'earliest_game': pokemon.earliest_game.game,
|
||
|
|
'obtain_method': pokemon.earliest_game.method,
|
||
|
|
'encounter_locations': ' | '.join((str(item) for item in encounter_locations))
|
||
|
1 year ago
|
})
|
||
|
|
|
||
|
1 year ago
|
def handle_unknown_encounters(cache):
|
||
|
1 year ago
|
for pokemon in big_pokemon_list:
|
||
|
1 year ago
|
if pokemon.earliest_game == None or pokemon.earliest_game.method == None:
|
||
|
1 year ago
|
print(f"Checking alternative sources for {pokemon.name}")
|
||
|
|
|
||
|
1 year ago
|
# Update the main function
|
||
|
|
if __name__ == "__main__":
|
||
|
1 year ago
|
cache = CacheManager()
|
||
|
1 year ago
|
|
||
|
1 year ago
|
pokemon_list = read_pokemon_list('pokemon_home_list.csv', limit=3000)
|
||
|
|
|
||
|
1 year ago
|
create_pokemon_index(big_pokemon_list)
|
||
|
1 year ago
|
|
||
|
1 year ago
|
determine_earliest_games(cache)
|
||
|
|
adjust_for_evolution(cache)
|
||
|
|
handle_unknown_encounters(cache)
|
||
|
|
save_to_csv()
|
||
|
1 year ago
|
|
||
|
1 year ago
|
cache.close()
|
||
|
1 year ago
|
print(f"Earliest obtainable games and encounter locations determined for {len(pokemon_list)} Pokémon and saved to pokemon_earliest_games.csv")
|