You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
873 lines
32 KiB
873 lines
32 KiB
from __future__ import annotations
|
|
import csv
|
|
import requests
|
|
import time
|
|
import json
|
|
import os
|
|
import re
|
|
import sqlite3
|
|
from bs4 import BeautifulSoup, Tag, NavigableString
|
|
import copy
|
|
from typing import List, Optional
|
|
from fuzzywuzzy import fuzz
|
|
from fuzzywuzzy import process
|
|
from collections import defaultdict
|
|
|
|
from cache_manager import CacheManager
|
|
|
|
# List of all main series Pokémon games in chronological order, with special games first in each generation
|
|
all_games = [
|
|
"Yellow", "Red", "Blue",
|
|
"Crystal", "Gold", "Silver",
|
|
"Emerald", "FireRed", "LeafGreen", "Ruby", "Sapphire",
|
|
"Platinum", "HeartGold", "SoulSilver", "Diamond", "Pearl",
|
|
"Black 2", "White 2", "Black", "White",
|
|
"X", "Y", "Omega Ruby", "Alpha Sapphire",
|
|
"Ultra Sun", "Ultra Moon", "Sun", "Moon",
|
|
"Sword", "Shield", "Expansion Pass",
|
|
"Brilliant Diamond", "Shining Pearl",
|
|
"Legends: Arceus",
|
|
"Scarlet", "Violet", "The Teal Mask", "The Hidden Treasure of Area Zero", "The Hidden Treasure of Area Zero (Scarlet)", "The Hidden Treasure of Area Zero (Violet)", "The Teal Mask (Scarlet)", "The Teal Mask (Violet)",
|
|
"Unknown",
|
|
"Pokémon Home",
|
|
"Pokémon Go",
|
|
]
|
|
|
|
big_pokemon_list = []
|
|
pokemon_index = {}
|
|
|
|
def create_pokemon_index(pokemon_list):
|
|
global pokemon_index
|
|
name_index = defaultdict(list)
|
|
for pokemon in pokemon_list:
|
|
name_index[pokemon.name.lower()].append(pokemon)
|
|
pokemon_index = name_index
|
|
|
|
def find_pokemon(name, form=None, threshold=80):
|
|
global pokemon_index
|
|
name = name.lower()
|
|
if name in pokemon_index:
|
|
candidates = pokemon_index[name]
|
|
if not form:
|
|
return candidates[0] if candidates else None
|
|
|
|
best_match = None
|
|
best_score = 0
|
|
for pokemon in candidates:
|
|
if pokemon.form:
|
|
score = fuzz.ratio(form.lower(), pokemon.form.lower())
|
|
if score > best_score:
|
|
best_score = score
|
|
best_match = pokemon
|
|
|
|
if best_match and best_score >= threshold:
|
|
return best_match
|
|
|
|
# If no exact name match, try fuzzy matching on names
|
|
best_name_match = None
|
|
best_name_score = 0
|
|
for pokemon_name in pokemon_index:
|
|
score = fuzz.ratio(name, pokemon_name)
|
|
if score > best_name_score:
|
|
best_name_score = score
|
|
best_name_match = pokemon_name
|
|
|
|
if best_name_match and best_name_score >= threshold:
|
|
candidates = pokemon_index[best_name_match]
|
|
if not form:
|
|
return candidates[0]
|
|
|
|
best_match = None
|
|
best_score = 0
|
|
for pokemon in candidates:
|
|
if pokemon.form:
|
|
score = fuzz.ratio(form.lower(), pokemon.form.lower())
|
|
if score > best_score:
|
|
best_score = score
|
|
best_match = pokemon
|
|
|
|
if best_match and best_score >= threshold:
|
|
return best_match
|
|
|
|
return None
|
|
|
|
def roman_to_int(s):
|
|
roman_values = {
|
|
'I': 1,
|
|
'V': 5,
|
|
'X': 10,
|
|
'L': 50,
|
|
'C': 100,
|
|
'D': 500,
|
|
'M': 1000
|
|
}
|
|
|
|
total = 0
|
|
prev_value = 0
|
|
|
|
for char in reversed(s):
|
|
current_value = roman_values[char]
|
|
if current_value >= prev_value:
|
|
total += current_value
|
|
else:
|
|
total -= current_value
|
|
prev_value = current_value
|
|
|
|
return total
|
|
|
|
class Pokemon:
|
|
def __init__(self, name: str, number: int, form: Optional[str] = None):
|
|
self.name = name
|
|
self.number = number
|
|
self.form = form
|
|
self.stage: Optional[str] = None
|
|
self.evolution_chain: Optional[List['EvolutionStage']] = []
|
|
self.is_baby = False
|
|
self.encounter_information: Optional[List['EncounterInformation']] = []
|
|
self.earliest_game: Optional['EncounterInformation'] = None
|
|
self.obtain_method: Optional[str] = None
|
|
self.introduced_in_gen = None
|
|
|
|
def get_earliest_game_and_method(self):
|
|
if self.evolution_chain:
|
|
for stage in self.evolution_chain:
|
|
if self.is_baby:
|
|
return stage.pokemon_reference.earliest_game.game, "Breed"
|
|
else:
|
|
if stage.pokemon_reference == self:
|
|
return self.earliest_game.game, self.earliest_game.method
|
|
return stage.pokemon_reference.earliest_game.game, "Evolve"
|
|
|
|
if self.earliest_game:
|
|
return self.earliest_game.game, self.earliest_game.method
|
|
return None, None
|
|
|
|
def __str__(self):
|
|
return f"{self.name}{' ' if self.form else ''}{self.form if self.form else ''} (#{self.number})"
|
|
|
|
def add_evolution_chain(self, evolution_chain: List['EvolutionStage']):
|
|
self.evolution_chain = evolution_chain
|
|
|
|
def add_stage(self, stage: str):
|
|
self.stage = stage
|
|
self.is_baby = self.stage is not None and 'Baby' in self.stage
|
|
|
|
def update_encounter_information(self, exclude_events=True, exclude_home=True, exclude_go=True):
|
|
if not self.encounter_information:
|
|
return
|
|
|
|
non_catchable_methods = ["trade", "global link", "poké transfer", "time capsule", "unobtainable"]
|
|
|
|
if exclude_events:
|
|
non_catchable_methods.append("event")
|
|
if exclude_home:
|
|
non_catchable_methods.append("pokemon home")
|
|
if exclude_go:
|
|
non_catchable_methods.append("pokémon go")
|
|
|
|
for encounter in self.encounter_information:
|
|
encounter.method = None
|
|
for location in encounter.locations:
|
|
skip_location = False
|
|
for non_catchable in non_catchable_methods:
|
|
if non_catchable in location.lower():
|
|
skip_location = True
|
|
break
|
|
|
|
if skip_location:
|
|
continue
|
|
|
|
if "first partner" in location.lower():
|
|
encounter.method = "Starter"
|
|
elif "received" in location.lower():
|
|
encounter.method = "Gift"
|
|
elif "evolve" in location.lower():
|
|
encounter.method = "Evolve"
|
|
elif "event" in location.lower():
|
|
encounter.method = "Event"
|
|
else:
|
|
encounter.method = "Catchable"
|
|
|
|
def parse_encoutners_for_games(self):
|
|
game_methods = {}
|
|
for encounter in self.encounter_information:
|
|
if encounter.method:
|
|
game_methods[encounter.game.lower()] = encounter
|
|
|
|
for game in all_games:
|
|
if game.lower() in game_methods:
|
|
self.earliest_game = game_methods[game.lower()]
|
|
return
|
|
|
|
def determine_earliest_game(self):
|
|
if not self.encounter_information:
|
|
self.earliest_game = None
|
|
return
|
|
|
|
self.update_encounter_information()
|
|
self.parse_encoutners_for_games()
|
|
if self.earliest_game != None:
|
|
return
|
|
|
|
|
|
self.update_encounter_information(exclude_events=False)
|
|
self.parse_encoutners_for_games()
|
|
if self.earliest_game != None:
|
|
return
|
|
|
|
self.update_encounter_information(exclude_home=False)
|
|
self.parse_encoutners_for_games()
|
|
if self.earliest_game != None:
|
|
return
|
|
|
|
self.update_encounter_information(exclude_go=False)
|
|
self.parse_encoutners_for_games()
|
|
if self.earliest_game != None:
|
|
return
|
|
|
|
self.earliest_game = None
|
|
|
|
class EvolutionStage:
|
|
def __init__(self, pokemon: str, method: Optional[str] = None, stage: Optional[str] = None, form: Optional[str] = None):
|
|
self.pokemon = pokemon
|
|
self.method = method
|
|
self.next_stage: Optional[EvolutionStage] = None
|
|
self.previous_stage: Optional[EvolutionStage] = None # New attribute
|
|
self.branches: List[EvolutionStage] = []
|
|
self.stage = stage
|
|
self.is_baby = self.stage is not None and 'Baby' in self.stage
|
|
self.pokemon_reference = find_pokemon(pokemon, form)
|
|
if self.pokemon_reference == None:
|
|
self.pokemon_reference = find_pokemon(pokemon, None)
|
|
self.form = form
|
|
|
|
def __str__(self):
|
|
return f"{self.pokemon} {self.form if self.form else ''} ({self.method if self.method else 'Base'})"
|
|
|
|
class EncounterInformation:
|
|
def __init__(self, game: str, locations: List[str]):
|
|
self.game = game
|
|
self.method = "Unknown"
|
|
self.locations = locations
|
|
|
|
def parse_evolution_chain(table: Tag, form: Optional[str] = None) -> List[EvolutionStage]:
|
|
main_chain = []
|
|
current_stage = None
|
|
pending_method = None
|
|
|
|
tbody = table.find('tbody', recursive=False)
|
|
if not tbody:
|
|
return []
|
|
|
|
rows = tbody.find_all('tr', recursive=False)
|
|
main_row = rows[0]
|
|
branch_rows = rows[1:]
|
|
|
|
# Parse main evolution chain
|
|
for td in main_row.find_all('td', recursive=False):
|
|
if td.find('table'):
|
|
# This TD contains Pokemon information
|
|
pokemon_name = extract_pokemon_name(td)
|
|
stage = extract_stage_form(td)
|
|
evolution_form = extract_evolution_form(td, pokemon_name)
|
|
new_stage = EvolutionStage(pokemon_name, pending_method, stage, evolution_form)
|
|
pending_method = None
|
|
if current_stage:
|
|
current_stage.next_stage = new_stage
|
|
new_stage.previous_stage = current_stage # Set the back link
|
|
current_stage = new_stage
|
|
main_chain.append(current_stage)
|
|
else:
|
|
# This TD contains evolution method for the next Pokemon
|
|
pending_method = extract_evolution_method(td)
|
|
|
|
# Parse branching evolutions
|
|
for row in branch_rows:
|
|
branch_stage = None
|
|
branch_method = None
|
|
for td in row.find_all('td', recursive=False):
|
|
if td.find('table'):
|
|
pokemon_name = extract_pokemon_name(td)
|
|
stage = extract_stage_form(td)
|
|
evolution_form = extract_evolution_form(td, pokemon_name)
|
|
new_stage = EvolutionStage(pokemon_name, branch_method, stage, evolution_form)
|
|
branch_method = None
|
|
if branch_stage:
|
|
branch_stage.next_stage = new_stage
|
|
new_stage.previous_stage = branch_stage # Set the back link
|
|
branch_stage = new_stage
|
|
# Find which main chain Pokemon this branches from
|
|
for main_stage in main_chain:
|
|
if td.get('rowspan') and main_stage.pokemon == pokemon_name:
|
|
main_stage.branches.append(branch_stage)
|
|
branch_stage.previous_stage = main_stage # Set the back link to the main chain
|
|
break
|
|
else:
|
|
branch_method = extract_evolution_method(td)
|
|
|
|
return main_chain
|
|
|
|
def find_name_tag(td: Tag) -> Optional[Tag]:
|
|
table = td.find('table')
|
|
name_tag = table.find('a', class_='selflink')
|
|
if name_tag:
|
|
return name_tag
|
|
name_tag = table.find('a', title=True, class_=lambda x: x != 'image')
|
|
return name_tag
|
|
|
|
def extract_pokemon_name(td: Tag) -> Optional[str]:
|
|
name_tag = find_name_tag(td)
|
|
if name_tag:
|
|
return name_tag.get_text(strip=True)
|
|
return None
|
|
|
|
def extract_evolution_method(td: Tag) -> str:
|
|
# Extract evolution method from the TD
|
|
return td.get_text(strip=True)
|
|
|
|
def extract_stage_form(td: Tag) -> Optional[str]:
|
|
stage_tag = td.find('table').find('small')
|
|
if stage_tag:
|
|
return stage_tag.get_text(strip=True)
|
|
return None
|
|
|
|
def extract_evolution_form(td: Tag, name: str) -> Optional[str]:
|
|
name_tag = find_name_tag(td)
|
|
if name_tag:
|
|
name_row = name_tag.parent
|
|
small_tags = name_row.find_all('small')
|
|
if len(small_tags) > 1:
|
|
return small_tags[0].get_text(strip=True)
|
|
return None
|
|
|
|
def read_pokemon_list(filename, limit=50):
|
|
pokemon_list = []
|
|
with open(filename, 'r', newline='', encoding='utf-8') as csvfile:
|
|
reader = csv.DictReader(csvfile)
|
|
for i, row in enumerate(reader):
|
|
if i >= limit:
|
|
break
|
|
# Split the name into base name and form
|
|
match = re.match(r'(.*?)\s*(\(.*\))?$', row['name'])
|
|
base_name, form = match.groups() if match else (row['name'], None)
|
|
row['base_name'] = base_name.strip()
|
|
row['form'] = form.strip('() ') if form else None
|
|
pokemon_list.append(row)
|
|
|
|
new_pokemon = Pokemon(row['base_name'], row['number'], row['form'])
|
|
big_pokemon_list.append(new_pokemon)
|
|
|
|
return big_pokemon_list
|
|
|
|
def get_pokemon_data_bulbapedia(pokemon_name, cache: CacheManager):
|
|
url = f"https://bulbapedia.bulbagarden.net/wiki/{pokemon_name}_(Pokémon)"
|
|
return cache.fetch_url(url)
|
|
|
|
def split_td_contents(td):
|
|
groups = []
|
|
current_group = []
|
|
|
|
for content in td.contents:
|
|
if isinstance(content, Tag) and content.name == 'br':
|
|
if current_group:
|
|
groups.append(BeautifulSoup('', 'html.parser').new_tag('div'))
|
|
for item in current_group:
|
|
groups[-1].append(copy.copy(item))
|
|
current_group = []
|
|
else:
|
|
current_group.append(content)
|
|
|
|
if current_group:
|
|
groups.append(BeautifulSoup('', 'html.parser').new_tag('div'))
|
|
for item in current_group:
|
|
groups[-1].append(copy.copy(item))
|
|
|
|
return groups
|
|
|
|
def parse_form_information(html_content):
|
|
soup = BeautifulSoup(html_content, 'html.parser')
|
|
small_tag = soup.find('small')
|
|
|
|
# Form info is in bold inside a small tag.
|
|
if small_tag:
|
|
bold_tag = small_tag.find('b')
|
|
if bold_tag:
|
|
form_text = bold_tag.get_text(strip=True)
|
|
# Remove parentheses
|
|
form_text = form_text.strip('()')
|
|
|
|
# Split the text into main form and breed (if present)
|
|
parts = form_text.split('(')
|
|
main_form = parts[0].strip()
|
|
|
|
# "Factor"s are not actual forms, they are properties of the pokemon you can encoutner.
|
|
if main_form and "factor" in main_form.lower():
|
|
return None, None
|
|
|
|
breed = parts[1].strip(')') if len(parts) > 1 else None
|
|
|
|
return main_form, breed
|
|
|
|
return None, None
|
|
|
|
def get_evolution_data_from_bulbapedia(pokemon_name, form, cache: CacheManager, gender: Optional[str] = None):
|
|
page_data = get_pokemon_data_bulbapedia(pokemon_name, cache)
|
|
if not page_data:
|
|
return None
|
|
|
|
soup = BeautifulSoup(page_data, 'html.parser')
|
|
|
|
evolution_section = soup.find('span', id='Evolution_data')
|
|
if not evolution_section:
|
|
return None
|
|
|
|
evolution_table = None
|
|
if form:
|
|
form_without_form = form.replace('Form', '').replace('form', '').strip()
|
|
for tag in evolution_section.parent.find_next_siblings():
|
|
if tag.name == 'h4' and form_without_form in tag.get_text(strip=True):
|
|
evolution_table = tag.find_next('table')
|
|
break
|
|
if tag.name == 'h3':
|
|
break
|
|
else:
|
|
evolution_table = evolution_section.parent.find_next('table')
|
|
if not evolution_table:
|
|
return None
|
|
|
|
eeveelutions = ["eevee", "vaporeon", "jolteon", "flareon", "espeon", "umbreon", "leafeon", "glaceon", "sylveon"]
|
|
|
|
if pokemon_name == "Eevee":
|
|
evolution_chain = parse_eevee_evolution_chain(evolution_table)
|
|
else:
|
|
evolution_chain = parse_evolution_chain(evolution_table, form)
|
|
return evolution_chain
|
|
|
|
# This is going to be a little odd.
|
|
# the first TR contains a full evolution chain
|
|
# other TRs contain branching evolution chains
|
|
# any TDs in the first TR with a rowspan are part of the main evolution chain
|
|
# any other TDS are part of the branching evolution chains
|
|
# a table in a TD is information about the current Pokémon in that evolution stage
|
|
# a TD without a table is information on how to trigger the next evolution
|
|
|
|
def parse_pokemon_subtable(td):
|
|
if td.find('table'):
|
|
# This TD contains Pokemon information
|
|
pokemon_name = extract_pokemon_name(td)
|
|
stage = extract_stage_form(td)
|
|
return pokemon_name, stage
|
|
return None, None
|
|
|
|
def parse_eevee_evolution_chain(table):
|
|
tbody = table.find('tbody', recursive=False)
|
|
if not tbody:
|
|
return []
|
|
|
|
rows = tbody.find_all('tr', recursive=False)
|
|
eevee_row = rows[1]
|
|
method_row = rows[2]
|
|
eeveelutions_row = rows[3]
|
|
|
|
eevee_td = eevee_row.find('td', recursive=False)
|
|
pokemon_name, stage = parse_pokemon_subtable(eevee_td)
|
|
eevee_stage = EvolutionStage(pokemon_name, None, stage, None)
|
|
|
|
methods = []
|
|
for method in method_row.find_all('td', recursive=False):
|
|
methods.append(extract_evolution_method(method))
|
|
|
|
eeveelutions = []
|
|
index = 0
|
|
for eeveelution in eeveelutions_row.find_all('td', recursive=False):
|
|
pokemon_name, stage = parse_pokemon_subtable(eeveelution)
|
|
eeveelution_stage = EvolutionStage(pokemon_name, methods[index], stage, None)
|
|
eeveelution_stage.previous_stage = eevee_stage # Set the back link to Eevee
|
|
eeveelutions.append(eeveelution_stage)
|
|
index += 1
|
|
|
|
eevee_stage.branches = eeveelutions # Set the branches directly, not as a nested list
|
|
|
|
return [eevee_stage]
|
|
|
|
def get_intro_generation(pokemon_name, form, cache: CacheManager):
|
|
page_data = get_pokemon_data_bulbapedia(pokemon_name, cache)
|
|
if not page_data:
|
|
return None
|
|
|
|
soup = BeautifulSoup(page_data, 'html.parser')
|
|
|
|
locations_section = soup.find('span', id='Game_locations')
|
|
if not locations_section:
|
|
return None
|
|
|
|
locations_table = locations_section.find_next('table', class_='roundy')
|
|
if not locations_table:
|
|
return None
|
|
|
|
generation_tbody = locations_table.find('tbody', recursive=False)
|
|
generation_rows = generation_tbody.find_all('tr', recursive=False)
|
|
for generation_row in generation_rows:
|
|
random_nested_td = generation_row.find('td', recursive=False)
|
|
if not random_nested_td:
|
|
continue
|
|
random_nested_table = random_nested_td.find('table', recursive=False)
|
|
if not random_nested_table:
|
|
continue
|
|
random_nested_tbody = random_nested_table.find('tbody', recursive=False)
|
|
random_nested_rows = random_nested_tbody.find_all('tr', recursive=False)
|
|
|
|
for nested_row in random_nested_rows:
|
|
test_text = None
|
|
pattern = r"Generation\s+([IVXLCDM]+)"
|
|
match = re.search(pattern, nested_row.get_text(strip=True))
|
|
if match:
|
|
test_text = match.group(1) # This returns just the Roman numeral
|
|
|
|
if test_text:
|
|
return roman_to_int(test_text.replace("Generation ", "").strip())
|
|
|
|
return None
|
|
|
|
def compare_forms(a, b):
|
|
if a == None or b == None:
|
|
return False
|
|
|
|
if a == b:
|
|
return True
|
|
|
|
temp_a = a.lower().replace("forme", "").replace("form", "").replace("é", "e").strip()
|
|
temp_b = b.lower().replace("forme", "").replace("form", "").replace("é", "e").strip()
|
|
|
|
temp_a = temp_a.replace("deputante", "debutante").replace("p'au", "pa'u").replace("blood moon", "bloodmoon")
|
|
temp_b = temp_b.replace("deputante", "debutante").replace("p'au", "pa'u").replace("blood moon", "bloodmoon")
|
|
|
|
if temp_a == temp_b:
|
|
return True
|
|
|
|
return False
|
|
|
|
def get_locations_from_bulbapedia(pokemon_name, form, cache: CacheManager):
|
|
page_data = get_pokemon_data_bulbapedia(pokemon_name, cache)
|
|
if not page_data:
|
|
return None
|
|
|
|
soup = BeautifulSoup(page_data, 'html.parser')
|
|
|
|
locations_section = soup.find('span', id='Game_locations')
|
|
if not locations_section:
|
|
return None
|
|
|
|
locations_table = locations_section.find_next('table', class_='roundy')
|
|
if not locations_table:
|
|
return None
|
|
|
|
raw_game_locations = {}
|
|
|
|
# Ok so the table is a bit of a mess. It has some nested tables and stuff.
|
|
# In each row is a nested table with all the games in a generation.
|
|
# Next is another nexted table, but i can't tell what for.
|
|
# within that nested table, is another nested table with the games, either the release pair or a single game spanning two columns.
|
|
# Next to that is another nested table with the locations.
|
|
|
|
generation_tbody = locations_table.find('tbody', recursive=False)
|
|
generation_rows = generation_tbody.find_all('tr', recursive=False)
|
|
for generation_row in generation_rows:
|
|
random_nested_td = generation_row.find('td', recursive=False)
|
|
if not random_nested_td:
|
|
continue
|
|
random_nested_table = random_nested_td.find('table', recursive=False)
|
|
if not random_nested_table:
|
|
continue
|
|
random_nested_tbody = random_nested_table.find('tbody', recursive=False)
|
|
random_nested_rows = random_nested_tbody.find_all('tr', recursive=False)
|
|
|
|
for nested_row in random_nested_rows:
|
|
if 'Generation' in nested_row.get_text(strip=True):
|
|
continue
|
|
|
|
games_container_td = nested_row.find('td', recursive=False)
|
|
if not games_container_td:
|
|
continue
|
|
games_container_table = games_container_td.find('table', recursive=False)
|
|
if not games_container_table:
|
|
continue
|
|
games_container_tbody = games_container_table.find('tbody', recursive=False)
|
|
games_container_rows = games_container_tbody.find_all('tr', recursive=False)
|
|
for games_container_row in games_container_rows:
|
|
games = games_container_row.find_all('th')
|
|
for game in games:
|
|
raw_game = game.get_text(strip=True)
|
|
if raw_game not in all_games:
|
|
continue
|
|
locations_container_td = games_container_row.find('td', recursive=False)
|
|
if not locations_container_td:
|
|
continue
|
|
locations_container_table = locations_container_td.find('table', recursive=False)
|
|
if not locations_container_table:
|
|
continue
|
|
locations_container_tbody = locations_container_table.find('tbody', recursive=False)
|
|
locations = locations_container_tbody.find_all('td')
|
|
for location in locations:
|
|
groups = split_td_contents(location)
|
|
for group in groups:
|
|
if raw_game not in raw_game_locations:
|
|
raw_game_locations[raw_game] = []
|
|
raw_game_locations[raw_game].append(group)
|
|
|
|
events_section = soup.find('span', id='In_events')
|
|
event_tables = {}
|
|
if events_section:
|
|
event_header = events_section.parent
|
|
|
|
variant = ""
|
|
for sibling in event_header.find_next_siblings():
|
|
if sibling.name == 'h4' or "held" in sibling.getText(strip=True).lower():
|
|
break
|
|
if sibling.name == 'h5':
|
|
variant = sibling.get_text(strip=True)
|
|
if sibling.name == 'table':
|
|
event_tables[variant] = sibling
|
|
|
|
game_locations = {}
|
|
for raw_game, raw_locations in raw_game_locations.items():
|
|
if form is None:
|
|
for raw_location in raw_locations:
|
|
raw_text = raw_location.get_text()
|
|
main_form, sub_form = parse_form_information(str(raw_location))
|
|
if main_form and (main_form != "All Forms" and main_form != "Kantonian Form"):
|
|
continue
|
|
if raw_game not in game_locations:
|
|
game_locations[raw_game] = []
|
|
info = {}
|
|
info["location"] = raw_text
|
|
info["tag"] = str(raw_location)
|
|
game_locations[raw_game].append(info)
|
|
else:
|
|
for raw_location in raw_locations:
|
|
main_form, sub_form = parse_form_information(str(raw_location))
|
|
if not main_form:
|
|
continue
|
|
|
|
if main_form == "Kantonian Form":
|
|
continue
|
|
|
|
if main_form == "All Forms":
|
|
main_form = form
|
|
|
|
main_form_match = compare_forms(form, main_form)
|
|
if not main_form_match:
|
|
main_form_match = fuzz.partial_ratio(form.lower(), main_form.lower()) >= 80
|
|
|
|
sub_form_match = compare_forms(form, sub_form)
|
|
if not sub_form_match:
|
|
sub_form_match = False if not sub_form else fuzz.partial_ratio(form.lower(), sub_form.lower()) >= 80
|
|
|
|
if main_form_match or sub_form_match:
|
|
raw_text = raw_location.get_text()
|
|
if raw_game not in game_locations:
|
|
game_locations[raw_game] = []
|
|
info = {}
|
|
info["location"] = raw_text
|
|
info["tag"] = str(raw_location)
|
|
game_locations[raw_game].append(info)
|
|
|
|
# For Later
|
|
for variant in event_tables:
|
|
if (variant == pokemon_name and form is None)or (form and form in variant):
|
|
games_container_rows = event_tables[variant].find_all('tr')
|
|
for game_row in games_container_rows:
|
|
entries = game_row.find_all('td')
|
|
if len(entries) > 1:
|
|
games_string = entries[0].find('a').get('title')
|
|
for game in all_games:
|
|
if game in games_string:
|
|
if game not in game_locations:
|
|
game_locations[game] = []
|
|
info = {}
|
|
info["location"] = "Event"
|
|
info["tag"] = None
|
|
game_locations[game].append(info)
|
|
|
|
return game_locations
|
|
|
|
def split_outside_brackets(str):
|
|
return re.split(r',(?![^()]*\))', str)
|
|
|
|
def handle_unown(pokemon, encounter_data):
|
|
if not pokemon.name == "Unown":
|
|
return
|
|
|
|
one_form_unown = find_pokemon(pokemon.name, None)
|
|
if not one_form_unown:
|
|
return
|
|
|
|
# The ! and ? forms were added in HeartGold and SoulSilver.
|
|
if (pokemon.form == "!" or pokemon.form == "?") and encounter_data:
|
|
for encounter in encounter_data:
|
|
encounter_information = EncounterInformation(encounter, encounter_data[encounter])
|
|
pokemon.encounter_information.append(encounter_information)
|
|
found_heartgold = False
|
|
found_soulsilver = False
|
|
for game in all_games:
|
|
if game == "HeartGold":
|
|
found_heartgold = True
|
|
continue
|
|
elif game == "SoulSilver":
|
|
found_soulsilver = True
|
|
continue
|
|
if not found_heartgold or not found_soulsilver:
|
|
continue
|
|
for encounter in one_form_unown.encounter_information:
|
|
if game == encounter.game:
|
|
pokemon.encounter_information.append(encounter)
|
|
break
|
|
else:
|
|
pokemon.encounter_information = one_form_unown.encounter_information
|
|
|
|
list_of_shifting_form_pokemon = [
|
|
"Deoxys",
|
|
"Burmy",
|
|
"Wormadam",
|
|
"Rotom",
|
|
"Shaymin",
|
|
"Keldeo",
|
|
"Furfrou",
|
|
"Hoopa",
|
|
"Pumpkaboo",
|
|
"Gourgeist",
|
|
"Zygarde",
|
|
"Magearna",
|
|
"Vivillon",
|
|
"Minior",
|
|
"Urshifu",
|
|
"Oinkologne",
|
|
"Basculegion",
|
|
"Enamorus",
|
|
]
|
|
|
|
def handle_form_shift(pokemon, encounter_data):
|
|
if not pokemon.name in list_of_shifting_form_pokemon:
|
|
return
|
|
|
|
if pokemon.form is None:
|
|
return
|
|
|
|
normal_form_pokemon = find_pokemon(pokemon.name, None)
|
|
if not normal_form_pokemon:
|
|
return
|
|
|
|
pokemon.encounter_information = normal_form_pokemon.encounter_information
|
|
|
|
phony_authentic = ["Sinistea", "Polteageist"]
|
|
countefieit_atrisan = ["Poltchageist"]
|
|
unremarkable_masterpiece = ["Sinistcha"]
|
|
bad_tea_pokemon = phony_authentic + countefieit_atrisan + unremarkable_masterpiece
|
|
|
|
def get_bad_tea_form(pokemon):
|
|
if not pokemon.name in bad_tea_pokemon:
|
|
return
|
|
|
|
if pokemon.name in phony_authentic:
|
|
if pokemon.form == None:
|
|
return "Phony Form"
|
|
if pokemon.form == "Authentic Form":
|
|
return None
|
|
|
|
if pokemon.name in countefieit_atrisan:
|
|
if pokemon.form == None:
|
|
return "Counterfeit Form"
|
|
if pokemon.form == "Artisan Form":
|
|
return None
|
|
|
|
if pokemon.name in unremarkable_masterpiece:
|
|
if pokemon.form == None:
|
|
return "Unremarkable Form"
|
|
else:
|
|
return pokemon.form
|
|
|
|
def determine_earliest_games(cache: CacheManager):
|
|
for pokemon in big_pokemon_list:
|
|
print(f"Processing {pokemon}")
|
|
form_to_find = pokemon.form
|
|
if pokemon.name == "Minior" and pokemon.form == "Orange Core":
|
|
form_to_find = None
|
|
if pokemon.name == "Squawkabilly" and pokemon.form:
|
|
form_to_find = pokemon.form.replace("Plumage", "").strip()
|
|
if pokemon.name == "Alcremie":
|
|
form_to_find = None
|
|
if pokemon.name in bad_tea_pokemon:
|
|
form_to_find = get_bad_tea_form(pokemon)
|
|
pokemon.introduced_in_gen = get_intro_generation(pokemon.name, form_to_find, cache)
|
|
encounter_data = get_locations_from_bulbapedia(pokemon.name, form_to_find, cache)
|
|
for encounter in encounter_data:
|
|
encounter_information = EncounterInformation(encounter, encounter_data[encounter])
|
|
pokemon.encounter_information.append(encounter_information)
|
|
handle_unown(pokemon, encounter_data)
|
|
handle_form_shift(pokemon, encounter_data)
|
|
if pokemon.name == "Gimmighoul" and pokemon.form == "Roaming Form":
|
|
encounter_information = EncounterInformation("Pokémon Go", ["Pokémon Go"])
|
|
pokemon.encounter_information.append(encounter_information)
|
|
|
|
pokemon.determine_earliest_game()
|
|
print(f"Processed {pokemon}: {pokemon.earliest_game.game} ({pokemon.earliest_game.method})")
|
|
|
|
def get_base_form(evolution_chain:List[EvolutionStage]):
|
|
if not evolution_chain:
|
|
return None
|
|
|
|
for stage in evolution_chain:
|
|
if stage.stage == "Unevolved":
|
|
return stage.pokemon
|
|
if stage.is_baby:
|
|
return stage.next_stage.pokemon
|
|
|
|
return None
|
|
|
|
def adjust_for_evolution(cache: CacheManager):
|
|
for pokemon in big_pokemon_list:
|
|
evolution_chain = get_evolution_data_from_bulbapedia(pokemon.name, pokemon.form, cache)
|
|
pokemon.add_evolution_chain(evolution_chain)
|
|
game, method = pokemon.get_earliest_game_and_method()
|
|
print(f"Adjusted {pokemon}: {game} ({method})")
|
|
|
|
def save_to_csv(filename='pokemon_earliest_games.csv'):
|
|
with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
|
|
fieldnames = ['number', 'name', 'introduced_in_gen', 'earliest_game', 'obtain_method', 'encounter_locations']
|
|
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
|
|
|
writer.writeheader()
|
|
for pokemon in big_pokemon_list:
|
|
encounter_locations = []
|
|
for encounter in pokemon.encounter_information:
|
|
if encounter.game == pokemon.earliest_game.game:
|
|
encounter_locations.extend(encounter.locations)
|
|
writer.writerow({
|
|
'number': pokemon.number,
|
|
'name': f"{pokemon.name} ({pokemon.form})",
|
|
'introduced_in_gen': pokemon.introduced_in_gen,
|
|
'earliest_game': pokemon.earliest_game.game,
|
|
'obtain_method': pokemon.earliest_game.method,
|
|
'encounter_locations': ' | '.join((str(item) for item in encounter_locations))
|
|
})
|
|
|
|
def handle_unknown_encounters(cache):
|
|
for pokemon in big_pokemon_list:
|
|
if pokemon.earliest_game == None or pokemon.earliest_game.method == None:
|
|
print(f"Checking alternative sources for {pokemon.name}")
|
|
|
|
# Update the main function
|
|
if __name__ == "__main__":
|
|
cache = CacheManager()
|
|
|
|
pokemon_list = read_pokemon_list('pokemon_home_list.csv', limit=3000)
|
|
|
|
create_pokemon_index(big_pokemon_list)
|
|
|
|
determine_earliest_games(cache)
|
|
adjust_for_evolution(cache)
|
|
handle_unknown_encounters(cache)
|
|
save_to_csv()
|
|
|
|
cache.close()
|
|
print(f"Earliest obtainable games and encounter locations determined for {len(pokemon_list)} Pokémon and saved to pokemon_earliest_games.csv")
|
|
|