You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
909 lines
33 KiB
909 lines
33 KiB
from __future__ import annotations
|
|
import csv
|
|
import requests
|
|
import time
|
|
import json
|
|
import os
|
|
import re
|
|
import sqlite3
|
|
from bs4 import BeautifulSoup, Tag, NavigableString
|
|
import copy
|
|
from typing import List, Optional
|
|
from fuzzywuzzy import fuzz
|
|
from fuzzywuzzy import process
|
|
from collections import defaultdict
|
|
|
|
import os
|
|
import sys
|
|
|
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
|
|
from DataGatherers.cache_manager import CacheManager
|
|
import concurrent.futures
|
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
from functools import lru_cache
|
|
|
|
# List of all main series Pokémon games in chronological order, with special games first in each generation
|
|
all_games = [
|
|
"Yellow", "Red", "Blue",
|
|
"Crystal", "Gold", "Silver",
|
|
"Emerald", "FireRed", "LeafGreen", "Ruby", "Sapphire",
|
|
"Platinum", "HeartGold", "SoulSilver", "Diamond", "Pearl",
|
|
"Black 2", "White 2", "Black", "White",
|
|
"X", "Y", "Omega Ruby", "Alpha Sapphire",
|
|
"Ultra Sun", "Ultra Moon", "Sun", "Moon", "Let's Go Pikachu", "Let's Go Eevee",
|
|
"Sword", "Shield", "Expansion Pass",
|
|
"Brilliant Diamond", "Shining Pearl",
|
|
"Legends: Arceus",
|
|
"Scarlet", "Violet", "The Teal Mask", "The Hidden Treasure of Area Zero", "The Hidden Treasure of Area Zero (Scarlet)", "The Hidden Treasure of Area Zero (Violet)", "The Teal Mask (Scarlet)", "The Teal Mask (Violet)",
|
|
"Unknown",
|
|
"Pokémon Home",
|
|
"Pokémon Go",
|
|
]
|
|
|
|
big_pokemon_list = []
|
|
pokemon_index = {}
|
|
|
|
def create_pokemon_index(pokemon_list):
|
|
global pokemon_index
|
|
name_index = defaultdict(list)
|
|
for pokemon in pokemon_list:
|
|
name_index[pokemon.name.lower()].append(pokemon)
|
|
pokemon_index = name_index
|
|
|
|
def find_pokemon(name, form=None, threshold=80):
|
|
global pokemon_index
|
|
name = name.lower()
|
|
if name in pokemon_index:
|
|
candidates = pokemon_index[name]
|
|
if not form:
|
|
return candidates[0] if candidates else None
|
|
|
|
best_match = None
|
|
best_score = 0
|
|
for pokemon in candidates:
|
|
if pokemon.form:
|
|
score = fuzz.ratio(form.lower(), pokemon.form.lower())
|
|
if score > best_score:
|
|
best_score = score
|
|
best_match = pokemon
|
|
|
|
if best_match and best_score >= threshold:
|
|
return best_match
|
|
|
|
# If no exact name match, try fuzzy matching on names
|
|
best_name_match = None
|
|
best_name_score = 0
|
|
for pokemon_name in pokemon_index:
|
|
score = fuzz.ratio(name, pokemon_name)
|
|
if score > best_name_score:
|
|
best_name_score = score
|
|
best_name_match = pokemon_name
|
|
|
|
if best_name_match and best_name_score >= threshold:
|
|
candidates = pokemon_index[best_name_match]
|
|
if not form:
|
|
return candidates[0]
|
|
|
|
best_match = None
|
|
best_score = 0
|
|
for pokemon in candidates:
|
|
if pokemon.form:
|
|
score = fuzz.ratio(form.lower(), pokemon.form.lower())
|
|
if score > best_score:
|
|
best_score = score
|
|
best_match = pokemon
|
|
|
|
if best_match and best_score >= threshold:
|
|
return best_match
|
|
|
|
return None
|
|
|
|
def roman_to_int(s):
|
|
roman_values = {
|
|
'I': 1,
|
|
'V': 5,
|
|
'X': 10,
|
|
'L': 50,
|
|
'C': 100,
|
|
'D': 500,
|
|
'M': 1000
|
|
}
|
|
|
|
total = 0
|
|
prev_value = 0
|
|
|
|
for char in reversed(s):
|
|
current_value = roman_values[char]
|
|
if current_value >= prev_value:
|
|
total += current_value
|
|
else:
|
|
total -= current_value
|
|
prev_value = current_value
|
|
|
|
return total
|
|
|
|
class Pokemon:
|
|
def __init__(self, name: str, number: int, form: Optional[str] = None):
|
|
self.name = name
|
|
self.number = number
|
|
self.form = form
|
|
self.stage: Optional[str] = None
|
|
self.evolution_chain: Optional[List['EvolutionStage']] = []
|
|
self.is_baby = False
|
|
self.encounter_information: Optional[List['EncounterInformation']] = []
|
|
self.earliest_game: Optional['EncounterInformation'] = None
|
|
self.obtain_method: Optional[str] = None
|
|
self.introduced_in_gen = None
|
|
|
|
def get_earliest_game_and_method(self):
|
|
if self.evolution_chain:
|
|
for stage in self.evolution_chain:
|
|
if self.is_baby:
|
|
return stage.pokemon_reference.earliest_game.game, "Breed"
|
|
else:
|
|
if stage.pokemon_reference == self:
|
|
return self.earliest_game.game, self.earliest_game.method
|
|
return stage.pokemon_reference.earliest_game.game, "Evolve"
|
|
|
|
if self.earliest_game:
|
|
return self.earliest_game.game, self.earliest_game.method
|
|
return None, None
|
|
|
|
def __str__(self):
|
|
return f"{self.name}{' ' if self.form else ''}{self.form if self.form else ''} (#{self.number})"
|
|
|
|
def add_evolution_chain(self, evolution_chain: List['EvolutionStage']):
|
|
self.evolution_chain = evolution_chain
|
|
|
|
def add_stage(self, stage: str):
|
|
self.stage = stage
|
|
self.is_baby = self.stage is not None and 'Baby' in self.stage
|
|
|
|
def update_encounter_information(self, exclude_events=True, exclude_home=True, exclude_go=True):
|
|
if not self.encounter_information:
|
|
return
|
|
|
|
non_catchable_methods = ["trade", "global link", "poké transfer", "time capsule", "unobtainable"]
|
|
|
|
if exclude_events:
|
|
non_catchable_methods.append("event")
|
|
if exclude_home:
|
|
non_catchable_methods.append("pokemon home")
|
|
if exclude_go:
|
|
non_catchable_methods.append("pokémon go")
|
|
|
|
for encounter in self.encounter_information:
|
|
encounter.method = None
|
|
for location in encounter.locations:
|
|
skip_location = False
|
|
for non_catchable in non_catchable_methods:
|
|
if non_catchable in location.lower():
|
|
skip_location = True
|
|
break
|
|
|
|
if skip_location:
|
|
continue
|
|
|
|
if "first partner" in location.lower():
|
|
encounter.method = "Starter"
|
|
elif "received" in location.lower():
|
|
encounter.method = "Gift"
|
|
elif "evolve" in location.lower():
|
|
encounter.method = "Evolve"
|
|
elif "event" in location.lower():
|
|
encounter.method = "Event"
|
|
else:
|
|
encounter.method = "Catchable"
|
|
|
|
def parse_encoutners_for_games(self):
|
|
game_methods = {}
|
|
for encounter in self.encounter_information:
|
|
if encounter.method:
|
|
game_methods[encounter.game.lower()] = encounter
|
|
|
|
for game in all_games:
|
|
if game.lower() in game_methods:
|
|
self.earliest_game = game_methods[game.lower()]
|
|
return
|
|
|
|
def determine_earliest_game(self):
|
|
if not self.encounter_information:
|
|
self.earliest_game = None
|
|
return
|
|
|
|
self.update_encounter_information()
|
|
self.parse_encoutners_for_games()
|
|
if self.earliest_game != None:
|
|
return
|
|
|
|
|
|
self.update_encounter_information(exclude_events=False)
|
|
self.parse_encoutners_for_games()
|
|
if self.earliest_game != None:
|
|
return
|
|
|
|
self.update_encounter_information(exclude_home=False)
|
|
self.parse_encoutners_for_games()
|
|
if self.earliest_game != None:
|
|
return
|
|
|
|
self.update_encounter_information(exclude_go=False)
|
|
self.parse_encoutners_for_games()
|
|
if self.earliest_game != None:
|
|
return
|
|
|
|
self.earliest_game = None
|
|
|
|
class EvolutionStage:
|
|
def __init__(self, pokemon: str, method: Optional[str] = None, stage: Optional[str] = None, form: Optional[str] = None):
|
|
self.pokemon = pokemon
|
|
self.method = method
|
|
self.next_stage: Optional[EvolutionStage] = None
|
|
self.previous_stage: Optional[EvolutionStage] = None # New attribute
|
|
self.branches: List[EvolutionStage] = []
|
|
self.stage = stage
|
|
self.is_baby = self.stage is not None and 'Baby' in self.stage
|
|
self.pokemon_reference = find_pokemon(pokemon, form)
|
|
if self.pokemon_reference == None:
|
|
self.pokemon_reference = find_pokemon(pokemon, None)
|
|
self.form = form
|
|
|
|
def __str__(self):
|
|
return f"{self.pokemon} {self.form if self.form else ''} ({self.method if self.method else 'Base'})"
|
|
|
|
class EncounterInformation:
|
|
def __init__(self, game: str, locations: List[str]):
|
|
self.game = game
|
|
self.method = "Unknown"
|
|
self.locations = locations
|
|
|
|
def parse_evolution_chain(table: Tag, form: Optional[str] = None) -> List[EvolutionStage]:
|
|
main_chain = []
|
|
current_stage = None
|
|
pending_method = None
|
|
|
|
tbody = table.find('tbody', recursive=False)
|
|
if not tbody:
|
|
return []
|
|
|
|
rows = tbody.find_all('tr', recursive=False)
|
|
main_row = rows[0]
|
|
branch_rows = rows[1:]
|
|
|
|
# Parse main evolution chain
|
|
for td in main_row.find_all('td', recursive=False):
|
|
if td.find('table'):
|
|
# This TD contains Pokemon information
|
|
pokemon_name = extract_pokemon_name(td)
|
|
stage = extract_stage_form(td)
|
|
evolution_form = extract_evolution_form(td, pokemon_name)
|
|
new_stage = EvolutionStage(pokemon_name, pending_method, stage, evolution_form)
|
|
pending_method = None
|
|
if current_stage:
|
|
current_stage.next_stage = new_stage
|
|
new_stage.previous_stage = current_stage # Set the back link
|
|
current_stage = new_stage
|
|
main_chain.append(current_stage)
|
|
else:
|
|
# This TD contains evolution method for the next Pokemon
|
|
pending_method = extract_evolution_method(td)
|
|
|
|
# Parse branching evolutions
|
|
for row in branch_rows:
|
|
branch_stage = None
|
|
branch_method = None
|
|
for td in row.find_all('td', recursive=False):
|
|
if td.find('table'):
|
|
pokemon_name = extract_pokemon_name(td)
|
|
stage = extract_stage_form(td)
|
|
evolution_form = extract_evolution_form(td, pokemon_name)
|
|
new_stage = EvolutionStage(pokemon_name, branch_method, stage, evolution_form)
|
|
branch_method = None
|
|
if branch_stage:
|
|
branch_stage.next_stage = new_stage
|
|
new_stage.previous_stage = branch_stage # Set the back link
|
|
branch_stage = new_stage
|
|
# Find which main chain Pokemon this branches from
|
|
for main_stage in main_chain:
|
|
if td.get('rowspan') and main_stage.pokemon == pokemon_name:
|
|
main_stage.branches.append(branch_stage)
|
|
branch_stage.previous_stage = main_stage # Set the back link to the main chain
|
|
break
|
|
else:
|
|
branch_method = extract_evolution_method(td)
|
|
|
|
return main_chain
|
|
|
|
def find_name_tag(td: Tag) -> Optional[Tag]:
|
|
table = td.find('table')
|
|
name_tag = table.find('a', class_='selflink')
|
|
if name_tag:
|
|
return name_tag
|
|
name_tag = table.find('a', title=True, class_=lambda x: x != 'image')
|
|
return name_tag
|
|
|
|
def extract_pokemon_name(td: Tag) -> Optional[str]:
|
|
name_tag = find_name_tag(td)
|
|
if name_tag:
|
|
return name_tag.get_text(strip=True)
|
|
return None
|
|
|
|
def extract_evolution_method(td: Tag) -> str:
|
|
# Extract evolution method from the TD
|
|
return td.get_text(strip=True)
|
|
|
|
def extract_stage_form(td: Tag) -> Optional[str]:
|
|
stage_tag = td.find('table').find('small')
|
|
if stage_tag:
|
|
return stage_tag.get_text(strip=True)
|
|
return None
|
|
|
|
def extract_evolution_form(td: Tag, name: str) -> Optional[str]:
|
|
name_tag = find_name_tag(td)
|
|
if name_tag:
|
|
name_row = name_tag.parent
|
|
small_tags = name_row.find_all('small')
|
|
if len(small_tags) > 1:
|
|
return small_tags[0].get_text(strip=True)
|
|
return None
|
|
|
|
def read_pokemon_list(filename, limit=50):
|
|
pokemon_list = []
|
|
with open(filename, 'r', newline='', encoding='utf-8') as csvfile:
|
|
reader = csv.DictReader(csvfile)
|
|
for i, row in enumerate(reader):
|
|
if i >= limit:
|
|
break
|
|
# Split the name into base name and form
|
|
match = re.match(r'(.*?)\s*(\(.*\))?$', row['name'])
|
|
base_name, form = match.groups() if match else (row['name'], None)
|
|
row['base_name'] = base_name.strip()
|
|
row['form'] = form.strip('() ') if form else None
|
|
pokemon_list.append(row)
|
|
|
|
new_pokemon = Pokemon(row['base_name'], row['number'], row['form'])
|
|
big_pokemon_list.append(new_pokemon)
|
|
|
|
return big_pokemon_list
|
|
|
|
def get_pokemon_data_bulbapedia(pokemon_name, cache: CacheManager):
|
|
url = f"https://bulbapedia.bulbagarden.net/wiki/{pokemon_name}_(Pokémon)"
|
|
return cache.fetch_url(url)
|
|
|
|
def split_td_contents(td):
|
|
groups = []
|
|
current_group = []
|
|
for content in td.contents:
|
|
if isinstance(content, NavigableString):
|
|
text = content.strip()
|
|
if text:
|
|
current_group.append(content)
|
|
elif content.name == 'br':
|
|
if current_group:
|
|
groups.append(''.join(str(item) for item in current_group))
|
|
current_group = []
|
|
else:
|
|
current_group.append(content)
|
|
if current_group:
|
|
groups.append(''.join(str(item) for item in current_group))
|
|
return groups
|
|
|
|
def parse_form_information(html_content):
|
|
soup = BeautifulSoup(html_content, 'html.parser')
|
|
small_tag = soup.find('small')
|
|
|
|
forms = []
|
|
# Form info is in bold inside a small tag.
|
|
if small_tag:
|
|
bold_tags = small_tag.find_all('b')
|
|
for bold_tag in bold_tags:
|
|
form_text = bold_tag.get_text(strip=True)
|
|
|
|
# Remove parentheses
|
|
form_text = form_text.strip('()')
|
|
|
|
if "/" in form_text:
|
|
last_word = form_text.split()[-1]
|
|
form_text = form_text.replace(last_word, "").strip()
|
|
parts = form_text.split('/')
|
|
for part in parts:
|
|
main_form = part.strip() + " " + last_word
|
|
info = {
|
|
"main_form": main_form,
|
|
"sub_form": None
|
|
}
|
|
forms.append(info)
|
|
continue
|
|
|
|
# Split the text into main form and breed (if present)
|
|
parts = form_text.split('(')
|
|
main_form = parts[0].strip()
|
|
|
|
# "Factor"s are not actual forms, they are properties of the pokemon you can encoutner.
|
|
if main_form and "factor" in main_form.lower():
|
|
continue
|
|
|
|
breed = parts[1].strip(')') if len(parts) > 1 else None
|
|
|
|
info = {
|
|
"main_form": main_form,
|
|
"sub_form": breed
|
|
}
|
|
|
|
forms.append(info)
|
|
else: #..... Gimmighoul
|
|
headings = soup.find_all('b')
|
|
if len(headings) > 0:
|
|
for heading in headings:
|
|
if heading.parent.name == 'sup':
|
|
continue
|
|
if "form" not in heading.get_text(strip=True).lower():
|
|
continue
|
|
main_form = heading.get_text(strip=True)
|
|
info = {
|
|
"main_form": main_form,
|
|
"sub_form": None
|
|
}
|
|
forms.append(info)
|
|
|
|
return forms
|
|
|
|
def get_evolution_data_from_bulbapedia(pokemon_name, form, cache: CacheManager, gender: Optional[str] = None):
|
|
page_data = get_pokemon_data_bulbapedia(pokemon_name, cache)
|
|
if not page_data:
|
|
return None
|
|
|
|
soup = BeautifulSoup(page_data, 'html.parser')
|
|
|
|
evolution_section = soup.find('span', id='Evolution_data')
|
|
if not evolution_section:
|
|
return None
|
|
|
|
evolution_table = None
|
|
if form:
|
|
form_without_form = form.replace('Form', '').replace('form', '').strip()
|
|
for tag in evolution_section.parent.find_next_siblings():
|
|
if tag.name == 'h4' and form_without_form in tag.get_text(strip=True):
|
|
evolution_table = tag.find_next('table')
|
|
break
|
|
if tag.name == 'h3':
|
|
break
|
|
else:
|
|
evolution_table = evolution_section.parent.find_next('table')
|
|
if not evolution_table:
|
|
return None
|
|
|
|
eeveelutions = ["eevee", "vaporeon", "jolteon", "flareon", "espeon", "umbreon", "leafeon", "glaceon", "sylveon"]
|
|
|
|
if pokemon_name == "Eevee":
|
|
evolution_chain = parse_eevee_evolution_chain(evolution_table)
|
|
else:
|
|
evolution_chain = parse_evolution_chain(evolution_table, form)
|
|
return evolution_chain
|
|
|
|
# This is going to be a little odd.
|
|
# the first TR contains a full evolution chain
|
|
# other TRs contain branching evolution chains
|
|
# any TDs in the first TR with a rowspan are part of the main evolution chain
|
|
# any other TDS are part of the branching evolution chains
|
|
# a table in a TD is information about the current Pokémon in that evolution stage
|
|
# a TD without a table is information on how to trigger the next evolution
|
|
|
|
def parse_pokemon_subtable(td):
|
|
if td.find('table'):
|
|
# This TD contains Pokemon information
|
|
pokemon_name = extract_pokemon_name(td)
|
|
stage = extract_stage_form(td)
|
|
return pokemon_name, stage
|
|
return None, None
|
|
|
|
def parse_eevee_evolution_chain(table):
|
|
tbody = table.find('tbody', recursive=False)
|
|
if not tbody:
|
|
return []
|
|
|
|
rows = tbody.find_all('tr', recursive=False)
|
|
eevee_row = rows[1]
|
|
method_row = rows[2]
|
|
eeveelutions_row = rows[3]
|
|
|
|
eevee_td = eevee_row.find('td', recursive=False)
|
|
pokemon_name, stage = parse_pokemon_subtable(eevee_td)
|
|
eevee_stage = EvolutionStage(pokemon_name, None, stage, None)
|
|
|
|
methods = []
|
|
for method in method_row.find_all('td', recursive=False):
|
|
methods.append(extract_evolution_method(method))
|
|
|
|
eeveelutions = []
|
|
index = 0
|
|
for eeveelution in eeveelutions_row.find_all('td', recursive=False):
|
|
pokemon_name, stage = parse_pokemon_subtable(eeveelution)
|
|
eeveelution_stage = EvolutionStage(pokemon_name, methods[index], stage, None)
|
|
eeveelution_stage.previous_stage = eevee_stage # Set the back link to Eevee
|
|
eeveelutions.append(eeveelution_stage)
|
|
index += 1
|
|
|
|
eevee_stage.branches = eeveelutions # Set the branches directly, not as a nested list
|
|
|
|
return [eevee_stage]
|
|
|
|
def get_intro_generation(pokemon_name, form, cache: CacheManager):
|
|
page_data = get_pokemon_data_bulbapedia(pokemon_name, cache)
|
|
if not page_data:
|
|
return None
|
|
|
|
soup = BeautifulSoup(page_data, 'html.parser')
|
|
|
|
locations_section = soup.find('span', id='Game_locations')
|
|
if not locations_section:
|
|
return None
|
|
|
|
locations_table = locations_section.find_next('table', class_='roundy')
|
|
if not locations_table:
|
|
return None
|
|
|
|
generation_tbody = locations_table.find('tbody', recursive=False)
|
|
generation_rows = generation_tbody.find_all('tr', recursive=False)
|
|
for generation_row in generation_rows:
|
|
random_nested_td = generation_row.find('td', recursive=False)
|
|
if not random_nested_td:
|
|
continue
|
|
random_nested_table = random_nested_td.find('table', recursive=False)
|
|
if not random_nested_table:
|
|
continue
|
|
random_nested_tbody = random_nested_table.find('tbody', recursive=False)
|
|
random_nested_rows = random_nested_tbody.find_all('tr', recursive=False)
|
|
|
|
for nested_row in random_nested_rows:
|
|
test_text = None
|
|
pattern = r"Generation\s+([IVXLCDM]+)"
|
|
match = re.search(pattern, nested_row.get_text(strip=True))
|
|
if match:
|
|
test_text = match.group(1) # This returns just the Roman numeral
|
|
|
|
if test_text:
|
|
return roman_to_int(test_text.replace("Generation ", "").strip())
|
|
|
|
return None
|
|
|
|
def compare_forms(a, b):
|
|
if a == None or b == None:
|
|
return False
|
|
|
|
if a == b:
|
|
return True
|
|
|
|
temp_a = a.lower().replace("forme", "").replace("form", "").replace("é", "e").strip()
|
|
temp_b = b.lower().replace("forme", "").replace("form", "").replace("é", "e").strip()
|
|
|
|
temp_a = temp_a.replace("deputante", "debutante").replace("p'au", "pa'u").replace("blood moon", "bloodmoon")
|
|
temp_b = temp_b.replace("deputante", "debutante").replace("p'au", "pa'u").replace("blood moon", "bloodmoon")
|
|
|
|
if temp_a == temp_b:
|
|
return True
|
|
|
|
return False
|
|
|
|
@lru_cache(maxsize=100)
|
|
def get_parsed_pokemon_page(pokemon_name, cache):
|
|
page_data = get_pokemon_data_bulbapedia(pokemon_name, cache)
|
|
return BeautifulSoup(page_data, 'html.parser') if page_data else None
|
|
|
|
def get_locations_from_bulbapedia(pokemon_name, form, cache: CacheManager, default_forms=None):
|
|
soup = get_parsed_pokemon_page(pokemon_name, cache)
|
|
if not soup:
|
|
return None
|
|
|
|
# Try different methods to find the locations table
|
|
locations_table = None
|
|
possible_headers = ['Game locations', 'In side games', 'In spin-off games']
|
|
|
|
for header in possible_headers:
|
|
span = soup.find('span', id=header.replace(' ', '_'))
|
|
if span:
|
|
locations_table = span.find_next('table', class_='roundy')
|
|
if locations_table:
|
|
break
|
|
|
|
if not locations_table:
|
|
print(f"Warning: Couldn't find locations table for {pokemon_name}")
|
|
return None
|
|
|
|
raw_game_locations = {}
|
|
|
|
# Process game locations
|
|
for row in locations_table.select('tr'):
|
|
games = row.select('th')
|
|
locations = row.select('td')
|
|
|
|
if len(games) != len(locations):
|
|
continue
|
|
|
|
for game, location in zip(games, locations):
|
|
raw_game = game.get_text(strip=True)
|
|
if raw_game in all_games:
|
|
groups = split_td_contents(location)
|
|
raw_game_locations.setdefault(raw_game, []).extend(groups)
|
|
|
|
# Process events
|
|
events_section = soup.find('span', id='In_events')
|
|
event_tables = process_event_tables(events_section) if events_section else {}
|
|
|
|
# Process game locations in parallel
|
|
with ThreadPoolExecutor() as executor:
|
|
futures = {executor.submit(process_game_locations, raw_game, raw_locations, form, default_forms): raw_game
|
|
for raw_game, raw_locations in raw_game_locations.items()}
|
|
|
|
game_locations = {}
|
|
for future in as_completed(futures):
|
|
raw_game = futures[future]
|
|
result = future.result()
|
|
if result:
|
|
game_locations[raw_game] = result
|
|
|
|
# Process event tables
|
|
for variant in event_tables:
|
|
if (variant == pokemon_name and form is None) or (form and form in variant):
|
|
process_event_table(event_tables[variant], game_locations)
|
|
|
|
return game_locations
|
|
|
|
def process_event_tables(events_section):
|
|
event_tables = {}
|
|
if events_section:
|
|
next_element = events_section.find_next_sibling()
|
|
while next_element and next_element.name != 'h3':
|
|
if next_element.name == 'h4':
|
|
variant = next_element.text.strip()
|
|
table = next_element.find_next_sibling('table', class_='roundy')
|
|
if table:
|
|
event_tables[variant] = table
|
|
next_element = next_element.find_next_sibling()
|
|
return event_tables
|
|
|
|
def process_event_table(table, game_locations):
|
|
for row in table.find_all('tr')[1:]: # Skip header row
|
|
cells = row.find_all('td')
|
|
if len(cells) >= 3:
|
|
game = cells[0].text.strip()
|
|
location = cells[2].text.strip()
|
|
if game in all_games:
|
|
if game not in game_locations:
|
|
game_locations[game] = []
|
|
game_locations[game].append({"location": f"Event: {location}", "tag": str(cells[2])})
|
|
|
|
def process_game_locations(raw_game, raw_locations, form, default_forms):
|
|
locations = []
|
|
|
|
for raw_location in raw_locations:
|
|
raw_text = raw_location
|
|
forms = parse_form_information(raw_location)
|
|
if form is None:
|
|
if len(forms) > 0:
|
|
for form_info in forms:
|
|
main_form = form_info["main_form"]
|
|
if default_forms and main_form and main_form in default_forms:
|
|
main_form = None
|
|
|
|
if main_form and (main_form != "All Forms" and main_form != "Kantonian Form" and main_form != "All Sizes"):
|
|
continue
|
|
|
|
locations.append({"location": raw_text, "tag": raw_location})
|
|
else:
|
|
locations.append({"location": raw_text, "tag": raw_location})
|
|
elif len(forms) > 0:
|
|
for form_info in forms:
|
|
if form_matches(form_info, form, default_forms):
|
|
locations.append({"location": raw_text, "tag": raw_location})
|
|
|
|
return locations if locations else None
|
|
|
|
def form_matches(form_info, form, default_forms):
|
|
main_form = form_info["main_form"]
|
|
sub_form = form_info["sub_form"]
|
|
|
|
if default_forms and main_form and main_form in default_forms:
|
|
main_form = None
|
|
|
|
if main_form is None:
|
|
return False
|
|
|
|
if main_form in ["All Forms", "Kantonian Form", "All Sizes"]:
|
|
return True
|
|
|
|
main_form_match = compare_forms(form, main_form) or fuzz.partial_ratio(form.lower(), main_form.lower()) >= 80
|
|
sub_form_match = compare_forms(form, sub_form) or (sub_form and fuzz.partial_ratio(form.lower(), sub_form.lower()) >= 80)
|
|
|
|
return main_form_match or sub_form_match
|
|
|
|
def record_location_info(raw_game, game_locations, raw_location, raw_text):
|
|
if raw_game not in game_locations:
|
|
game_locations[raw_game] = []
|
|
info = {}
|
|
info["location"] = raw_text
|
|
info["tag"] = str(raw_location)
|
|
game_locations[raw_game].append(info)
|
|
|
|
def split_outside_brackets(str):
|
|
return re.split(r',(?![^()]*\))', str)
|
|
|
|
def handle_unown(pokemon, encounter_data):
|
|
if not pokemon.name == "Unown":
|
|
return
|
|
|
|
one_form_unown = find_pokemon(pokemon.name, None)
|
|
if not one_form_unown:
|
|
return
|
|
|
|
# The ! and ? forms were added in HeartGold and SoulSilver.
|
|
if (pokemon.form == "!" or pokemon.form == "?") and encounter_data:
|
|
for encounter in encounter_data:
|
|
encounter_information = EncounterInformation(encounter, encounter_data[encounter])
|
|
pokemon.encounter_information.append(encounter_information)
|
|
found_heartgold = False
|
|
found_soulsilver = False
|
|
for game in all_games:
|
|
if game == "HeartGold":
|
|
found_heartgold = True
|
|
continue
|
|
elif game == "SoulSilver":
|
|
found_soulsilver = True
|
|
continue
|
|
if not found_heartgold or not found_soulsilver:
|
|
continue
|
|
for encounter in one_form_unown.encounter_information:
|
|
if game == encounter.game:
|
|
pokemon.encounter_information.append(encounter)
|
|
break
|
|
else:
|
|
pokemon.encounter_information = one_form_unown.encounter_information
|
|
|
|
list_of_shifting_form_pokemon = [
|
|
"Deoxys",
|
|
"Burmy",
|
|
"Wormadam",
|
|
"Rotom",
|
|
"Shaymin",
|
|
"Keldeo",
|
|
"Furfrou",
|
|
"Hoopa",
|
|
"Pumpkaboo",
|
|
"Gourgeist",
|
|
"Zygarde",
|
|
"Magearna",
|
|
"Vivillon",
|
|
"Minior",
|
|
"Urshifu",
|
|
"Oinkologne",
|
|
"Basculegion",
|
|
"Enamorus",
|
|
]
|
|
|
|
def handle_form_shift(pokemon, encounter_data):
|
|
if not pokemon.name in list_of_shifting_form_pokemon:
|
|
return
|
|
|
|
if pokemon.form is None:
|
|
return
|
|
|
|
normal_form_pokemon = find_pokemon(pokemon.name, None)
|
|
if not normal_form_pokemon:
|
|
return
|
|
|
|
pokemon.encounter_information = normal_form_pokemon.encounter_information
|
|
|
|
phony_authentic = ["Sinistea", "Polteageist"]
|
|
countefieit_atrisan = ["Poltchageist"]
|
|
unremarkable_masterpiece = ["Sinistcha"]
|
|
bad_tea_pokemon = phony_authentic + countefieit_atrisan + unremarkable_masterpiece
|
|
|
|
def get_bad_tea_form(pokemon):
|
|
if not pokemon.name in bad_tea_pokemon:
|
|
return
|
|
|
|
if pokemon.name in phony_authentic:
|
|
if pokemon.form == None:
|
|
return "Phony Form"
|
|
if pokemon.form == "Authentic Form":
|
|
return None
|
|
|
|
if pokemon.name in countefieit_atrisan:
|
|
if pokemon.form == None:
|
|
return "Counterfeit Form"
|
|
if pokemon.form == "Artisan Form":
|
|
return None
|
|
|
|
if pokemon.name in unremarkable_masterpiece:
|
|
if pokemon.form == None:
|
|
return "Unremarkable Form"
|
|
else:
|
|
return pokemon.form
|
|
|
|
def determine_earliest_games(cache: CacheManager):
|
|
for pokemon in big_pokemon_list:
|
|
print(f"Processing {pokemon}")
|
|
form_to_find = pokemon.form
|
|
if pokemon.name == "Minior" and pokemon.form == "Orange Core":
|
|
form_to_find = None
|
|
if pokemon.name == "Squawkabilly" and pokemon.form:
|
|
form_to_find = pokemon.form.replace("Plumage", "").strip()
|
|
if pokemon.name == "Alcremie":
|
|
form_to_find = None
|
|
if pokemon.name in bad_tea_pokemon:
|
|
form_to_find = get_bad_tea_form(pokemon)
|
|
pokemon.introduced_in_gen = get_intro_generation(pokemon.name, form_to_find, cache)
|
|
encounter_data = get_locations_from_bulbapedia(pokemon.name, form_to_find, cache)
|
|
for encounter in encounter_data:
|
|
encounter_information = EncounterInformation(encounter, encounter_data[encounter])
|
|
pokemon.encounter_information.append(encounter_information)
|
|
handle_unown(pokemon, encounter_data)
|
|
handle_form_shift(pokemon, encounter_data)
|
|
if pokemon.name == "Gimmighoul" and pokemon.form == "Roaming Form":
|
|
encounter_information = EncounterInformation("Pokémon Go", ["Pokémon Go"])
|
|
pokemon.encounter_information.append(encounter_information)
|
|
|
|
pokemon.determine_earliest_game()
|
|
print(f"Processed {pokemon}: {pokemon.earliest_game.game} ({pokemon.earliest_game.method})")
|
|
|
|
def get_base_form(evolution_chain:List[EvolutionStage]):
|
|
if not evolution_chain:
|
|
return None
|
|
|
|
for stage in evolution_chain:
|
|
if stage.stage == "Unevolved":
|
|
return stage.pokemon
|
|
if stage.is_baby:
|
|
return stage.next_stage.pokemon
|
|
|
|
return None
|
|
|
|
def adjust_for_evolution(cache: CacheManager):
|
|
for pokemon in big_pokemon_list:
|
|
evolution_chain = get_evolution_data_from_bulbapedia(pokemon.name, pokemon.form, cache)
|
|
pokemon.add_evolution_chain(evolution_chain)
|
|
game, method = pokemon.get_earliest_game_and_method()
|
|
print(f"Adjusted {pokemon}: {game} ({method})")
|
|
|
|
def save_to_csv(filename='pokemon_earliest_games.csv'):
|
|
with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
|
|
fieldnames = ['number', 'name', 'introduced_in_gen', 'earliest_game', 'obtain_method', 'encounter_locations']
|
|
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
|
|
|
writer.writeheader()
|
|
for pokemon in big_pokemon_list:
|
|
encounter_locations = []
|
|
for encounter in pokemon.encounter_information:
|
|
if encounter.game == pokemon.earliest_game.game:
|
|
encounter_locations.extend(encounter.locations)
|
|
writer.writerow({
|
|
'number': pokemon.number,
|
|
'name': f"{pokemon.name} ({pokemon.form})",
|
|
'introduced_in_gen': pokemon.introduced_in_gen,
|
|
'earliest_game': pokemon.earliest_game.game,
|
|
'obtain_method': pokemon.earliest_game.method,
|
|
'encounter_locations': ' | '.join((str(item) for item in encounter_locations))
|
|
})
|
|
|
|
def handle_unknown_encounters(cache):
|
|
for pokemon in big_pokemon_list:
|
|
if pokemon.earliest_game == None or pokemon.earliest_game.method == None:
|
|
print(f"Checking alternative sources for {pokemon.name}")
|
|
|
|
# Update the main function
|
|
if __name__ == "__main__":
|
|
cache = CacheManager()
|
|
|
|
pokemon_list = read_pokemon_list('pokemon_home_list.csv', limit=3000)
|
|
|
|
create_pokemon_index(big_pokemon_list)
|
|
|
|
determine_earliest_games(cache)
|
|
adjust_for_evolution(cache)
|
|
handle_unknown_encounters(cache)
|
|
save_to_csv()
|
|
|
|
cache.close()
|
|
print(f"Earliest obtainable games and encounter locations determined for {len(pokemon_list)} Pokémon and saved to pokemon_earliest_games.csv")
|
|
|
|
|