You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
947 lines
34 KiB
947 lines
34 KiB
from __future__ import annotations
|
|
import csv
|
|
import requests
|
|
import time
|
|
import json
|
|
import os
|
|
import re
|
|
import sqlite3
|
|
from bs4 import BeautifulSoup, Tag, NavigableString
|
|
import copy
|
|
from typing import List, Optional
|
|
from fuzzywuzzy import fuzz
|
|
from fuzzywuzzy import process
|
|
from collections import defaultdict
|
|
|
|
import os
|
|
import sys
|
|
|
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
|
|
from DataGatherers.cache_manager import CacheManager
|
|
import concurrent.futures
|
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
from functools import lru_cache
|
|
|
|
from DataGatherers.constants import all_games, regional_descriptors
|
|
|
|
big_pokemon_list = []
|
|
pokemon_index = {}
|
|
|
|
def create_pokemon_index(pokemon_list):
|
|
global pokemon_index
|
|
name_index = defaultdict(list)
|
|
for pokemon in pokemon_list:
|
|
name_index[pokemon.name.lower()].append(pokemon)
|
|
pokemon_index = name_index
|
|
|
|
def find_pokemon(name, form=None, threshold=80):
|
|
global pokemon_index
|
|
name = name.lower()
|
|
if name in pokemon_index:
|
|
candidates = pokemon_index[name]
|
|
if not form:
|
|
return candidates[0] if candidates else None
|
|
|
|
best_match = None
|
|
best_score = 0
|
|
for pokemon in candidates:
|
|
if pokemon.form:
|
|
score = fuzz.ratio(form.lower(), pokemon.form.lower())
|
|
if score > best_score:
|
|
best_score = score
|
|
best_match = pokemon
|
|
|
|
if best_match and best_score >= threshold:
|
|
return best_match
|
|
|
|
# If no exact name match, try fuzzy matching on names
|
|
best_name_match = None
|
|
best_name_score = 0
|
|
for pokemon_name in pokemon_index:
|
|
score = fuzz.ratio(name, pokemon_name)
|
|
if score > best_name_score:
|
|
best_name_score = score
|
|
best_name_match = pokemon_name
|
|
|
|
if best_name_match and best_name_score >= threshold:
|
|
candidates = pokemon_index[best_name_match]
|
|
if not form:
|
|
return candidates[0]
|
|
|
|
best_match = None
|
|
best_score = 0
|
|
for pokemon in candidates:
|
|
if pokemon.form:
|
|
score = fuzz.ratio(form.lower(), pokemon.form.lower())
|
|
if score > best_score:
|
|
best_score = score
|
|
best_match = pokemon
|
|
|
|
if best_match and best_score >= threshold:
|
|
return best_match
|
|
|
|
return None
|
|
|
|
def roman_to_int(s):
|
|
roman_values = {
|
|
'I': 1,
|
|
'V': 5,
|
|
'X': 10,
|
|
'L': 50,
|
|
'C': 100,
|
|
'D': 500,
|
|
'M': 1000
|
|
}
|
|
|
|
total = 0
|
|
prev_value = 0
|
|
|
|
for char in reversed(s):
|
|
current_value = roman_values[char]
|
|
if current_value >= prev_value:
|
|
total += current_value
|
|
else:
|
|
total -= current_value
|
|
prev_value = current_value
|
|
|
|
return total
|
|
|
|
class Pokemon:
|
|
def __init__(self, name: str, number: int, form: Optional[str] = None):
|
|
self.name = name
|
|
self.number = number
|
|
self.form = form
|
|
self.stage: Optional[str] = None
|
|
self.evolution_chain: Optional[List['EvolutionStage']] = []
|
|
self.is_baby = False
|
|
self.encounter_information: Optional[List['EncounterInformation']] = []
|
|
self.earliest_game: Optional['EncounterInformation'] = None
|
|
self.obtain_method: Optional[str] = None
|
|
self.introduced_in_gen = None
|
|
|
|
def get_earliest_game_and_method(self):
|
|
if self.evolution_chain:
|
|
for stage in self.evolution_chain:
|
|
if self.is_baby:
|
|
return stage.pokemon_reference.earliest_game.game, "Breed"
|
|
else:
|
|
if stage.pokemon_reference == self:
|
|
return self.earliest_game.game, self.earliest_game.method
|
|
return stage.pokemon_reference.earliest_game.game, "Evolve"
|
|
|
|
if self.earliest_game:
|
|
return self.earliest_game.game, self.earliest_game.method
|
|
return None, None
|
|
|
|
def __str__(self):
|
|
return f"{self.name}{' ' if self.form else ''}{self.form if self.form else ''} (#{self.number})"
|
|
|
|
def add_evolution_chain(self, evolution_chain: List['EvolutionStage']):
|
|
self.evolution_chain = evolution_chain
|
|
|
|
def add_stage(self, stage: str):
|
|
self.stage = stage
|
|
self.is_baby = self.stage is not None and 'Baby' in self.stage
|
|
|
|
def update_encounter_information(self, exclude_events=True, exclude_home=True, exclude_go=True):
|
|
if not self.encounter_information:
|
|
return
|
|
|
|
non_catchable_methods = ["trade", "global link", "poké transfer", "time capsule", "unobtainable"]
|
|
|
|
if exclude_events:
|
|
non_catchable_methods.append("event")
|
|
if exclude_home:
|
|
non_catchable_methods.append("pokemon home")
|
|
if exclude_go:
|
|
non_catchable_methods.append("pokémon go")
|
|
|
|
for encounter in self.encounter_information:
|
|
encounter.method = None
|
|
for location in encounter.locations:
|
|
skip_location = False
|
|
for non_catchable in non_catchable_methods:
|
|
if non_catchable in location.lower():
|
|
skip_location = True
|
|
break
|
|
|
|
if skip_location:
|
|
continue
|
|
|
|
if "first partner" in location.lower():
|
|
encounter.method = "Starter"
|
|
elif "received" in location.lower():
|
|
encounter.method = "Gift"
|
|
elif "evolve" in location.lower():
|
|
encounter.method = "Evolve"
|
|
elif "event" in location.lower():
|
|
encounter.method = "Event"
|
|
else:
|
|
encounter.method = "Catchable"
|
|
|
|
def parse_encoutners_for_games(self):
|
|
game_methods = {}
|
|
for encounter in self.encounter_information:
|
|
if encounter.method:
|
|
game_methods[encounter.game.lower()] = encounter
|
|
|
|
for game in all_games:
|
|
if game.lower() in game_methods:
|
|
self.earliest_game = game_methods[game.lower()]
|
|
return
|
|
|
|
def determine_earliest_game(self):
|
|
if not self.encounter_information:
|
|
self.earliest_game = None
|
|
return
|
|
|
|
self.update_encounter_information()
|
|
self.parse_encoutners_for_games()
|
|
if self.earliest_game != None:
|
|
return
|
|
|
|
|
|
self.update_encounter_information(exclude_events=False)
|
|
self.parse_encoutners_for_games()
|
|
if self.earliest_game != None:
|
|
return
|
|
|
|
self.update_encounter_information(exclude_home=False)
|
|
self.parse_encoutners_for_games()
|
|
if self.earliest_game != None:
|
|
return
|
|
|
|
self.update_encounter_information(exclude_go=False)
|
|
self.parse_encoutners_for_games()
|
|
if self.earliest_game != None:
|
|
return
|
|
|
|
self.earliest_game = None
|
|
|
|
class EvolutionStage:
|
|
def __init__(self, pokemon: str, method: Optional[str] = None, stage: Optional[str] = None, form: Optional[str] = None):
|
|
self.pokemon = pokemon
|
|
self.method = method
|
|
self.next_stage: Optional[EvolutionStage] = None
|
|
self.previous_stage: Optional[EvolutionStage] = None # New attribute
|
|
self.branches: List[EvolutionStage] = []
|
|
self.stage = stage
|
|
self.is_baby = self.stage is not None and 'Baby' in self.stage
|
|
self.pokemon_reference = find_pokemon(pokemon, form)
|
|
if self.pokemon_reference == None:
|
|
self.pokemon_reference = find_pokemon(pokemon, None)
|
|
self.form = form
|
|
|
|
def __str__(self):
|
|
return f"{self.pokemon} {self.form if self.form else ''} ({self.method if self.method else 'Base'})"
|
|
|
|
class EncounterInformation:
|
|
def __init__(self, game: str, locations: List[str]):
|
|
self.game = game
|
|
self.method = "Unknown"
|
|
self.locations = locations
|
|
|
|
def parse_evolution_chain(table: Tag, form: Optional[str] = None) -> List[EvolutionStage]:
|
|
main_chain = []
|
|
current_stage = None
|
|
pending_method = None
|
|
|
|
tbody = table.find('tbody', recursive=False)
|
|
if not tbody:
|
|
return []
|
|
|
|
rows = tbody.find_all('tr', recursive=False)
|
|
main_row = rows[0]
|
|
branch_rows = rows[1:]
|
|
|
|
# Parse main evolution chain
|
|
for td in main_row.find_all('td', recursive=False):
|
|
if td.find('table'):
|
|
# This TD contains Pokemon information
|
|
pokemon_name = extract_pokemon_name(td)
|
|
stage = extract_stage_form(td)
|
|
evolution_form = extract_evolution_form(td, pokemon_name)
|
|
new_stage = EvolutionStage(pokemon_name, pending_method, stage, evolution_form)
|
|
pending_method = None
|
|
if current_stage:
|
|
current_stage.next_stage = new_stage
|
|
new_stage.previous_stage = current_stage # Set the back link
|
|
current_stage = new_stage
|
|
main_chain.append(current_stage)
|
|
else:
|
|
# This TD contains evolution method for the next Pokemon
|
|
pending_method = extract_evolution_method(td)
|
|
|
|
# Parse branching evolutions
|
|
for row in branch_rows:
|
|
branch_stage = None
|
|
branch_method = None
|
|
for td in row.find_all('td', recursive=False):
|
|
if td.find('table'):
|
|
pokemon_name = extract_pokemon_name(td)
|
|
stage = extract_stage_form(td)
|
|
evolution_form = extract_evolution_form(td, pokemon_name)
|
|
new_stage = EvolutionStage(pokemon_name, branch_method, stage, evolution_form)
|
|
branch_method = None
|
|
if branch_stage:
|
|
branch_stage.next_stage = new_stage
|
|
new_stage.previous_stage = branch_stage # Set the back link
|
|
branch_stage = new_stage
|
|
# Find which main chain Pokemon this branches from
|
|
for main_stage in main_chain:
|
|
if td.get('rowspan') and main_stage.pokemon == pokemon_name:
|
|
main_stage.branches.append(branch_stage)
|
|
branch_stage.previous_stage = main_stage # Set the back link to the main chain
|
|
break
|
|
else:
|
|
branch_method = extract_evolution_method(td)
|
|
|
|
return main_chain
|
|
|
|
def find_name_tag(td: Tag) -> Optional[Tag]:
|
|
table = td.find('table')
|
|
name_tag = table.find('a', class_='selflink')
|
|
if name_tag:
|
|
return name_tag
|
|
name_tag = table.find('a', title=True, class_=lambda x: x != 'image')
|
|
return name_tag
|
|
|
|
def extract_pokemon_name(td: Tag) -> Optional[str]:
|
|
name_tag = find_name_tag(td)
|
|
if name_tag:
|
|
return name_tag.get_text(strip=True)
|
|
return None
|
|
|
|
def extract_evolution_method(td: Tag) -> str:
|
|
# Extract evolution method from the TD
|
|
return td.get_text(strip=True)
|
|
|
|
def extract_stage_form(td: Tag) -> Optional[str]:
|
|
stage_tag = td.find('table').find('small')
|
|
if stage_tag:
|
|
return stage_tag.get_text(strip=True)
|
|
return None
|
|
|
|
def extract_evolution_form(td: Tag, name: str) -> Optional[str]:
|
|
name_tag = find_name_tag(td)
|
|
if name_tag:
|
|
name_row = name_tag.parent
|
|
small_tags = name_row.find_all('small')
|
|
if len(small_tags) > 1:
|
|
return small_tags[0].get_text(strip=True)
|
|
return None
|
|
|
|
def read_pokemon_list(filename, limit=50):
|
|
pokemon_list = []
|
|
with open(filename, 'r', newline='', encoding='utf-8') as csvfile:
|
|
reader = csv.DictReader(csvfile)
|
|
for i, row in enumerate(reader):
|
|
if i >= limit:
|
|
break
|
|
# Split the name into base name and form
|
|
match = re.match(r'(.*?)\s*(\(.*\))?$', row['name'])
|
|
base_name, form = match.groups() if match else (row['name'], None)
|
|
row['base_name'] = base_name.strip()
|
|
row['form'] = form.strip('() ') if form else None
|
|
pokemon_list.append(row)
|
|
|
|
new_pokemon = Pokemon(row['base_name'], row['number'], row['form'])
|
|
big_pokemon_list.append(new_pokemon)
|
|
|
|
return big_pokemon_list
|
|
|
|
def get_pokemon_data_bulbapedia(pokemon_name, cache: CacheManager):
|
|
url = f"https://bulbapedia.bulbagarden.net/wiki/{pokemon_name}_(Pokémon)"
|
|
return cache.fetch_url(url)
|
|
|
|
def split_td_contents(td):
|
|
groups = []
|
|
current_group = []
|
|
for content in td.contents:
|
|
if isinstance(content, NavigableString):
|
|
text = content.strip()
|
|
if text:
|
|
current_group.append(content)
|
|
elif content.name == 'br':
|
|
if current_group:
|
|
groups.append(''.join(str(item) for item in current_group))
|
|
current_group = []
|
|
else:
|
|
current_group.append(content)
|
|
if current_group:
|
|
groups.append(''.join(str(item) for item in current_group))
|
|
return groups
|
|
|
|
def parse_form_information(html_content):
|
|
soup = BeautifulSoup(html_content, 'html.parser')
|
|
|
|
#TODO: This wont work for lines that have several small blocks in one line.
|
|
#TODO: Adjust this to handle more than one small block, see Basculin for example
|
|
small_tag = soup.find('small')
|
|
|
|
forms = []
|
|
# Form info is in bold inside a small tag.
|
|
if small_tag:
|
|
bold_tags = small_tag.find_all('b')
|
|
for bold_tag in bold_tags:
|
|
form_text = bold_tag.get_text(strip=True)
|
|
|
|
# Remove parentheses
|
|
form_text = form_text.strip('()')
|
|
|
|
if "/" in form_text:
|
|
last_word = form_text.split()[-1]
|
|
form_text = form_text.replace(last_word, "").strip()
|
|
parts = form_text.split('/')
|
|
for part in parts:
|
|
main_form = part.strip() + " " + last_word
|
|
info = {
|
|
"main_form": main_form,
|
|
"sub_form": None
|
|
}
|
|
forms.append(info)
|
|
continue
|
|
|
|
# Split the text into main form and breed (if present)
|
|
parts = form_text.split('(')
|
|
main_form = parts[0].strip()
|
|
|
|
# "Factor"s are not actual forms, they are properties of the pokemon you can encoutner.
|
|
if main_form and "factor" in main_form.lower():
|
|
continue
|
|
|
|
breed = parts[1].strip(')') if len(parts) > 1 else None
|
|
|
|
info = {
|
|
"main_form": main_form,
|
|
"sub_form": breed
|
|
}
|
|
|
|
for region in regional_descriptors:
|
|
if region in main_form.lower():
|
|
info["region"] = region
|
|
break
|
|
|
|
forms.append(info)
|
|
else: #..... Gimmighoul
|
|
headings = soup.find_all('b')
|
|
if len(headings) > 0:
|
|
for heading in headings:
|
|
if heading.parent.name == 'sup':
|
|
continue
|
|
if "form" not in heading.get_text(strip=True).lower():
|
|
continue
|
|
main_form = heading.get_text(strip=True)
|
|
info = {
|
|
"main_form": main_form,
|
|
"sub_form": None
|
|
}
|
|
|
|
for region in regional_descriptors:
|
|
if region in main_form.lower():
|
|
info["region"] = region
|
|
break
|
|
|
|
forms.append(info)
|
|
|
|
return forms
|
|
|
|
def get_evolution_data_from_bulbapedia(pokemon_name, form, cache: CacheManager, gender: Optional[str] = None):
|
|
page_data = get_pokemon_data_bulbapedia(pokemon_name, cache)
|
|
if not page_data:
|
|
return None
|
|
|
|
soup = BeautifulSoup(page_data, 'html.parser')
|
|
|
|
evolution_section = soup.find('span', id='Evolution_data')
|
|
if not evolution_section:
|
|
return None
|
|
|
|
evolution_table = None
|
|
if form:
|
|
form_without_form = form.replace('Form', '').replace('form', '').strip()
|
|
for tag in evolution_section.parent.find_next_siblings():
|
|
if tag.name == 'h4' and form_without_form in tag.get_text(strip=True):
|
|
evolution_table = tag.find_next('table')
|
|
break
|
|
if tag.name == 'h3':
|
|
break
|
|
else:
|
|
evolution_table = evolution_section.parent.find_next('table')
|
|
if not evolution_table:
|
|
return None
|
|
|
|
if pokemon_name == "Eevee":
|
|
evolution_chain = parse_eevee_evolution_chain(evolution_table)
|
|
else:
|
|
evolution_chain = parse_evolution_chain(evolution_table, form)
|
|
return evolution_chain
|
|
|
|
# This is going to be a little odd.
|
|
# the first TR contains a full evolution chain
|
|
# other TRs contain branching evolution chains
|
|
# any TDs in the first TR with a rowspan are part of the main evolution chain
|
|
# any other TDS are part of the branching evolution chains
|
|
# a table in a TD is information about the current Pokémon in that evolution stage
|
|
# a TD without a table is information on how to trigger the next evolution
|
|
|
|
def parse_pokemon_subtable(td):
|
|
if td.find('table'):
|
|
# This TD contains Pokemon information
|
|
pokemon_name = extract_pokemon_name(td)
|
|
stage = extract_stage_form(td)
|
|
return pokemon_name, stage
|
|
return None, None
|
|
|
|
def parse_eevee_evolution_chain(table):
|
|
tbody = table.find('tbody', recursive=False)
|
|
if not tbody:
|
|
return []
|
|
|
|
rows = tbody.find_all('tr', recursive=False)
|
|
eevee_row = rows[1]
|
|
method_row = rows[2]
|
|
eeveelutions_row = rows[3]
|
|
|
|
eevee_td = eevee_row.find('td', recursive=False)
|
|
pokemon_name, stage = parse_pokemon_subtable(eevee_td)
|
|
eevee_stage = EvolutionStage(pokemon_name, None, stage, None)
|
|
|
|
methods = []
|
|
for method in method_row.find_all('td', recursive=False):
|
|
methods.append(extract_evolution_method(method))
|
|
|
|
eeveelutions = []
|
|
index = 0
|
|
for eeveelution in eeveelutions_row.find_all('td', recursive=False):
|
|
pokemon_name, stage = parse_pokemon_subtable(eeveelution)
|
|
eeveelution_stage = EvolutionStage(pokemon_name, methods[index], stage, None)
|
|
eeveelution_stage.previous_stage = eevee_stage # Set the back link to Eevee
|
|
eeveelutions.append(eeveelution_stage)
|
|
index += 1
|
|
|
|
eevee_stage.branches = eeveelutions # Set the branches directly, not as a nested list
|
|
|
|
return [eevee_stage]
|
|
|
|
def get_intro_generation(pokemon_name, form, cache: CacheManager):
|
|
page_data = get_pokemon_data_bulbapedia(pokemon_name, cache)
|
|
if not page_data:
|
|
return None
|
|
|
|
soup = BeautifulSoup(page_data, 'html.parser')
|
|
|
|
locations_section = soup.find('span', id='Game_locations')
|
|
if not locations_section:
|
|
return None
|
|
|
|
locations_table = locations_section.find_next('table', class_='roundy')
|
|
if not locations_table:
|
|
return None
|
|
|
|
generation_tbody = locations_table.find('tbody', recursive=False)
|
|
generation_rows = generation_tbody.find_all('tr', recursive=False)
|
|
for generation_row in generation_rows:
|
|
random_nested_td = generation_row.find('td', recursive=False)
|
|
if not random_nested_td:
|
|
continue
|
|
random_nested_table = random_nested_td.find('table', recursive=False)
|
|
if not random_nested_table:
|
|
continue
|
|
random_nested_tbody = random_nested_table.find('tbody', recursive=False)
|
|
random_nested_rows = random_nested_tbody.find_all('tr', recursive=False)
|
|
|
|
for nested_row in random_nested_rows:
|
|
test_text = None
|
|
pattern = r"Generation\s+([IVXLCDM]+)"
|
|
match = re.search(pattern, nested_row.get_text(strip=True))
|
|
if match:
|
|
test_text = match.group(1) # This returns just the Roman numeral
|
|
|
|
if test_text:
|
|
return roman_to_int(test_text.replace("Generation ", "").strip())
|
|
|
|
return None
|
|
|
|
def compare_forms(a, b):
|
|
if a == None or b == None:
|
|
return False
|
|
|
|
if a == b:
|
|
return True
|
|
|
|
temp_a = a.lower().replace("forme", "").replace("form", "").replace("é", "e").strip()
|
|
temp_b = b.lower().replace("forme", "").replace("form", "").replace("é", "e").strip()
|
|
|
|
temp_a = temp_a.replace("deputante", "debutante").replace("p'au", "pa'u").replace("blood moon", "bloodmoon")
|
|
temp_b = temp_b.replace("deputante", "debutante").replace("p'au", "pa'u").replace("blood moon", "bloodmoon")
|
|
|
|
if temp_a == temp_b:
|
|
return True
|
|
|
|
return False
|
|
|
|
@lru_cache(maxsize=100)
|
|
def get_parsed_pokemon_page(pokemon_name, cache):
|
|
page_data = get_pokemon_data_bulbapedia(pokemon_name, cache)
|
|
return BeautifulSoup(page_data, 'html.parser') if page_data else None
|
|
|
|
def get_locations_from_bulbapedia(pokemon_name, form, cache: CacheManager, default_forms=None):
|
|
soup = get_parsed_pokemon_page(pokemon_name, cache)
|
|
if not soup:
|
|
return None
|
|
|
|
# Try different methods to find the locations table
|
|
locations_table = None
|
|
possible_headers = ['Game locations', 'In side games', 'In spin-off games']
|
|
|
|
for header in possible_headers:
|
|
span = soup.find('span', id=header.replace(' ', '_'))
|
|
if span:
|
|
locations_table = span.find_next('table', class_='roundy')
|
|
if locations_table:
|
|
break
|
|
|
|
if not locations_table:
|
|
print(f"Warning: Couldn't find locations table for {pokemon_name}")
|
|
return None
|
|
|
|
raw_game_locations = {}
|
|
|
|
generation_tbody = locations_table.find('tbody', recursive=False)
|
|
generation_rows = generation_tbody.find_all('tr', recursive=False)
|
|
for generation_row in generation_rows:
|
|
random_nested_td = generation_row.find('td', recursive=False)
|
|
if not random_nested_td:
|
|
continue
|
|
random_nested_table = random_nested_td.find('table', recursive=False)
|
|
if not random_nested_table:
|
|
continue
|
|
random_nested_tbody = random_nested_table.find('tbody', recursive=False)
|
|
random_nested_rows = random_nested_tbody.find_all('tr', recursive=False)
|
|
|
|
for nested_row in random_nested_rows:
|
|
if 'Generation' in nested_row.get_text(strip=True):
|
|
continue
|
|
|
|
games_container_td = nested_row.find('td', recursive=False)
|
|
if not games_container_td:
|
|
continue
|
|
games_container_table = games_container_td.find('table', recursive=False)
|
|
if not games_container_table:
|
|
continue
|
|
games_container_tbody = games_container_table.find('tbody', recursive=False)
|
|
games_container_rows = games_container_tbody.find_all('tr', recursive=False)
|
|
for games_container_row in games_container_rows:
|
|
games = games_container_row.find_all('th')
|
|
for game in games:
|
|
raw_game = game.get_text(strip=True)
|
|
if raw_game not in all_games:
|
|
continue
|
|
locations_container_td = games_container_row.find('td', recursive=False)
|
|
if not locations_container_td:
|
|
continue
|
|
locations_container_table = locations_container_td.find('table', recursive=False)
|
|
if not locations_container_table:
|
|
continue
|
|
locations_container_tbody = locations_container_table.find('tbody', recursive=False)
|
|
locations = locations_container_tbody.find_all('td')
|
|
for location in locations:
|
|
groups = split_td_contents(location)
|
|
for group in groups:
|
|
if raw_game not in raw_game_locations:
|
|
raw_game_locations[raw_game] = []
|
|
raw_game_locations[raw_game].append(group)
|
|
|
|
# Process events
|
|
events_section = soup.find('span', id='In_events')
|
|
event_tables = process_event_tables(events_section) if events_section else {}
|
|
|
|
# Process game locations in parallel
|
|
with ThreadPoolExecutor(max_workers=1) as executor:
|
|
futures = {executor.submit(process_game_locations, raw_game, raw_locations, form, default_forms): raw_game
|
|
for raw_game, raw_locations in raw_game_locations.items()}
|
|
|
|
game_locations = {}
|
|
for future in as_completed(futures):
|
|
raw_game = futures[future]
|
|
result = future.result()
|
|
if result:
|
|
game_locations[raw_game] = result
|
|
|
|
# Process event tables
|
|
for variant in event_tables:
|
|
if (variant == pokemon_name and form is None) or (form and form in variant):
|
|
process_event_table(event_tables[variant], game_locations)
|
|
|
|
return game_locations
|
|
|
|
def process_event_tables(events_section):
|
|
event_tables = {}
|
|
if events_section:
|
|
next_element = events_section.find_next_sibling()
|
|
while next_element and next_element.name != 'h3':
|
|
if next_element.name == 'h4':
|
|
variant = next_element.text.strip()
|
|
table = next_element.find_next_sibling('table', class_='roundy')
|
|
if table:
|
|
event_tables[variant] = table
|
|
next_element = next_element.find_next_sibling()
|
|
return event_tables
|
|
|
|
def process_event_table(table, game_locations):
|
|
for row in table.find_all('tr')[1:]: # Skip header row
|
|
cells = row.find_all('td')
|
|
if len(cells) >= 3:
|
|
game = cells[0].text.strip()
|
|
location = cells[2].text.strip()
|
|
if game in all_games:
|
|
if game not in game_locations:
|
|
game_locations[game] = []
|
|
game_locations[game].append({"location": f"Event: {location}", "tag": str(cells[2])})
|
|
|
|
def process_game_locations(raw_game, raw_locations, form, default_forms):
|
|
locations = []
|
|
|
|
for raw_location in raw_locations:
|
|
raw_text = raw_location
|
|
forms = parse_form_information(raw_location)
|
|
if form is None:
|
|
if len(forms) > 0:
|
|
for form_info in forms:
|
|
main_form = form_info["main_form"]
|
|
if default_forms and main_form and main_form in default_forms:
|
|
main_form = None
|
|
|
|
if main_form and (main_form != "All Forms" and main_form != "Kantonian Form" and main_form != "All Sizes"):
|
|
continue
|
|
|
|
locations.append({"location": raw_text, "tag": raw_location})
|
|
else:
|
|
locations.append({"location": raw_text, "tag": raw_location})
|
|
elif len(forms) > 0:
|
|
for form_info in forms:
|
|
if form_matches(form_info, form, default_forms):
|
|
locations.append({"location": raw_text, "tag": raw_location})
|
|
|
|
return locations if locations else None
|
|
|
|
def form_matches(form_info, form, default_forms):
|
|
main_form = form_info["main_form"]
|
|
sub_form = form_info["sub_form"]
|
|
try:
|
|
region = form_info['region'] if 'region' in form_info else None
|
|
except KeyError:
|
|
region = None
|
|
|
|
if default_forms and main_form and main_form in default_forms:
|
|
main_form = None
|
|
|
|
if main_form is None:
|
|
return False
|
|
|
|
if main_form in ["All Forms", "All Sizes"]:
|
|
return True
|
|
|
|
if region == None and main_form in ["Kantonian Form"]:
|
|
return True
|
|
|
|
main_form_match = compare_forms(form, main_form) or fuzz.partial_ratio(form.lower(), main_form.lower()) >= 95
|
|
sub_form_match = compare_forms(form, sub_form) or (sub_form and fuzz.partial_ratio(form.lower(), sub_form.lower()) >= 95)
|
|
|
|
if not main_form_match and not sub_form_match and region:
|
|
region_match = compare_forms(form, region) or fuzz.partial_ratio(form.lower(), region.lower()) >= 95
|
|
return region_match
|
|
|
|
return main_form_match or sub_form_match
|
|
|
|
def record_location_info(raw_game, game_locations, raw_location, raw_text):
|
|
if raw_game not in game_locations:
|
|
game_locations[raw_game] = []
|
|
info = {}
|
|
info["location"] = raw_text
|
|
info["tag"] = str(raw_location)
|
|
game_locations[raw_game].append(info)
|
|
|
|
def split_outside_brackets(str):
|
|
return re.split(r',(?![^()]*\))', str)
|
|
|
|
def handle_unown(pokemon, encounter_data):
|
|
if not pokemon.name == "Unown":
|
|
return
|
|
|
|
one_form_unown = find_pokemon(pokemon.name, None)
|
|
if not one_form_unown:
|
|
return
|
|
|
|
# The ! and ? forms were added in HeartGold and SoulSilver.
|
|
if (pokemon.form == "!" or pokemon.form == "?") and encounter_data:
|
|
for encounter in encounter_data:
|
|
encounter_information = EncounterInformation(encounter, encounter_data[encounter])
|
|
pokemon.encounter_information.append(encounter_information)
|
|
found_heartgold = False
|
|
found_soulsilver = False
|
|
for game in all_games:
|
|
if game == "HeartGold":
|
|
found_heartgold = True
|
|
continue
|
|
elif game == "SoulSilver":
|
|
found_soulsilver = True
|
|
continue
|
|
if not found_heartgold or not found_soulsilver:
|
|
continue
|
|
for encounter in one_form_unown.encounter_information:
|
|
if game == encounter.game:
|
|
pokemon.encounter_information.append(encounter)
|
|
break
|
|
else:
|
|
pokemon.encounter_information = one_form_unown.encounter_information
|
|
|
|
list_of_shifting_form_pokemon = [
|
|
"Deoxys",
|
|
"Burmy",
|
|
"Wormadam",
|
|
"Rotom",
|
|
"Shaymin",
|
|
"Keldeo",
|
|
"Furfrou",
|
|
"Hoopa",
|
|
"Pumpkaboo",
|
|
"Gourgeist",
|
|
"Zygarde",
|
|
"Magearna",
|
|
"Vivillon",
|
|
"Minior",
|
|
"Urshifu",
|
|
"Oinkologne",
|
|
"Basculegion",
|
|
"Enamorus",
|
|
]
|
|
|
|
def handle_form_shift(pokemon, encounter_data):
|
|
if not pokemon.name in list_of_shifting_form_pokemon:
|
|
return
|
|
|
|
if pokemon.form is None:
|
|
return
|
|
|
|
normal_form_pokemon = find_pokemon(pokemon.name, None)
|
|
if not normal_form_pokemon:
|
|
return
|
|
|
|
pokemon.encounter_information = normal_form_pokemon.encounter_information
|
|
|
|
phony_authentic = ["Sinistea", "Polteageist"]
|
|
countefieit_atrisan = ["Poltchageist"]
|
|
unremarkable_masterpiece = ["Sinistcha"]
|
|
bad_tea_pokemon = phony_authentic + countefieit_atrisan + unremarkable_masterpiece
|
|
|
|
def get_bad_tea_form(pokemon):
|
|
if not pokemon.name in bad_tea_pokemon:
|
|
return
|
|
|
|
if pokemon.name in phony_authentic:
|
|
if pokemon.form == None:
|
|
return "Phony Form"
|
|
if pokemon.form == "Authentic Form":
|
|
return None
|
|
|
|
if pokemon.name in countefieit_atrisan:
|
|
if pokemon.form == None:
|
|
return "Counterfeit Form"
|
|
if pokemon.form == "Artisan Form":
|
|
return None
|
|
|
|
if pokemon.name in unremarkable_masterpiece:
|
|
if pokemon.form == None:
|
|
return "Unremarkable Form"
|
|
else:
|
|
return pokemon.form
|
|
|
|
def determine_earliest_games(cache: CacheManager):
|
|
for pokemon in big_pokemon_list:
|
|
print(f"Processing {pokemon}")
|
|
form_to_find = pokemon.form
|
|
if pokemon.name == "Minior" and pokemon.form == "Orange Core":
|
|
form_to_find = None
|
|
if pokemon.name == "Squawkabilly" and pokemon.form:
|
|
form_to_find = pokemon.form.replace("Plumage", "").strip()
|
|
if pokemon.name == "Alcremie":
|
|
form_to_find = None
|
|
if pokemon.name in bad_tea_pokemon:
|
|
form_to_find = get_bad_tea_form(pokemon)
|
|
pokemon.introduced_in_gen = get_intro_generation(pokemon.name, form_to_find, cache)
|
|
encounter_data = get_locations_from_bulbapedia(pokemon.name, form_to_find, cache)
|
|
for encounter in encounter_data:
|
|
encounter_information = EncounterInformation(encounter, encounter_data[encounter])
|
|
pokemon.encounter_information.append(encounter_information)
|
|
handle_unown(pokemon, encounter_data)
|
|
handle_form_shift(pokemon, encounter_data)
|
|
if pokemon.name == "Gimmighoul" and pokemon.form == "Roaming Form":
|
|
encounter_information = EncounterInformation("Pokémon Go", ["Pokémon Go"])
|
|
pokemon.encounter_information.append(encounter_information)
|
|
|
|
pokemon.determine_earliest_game()
|
|
print(f"Processed {pokemon}: {pokemon.earliest_game.game} ({pokemon.earliest_game.method})")
|
|
|
|
def get_base_form(evolution_chain:List[EvolutionStage]):
|
|
if not evolution_chain:
|
|
return None
|
|
|
|
for stage in evolution_chain:
|
|
if stage.stage == "Unevolved":
|
|
return stage.pokemon
|
|
if stage.is_baby:
|
|
return stage.next_stage.pokemon
|
|
|
|
return None
|
|
|
|
def adjust_for_evolution(cache: CacheManager):
|
|
for pokemon in big_pokemon_list:
|
|
evolution_chain = get_evolution_data_from_bulbapedia(pokemon.name, pokemon.form, cache)
|
|
pokemon.add_evolution_chain(evolution_chain)
|
|
game, method = pokemon.get_earliest_game_and_method()
|
|
print(f"Adjusted {pokemon}: {game} ({method})")
|
|
|
|
def save_to_csv(filename='pokemon_earliest_games.csv'):
|
|
with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
|
|
fieldnames = ['number', 'name', 'introduced_in_gen', 'earliest_game', 'obtain_method', 'encounter_locations']
|
|
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
|
|
|
writer.writeheader()
|
|
for pokemon in big_pokemon_list:
|
|
encounter_locations = []
|
|
for encounter in pokemon.encounter_information:
|
|
if encounter.game == pokemon.earliest_game.game:
|
|
encounter_locations.extend(encounter.locations)
|
|
writer.writerow({
|
|
'number': pokemon.number,
|
|
'name': f"{pokemon.name} ({pokemon.form})",
|
|
'introduced_in_gen': pokemon.introduced_in_gen,
|
|
'earliest_game': pokemon.earliest_game.game,
|
|
'obtain_method': pokemon.earliest_game.method,
|
|
'encounter_locations': ' | '.join((str(item) for item in encounter_locations))
|
|
})
|
|
|
|
def handle_unknown_encounters(cache):
|
|
for pokemon in big_pokemon_list:
|
|
if pokemon.earliest_game == None or pokemon.earliest_game.method == None:
|
|
print(f"Checking alternative sources for {pokemon.name}")
|
|
|
|
# Update the main function
|
|
if __name__ == "__main__":
|
|
cache = CacheManager()
|
|
|
|
pokemon_list = read_pokemon_list('pokemon_home_list.csv', limit=3000)
|
|
|
|
create_pokemon_index(big_pokemon_list)
|
|
|
|
determine_earliest_games(cache)
|
|
adjust_for_evolution(cache)
|
|
handle_unknown_encounters(cache)
|
|
save_to_csv()
|
|
|
|
cache.close()
|
|
print(f"Earliest obtainable games and encounter locations determined for {len(pokemon_list)} Pokémon and saved to pokemon_earliest_games.csv")
|
|
|
|
|