Browse Source

- More work on the parser, need to work on Rotom forms

master
Quildra 1 year ago
parent
commit
2a518b843b
  1. 161
      Utilities/DetermineOriginGame.py
  2. 1221
      pokemon_earliest_games.csv

161
Utilities/DetermineOriginGame.py

@ -9,6 +9,9 @@ import sqlite3
from bs4 import BeautifulSoup, Tag, NavigableString from bs4 import BeautifulSoup, Tag, NavigableString
import copy import copy
from typing import List, Optional from typing import List, Optional
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
from collections import defaultdict
# Initialize the database connection # Initialize the database connection
@ -69,6 +72,61 @@ def update_cache(key, value):
save_cached_data() save_cached_data()
time.sleep(1) time.sleep(1)
pokemon_index = None
def create_pokemon_index(pokemon_list):
name_index = defaultdict(list)
for pokemon in pokemon_list:
name_index[pokemon.name.lower()].append(pokemon)
return name_index
def find_pokemon(name, form=None, threshold=80):
name = name.lower()
if name in pokemon_index:
candidates = pokemon_index[name]
if not form:
return candidates[0] if candidates else None
best_match = None
best_score = 0
for pokemon in candidates:
if pokemon.form:
score = fuzz.ratio(form.lower(), pokemon.form.lower())
if score > best_score:
best_score = score
best_match = pokemon
if best_match and best_score >= threshold:
return best_match
# If no exact name match, try fuzzy matching on names
best_name_match = None
best_name_score = 0
for pokemon_name in pokemon_index:
score = fuzz.ratio(name, pokemon_name)
if score > best_name_score:
best_name_score = score
best_name_match = pokemon_name
if best_name_match and best_name_score >= threshold:
candidates = pokemon_index[best_name_match]
if not form:
return candidates[0]
best_match = None
best_score = 0
for pokemon in candidates:
if pokemon.form:
score = fuzz.ratio(form.lower(), pokemon.form.lower())
if score > best_score:
best_score = score
best_match = pokemon
if best_match and best_score >= threshold:
return best_match
return None
class Pokemon: class Pokemon:
def __init__(self, name: str, number: int, form: Optional[str] = None): def __init__(self, name: str, number: int, form: Optional[str] = None):
self.name = name self.name = name
@ -85,16 +143,18 @@ class Pokemon:
if self.evolution_chain: if self.evolution_chain:
for stage in self.evolution_chain: for stage in self.evolution_chain:
if self.is_baby: if self.is_baby:
return stage.pokemon.earliest_game.game, "Breed" return stage.pokemon_reference.earliest_game.game, "Breed"
else: else:
return stage.pokemon.earliest_game.game, "Evolve" if stage.pokemon_reference == self:
return self.earliest_game.game, self.earliest_game.method
return stage.pokemon_reference.earliest_game.game, "Evolve"
if self.earliest_game: if self.earliest_game:
return self.earliest_game.game, self.earliest_game.method return self.earliest_game.game, self.earliest_game.method
return None, None return None, None
def __str__(self): def __str__(self):
return f"{self.name} {self.form if self.form else ''} (#{self.number})" return f"{self.name}{' ' if self.form else ''}{self.form if self.form else ''} (#{self.number})"
def add_evolution_chain(self, evolution_chain: List['EvolutionStage']): def add_evolution_chain(self, evolution_chain: List['EvolutionStage']):
self.evolution_chain = evolution_chain self.evolution_chain = evolution_chain
@ -103,11 +163,14 @@ class Pokemon:
self.stage = stage self.stage = stage
self.is_baby = self.stage is not None and 'Baby' in self.stage self.is_baby = self.stage is not None and 'Baby' in self.stage
def update_encounter_information(self): def update_encounter_information(self, exclude_events=True):
if not self.encounter_information: if not self.encounter_information:
return return
non_catchable_methods = ["trade", "event", "global link", "poké transfer", "time capsule", "unobtainable", "pokémon home"] non_catchable_methods = ["trade", "global link", "poké transfer", "time capsule", "unobtainable", "pokémon home"]
if exclude_events:
non_catchable_methods.append("event")
for encounter in self.encounter_information: for encounter in self.encounter_information:
for location in encounter.locations: for location in encounter.locations:
@ -127,6 +190,8 @@ class Pokemon:
encounter.method = "Gift" encounter.method = "Gift"
elif "evolve" in location.lower(): elif "evolve" in location.lower():
encounter.method = "Evolve" encounter.method = "Evolve"
elif "event" in location.lower():
encounter.method = "Event"
else: else:
encounter.method = "Catchable" encounter.method = "Catchable"
@ -137,6 +202,18 @@ class Pokemon:
self.update_encounter_information() self.update_encounter_information()
game_methods = {}
for encounter in self.encounter_information:
if encounter.method:
game_methods[encounter.game.lower()] = encounter
for game in all_games:
if game.lower() in game_methods:
self.earliest_game = game_methods[game.lower()]
return
self.update_encounter_information(exclude_events=False)
game_methods = {} game_methods = {}
for encounter in self.encounter_information: for encounter in self.encounter_information:
if encounter.method: if encounter.method:
@ -157,8 +234,11 @@ class EvolutionStage:
self.branches: List[EvolutionStage] = [] self.branches: List[EvolutionStage] = []
self.stage = stage self.stage = stage
self.is_baby = self.stage is not None and 'Baby' in self.stage self.is_baby = self.stage is not None and 'Baby' in self.stage
self.pokemon_reference = find_pokemon(pokemon, form)
self.form = form self.form = form
def __str__(self): def __str__(self):
return f"{self.pokemon} {self.form if self.form else ''} ({self.method if self.method else 'Base'})" return f"{self.pokemon} {self.form if self.form else ''} ({self.method if self.method else 'Base'})"
@ -222,10 +302,11 @@ def parse_evolution_chain(table: Tag, form: Optional[str] = None) -> List[Evolut
def extract_pokemon_name(td: Tag) -> str: def extract_pokemon_name(td: Tag) -> str:
# Extract Pokemon name from the table within the TD # Extract Pokemon name from the table within the TD
name_tag = td.find('table').find('a', class_='selflink') table = td.find('table')
name_tag = table.find('a', class_='selflink')
if name_tag: if name_tag:
return name_tag.get_text(strip=True) return name_tag.get_text(strip=True)
name_tag = td.find('table').find('a', title=True) name_tag = table.find('a', title=True, class_=lambda x: x != 'image')
return name_tag.get_text(strip=True) return name_tag.get_text(strip=True)
def extract_evolution_method(td: Tag) -> str: def extract_evolution_method(td: Tag) -> str:
@ -260,6 +341,7 @@ def read_pokemon_list(filename, limit=50):
new_pokemon = Pokemon(row['base_name'], row['number'], row['form']) new_pokemon = Pokemon(row['base_name'], row['number'], row['form'])
big_pokemon_list.append(new_pokemon) big_pokemon_list.append(new_pokemon)
return pokemon_list return pokemon_list
def sanitize_name_and_form(name, form): def sanitize_name_and_form(name, form):
@ -470,9 +552,9 @@ def get_evolution_data_from_bulbapedia(pokemon_name, form, cache):
evolution_table = None evolution_table = None
if form: if form:
form = form.replace('Form', '').replace('form', '').strip() form_without_form = form.replace('Form', '').replace('form', '').strip()
for tag in evolution_section.parent.find_next_siblings(): for tag in evolution_section.parent.find_next_siblings():
if tag.name == 'h4' and form in tag.get_text(strip=True): if tag.name == 'h4' and form_without_form in tag.get_text(strip=True):
evolution_table = tag.find_next('table') evolution_table = tag.find_next('table')
break break
if tag.name == 'h3': if tag.name == 'h3':
@ -589,7 +671,13 @@ def get_locations_from_bulbapedia(pokemon_name, form, cache):
else: else:
for raw_location in raw_locations: for raw_location in raw_locations:
main_form, sub_form = parse_form_information(str(raw_location)) main_form, sub_form = parse_form_information(str(raw_location))
if main_form == form: if not main_form:
continue
main_form_match = fuzz.partial_ratio(form.lower(), main_form.lower()) >= 80
sub_form_match = False if not sub_form else fuzz.partial_ratio(form.lower(), sub_form.lower()) >= 80
if main_form_match or sub_form_match:
locations = raw_location.get_text().split(',') locations = raw_location.get_text().split(',')
for location in locations: for location in locations:
if raw_game not in game_locations: if raw_game not in game_locations:
@ -652,6 +740,48 @@ def get_earliest_game(encounter_data, pokemon_name, form):
return "Unknown", "Unknown" return "Unknown", "Unknown"
def handle_unown(pokemon, encounter_data):
if not pokemon.name == "Unown":
return
one_form_unown = find_pokemon(pokemon.name, None)
if not one_form_unown:
return
# The ! and ? forms were added in HeartGold and SoulSilver.
if (pokemon.form == "!" or pokemon.form == "?") and encounter_data:
for encounter in encounter_data:
encounter_information = EncounterInformation(encounter, encounter_data[encounter])
pokemon.encounter_information.append(encounter_information)
found_heartgold = False
found_soulsilver = False
for game in all_games:
if game == "HeartGold":
found_heartgold = True
continue
elif game == "SoulSilver":
found_soulsilver = True
continue
if not found_heartgold or not found_soulsilver:
continue
for encounter in one_form_unown.encounter_information:
if game == encounter.game:
pokemon.encounter_information.append(encounter)
break
else:
pokemon.encounter_information = one_form_unown.encounter_information
def handle_deoxys(pokemon, encounter_data):
if not pokemon.name == "Deoxys":
return
normal_form_deoxys = find_pokemon(pokemon.name, None)
if not normal_form_deoxys:
return
if pokemon.form:
pokemon.encounter_information = normal_form_deoxys.encounter_information
def determine_earliest_games(pokemon_list, cache): def determine_earliest_games(pokemon_list, cache):
for pokemon in big_pokemon_list: for pokemon in big_pokemon_list:
print(f"Processing {pokemon}") print(f"Processing {pokemon}")
@ -659,6 +789,8 @@ def determine_earliest_games(pokemon_list, cache):
for encounter in encounter_data: for encounter in encounter_data:
encounter_information = EncounterInformation(encounter, encounter_data[encounter]) encounter_information = EncounterInformation(encounter, encounter_data[encounter])
pokemon.encounter_information.append(encounter_information) pokemon.encounter_information.append(encounter_information)
handle_unown(pokemon, encounter_data)
handle_deoxys(pokemon, encounter_data)
pokemon.determine_earliest_game() pokemon.determine_earliest_game()
print(f"Processed {pokemon}: {pokemon.earliest_game.game} ({pokemon.earliest_game.method})") print(f"Processed {pokemon}: {pokemon.earliest_game.game} ({pokemon.earliest_game.method})")
@ -736,9 +868,9 @@ def get_base_form(evolution_chain:List[EvolutionStage]):
def adjust_for_evolution(pokemon_list, cache): def adjust_for_evolution(pokemon_list, cache):
for pokemon in big_pokemon_list: for pokemon in big_pokemon_list:
evolution_chain = get_evolution_data_from_bulbapedia(pokemon.name, pokemon.form, cache) evolution_chain = get_evolution_data_from_bulbapedia(pokemon.name, pokemon.form, cache)
pokemon.add_evolution_data(evolution_chain) pokemon.add_evolution_chain(evolution_chain)
game, method = pokemon.get_earliest_game_and_method() game, method = pokemon.get_earliest_game_and_method()
print(f"Adjusted {pokemon.name} (#{pokemon.number}): {game} ({method})") print(f"Adjusted {pokemon}: {game} ({method})")
pokemon_dict = {f"{pokemon['base_name']}_{pokemon['form']}".lower(): pokemon for pokemon in pokemon_list} pokemon_dict = {f"{pokemon['base_name']}_{pokemon['form']}".lower(): pokemon for pokemon in pokemon_list}
@ -909,7 +1041,10 @@ def handle_unknown_encounters(pokemon_list, cache):
if __name__ == "__main__": if __name__ == "__main__":
get_cached_data() get_cached_data()
pokemon_list = read_pokemon_list('pokemon_home_list.csv', limit=200) pokemon_list = read_pokemon_list('pokemon_home_list.csv', limit=3000)
pokemon_index = create_pokemon_index(big_pokemon_list)
pokemon_list_with_games = determine_earliest_games(pokemon_list, cache) pokemon_list_with_games = determine_earliest_games(pokemon_list, cache)
pokemon_list_adjusted = adjust_for_evolution(pokemon_list_with_games, cache) pokemon_list_adjusted = adjust_for_evolution(pokemon_list_with_games, cache)
pokemon_list_with_locations = add_encounter_locations(pokemon_list_adjusted, cache) pokemon_list_with_locations = add_encounter_locations(pokemon_list_adjusted, cache)

1221
pokemon_earliest_games.csv

File diff suppressed because it is too large
Loading…
Cancel
Save