Browse Source

- More work on the parser, need to work on Rotom forms

master
Quildra 1 year ago
parent
commit
2a518b843b
  1. 161
      Utilities/DetermineOriginGame.py
  2. 1221
      pokemon_earliest_games.csv

161
Utilities/DetermineOriginGame.py

@ -9,6 +9,9 @@ import sqlite3
from bs4 import BeautifulSoup, Tag, NavigableString
import copy
from typing import List, Optional
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
from collections import defaultdict
# Initialize the database connection
@ -69,6 +72,61 @@ def update_cache(key, value):
save_cached_data()
time.sleep(1)
pokemon_index = None
def create_pokemon_index(pokemon_list):
name_index = defaultdict(list)
for pokemon in pokemon_list:
name_index[pokemon.name.lower()].append(pokemon)
return name_index
def find_pokemon(name, form=None, threshold=80):
name = name.lower()
if name in pokemon_index:
candidates = pokemon_index[name]
if not form:
return candidates[0] if candidates else None
best_match = None
best_score = 0
for pokemon in candidates:
if pokemon.form:
score = fuzz.ratio(form.lower(), pokemon.form.lower())
if score > best_score:
best_score = score
best_match = pokemon
if best_match and best_score >= threshold:
return best_match
# If no exact name match, try fuzzy matching on names
best_name_match = None
best_name_score = 0
for pokemon_name in pokemon_index:
score = fuzz.ratio(name, pokemon_name)
if score > best_name_score:
best_name_score = score
best_name_match = pokemon_name
if best_name_match and best_name_score >= threshold:
candidates = pokemon_index[best_name_match]
if not form:
return candidates[0]
best_match = None
best_score = 0
for pokemon in candidates:
if pokemon.form:
score = fuzz.ratio(form.lower(), pokemon.form.lower())
if score > best_score:
best_score = score
best_match = pokemon
if best_match and best_score >= threshold:
return best_match
return None
class Pokemon:
def __init__(self, name: str, number: int, form: Optional[str] = None):
self.name = name
@ -85,16 +143,18 @@ class Pokemon:
if self.evolution_chain:
for stage in self.evolution_chain:
if self.is_baby:
return stage.pokemon.earliest_game.game, "Breed"
return stage.pokemon_reference.earliest_game.game, "Breed"
else:
return stage.pokemon.earliest_game.game, "Evolve"
if stage.pokemon_reference == self:
return self.earliest_game.game, self.earliest_game.method
return stage.pokemon_reference.earliest_game.game, "Evolve"
if self.earliest_game:
return self.earliest_game.game, self.earliest_game.method
return None, None
def __str__(self):
return f"{self.name} {self.form if self.form else ''} (#{self.number})"
return f"{self.name}{' ' if self.form else ''}{self.form if self.form else ''} (#{self.number})"
def add_evolution_chain(self, evolution_chain: List['EvolutionStage']):
self.evolution_chain = evolution_chain
@ -103,11 +163,14 @@ class Pokemon:
self.stage = stage
self.is_baby = self.stage is not None and 'Baby' in self.stage
def update_encounter_information(self):
def update_encounter_information(self, exclude_events=True):
if not self.encounter_information:
return
non_catchable_methods = ["trade", "event", "global link", "poké transfer", "time capsule", "unobtainable", "pokémon home"]
non_catchable_methods = ["trade", "global link", "poké transfer", "time capsule", "unobtainable", "pokémon home"]
if exclude_events:
non_catchable_methods.append("event")
for encounter in self.encounter_information:
for location in encounter.locations:
@ -127,6 +190,8 @@ class Pokemon:
encounter.method = "Gift"
elif "evolve" in location.lower():
encounter.method = "Evolve"
elif "event" in location.lower():
encounter.method = "Event"
else:
encounter.method = "Catchable"
@ -137,6 +202,18 @@ class Pokemon:
self.update_encounter_information()
game_methods = {}
for encounter in self.encounter_information:
if encounter.method:
game_methods[encounter.game.lower()] = encounter
for game in all_games:
if game.lower() in game_methods:
self.earliest_game = game_methods[game.lower()]
return
self.update_encounter_information(exclude_events=False)
game_methods = {}
for encounter in self.encounter_information:
if encounter.method:
@ -157,8 +234,11 @@ class EvolutionStage:
self.branches: List[EvolutionStage] = []
self.stage = stage
self.is_baby = self.stage is not None and 'Baby' in self.stage
self.pokemon_reference = find_pokemon(pokemon, form)
self.form = form
def __str__(self):
return f"{self.pokemon} {self.form if self.form else ''} ({self.method if self.method else 'Base'})"
@ -222,10 +302,11 @@ def parse_evolution_chain(table: Tag, form: Optional[str] = None) -> List[Evolut
def extract_pokemon_name(td: Tag) -> str:
# Extract Pokemon name from the table within the TD
name_tag = td.find('table').find('a', class_='selflink')
table = td.find('table')
name_tag = table.find('a', class_='selflink')
if name_tag:
return name_tag.get_text(strip=True)
name_tag = td.find('table').find('a', title=True)
name_tag = table.find('a', title=True, class_=lambda x: x != 'image')
return name_tag.get_text(strip=True)
def extract_evolution_method(td: Tag) -> str:
@ -260,6 +341,7 @@ def read_pokemon_list(filename, limit=50):
new_pokemon = Pokemon(row['base_name'], row['number'], row['form'])
big_pokemon_list.append(new_pokemon)
return pokemon_list
def sanitize_name_and_form(name, form):
@ -470,9 +552,9 @@ def get_evolution_data_from_bulbapedia(pokemon_name, form, cache):
evolution_table = None
if form:
form = form.replace('Form', '').replace('form', '').strip()
form_without_form = form.replace('Form', '').replace('form', '').strip()
for tag in evolution_section.parent.find_next_siblings():
if tag.name == 'h4' and form in tag.get_text(strip=True):
if tag.name == 'h4' and form_without_form in tag.get_text(strip=True):
evolution_table = tag.find_next('table')
break
if tag.name == 'h3':
@ -589,7 +671,13 @@ def get_locations_from_bulbapedia(pokemon_name, form, cache):
else:
for raw_location in raw_locations:
main_form, sub_form = parse_form_information(str(raw_location))
if main_form == form:
if not main_form:
continue
main_form_match = fuzz.partial_ratio(form.lower(), main_form.lower()) >= 80
sub_form_match = False if not sub_form else fuzz.partial_ratio(form.lower(), sub_form.lower()) >= 80
if main_form_match or sub_form_match:
locations = raw_location.get_text().split(',')
for location in locations:
if raw_game not in game_locations:
@ -652,6 +740,48 @@ def get_earliest_game(encounter_data, pokemon_name, form):
return "Unknown", "Unknown"
def handle_unown(pokemon, encounter_data):
if not pokemon.name == "Unown":
return
one_form_unown = find_pokemon(pokemon.name, None)
if not one_form_unown:
return
# The ! and ? forms were added in HeartGold and SoulSilver.
if (pokemon.form == "!" or pokemon.form == "?") and encounter_data:
for encounter in encounter_data:
encounter_information = EncounterInformation(encounter, encounter_data[encounter])
pokemon.encounter_information.append(encounter_information)
found_heartgold = False
found_soulsilver = False
for game in all_games:
if game == "HeartGold":
found_heartgold = True
continue
elif game == "SoulSilver":
found_soulsilver = True
continue
if not found_heartgold or not found_soulsilver:
continue
for encounter in one_form_unown.encounter_information:
if game == encounter.game:
pokemon.encounter_information.append(encounter)
break
else:
pokemon.encounter_information = one_form_unown.encounter_information
def handle_deoxys(pokemon, encounter_data):
if not pokemon.name == "Deoxys":
return
normal_form_deoxys = find_pokemon(pokemon.name, None)
if not normal_form_deoxys:
return
if pokemon.form:
pokemon.encounter_information = normal_form_deoxys.encounter_information
def determine_earliest_games(pokemon_list, cache):
for pokemon in big_pokemon_list:
print(f"Processing {pokemon}")
@ -659,6 +789,8 @@ def determine_earliest_games(pokemon_list, cache):
for encounter in encounter_data:
encounter_information = EncounterInformation(encounter, encounter_data[encounter])
pokemon.encounter_information.append(encounter_information)
handle_unown(pokemon, encounter_data)
handle_deoxys(pokemon, encounter_data)
pokemon.determine_earliest_game()
print(f"Processed {pokemon}: {pokemon.earliest_game.game} ({pokemon.earliest_game.method})")
@ -736,9 +868,9 @@ def get_base_form(evolution_chain:List[EvolutionStage]):
def adjust_for_evolution(pokemon_list, cache):
for pokemon in big_pokemon_list:
evolution_chain = get_evolution_data_from_bulbapedia(pokemon.name, pokemon.form, cache)
pokemon.add_evolution_data(evolution_chain)
pokemon.add_evolution_chain(evolution_chain)
game, method = pokemon.get_earliest_game_and_method()
print(f"Adjusted {pokemon.name} (#{pokemon.number}): {game} ({method})")
print(f"Adjusted {pokemon}: {game} ({method})")
pokemon_dict = {f"{pokemon['base_name']}_{pokemon['form']}".lower(): pokemon for pokemon in pokemon_list}
@ -909,7 +1041,10 @@ def handle_unknown_encounters(pokemon_list, cache):
if __name__ == "__main__":
get_cached_data()
pokemon_list = read_pokemon_list('pokemon_home_list.csv', limit=200)
pokemon_list = read_pokemon_list('pokemon_home_list.csv', limit=3000)
pokemon_index = create_pokemon_index(big_pokemon_list)
pokemon_list_with_games = determine_earliest_games(pokemon_list, cache)
pokemon_list_adjusted = adjust_for_evolution(pokemon_list_with_games, cache)
pokemon_list_with_locations = add_encounter_locations(pokemon_list_adjusted, cache)

1221
pokemon_earliest_games.csv

File diff suppressed because it is too large
Loading…
Cancel
Save