|
|
@ -9,6 +9,9 @@ import sqlite3 |
|
|
from bs4 import BeautifulSoup, Tag, NavigableString |
|
|
from bs4 import BeautifulSoup, Tag, NavigableString |
|
|
import copy |
|
|
import copy |
|
|
from typing import List, Optional |
|
|
from typing import List, Optional |
|
|
|
|
|
from fuzzywuzzy import fuzz |
|
|
|
|
|
from fuzzywuzzy import process |
|
|
|
|
|
from collections import defaultdict |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Initialize the database connection |
|
|
# Initialize the database connection |
|
|
@ -69,6 +72,61 @@ def update_cache(key, value): |
|
|
save_cached_data() |
|
|
save_cached_data() |
|
|
time.sleep(1) |
|
|
time.sleep(1) |
|
|
|
|
|
|
|
|
|
|
|
pokemon_index = None |
|
|
|
|
|
|
|
|
|
|
|
def create_pokemon_index(pokemon_list): |
|
|
|
|
|
name_index = defaultdict(list) |
|
|
|
|
|
for pokemon in pokemon_list: |
|
|
|
|
|
name_index[pokemon.name.lower()].append(pokemon) |
|
|
|
|
|
return name_index |
|
|
|
|
|
|
|
|
|
|
|
def find_pokemon(name, form=None, threshold=80): |
|
|
|
|
|
name = name.lower() |
|
|
|
|
|
if name in pokemon_index: |
|
|
|
|
|
candidates = pokemon_index[name] |
|
|
|
|
|
if not form: |
|
|
|
|
|
return candidates[0] if candidates else None |
|
|
|
|
|
|
|
|
|
|
|
best_match = None |
|
|
|
|
|
best_score = 0 |
|
|
|
|
|
for pokemon in candidates: |
|
|
|
|
|
if pokemon.form: |
|
|
|
|
|
score = fuzz.ratio(form.lower(), pokemon.form.lower()) |
|
|
|
|
|
if score > best_score: |
|
|
|
|
|
best_score = score |
|
|
|
|
|
best_match = pokemon |
|
|
|
|
|
|
|
|
|
|
|
if best_match and best_score >= threshold: |
|
|
|
|
|
return best_match |
|
|
|
|
|
|
|
|
|
|
|
# If no exact name match, try fuzzy matching on names |
|
|
|
|
|
best_name_match = None |
|
|
|
|
|
best_name_score = 0 |
|
|
|
|
|
for pokemon_name in pokemon_index: |
|
|
|
|
|
score = fuzz.ratio(name, pokemon_name) |
|
|
|
|
|
if score > best_name_score: |
|
|
|
|
|
best_name_score = score |
|
|
|
|
|
best_name_match = pokemon_name |
|
|
|
|
|
|
|
|
|
|
|
if best_name_match and best_name_score >= threshold: |
|
|
|
|
|
candidates = pokemon_index[best_name_match] |
|
|
|
|
|
if not form: |
|
|
|
|
|
return candidates[0] |
|
|
|
|
|
|
|
|
|
|
|
best_match = None |
|
|
|
|
|
best_score = 0 |
|
|
|
|
|
for pokemon in candidates: |
|
|
|
|
|
if pokemon.form: |
|
|
|
|
|
score = fuzz.ratio(form.lower(), pokemon.form.lower()) |
|
|
|
|
|
if score > best_score: |
|
|
|
|
|
best_score = score |
|
|
|
|
|
best_match = pokemon |
|
|
|
|
|
|
|
|
|
|
|
if best_match and best_score >= threshold: |
|
|
|
|
|
return best_match |
|
|
|
|
|
|
|
|
|
|
|
return None |
|
|
|
|
|
|
|
|
class Pokemon: |
|
|
class Pokemon: |
|
|
def __init__(self, name: str, number: int, form: Optional[str] = None): |
|
|
def __init__(self, name: str, number: int, form: Optional[str] = None): |
|
|
self.name = name |
|
|
self.name = name |
|
|
@ -85,16 +143,18 @@ class Pokemon: |
|
|
if self.evolution_chain: |
|
|
if self.evolution_chain: |
|
|
for stage in self.evolution_chain: |
|
|
for stage in self.evolution_chain: |
|
|
if self.is_baby: |
|
|
if self.is_baby: |
|
|
return stage.pokemon.earliest_game.game, "Breed" |
|
|
return stage.pokemon_reference.earliest_game.game, "Breed" |
|
|
else: |
|
|
else: |
|
|
return stage.pokemon.earliest_game.game, "Evolve" |
|
|
if stage.pokemon_reference == self: |
|
|
|
|
|
return self.earliest_game.game, self.earliest_game.method |
|
|
|
|
|
return stage.pokemon_reference.earliest_game.game, "Evolve" |
|
|
|
|
|
|
|
|
if self.earliest_game: |
|
|
if self.earliest_game: |
|
|
return self.earliest_game.game, self.earliest_game.method |
|
|
return self.earliest_game.game, self.earliest_game.method |
|
|
return None, None |
|
|
return None, None |
|
|
|
|
|
|
|
|
def __str__(self): |
|
|
def __str__(self): |
|
|
return f"{self.name} {self.form if self.form else ''} (#{self.number})" |
|
|
return f"{self.name}{' ' if self.form else ''}{self.form if self.form else ''} (#{self.number})" |
|
|
|
|
|
|
|
|
def add_evolution_chain(self, evolution_chain: List['EvolutionStage']): |
|
|
def add_evolution_chain(self, evolution_chain: List['EvolutionStage']): |
|
|
self.evolution_chain = evolution_chain |
|
|
self.evolution_chain = evolution_chain |
|
|
@ -103,11 +163,14 @@ class Pokemon: |
|
|
self.stage = stage |
|
|
self.stage = stage |
|
|
self.is_baby = self.stage is not None and 'Baby' in self.stage |
|
|
self.is_baby = self.stage is not None and 'Baby' in self.stage |
|
|
|
|
|
|
|
|
def update_encounter_information(self): |
|
|
def update_encounter_information(self, exclude_events=True): |
|
|
if not self.encounter_information: |
|
|
if not self.encounter_information: |
|
|
return |
|
|
return |
|
|
|
|
|
|
|
|
non_catchable_methods = ["trade", "event", "global link", "poké transfer", "time capsule", "unobtainable", "pokémon home"] |
|
|
non_catchable_methods = ["trade", "global link", "poké transfer", "time capsule", "unobtainable", "pokémon home"] |
|
|
|
|
|
|
|
|
|
|
|
if exclude_events: |
|
|
|
|
|
non_catchable_methods.append("event") |
|
|
|
|
|
|
|
|
for encounter in self.encounter_information: |
|
|
for encounter in self.encounter_information: |
|
|
for location in encounter.locations: |
|
|
for location in encounter.locations: |
|
|
@ -127,6 +190,8 @@ class Pokemon: |
|
|
encounter.method = "Gift" |
|
|
encounter.method = "Gift" |
|
|
elif "evolve" in location.lower(): |
|
|
elif "evolve" in location.lower(): |
|
|
encounter.method = "Evolve" |
|
|
encounter.method = "Evolve" |
|
|
|
|
|
elif "event" in location.lower(): |
|
|
|
|
|
encounter.method = "Event" |
|
|
else: |
|
|
else: |
|
|
encounter.method = "Catchable" |
|
|
encounter.method = "Catchable" |
|
|
|
|
|
|
|
|
@ -137,6 +202,18 @@ class Pokemon: |
|
|
|
|
|
|
|
|
self.update_encounter_information() |
|
|
self.update_encounter_information() |
|
|
|
|
|
|
|
|
|
|
|
game_methods = {} |
|
|
|
|
|
for encounter in self.encounter_information: |
|
|
|
|
|
if encounter.method: |
|
|
|
|
|
game_methods[encounter.game.lower()] = encounter |
|
|
|
|
|
|
|
|
|
|
|
for game in all_games: |
|
|
|
|
|
if game.lower() in game_methods: |
|
|
|
|
|
self.earliest_game = game_methods[game.lower()] |
|
|
|
|
|
return |
|
|
|
|
|
|
|
|
|
|
|
self.update_encounter_information(exclude_events=False) |
|
|
|
|
|
|
|
|
game_methods = {} |
|
|
game_methods = {} |
|
|
for encounter in self.encounter_information: |
|
|
for encounter in self.encounter_information: |
|
|
if encounter.method: |
|
|
if encounter.method: |
|
|
@ -157,8 +234,11 @@ class EvolutionStage: |
|
|
self.branches: List[EvolutionStage] = [] |
|
|
self.branches: List[EvolutionStage] = [] |
|
|
self.stage = stage |
|
|
self.stage = stage |
|
|
self.is_baby = self.stage is not None and 'Baby' in self.stage |
|
|
self.is_baby = self.stage is not None and 'Baby' in self.stage |
|
|
|
|
|
self.pokemon_reference = find_pokemon(pokemon, form) |
|
|
self.form = form |
|
|
self.form = form |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def __str__(self): |
|
|
def __str__(self): |
|
|
return f"{self.pokemon} {self.form if self.form else ''} ({self.method if self.method else 'Base'})" |
|
|
return f"{self.pokemon} {self.form if self.form else ''} ({self.method if self.method else 'Base'})" |
|
|
|
|
|
|
|
|
@ -222,10 +302,11 @@ def parse_evolution_chain(table: Tag, form: Optional[str] = None) -> List[Evolut |
|
|
|
|
|
|
|
|
def extract_pokemon_name(td: Tag) -> str: |
|
|
def extract_pokemon_name(td: Tag) -> str: |
|
|
# Extract Pokemon name from the table within the TD |
|
|
# Extract Pokemon name from the table within the TD |
|
|
name_tag = td.find('table').find('a', class_='selflink') |
|
|
table = td.find('table') |
|
|
|
|
|
name_tag = table.find('a', class_='selflink') |
|
|
if name_tag: |
|
|
if name_tag: |
|
|
return name_tag.get_text(strip=True) |
|
|
return name_tag.get_text(strip=True) |
|
|
name_tag = td.find('table').find('a', title=True) |
|
|
name_tag = table.find('a', title=True, class_=lambda x: x != 'image') |
|
|
return name_tag.get_text(strip=True) |
|
|
return name_tag.get_text(strip=True) |
|
|
|
|
|
|
|
|
def extract_evolution_method(td: Tag) -> str: |
|
|
def extract_evolution_method(td: Tag) -> str: |
|
|
@ -260,6 +341,7 @@ def read_pokemon_list(filename, limit=50): |
|
|
|
|
|
|
|
|
new_pokemon = Pokemon(row['base_name'], row['number'], row['form']) |
|
|
new_pokemon = Pokemon(row['base_name'], row['number'], row['form']) |
|
|
big_pokemon_list.append(new_pokemon) |
|
|
big_pokemon_list.append(new_pokemon) |
|
|
|
|
|
|
|
|
return pokemon_list |
|
|
return pokemon_list |
|
|
|
|
|
|
|
|
def sanitize_name_and_form(name, form): |
|
|
def sanitize_name_and_form(name, form): |
|
|
@ -470,9 +552,9 @@ def get_evolution_data_from_bulbapedia(pokemon_name, form, cache): |
|
|
|
|
|
|
|
|
evolution_table = None |
|
|
evolution_table = None |
|
|
if form: |
|
|
if form: |
|
|
form = form.replace('Form', '').replace('form', '').strip() |
|
|
form_without_form = form.replace('Form', '').replace('form', '').strip() |
|
|
for tag in evolution_section.parent.find_next_siblings(): |
|
|
for tag in evolution_section.parent.find_next_siblings(): |
|
|
if tag.name == 'h4' and form in tag.get_text(strip=True): |
|
|
if tag.name == 'h4' and form_without_form in tag.get_text(strip=True): |
|
|
evolution_table = tag.find_next('table') |
|
|
evolution_table = tag.find_next('table') |
|
|
break |
|
|
break |
|
|
if tag.name == 'h3': |
|
|
if tag.name == 'h3': |
|
|
@ -589,7 +671,13 @@ def get_locations_from_bulbapedia(pokemon_name, form, cache): |
|
|
else: |
|
|
else: |
|
|
for raw_location in raw_locations: |
|
|
for raw_location in raw_locations: |
|
|
main_form, sub_form = parse_form_information(str(raw_location)) |
|
|
main_form, sub_form = parse_form_information(str(raw_location)) |
|
|
if main_form == form: |
|
|
if not main_form: |
|
|
|
|
|
continue |
|
|
|
|
|
|
|
|
|
|
|
main_form_match = fuzz.partial_ratio(form.lower(), main_form.lower()) >= 80 |
|
|
|
|
|
sub_form_match = False if not sub_form else fuzz.partial_ratio(form.lower(), sub_form.lower()) >= 80 |
|
|
|
|
|
|
|
|
|
|
|
if main_form_match or sub_form_match: |
|
|
locations = raw_location.get_text().split(',') |
|
|
locations = raw_location.get_text().split(',') |
|
|
for location in locations: |
|
|
for location in locations: |
|
|
if raw_game not in game_locations: |
|
|
if raw_game not in game_locations: |
|
|
@ -652,6 +740,48 @@ def get_earliest_game(encounter_data, pokemon_name, form): |
|
|
|
|
|
|
|
|
return "Unknown", "Unknown" |
|
|
return "Unknown", "Unknown" |
|
|
|
|
|
|
|
|
|
|
|
def handle_unown(pokemon, encounter_data): |
|
|
|
|
|
if not pokemon.name == "Unown": |
|
|
|
|
|
return |
|
|
|
|
|
|
|
|
|
|
|
one_form_unown = find_pokemon(pokemon.name, None) |
|
|
|
|
|
if not one_form_unown: |
|
|
|
|
|
return |
|
|
|
|
|
|
|
|
|
|
|
# The ! and ? forms were added in HeartGold and SoulSilver. |
|
|
|
|
|
if (pokemon.form == "!" or pokemon.form == "?") and encounter_data: |
|
|
|
|
|
for encounter in encounter_data: |
|
|
|
|
|
encounter_information = EncounterInformation(encounter, encounter_data[encounter]) |
|
|
|
|
|
pokemon.encounter_information.append(encounter_information) |
|
|
|
|
|
found_heartgold = False |
|
|
|
|
|
found_soulsilver = False |
|
|
|
|
|
for game in all_games: |
|
|
|
|
|
if game == "HeartGold": |
|
|
|
|
|
found_heartgold = True |
|
|
|
|
|
continue |
|
|
|
|
|
elif game == "SoulSilver": |
|
|
|
|
|
found_soulsilver = True |
|
|
|
|
|
continue |
|
|
|
|
|
if not found_heartgold or not found_soulsilver: |
|
|
|
|
|
continue |
|
|
|
|
|
for encounter in one_form_unown.encounter_information: |
|
|
|
|
|
if game == encounter.game: |
|
|
|
|
|
pokemon.encounter_information.append(encounter) |
|
|
|
|
|
break |
|
|
|
|
|
else: |
|
|
|
|
|
pokemon.encounter_information = one_form_unown.encounter_information |
|
|
|
|
|
|
|
|
|
|
|
def handle_deoxys(pokemon, encounter_data): |
|
|
|
|
|
if not pokemon.name == "Deoxys": |
|
|
|
|
|
return |
|
|
|
|
|
|
|
|
|
|
|
normal_form_deoxys = find_pokemon(pokemon.name, None) |
|
|
|
|
|
if not normal_form_deoxys: |
|
|
|
|
|
return |
|
|
|
|
|
|
|
|
|
|
|
if pokemon.form: |
|
|
|
|
|
pokemon.encounter_information = normal_form_deoxys.encounter_information |
|
|
|
|
|
|
|
|
def determine_earliest_games(pokemon_list, cache): |
|
|
def determine_earliest_games(pokemon_list, cache): |
|
|
for pokemon in big_pokemon_list: |
|
|
for pokemon in big_pokemon_list: |
|
|
print(f"Processing {pokemon}") |
|
|
print(f"Processing {pokemon}") |
|
|
@ -659,6 +789,8 @@ def determine_earliest_games(pokemon_list, cache): |
|
|
for encounter in encounter_data: |
|
|
for encounter in encounter_data: |
|
|
encounter_information = EncounterInformation(encounter, encounter_data[encounter]) |
|
|
encounter_information = EncounterInformation(encounter, encounter_data[encounter]) |
|
|
pokemon.encounter_information.append(encounter_information) |
|
|
pokemon.encounter_information.append(encounter_information) |
|
|
|
|
|
handle_unown(pokemon, encounter_data) |
|
|
|
|
|
handle_deoxys(pokemon, encounter_data) |
|
|
pokemon.determine_earliest_game() |
|
|
pokemon.determine_earliest_game() |
|
|
print(f"Processed {pokemon}: {pokemon.earliest_game.game} ({pokemon.earliest_game.method})") |
|
|
print(f"Processed {pokemon}: {pokemon.earliest_game.game} ({pokemon.earliest_game.method})") |
|
|
|
|
|
|
|
|
@ -736,9 +868,9 @@ def get_base_form(evolution_chain:List[EvolutionStage]): |
|
|
def adjust_for_evolution(pokemon_list, cache): |
|
|
def adjust_for_evolution(pokemon_list, cache): |
|
|
for pokemon in big_pokemon_list: |
|
|
for pokemon in big_pokemon_list: |
|
|
evolution_chain = get_evolution_data_from_bulbapedia(pokemon.name, pokemon.form, cache) |
|
|
evolution_chain = get_evolution_data_from_bulbapedia(pokemon.name, pokemon.form, cache) |
|
|
pokemon.add_evolution_data(evolution_chain) |
|
|
pokemon.add_evolution_chain(evolution_chain) |
|
|
game, method = pokemon.get_earliest_game_and_method() |
|
|
game, method = pokemon.get_earliest_game_and_method() |
|
|
print(f"Adjusted {pokemon.name} (#{pokemon.number}): {game} ({method})") |
|
|
print(f"Adjusted {pokemon}: {game} ({method})") |
|
|
|
|
|
|
|
|
pokemon_dict = {f"{pokemon['base_name']}_{pokemon['form']}".lower(): pokemon for pokemon in pokemon_list} |
|
|
pokemon_dict = {f"{pokemon['base_name']}_{pokemon['form']}".lower(): pokemon for pokemon in pokemon_list} |
|
|
|
|
|
|
|
|
@ -909,7 +1041,10 @@ def handle_unknown_encounters(pokemon_list, cache): |
|
|
if __name__ == "__main__": |
|
|
if __name__ == "__main__": |
|
|
get_cached_data() |
|
|
get_cached_data() |
|
|
|
|
|
|
|
|
pokemon_list = read_pokemon_list('pokemon_home_list.csv', limit=200) |
|
|
pokemon_list = read_pokemon_list('pokemon_home_list.csv', limit=3000) |
|
|
|
|
|
|
|
|
|
|
|
pokemon_index = create_pokemon_index(big_pokemon_list) |
|
|
|
|
|
|
|
|
pokemon_list_with_games = determine_earliest_games(pokemon_list, cache) |
|
|
pokemon_list_with_games = determine_earliest_games(pokemon_list, cache) |
|
|
pokemon_list_adjusted = adjust_for_evolution(pokemon_list_with_games, cache) |
|
|
pokemon_list_adjusted = adjust_for_evolution(pokemon_list_with_games, cache) |
|
|
pokemon_list_with_locations = add_encounter_locations(pokemon_list_adjusted, cache) |
|
|
pokemon_list_with_locations = add_encounter_locations(pokemon_list_adjusted, cache) |
|
|
|