diff --git a/DataGatherers/DetermineOriginGame.py b/DataGatherers/DetermineOriginGame.py index 62ceb52..03f5abd 100644 --- a/DataGatherers/DetermineOriginGame.py +++ b/DataGatherers/DetermineOriginGame.py @@ -13,7 +13,7 @@ from fuzzywuzzy import fuzz from fuzzywuzzy import process from collections import defaultdict -from DataGatherers.cache_manager import CacheManager +from cache_manager import CacheManager # List of all main series Pokémon games in chronological order, with special games first in each generation all_games = [ @@ -555,7 +555,6 @@ def get_locations_from_bulbapedia(pokemon_name, form, cache: CacheManager): continue random_nested_tbody = random_nested_table.find('tbody', recursive=False) random_nested_rows = random_nested_tbody.find_all('tr', recursive=False) - intro_gen = None for nested_row in random_nested_rows: if 'Generation' in nested_row.get_text(strip=True): @@ -609,9 +608,12 @@ def get_locations_from_bulbapedia(pokemon_name, form, cache: CacheManager): if form is None: for raw_location in raw_locations: raw_text = raw_location.get_text() - raw_text = raw_text.replace("and", ",") + raw_text = raw_text.replace(" and ", ",") locations = raw_text.split(',') for location in locations: + location = location.strip() + if location == "": + continue if raw_game not in game_locations: game_locations[raw_game] = [] game_locations[raw_game].append(location.strip()) @@ -628,8 +630,11 @@ def get_locations_from_bulbapedia(pokemon_name, form, cache: CacheManager): sub_form_match = False if not sub_form else fuzz.partial_ratio(form.lower(), sub_form.lower()) >= 80 if main_form_match or sub_form_match: - locations = raw_location.get_text().replace('and', ',').replace('#', '').split(',') + locations = raw_location.get_text().replace(' and ', ',').replace('#', '').split(',') for location in locations: + location = location.strip() + if location == "": + continue if raw_game not in game_locations: game_locations[raw_game] = [] game_locations[raw_game].append(location.strip()) diff --git a/DataGatherers/update_location_information.py b/DataGatherers/update_location_information.py new file mode 100644 index 0000000..42779da --- /dev/null +++ b/DataGatherers/update_location_information.py @@ -0,0 +1,48 @@ +import sqlite3 +from cache_manager import CacheManager +from DetermineOriginGame import get_locations_from_bulbapedia + +def create_encounters_table(): + conn = sqlite3.connect('pokemon_forms.db') + #cursor = conn.cursor() + #cursor.execute(''' + #CREATE TABLE IF NOT EXISTS encounters ( + # pfic TEXT, + # + #) + #''') + #conn.commit() + return conn + +if __name__ == "__main__": + cache = CacheManager() + + conn = create_encounters_table() + cursor = conn.cursor() + cursor.execute('SELECT DISTINCT name, form_name FROM pokemon_forms') + pokemon_forms = cursor.fetchall() + + for name, form in pokemon_forms: + print(f"Processing {name} {form if form else ''}") + + if form and name in form: + form = form.replace(name, "").strip() + + gender = None + if form and "male" in form.lower(): + gender = form + form = None + + encounters_we_aren_t_interested_in = ["Trade", "Time Capsule", "Unobtainable"] + + encounter_data = get_locations_from_bulbapedia(name, form, cache) + for encounter in encounter_data: + print(f"Found in {encounter}:") + for location in encounter_data[encounter]: + if location in encounters_we_aren_t_interested_in: + continue + if "Evolve" in location: + continue + if "TradeVersion" in location: + continue + print(f" {location}") \ No newline at end of file