From 13f3e1c6a65c4f54f47f603948473108c7bd7204 Mon Sep 17 00:00:00 2001 From: Dan Date: Thu, 17 Oct 2024 15:44:42 +0100 Subject: [PATCH] - Fixes for the more annoying pokemon --- DataGatherers/DefaultForms.json | 2 - DataGatherers/DetermineOriginGame.py | 121 ++++++++++++------- DataGatherers/update_location_information.py | 114 ++++++++++------- 3 files changed, 147 insertions(+), 90 deletions(-) diff --git a/DataGatherers/DefaultForms.json b/DataGatherers/DefaultForms.json index e09ba7d..756f7bd 100644 --- a/DataGatherers/DefaultForms.json +++ b/DataGatherers/DefaultForms.json @@ -20,7 +20,6 @@ "Full Belly Mode", "Zero Form", "Curly Form", - "Chest Form", "Apex Build", "Ultimate Mode", "Teal Mask", @@ -42,7 +41,6 @@ "Amped Form", "Vanilla Cream Strawberry Sweet", "Single Strike Style", - "Family of Three", "Green Plumage", "Two-Segment Form", "Standard Form" diff --git a/DataGatherers/DetermineOriginGame.py b/DataGatherers/DetermineOriginGame.py index 00f9d1a..9ed8033 100644 --- a/DataGatherers/DetermineOriginGame.py +++ b/DataGatherers/DetermineOriginGame.py @@ -374,11 +374,13 @@ def split_td_contents(td): current_group = [] for content in td.contents: - if isinstance(content, Tag) and content.name == 'br': + if isinstance(content, Tag) and (content.name == 'br' or content.name == 'p'): if current_group: groups.append(BeautifulSoup('', 'html.parser').new_tag('div')) for item in current_group: groups[-1].append(copy.copy(item)) + if content.name == 'p': + groups[-1].append(copy.copy(content)) current_group = [] else: current_group.append(content) @@ -394,13 +396,28 @@ def parse_form_information(html_content): soup = BeautifulSoup(html_content, 'html.parser') small_tag = soup.find('small') + forms = [] # Form info is in bold inside a small tag. if small_tag: - bold_tag = small_tag.find('b') - if bold_tag: + bold_tags = small_tag.find_all('b') + for bold_tag in bold_tags: form_text = bold_tag.get_text(strip=True) + # Remove parentheses form_text = form_text.strip('()') + + if "/" in form_text: + last_word = form_text.split()[-1] + form_text = form_text.replace(last_word, "").strip() + parts = form_text.split('/') + for part in parts: + main_form = part.strip() + " " + last_word + info = { + "main_form": main_form, + "sub_form": None + } + forms.append(info) + continue # Split the text into main form and breed (if present) parts = form_text.split('(') @@ -408,13 +425,28 @@ def parse_form_information(html_content): # "Factor"s are not actual forms, they are properties of the pokemon you can encoutner. if main_form and "factor" in main_form.lower(): - return None, None + continue breed = parts[1].strip(')') if len(parts) > 1 else None - return main_form, breed - - return None, None + info = { + "main_form": main_form, + "sub_form": breed + } + + forms.append(info) + else: #..... Gimmighoul + headings = soup.find_all('b') + if len(headings) > 0: + for heading in headings: + main_form = heading.get_text(strip=True) + info = { + "main_form": main_form, + "sub_form": None + } + forms.append(info) + + return forms def get_evolution_data_from_bulbapedia(pokemon_name, form, cache: CacheManager, gender: Optional[str] = None): page_data = get_pokemon_data_bulbapedia(pokemon_name, cache) @@ -640,48 +672,44 @@ def get_locations_from_bulbapedia(pokemon_name, form, cache: CacheManager, defau if form is None: for raw_location in raw_locations: raw_text = raw_location.get_text() - main_form, sub_form = parse_form_information(str(raw_location)) + forms = parse_form_information(str(raw_location)) + if len(forms) > 0: + for form_info in forms: + main_form = form_info["main_form"] - if default_forms and main_form and main_form in default_forms: - main_form = None + if default_forms and main_form and main_form in default_forms: + main_form = None - if main_form and (main_form != "All Forms" and main_form != "Kantonian Form"): - continue + if main_form and (main_form != "All Forms" and main_form != "Kantonian Form" and main_form != "All Sizes"): + continue - if raw_game not in game_locations: - game_locations[raw_game] = [] - info = {} - info["location"] = raw_text - info["tag"] = str(raw_location) - game_locations[raw_game].append(info) + record_location_info(raw_game, game_locations, raw_location, raw_text) + else: + record_location_info(raw_game, game_locations, raw_location, raw_text) else: for raw_location in raw_locations: - main_form, sub_form = parse_form_information(str(raw_location)) - if not main_form: - continue + forms = parse_form_information(str(raw_location)) + for form_info in forms: + main_form = form_info["main_form"] + sub_form = form_info["sub_form"] - if main_form == "Kantonian Form": - continue + if not main_form: + continue - if main_form == "All Forms": - main_form = form + if main_form == "All Forms" or main_form == "Kantonian Form" or main_form == "All Sizes": + main_form = form - main_form_match = compare_forms(form, main_form) - if not main_form_match: - main_form_match = fuzz.partial_ratio(form.lower(), main_form.lower()) >= 80 + main_form_match = compare_forms(form, main_form) + if not main_form_match: + main_form_match = fuzz.partial_ratio(form.lower(), main_form.lower()) >= 80 - sub_form_match = compare_forms(form, sub_form) - if not sub_form_match: - sub_form_match = False if not sub_form else fuzz.partial_ratio(form.lower(), sub_form.lower()) >= 80 + sub_form_match = compare_forms(form, sub_form) + if not sub_form_match: + sub_form_match = False if not sub_form else fuzz.partial_ratio(form.lower(), sub_form.lower()) >= 80 - if main_form_match or sub_form_match: - raw_text = raw_location.get_text() - if raw_game not in game_locations: - game_locations[raw_game] = [] - info = {} - info["location"] = raw_text - info["tag"] = str(raw_location) - game_locations[raw_game].append(info) + if main_form_match or sub_form_match: + raw_text = raw_location.get_text() + record_location_info(raw_game, game_locations, raw_location, raw_text) # For Later for variant in event_tables: @@ -693,15 +721,18 @@ def get_locations_from_bulbapedia(pokemon_name, form, cache: CacheManager, defau games_string = entries[0].find('a').get('title') for game in all_games: if game in games_string: - if game not in game_locations: - game_locations[game] = [] - info = {} - info["location"] = "Event" - info["tag"] = None - game_locations[game].append(info) + record_location_info(game, game_locations, None, "Event") return game_locations +def record_location_info(raw_game, game_locations, raw_location, raw_text): + if raw_game not in game_locations: + game_locations[raw_game] = [] + info = {} + info["location"] = raw_text + info["tag"] = str(raw_location) + game_locations[raw_game].append(info) + def split_outside_brackets(str): return re.split(r',(?![^()]*\))', str) diff --git a/DataGatherers/update_location_information.py b/DataGatherers/update_location_information.py index 27adae7..81da479 100644 --- a/DataGatherers/update_location_information.py +++ b/DataGatherers/update_location_information.py @@ -207,31 +207,63 @@ def extract_additional_information(s): else: return full_text, details +def build_query_string(table_name, criteria): + conditions = [] + values = [] + query = f"SELECT * FROM {table_name} WHERE " + + for column, value in criteria.items(): + if value is None: + conditions.append(f"({column} IS NULL OR {column} = '')") + elif value == "": + conditions.append(f"{column} = ''") + else: + conditions.append(f"{column} = ?") + values.append(value) + + return query + " AND ".join(conditions), values + def save_encounter(conn, pfic, game, location, days, times, dual_slot, static_encounter, static_encounter_count, extra_text, stars, rods, fishing, starter): cursor = conn.cursor() - - # Convert lists to strings for comparison, except days and times + extra_text_str = ' '.join(extra_text) if extra_text else None stars_str = ','.join(sorted(stars)) if stars else None rods_str = ','.join(sorted(rods)) if rods else None + insert_query = ''' + INSERT INTO encounters + (pfic, game, location, day, time, dual_slot, static_encounter_count, static_encounter, extra_text, stars, rods, fishing, starter) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + ''' + + criteria = { + "pfic": pfic, + "game": game, + "location": location, + "day": None, + "time": None, + "dual_slot": dual_slot, + "static_encounter": static_encounter, + "static_encounter_count": static_encounter_count, + "extra_text": extra_text_str, + "stars": stars_str, + "rods": rods_str, + "fishing": fishing, + "starter": starter + } + if len(days) > 0: for day in days: + criteria["day"] = day + criteria["time"] = None + query, values = build_query_string("encounters", criteria) # Check if an identical record already exists - cursor.execute(''' - SELECT COUNT(*) FROM encounters - WHERE pfic = ? AND game = ? AND location = ? AND day = ? AND time IS NULL - AND dual_slot = ? AND static_encounter = ? AND static_encounter_count = ? - AND extra_text = ? AND stars = ? AND rods = ? AND fishing = ? AND starter = ? - ''', (pfic, game, location, day, dual_slot, static_encounter, - static_encounter_count, extra_text_str, stars_str, rods_str, fishing, starter)) - - if cursor.fetchone()[0] == 0: - cursor.execute(''' - INSERT INTO encounters - (pfic, game, location, day, time, dual_slot, static_encounter_count, static_encounter, extra_text, stars, rods, fishing, starter) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) - ''', (pfic, game, location, day, None, dual_slot, static_encounter_count, + cursor.execute(query, values) + + encounter = cursor.fetchone() + + if encounter == None or encounter[0] == 0: + cursor.execute(insert_query, (pfic, game, location, day, None, dual_slot, static_encounter_count, static_encounter, extra_text_str, stars_str, rods_str, fishing, starter)) print(f"New encounter added for {pfic} in {game} at {location} on {day}") else: @@ -239,42 +271,32 @@ def save_encounter(conn, pfic, game, location, days, times, dual_slot, static_en elif len(times) > 0: for time in times: + criteria["day"] = None + criteria["time"] = time + query, values = build_query_string("encounters", criteria) # Check if an identical record already exists - cursor.execute(''' - SELECT COUNT(*) FROM encounters - WHERE pfic = ? AND game = ? AND location = ? AND day IS NULL AND time = ? - AND dual_slot = ? AND static_encounter = ? AND static_encounter_count = ? - AND extra_text = ? AND stars = ? AND rods = ? AND fishing = ? AND starter = ? - ''', (pfic, game, location, time, dual_slot, static_encounter, - static_encounter_count, extra_text_str, stars_str, rods_str, fishing, starter)) + cursor.execute(query, values) - if cursor.fetchone()[0] == 0: - cursor.execute(''' - INSERT INTO encounters - (pfic, game, location, day, time, dual_slot, static_encounter_count, static_encounter, extra_text, stars, rods, fishing, starter) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) - ''', (pfic, game, location, None, time, dual_slot, static_encounter_count, + encounter = cursor.fetchone() + + if encounter == None or encounter[0] == 0: + cursor.execute(insert_query, (pfic, game, location, None, time, dual_slot, static_encounter_count, static_encounter, extra_text_str, stars_str, rods_str, fishing, starter)) print(f"New encounter added for {pfic} in {game} at {location} at {time}") else: print(f"Identical encounter already exists for {pfic} in {game} at {location} at {time}") else: + criteria["day"] = None + criteria["time"] = None + query, values = build_query_string("encounters", criteria) # Check if an identical record already exists - cursor.execute(''' - SELECT COUNT(*) FROM encounters - WHERE pfic = ? AND game = ? AND location = ? AND day IS NULL AND time IS NULL - AND dual_slot = ? AND static_encounter = ? AND static_encounter_count = ? - AND extra_text = ? AND stars = ? AND rods = ? AND fishing = ? AND starter = ? - ''', (pfic, game, location, dual_slot, static_encounter, - static_encounter_count, extra_text_str, stars_str, rods_str, fishing, starter)) - thing = cursor.fetchone() - if thing[0] == 0: - cursor.execute(''' - INSERT INTO encounters - (pfic, game, location, day, time, dual_slot, static_encounter_count, static_encounter, extra_text, stars, rods, fishing, starter) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) - ''', (pfic, game, location, None, None, dual_slot, static_encounter_count, + cursor.execute(query, values) + + encounter = cursor.fetchone() + + if encounter == None or encounter[0] == 0: + cursor.execute(insert_query, (pfic, game, location, None, None, dual_slot, static_encounter_count, static_encounter, extra_text_str, stars_str, rods_str, fishing, starter)) print(f"New encounter added for {pfic} in {game} at {location}") else: @@ -315,6 +337,9 @@ def process_pokemon_for_location_data(pfic, name, form, national_dex, default_fo if name == "Alcremie": form = None + if name == "Minior": + form = None + if form and form.lower() == "female": form = None @@ -366,6 +391,9 @@ def process_pokemon_for_location_data(pfic, name, form, national_dex, default_fo if remaining != "": remaining_locations = remaining.replace(" and ", ",").split(",") for remaining_location in remaining_locations: + if remaining_location.strip() == "": + continue + save_encounter(conn, pfic, encounter, remaining_location.strip(), details["days"], details["times"], details["dual_slot"], details["static_encounter"], details["static_encounter_count"], details["extra_text"], details["stars"], details["Rods"], details["Fishing"], details["starter"] ) if __name__ == "__main__":