diff --git a/DataGatherers/DefaultForms.json b/DataGatherers/DefaultForms.json index 36898ce..1f829b7 100644 --- a/DataGatherers/DefaultForms.json +++ b/DataGatherers/DefaultForms.json @@ -5,7 +5,6 @@ "Altered Forme", "Land Forme", "Standard Mode", - "Galarian Standard Mode", "Ordinary Forme", "Aria Forme", "Natural Form", diff --git a/DataGatherers/DetermineOriginGame.py b/DataGatherers/DetermineOriginGame.py index 8c0ce49..53d84eb 100644 --- a/DataGatherers/DetermineOriginGame.py +++ b/DataGatherers/DetermineOriginGame.py @@ -23,23 +23,7 @@ import concurrent.futures from concurrent.futures import ThreadPoolExecutor, as_completed from functools import lru_cache -# List of all main series Pokémon games in chronological order, with special games first in each generation -all_games = [ - "Yellow", "Red", "Blue", - "Crystal", "Gold", "Silver", - "Emerald", "FireRed", "LeafGreen", "Ruby", "Sapphire", - "Platinum", "HeartGold", "SoulSilver", "Diamond", "Pearl", - "Black 2", "White 2", "Black", "White", - "X", "Y", "Omega Ruby", "Alpha Sapphire", - "Ultra Sun", "Ultra Moon", "Sun", "Moon", "Let's Go Pikachu", "Let's Go Eevee", - "Sword", "Shield", "Expansion Pass", - "Brilliant Diamond", "Shining Pearl", - "Legends: Arceus", - "Scarlet", "Violet", "The Teal Mask", "The Hidden Treasure of Area Zero", "The Hidden Treasure of Area Zero (Scarlet)", "The Hidden Treasure of Area Zero (Violet)", "The Teal Mask (Scarlet)", "The Teal Mask (Violet)", - "Unknown", - "Pokémon Home", - "Pokémon Go", -] +from DataGatherers.constants import all_games, regional_descriptors big_pokemon_list = [] pokemon_index = {} @@ -391,6 +375,9 @@ def split_td_contents(td): def parse_form_information(html_content): soup = BeautifulSoup(html_content, 'html.parser') + + #TODO: This wont work for lines that have several small blocks in one line. + #TODO: Adjust this to handle more than one small block, see Basculin for example small_tag = soup.find('small') forms = [] @@ -431,6 +418,11 @@ def parse_form_information(html_content): "sub_form": breed } + for region in regional_descriptors: + if region in main_form.lower(): + info["region"] = region + break + forms.append(info) else: #..... Gimmighoul headings = soup.find_all('b') @@ -445,6 +437,12 @@ def parse_form_information(html_content): "main_form": main_form, "sub_form": None } + + for region in regional_descriptors: + if region in main_form.lower(): + info["region"] = region + break + forms.append(info) return forms @@ -474,8 +472,6 @@ def get_evolution_data_from_bulbapedia(pokemon_name, form, cache: CacheManager, if not evolution_table: return None - eeveelutions = ["eevee", "vaporeon", "jolteon", "flareon", "espeon", "umbreon", "leafeon", "glaceon", "sylveon"] - if pokemon_name == "Eevee": evolution_chain = parse_eevee_evolution_chain(evolution_table) else: @@ -734,6 +730,10 @@ def process_game_locations(raw_game, raw_locations, form, default_forms): def form_matches(form_info, form, default_forms): main_form = form_info["main_form"] sub_form = form_info["sub_form"] + try: + region = form_info['region'] if 'region' in form_info else None + except KeyError: + region = None if default_forms and main_form and main_form in default_forms: main_form = None @@ -741,11 +741,18 @@ def form_matches(form_info, form, default_forms): if main_form is None: return False - if main_form in ["All Forms", "Kantonian Form", "All Sizes"]: + if main_form in ["All Forms", "All Sizes"]: return True + + if region == None and main_form in ["Kantonian Form"]: + return True + + main_form_match = compare_forms(form, main_form) or fuzz.partial_ratio(form.lower(), main_form.lower()) >= 95 + sub_form_match = compare_forms(form, sub_form) or (sub_form and fuzz.partial_ratio(form.lower(), sub_form.lower()) >= 95) - main_form_match = compare_forms(form, main_form) or fuzz.partial_ratio(form.lower(), main_form.lower()) >= 80 - sub_form_match = compare_forms(form, sub_form) or (sub_form and fuzz.partial_ratio(form.lower(), sub_form.lower()) >= 80) + if not main_form_match and not sub_form_match and region: + region_match = compare_forms(form, region) or fuzz.partial_ratio(form.lower(), region.lower()) >= 95 + return region_match return main_form_match or sub_form_match diff --git a/DataGatherers/constants.py b/DataGatherers/constants.py new file mode 100644 index 0000000..34017d6 --- /dev/null +++ b/DataGatherers/constants.py @@ -0,0 +1,19 @@ +regional_descriptors = ["kantonian", "johtonian", "hoennian", "sinnohan", "unovan", "kalosian", "alolan", "galarian", "hisuian", "paldean"] + +# List of all main series Pokémon games in chronological order, with special games first in each generation +all_games = [ + "Yellow", "Red", "Blue", + "Crystal", "Gold", "Silver", + "Emerald", "FireRed", "LeafGreen", "Ruby", "Sapphire", + "Platinum", "HeartGold", "SoulSilver", "Diamond", "Pearl", + "Black 2", "White 2", "Black", "White", + "X", "Y", "Omega Ruby", "Alpha Sapphire", + "Ultra Sun", "Ultra Moon", "Sun", "Moon", "Let's Go Pikachu", "Let's Go Eevee", + "Sword", "Shield", "Expansion Pass", + "Brilliant Diamond", "Shining Pearl", + "Legends: Arceus", + "Scarlet", "Violet", "The Teal Mask", "The Hidden Treasure of Area Zero", "The Hidden Treasure of Area Zero (Scarlet)", "The Hidden Treasure of Area Zero (Violet)", "The Teal Mask (Scarlet)", "The Teal Mask (Violet)", + "Unknown", + "Pokémon Home", + "Pokémon Go", +] \ No newline at end of file diff --git a/pokemon_forms.db b/pokemon_forms.db index bc76ec7..00162c5 100644 Binary files a/pokemon_forms.db and b/pokemon_forms.db differ