from __future__ import annotations import csv import requests import time import json import os import re import sqlite3 from bs4 import BeautifulSoup, Tag, NavigableString import copy from typing import List, Optional from fuzzywuzzy import fuzz from fuzzywuzzy import process from collections import defaultdict import os import sys sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from DataGatherers.cache_manager import CacheManager import concurrent.futures from concurrent.futures import ThreadPoolExecutor, as_completed from functools import lru_cache from pattern.en import singularize from DataGatherers.constants import all_games, regional_descriptors big_pokemon_list = [] pokemon_index = {} def create_pokemon_index(pokemon_list): global pokemon_index name_index = defaultdict(list) for pokemon in pokemon_list: name_index[pokemon.name.lower()].append(pokemon) pokemon_index = name_index def find_pokemon(name, form=None, threshold=80): global pokemon_index name = name.lower() if name in pokemon_index: candidates = pokemon_index[name] if not form: return candidates[0] if candidates else None best_match = None best_score = 0 for pokemon in candidates: if pokemon.form: score = fuzz.ratio(form.lower(), pokemon.form.lower()) if score > best_score: best_score = score best_match = pokemon if best_match and best_score >= threshold: return best_match # If no exact name match, try fuzzy matching on names best_name_match = None best_name_score = 0 for pokemon_name in pokemon_index: score = fuzz.ratio(name, pokemon_name) if score > best_name_score: best_name_score = score best_name_match = pokemon_name if best_name_match and best_name_score >= threshold: candidates = pokemon_index[best_name_match] if not form: return candidates[0] best_match = None best_score = 0 for pokemon in candidates: if pokemon.form: score = fuzz.ratio(form.lower(), pokemon.form.lower()) if score > best_score: best_score = score best_match = pokemon if best_match and best_score >= threshold: return best_match return None def roman_to_int(s): roman_values = { 'I': 1, 'V': 5, 'X': 10, 'L': 50, 'C': 100, 'D': 500, 'M': 1000 } total = 0 prev_value = 0 for char in reversed(s): current_value = roman_values[char] if current_value >= prev_value: total += current_value else: total -= current_value prev_value = current_value return total class Pokemon: def __init__(self, name: str, number: int, form: Optional[str] = None): self.name = name self.number = number self.form = form self.stage: Optional[str] = None self.evolution_chain: Optional[List['EvolutionStage']] = [] self.is_baby = False self.encounter_information: Optional[List['EncounterInformation']] = [] self.earliest_game: Optional['EncounterInformation'] = None self.obtain_method: Optional[str] = None self.introduced_in_gen = None def get_earliest_game_and_method(self): if self.evolution_chain: for stage in self.evolution_chain: if self.is_baby: return stage.pokemon_reference.earliest_game.game, "Breed" else: if stage.pokemon_reference == self: return self.earliest_game.game, self.earliest_game.method return stage.pokemon_reference.earliest_game.game, "Evolve" if self.earliest_game: return self.earliest_game.game, self.earliest_game.method return None, None def __str__(self): return f"{self.name}{' ' if self.form else ''}{self.form if self.form else ''} (#{self.number})" def add_evolution_chain(self, evolution_chain: List['EvolutionStage']): self.evolution_chain = evolution_chain def add_stage(self, stage: str): self.stage = stage self.is_baby = self.stage is not None and 'Baby' in self.stage def update_encounter_information(self, exclude_events=True, exclude_home=True, exclude_go=True): if not self.encounter_information: return non_catchable_methods = ["trade", "global link", "poké transfer", "time capsule", "unobtainable"] if exclude_events: non_catchable_methods.append("event") if exclude_home: non_catchable_methods.append("pokemon home") if exclude_go: non_catchable_methods.append("pokémon go") for encounter in self.encounter_information: encounter.method = None for location in encounter.locations: skip_location = False for non_catchable in non_catchable_methods: if non_catchable in location.lower(): skip_location = True break if skip_location: continue if "first partner" in location.lower(): encounter.method = "Starter" elif "received" in location.lower(): encounter.method = "Gift" elif "evolve" in location.lower(): encounter.method = "Evolve" elif "event" in location.lower(): encounter.method = "Event" else: encounter.method = "Catchable" def parse_encoutners_for_games(self): game_methods = {} for encounter in self.encounter_information: if encounter.method: game_methods[encounter.game.lower()] = encounter for game in all_games: if game.lower() in game_methods: self.earliest_game = game_methods[game.lower()] return def determine_earliest_game(self): if not self.encounter_information: self.earliest_game = None return self.update_encounter_information() self.parse_encoutners_for_games() if self.earliest_game != None: return self.update_encounter_information(exclude_events=False) self.parse_encoutners_for_games() if self.earliest_game != None: return self.update_encounter_information(exclude_home=False) self.parse_encoutners_for_games() if self.earliest_game != None: return self.update_encounter_information(exclude_go=False) self.parse_encoutners_for_games() if self.earliest_game != None: return self.earliest_game = None class EvolutionStage: def __init__(self, pokemon: str, method: Optional[str] = None, stage: Optional[str] = None, form: Optional[str] = None): self.pokemon = pokemon self.method = method self.next_stage: Optional[EvolutionStage] = None self.previous_stage: Optional[EvolutionStage] = None # New attribute self.branches: List[EvolutionStage] = [] self.stage = stage self.is_baby = self.stage is not None and 'Baby' in self.stage self.pokemon_reference = find_pokemon(pokemon, form) if self.pokemon_reference == None: self.pokemon_reference = find_pokemon(pokemon, None) self.form = form def __str__(self): return f"{self.pokemon} {self.form if self.form else ''} ({self.method if self.method else 'Base'})" class EncounterInformation: def __init__(self, game: str, locations: List[str]): self.game = game self.method = "Unknown" self.locations = locations def parse_evolution_chain(table: Tag, form: Optional[str] = None) -> List[EvolutionStage]: main_chain = [] current_stage = None pending_method = None tbody = table.find('tbody', recursive=False) if not tbody: return [] rows = tbody.find_all('tr', recursive=False) main_row = rows[0] branch_rows = rows[1:] # Parse main evolution chain for td in main_row.find_all('td', recursive=False): if td.find('table'): # This TD contains Pokemon information pokemon_name = extract_pokemon_name(td) stage = extract_stage_form(td) evolution_form = extract_evolution_form(td, pokemon_name) new_stage = EvolutionStage(pokemon_name, pending_method, stage, evolution_form) pending_method = None if current_stage: current_stage.next_stage = new_stage new_stage.previous_stage = current_stage # Set the back link current_stage = new_stage main_chain.append(current_stage) else: # This TD contains evolution method for the next Pokemon pending_method = extract_evolution_method(td) # Parse branching evolutions for row in branch_rows: branch_stage = None branch_method = None for td in row.find_all('td', recursive=False): if td.find('table'): pokemon_name = extract_pokemon_name(td) stage = extract_stage_form(td) evolution_form = extract_evolution_form(td, pokemon_name) new_stage = EvolutionStage(pokemon_name, branch_method, stage, evolution_form) branch_method = None if branch_stage: branch_stage.next_stage = new_stage new_stage.previous_stage = branch_stage # Set the back link branch_stage = new_stage # Find which main chain Pokemon this branches from for main_stage in main_chain: if td.get('rowspan') and main_stage.pokemon == pokemon_name: main_stage.branches.append(branch_stage) branch_stage.previous_stage = main_stage # Set the back link to the main chain break else: branch_method = extract_evolution_method(td) return main_chain def find_name_tag(td: Tag) -> Optional[Tag]: table = td.find('table') name_tag = table.find('a', class_='selflink') if name_tag: return name_tag name_tag = table.find('a', title=True, class_=lambda x: x != 'image') return name_tag def extract_pokemon_name(td: Tag) -> Optional[str]: name_tag = find_name_tag(td) if name_tag: return name_tag.get_text(strip=True) return None def extract_evolution_method(td: Tag) -> str: # Extract evolution method from the TD return td.get_text(strip=True) def extract_stage_form(td: Tag) -> Optional[str]: stage_tag = td.find('table').find('small') if stage_tag: return stage_tag.get_text(strip=True) return None def extract_evolution_form(td: Tag, name: str) -> Optional[str]: name_tag = find_name_tag(td) if name_tag: name_row = name_tag.parent small_tags = name_row.find_all('small') if len(small_tags) > 1: return small_tags[0].get_text(strip=True) return None def read_pokemon_list(filename, limit=50): pokemon_list = [] with open(filename, 'r', newline='', encoding='utf-8') as csvfile: reader = csv.DictReader(csvfile) for i, row in enumerate(reader): if i >= limit: break # Split the name into base name and form match = re.match(r'(.*?)\s*(\(.*\))?$', row['name']) base_name, form = match.groups() if match else (row['name'], None) row['base_name'] = base_name.strip() row['form'] = form.strip('() ') if form else None pokemon_list.append(row) new_pokemon = Pokemon(row['base_name'], row['number'], row['form']) big_pokemon_list.append(new_pokemon) return big_pokemon_list def get_pokemon_data_bulbapedia(pokemon_name, cache: CacheManager): url = f"https://bulbapedia.bulbagarden.net/wiki/{pokemon_name}_(Pokémon)" return cache.fetch_url(url) def split_td_contents(td): groups = [] current_group = [] for content in td.contents: if isinstance(content, NavigableString): text = content.strip() if text: current_group.append(content) elif content.name == 'br': if current_group: groups.append(''.join(str(item) for item in current_group)) current_group = [] else: current_group.append(content) if current_group: groups.append(''.join(str(item) for item in current_group)) return groups def parse_form_information(html_content): soup = BeautifulSoup(html_content, 'html.parser') #TODO: This wont work for lines that have several small blocks in one line. #TODO: Adjust this to handle more than one small block, see Basculin for example small_tag = soup.find('small') forms = [] # Form info is in bold inside a small tag. if small_tag: bold_tags = small_tag.find_all('b') for bold_tag in bold_tags: form_text = bold_tag.get_text(strip=True) # Remove parentheses form_text = form_text.strip('()') if "/" in form_text: last_word = singularize(form_text.split()[-1]) form_text = form_text.replace(last_word, "").strip() parts = form_text.split('/') for part in parts: main_form = part.strip() + " " + last_word info = { "main_form": main_form, "sub_form": None } forms.append(info) continue # Split the text into main form and breed (if present) parts = form_text.split('(') main_form = parts[0].strip() # "Factor"s are not actual forms, they are properties of the pokemon you can encoutner. if main_form and "factor" in main_form.lower(): continue breed = parts[1].strip(')') if len(parts) > 1 else None info = { "main_form": main_form, "sub_form": breed } for region in regional_descriptors: if region in main_form.lower(): info["region"] = region break forms.append(info) else: #..... Gimmighoul headings = soup.find_all('b') if len(headings) > 0: for heading in headings: if heading.parent.name == 'sup': continue if "form" not in heading.get_text(strip=True).lower(): continue main_form = heading.get_text(strip=True) info = { "main_form": main_form, "sub_form": None } for region in regional_descriptors: if region in main_form.lower(): info["region"] = region break forms.append(info) return forms def get_evolution_data_from_bulbapedia(pokemon_name, form, cache: CacheManager, gender: Optional[str] = None): page_data = get_pokemon_data_bulbapedia(pokemon_name, cache) if not page_data: return None soup = BeautifulSoup(page_data, 'html.parser') evolution_section = soup.find('span', id='Evolution_data') if not evolution_section: return None evolution_table = None if form: form_without_form = form.replace('Form', '').replace('form', '').strip() for tag in evolution_section.parent.find_next_siblings(): if tag.name == 'h4' and form_without_form in tag.get_text(strip=True): evolution_table = tag.find_next('table') break if tag.name == 'h3': break else: evolution_table = evolution_section.parent.find_next('table') if not evolution_table: return None if pokemon_name == "Eevee": evolution_chain = parse_eevee_evolution_chain(evolution_table) else: evolution_chain = parse_evolution_chain(evolution_table, form) return evolution_chain # This is going to be a little odd. # the first TR contains a full evolution chain # other TRs contain branching evolution chains # any TDs in the first TR with a rowspan are part of the main evolution chain # any other TDS are part of the branching evolution chains # a table in a TD is information about the current Pokémon in that evolution stage # a TD without a table is information on how to trigger the next evolution def parse_pokemon_subtable(td): if td.find('table'): # This TD contains Pokemon information pokemon_name = extract_pokemon_name(td) stage = extract_stage_form(td) return pokemon_name, stage return None, None def parse_eevee_evolution_chain(table): tbody = table.find('tbody', recursive=False) if not tbody: return [] rows = tbody.find_all('tr', recursive=False) eevee_row = rows[1] method_row = rows[2] eeveelutions_row = rows[3] eevee_td = eevee_row.find('td', recursive=False) pokemon_name, stage = parse_pokemon_subtable(eevee_td) eevee_stage = EvolutionStage(pokemon_name, None, stage, None) methods = [] for method in method_row.find_all('td', recursive=False): methods.append(extract_evolution_method(method)) eeveelutions = [] index = 0 for eeveelution in eeveelutions_row.find_all('td', recursive=False): pokemon_name, stage = parse_pokemon_subtable(eeveelution) eeveelution_stage = EvolutionStage(pokemon_name, methods[index], stage, None) eeveelution_stage.previous_stage = eevee_stage # Set the back link to Eevee eeveelutions.append(eeveelution_stage) index += 1 eevee_stage.branches = eeveelutions # Set the branches directly, not as a nested list return [eevee_stage] def get_intro_generation(pokemon_name, form, cache: CacheManager): page_data = get_pokemon_data_bulbapedia(pokemon_name, cache) if not page_data: return None soup = BeautifulSoup(page_data, 'html.parser') locations_section = soup.find('span', id='Game_locations') if not locations_section: return None locations_table = locations_section.find_next('table', class_='roundy') if not locations_table: return None generation_tbody = locations_table.find('tbody', recursive=False) generation_rows = generation_tbody.find_all('tr', recursive=False) for generation_row in generation_rows: random_nested_td = generation_row.find('td', recursive=False) if not random_nested_td: continue random_nested_table = random_nested_td.find('table', recursive=False) if not random_nested_table: continue random_nested_tbody = random_nested_table.find('tbody', recursive=False) random_nested_rows = random_nested_tbody.find_all('tr', recursive=False) for nested_row in random_nested_rows: test_text = None pattern = r"Generation\s+([IVXLCDM]+)" match = re.search(pattern, nested_row.get_text(strip=True)) if match: test_text = match.group(1) # This returns just the Roman numeral if test_text: return roman_to_int(test_text.replace("Generation ", "").strip()) return None def compare_forms(a, b): if a == None or b == None: return False if a == b: return True temp_a = a.lower().replace("forme", "").replace("form", "").replace("é", "e").strip() temp_b = b.lower().replace("forme", "").replace("form", "").replace("é", "e").strip() temp_a = temp_a.replace("deputante", "debutante").replace("p'au", "pa'u").replace("blood moon", "bloodmoon") temp_b = temp_b.replace("deputante", "debutante").replace("p'au", "pa'u").replace("blood moon", "bloodmoon") if temp_a == temp_b: return True return False @lru_cache(maxsize=100) def get_parsed_pokemon_page(pokemon_name, cache): page_data = get_pokemon_data_bulbapedia(pokemon_name, cache) return BeautifulSoup(page_data, 'html.parser') if page_data else None def get_locations_from_bulbapedia(pokemon_name, form, cache: CacheManager, default_forms=None): soup = get_parsed_pokemon_page(pokemon_name, cache) if not soup: return None # Try different methods to find the locations table locations_table = None possible_headers = ['Game locations', 'In side games', 'In spin-off games'] for header in possible_headers: span = soup.find('span', id=header.replace(' ', '_')) if span: locations_table = span.find_next('table', class_='roundy') if locations_table: break if not locations_table: print(f"Warning: Couldn't find locations table for {pokemon_name}") return None raw_game_locations = {} generation_tbody = locations_table.find('tbody', recursive=False) generation_rows = generation_tbody.find_all('tr', recursive=False) for generation_row in generation_rows: random_nested_td = generation_row.find('td', recursive=False) if not random_nested_td: continue random_nested_table = random_nested_td.find('table', recursive=False) if not random_nested_table: continue random_nested_tbody = random_nested_table.find('tbody', recursive=False) random_nested_rows = random_nested_tbody.find_all('tr', recursive=False) for nested_row in random_nested_rows: if 'Generation' in nested_row.get_text(strip=True): continue games_container_td = nested_row.find('td', recursive=False) if not games_container_td: continue games_container_table = games_container_td.find('table', recursive=False) if not games_container_table: continue games_container_tbody = games_container_table.find('tbody', recursive=False) games_container_rows = games_container_tbody.find_all('tr', recursive=False) for games_container_row in games_container_rows: games = games_container_row.find_all('th') for game in games: raw_game = game.get_text(strip=True) if raw_game not in all_games: continue locations_container_td = games_container_row.find('td', recursive=False) if not locations_container_td: continue locations_container_table = locations_container_td.find('table', recursive=False) if not locations_container_table: continue locations_container_tbody = locations_container_table.find('tbody', recursive=False) locations = locations_container_tbody.find_all('td') for location in locations: groups = split_td_contents(location) for group in groups: if raw_game not in raw_game_locations: raw_game_locations[raw_game] = [] raw_game_locations[raw_game].append(group) # Process events events_section = soup.find('span', id='In_events') event_tables = process_event_tables(events_section) if events_section else {} # Process game locations in parallel with ThreadPoolExecutor(max_workers=1) as executor: futures = {executor.submit(process_game_locations, raw_game, raw_locations, form, default_forms): raw_game for raw_game, raw_locations in raw_game_locations.items()} game_locations = {} for future in as_completed(futures): raw_game = futures[future] result = future.result() if result: game_locations[raw_game] = result # Process event tables for variant in event_tables: if (variant == pokemon_name and form is None) or (form and form in variant): process_event_table(event_tables[variant], game_locations) return game_locations def process_event_tables(events_section): event_tables = {} if events_section: next_element = events_section.parent.find_next_sibling() while next_element and next_element.name != 'h3': if next_element.name == 'h5': variant = next_element.text.strip() table = next_element.find_next_sibling('table', class_='roundy') if table: event_tables[variant] = table next_element = next_element.find_next_sibling() return event_tables def _process_event_table(table, game_locations): for row in table.find_all('tr')[1:]: # Skip header row cells = row.find_all('td') if len(cells) >= 3: game = cells[0].text.strip() location = cells[2].text.strip() if game in all_games: if game not in game_locations: game_locations[game] = [] game_locations[game].append({"location": f"Event: {location}", "tag": str(cells[2])}) def process_event_table(table, game_locations): for row in table.find_all('tr')[1:]: # Skip header row cells = row.find_all('td') if len(cells) >= 6: # Ensure all required columns are present # Extract game names as a list game_links = cells[0].find_all('a') individual_games = [] for link in game_links: # Replace specific known prefixes game_name = link['title'].replace("Pokémon ", "").replace("Versions", "").replace(" Version", "").replace(" (Japanese)", "") # Split on " and ", which is used for combined games parsed_names = game_name.split(" and ") # Add the parsed names to the list individual_games.extend(parsed_names) # Print extracted game names for debugging print(f"Extracted game names from row: {individual_games}") # Filter games to include only those in all_games matching_games = [] for game in individual_games: if any(game.strip().lower() == g.lower() for g in all_games): matching_games.append(game) # Print matching games for debugging print(f"Matching games after filtering: {matching_games}") if matching_games: location = cells[2].text.strip() distribution_period = cells[5].text.strip() for game in matching_games: if game not in game_locations: game_locations[game] = [] game_locations[game].append({ "location": f"Event: {location}", "tag": str(cells[2]) }) def process_game_locations(raw_game, raw_locations, form, default_forms): locations = [] for raw_location in raw_locations: raw_text = raw_location forms = parse_form_information(raw_location) if form is None: if len(forms) > 0: for form_info in forms: main_form = form_info["main_form"] if default_forms and main_form and main_form in default_forms: main_form = None if main_form and (main_form != "All Forms" and main_form != "Kantonian Form" and main_form != "All Sizes"): continue locations.append({"location": raw_text, "tag": raw_location}) else: locations.append({"location": raw_text, "tag": raw_location}) elif len(forms) > 0: for form_info in forms: if form_matches(form_info, form, default_forms): locations.append({"location": raw_text, "tag": raw_location}) else: form_info = {"main_form": None, "sub_form": None, "region": None} if form_matches(form_info, form, default_forms): locations.append({"location": raw_text, "tag": raw_location}) return locations if locations else None def form_matches(form_info, form, default_forms): main_form = form_info["main_form"] sub_form = form_info["sub_form"] try: region = form_info['region'] if 'region' in form_info else None except KeyError: region = None if default_forms and main_form and main_form in default_forms: main_form = None if form.lower() in ["spring form", "summer form", "autumn form", "winter form"] and main_form == None: return True if main_form is None: return False if main_form in ["All Forms", "All Sizes"]: return True if region == None and main_form in ["Kantonian Form"]: return True main_form_match = compare_forms(form, main_form) or fuzz.partial_ratio(form.lower(), main_form.lower()) >= 95 sub_form_match = compare_forms(form, sub_form) or (sub_form and fuzz.partial_ratio(form.lower(), sub_form.lower()) >= 95) if not main_form_match and not sub_form_match and region: region_match = compare_forms(form, region) or fuzz.partial_ratio(form.lower(), region.lower()) >= 95 return region_match return main_form_match or sub_form_match def record_location_info(raw_game, game_locations, raw_location, raw_text): if raw_game not in game_locations: game_locations[raw_game] = [] info = {} info["location"] = raw_text info["tag"] = str(raw_location) game_locations[raw_game].append(info) def split_outside_brackets(str): return re.split(r',(?![^()]*\))', str) def handle_unown(pokemon, encounter_data): if not pokemon.name == "Unown": return one_form_unown = find_pokemon(pokemon.name, None) if not one_form_unown: return # The ! and ? forms were added in HeartGold and SoulSilver. if (pokemon.form == "!" or pokemon.form == "?") and encounter_data: for encounter in encounter_data: encounter_information = EncounterInformation(encounter, encounter_data[encounter]) pokemon.encounter_information.append(encounter_information) found_heartgold = False found_soulsilver = False for game in all_games: if game == "HeartGold": found_heartgold = True continue elif game == "SoulSilver": found_soulsilver = True continue if not found_heartgold or not found_soulsilver: continue for encounter in one_form_unown.encounter_information: if game == encounter.game: pokemon.encounter_information.append(encounter) break else: pokemon.encounter_information = one_form_unown.encounter_information list_of_shifting_form_pokemon = [ "Deoxys", "Burmy", "Wormadam", "Rotom", "Shaymin", "Keldeo", "Furfrou", "Hoopa", "Pumpkaboo", "Gourgeist", "Zygarde", "Magearna", "Vivillon", "Minior", "Urshifu", "Oinkologne", "Basculegion", "Enamorus", ] def handle_form_shift(pokemon, encounter_data): if not pokemon.name in list_of_shifting_form_pokemon: return if pokemon.form is None: return normal_form_pokemon = find_pokemon(pokemon.name, None) if not normal_form_pokemon: return pokemon.encounter_information = normal_form_pokemon.encounter_information phony_authentic = ["Sinistea", "Polteageist"] countefieit_atrisan = ["Poltchageist"] unremarkable_masterpiece = ["Sinistcha"] bad_tea_pokemon = phony_authentic + countefieit_atrisan + unremarkable_masterpiece def get_bad_tea_form(pokemon): if not pokemon.name in bad_tea_pokemon: return if pokemon.name in phony_authentic: if pokemon.form == None: return "Phony Form" if pokemon.form == "Authentic Form": return None if pokemon.name in countefieit_atrisan: if pokemon.form == None: return "Counterfeit Form" if pokemon.form == "Artisan Form": return None if pokemon.name in unremarkable_masterpiece: if pokemon.form == None: return "Unremarkable Form" else: return pokemon.form def determine_earliest_games(cache: CacheManager): for pokemon in big_pokemon_list: print(f"Processing {pokemon}") form_to_find = pokemon.form if pokemon.name == "Minior" and pokemon.form == "Orange Core": form_to_find = None if pokemon.name == "Squawkabilly" and pokemon.form: form_to_find = pokemon.form.replace("Plumage", "").strip() if pokemon.name == "Alcremie": form_to_find = None if pokemon.name in bad_tea_pokemon: form_to_find = get_bad_tea_form(pokemon) pokemon.introduced_in_gen = get_intro_generation(pokemon.name, form_to_find, cache) encounter_data = get_locations_from_bulbapedia(pokemon.name, form_to_find, cache) for encounter in encounter_data: encounter_information = EncounterInformation(encounter, encounter_data[encounter]) pokemon.encounter_information.append(encounter_information) handle_unown(pokemon, encounter_data) handle_form_shift(pokemon, encounter_data) if pokemon.name == "Gimmighoul" and pokemon.form == "Roaming Form": encounter_information = EncounterInformation("Pokémon Go", ["Pokémon Go"]) pokemon.encounter_information.append(encounter_information) pokemon.determine_earliest_game() print(f"Processed {pokemon}: {pokemon.earliest_game.game} ({pokemon.earliest_game.method})") def get_base_form(evolution_chain:List[EvolutionStage]): if not evolution_chain: return None for stage in evolution_chain: if stage.stage == "Unevolved": return stage.pokemon if stage.is_baby: return stage.next_stage.pokemon return None def adjust_for_evolution(cache: CacheManager): for pokemon in big_pokemon_list: evolution_chain = get_evolution_data_from_bulbapedia(pokemon.name, pokemon.form, cache) pokemon.add_evolution_chain(evolution_chain) game, method = pokemon.get_earliest_game_and_method() print(f"Adjusted {pokemon}: {game} ({method})") def save_to_csv(filename='pokemon_earliest_games.csv'): with open(filename, 'w', newline='', encoding='utf-8') as csvfile: fieldnames = ['number', 'name', 'introduced_in_gen', 'earliest_game', 'obtain_method', 'encounter_locations'] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for pokemon in big_pokemon_list: encounter_locations = [] for encounter in pokemon.encounter_information: if encounter.game == pokemon.earliest_game.game: encounter_locations.extend(encounter.locations) writer.writerow({ 'number': pokemon.number, 'name': f"{pokemon.name} ({pokemon.form})", 'introduced_in_gen': pokemon.introduced_in_gen, 'earliest_game': pokemon.earliest_game.game, 'obtain_method': pokemon.earliest_game.method, 'encounter_locations': ' | '.join((str(item) for item in encounter_locations)) }) def handle_unknown_encounters(cache): for pokemon in big_pokemon_list: if pokemon.earliest_game == None or pokemon.earliest_game.method == None: print(f"Checking alternative sources for {pokemon.name}") # Update the main function if __name__ == "__main__": cache = CacheManager() pokemon_list = read_pokemon_list('pokemon_home_list.csv', limit=3000) create_pokemon_index(big_pokemon_list) determine_earliest_games(cache) adjust_for_evolution(cache) handle_unknown_encounters(cache) save_to_csv() cache.close() print(f"Earliest obtainable games and encounter locations determined for {len(pokemon_list)} Pokémon and saved to pokemon_earliest_games.csv")