from PyQt6.QtCore import QObject, pyqtSignal, QRunnable from bs4 import BeautifulSoup, NavigableString from pattern.en import singularize from fuzzywuzzy import fuzz import re from cache import cache from db import db from utility.data import default_forms, regional_descriptors, days, times, rods from utility.functions import is_mainline_game, compare_pokemon_forms, find_match_in_string_array, extract_bracketed_text from utility.pokemon_word_ninja import PokemonWordNinja class GatherEncountersWorkerSignals(QObject): finished = pyqtSignal(list) class GatherEncountersWorker(QRunnable): def __init__(self): super().__init__() self.signals = GatherEncountersWorkerSignals() self.default_forms_set = set(default_forms) self.splitter = PokemonWordNinja() self.encounters_to_ignore = [ "trade", "time capsule", "unobtainable", "tradeversion", "poké transfer", "friend safari", "unavailable", "pokémon home", "union circle", "pokémon bank", "pal park", "transfer from dream radar", "global link event", "pokémon channel", "pokémon colosseum bonus disc" ] self.encounters = [] def run(self): try: gathered_data = self.gather_encounter_data() self.signals.finished.emit(gathered_data) except Exception as e: print(f"Error gathering Pokémon forms: {e}") def gather_encounter_data(self): all_pokemon_forms = db.get_list_of_pokemon_forms() for form_entry in all_pokemon_forms: form = form_entry["form_name"] name = form_entry["name"] pfic = form_entry["pfic"] print(f'Processing {name}') self.splitter.add_custom_word(name) if form and name in form: form = form.replace(name, "").strip() if form and form.startswith("Female"): form = form.replace("Female", "").strip() if form and form.startswith("Male"): form = form.replace("Male", "").strip() if form and form in default_forms: form = None if name == "Unown" and (form != "!" and form != "?"): form = None if name == "Tauros" and form == "Combat Breed": form = "Paldean Form" if name == "Alcremie": form = None if name == "Minior": form = None if name.lower() == "ho-oh": name = "Ho-Oh" if form == "": form = None search_form = form encounter_data = self.get_locations_from_bulbapedia(name, search_form) if encounter_data == None: continue for encounter in encounter_data: if len(encounter_data[encounter]) == 0: break for location in encounter_data[encounter]: if location == "": continue test_location = location["location"].strip().lower() test_location_text = BeautifulSoup(test_location, 'html.parser').get_text().lower() if "evolve" in test_location_text: remaining, details = self.extract_additional_information(location["tag"]) evolve_info = self.extract_evolve_information(remaining, form_entry["form_name"]) if evolve_info: #logger.info(f"Evolve Info: {evolve_info}") self.save_evolve_encounter(pfic, encounter, details["days"], details["times"], evolve_info["evolve_from"]) elif "event" in test_location_text: #logger.info(f"Event: {location['location']}") self.save_event_encounter(pfic, encounter) else: remaining, details = self.extract_additional_information(location["tag"]) routes, remaining = self.extract_routes(remaining) #logger.info(f"Routes: {routes}") #logger.info(f"Remaining: {remaining.strip()}") #logger.info(f"Details: {details}") if len(details["times"]) > 0: #logger.info("Stupid Data") pass for route in routes: route_name = f"Route {route}" self.save_encounter(pfic, encounter, route_name, details["days"], details["times"], details["dual_slot"], details["static_encounter"], details["static_encounter_count"], details["extra_text"], details["stars"], details["Rods"], details["Fishing"], details["starter"] ) if remaining != "": remaining_locations = remaining.replace(" and ", ",").split(",") for remaining_location in remaining_locations: if remaining_location.strip() == "": continue ignore_location = False for ignore in self.encounters_to_ignore: if ignore in remaining_location.lower(): ignore_location = True break if ignore_location: continue self.save_encounter(pfic, encounter, remaining_location.strip(), details["days"], details["times"], details["dual_slot"], details["static_encounter"], details["static_encounter_count"], details["extra_text"], details["stars"], details["Rods"], details["Fishing"], details["starter"] ) return self.encounters def get_locations_from_bulbapedia(self, pokemon_name, form, force_refresh = False): url = f"https://bulbapedia.bulbagarden.net/wiki/{pokemon_name}_(Pokémon)" page_data = cache.fetch_url(url) if not page_data: return None cache_key = f'locations_{url}_data_{form}' if force_refresh: cache.purge(cache_key) cached_entry = cache.get(cache_key) if cached_entry != None: return cached_entry soup = BeautifulSoup(page_data, 'html.parser') if not soup: return None # Try different methods to find the locations table locations_table = None possible_headers = ['Game locations', 'In side games', 'In spin-off games'] for header in possible_headers: span = soup.find('span', id=header.replace(' ', '_')) if span: locations_table = span.find_next('table', class_='roundy') if locations_table: break if not locations_table: print(f"Warning: Couldn't find locations table for {pokemon_name}") return None raw_game_locations = {} generation_tbody = locations_table.find('tbody', recursive=False) generation_rows = generation_tbody.find_all('tr', recursive=False) for generation_row in generation_rows: random_nested_td = generation_row.find('td', recursive=False) if not random_nested_td: continue random_nested_table = random_nested_td.find('table', recursive=False) if not random_nested_table: continue random_nested_tbody = random_nested_table.find('tbody', recursive=False) random_nested_rows = random_nested_tbody.find_all('tr', recursive=False) for nested_row in random_nested_rows: if 'Generation' in nested_row.get_text(strip=True): continue games_container_td = nested_row.find('td', recursive=False) if not games_container_td: continue games_container_table = games_container_td.find('table', recursive=False) if not games_container_table: continue games_container_tbody = games_container_table.find('tbody', recursive=False) games_container_rows = games_container_tbody.find_all('tr', recursive=False) for games_container_row in games_container_rows: games = games_container_row.find_all('th') for game in games: raw_game = game.get_text(strip=True) if is_mainline_game(raw_game) == None: continue locations_container_td = games_container_row.find('td', recursive=False) if not locations_container_td: continue locations_container_table = locations_container_td.find('table', recursive=False) if not locations_container_table: continue locations_container_tbody = locations_container_table.find('tbody', recursive=False) locations = locations_container_tbody.find_all('td') for location in locations: groups = self.split_td_contents(location) for group in groups: if raw_game not in raw_game_locations: raw_game_locations[raw_game] = [] raw_game_locations[raw_game].append(group) # Process events events_section = soup.find('span', id='In_events') event_tables = self.process_event_tables(events_section) if events_section else {} game_locations = {} for raw_game, raw_locations in raw_game_locations.items(): encounters = self.process_game_locations(raw_game, raw_locations, form) if encounters and len(encounters) > 0: game_locations[raw_game] = encounters # Process event tables for variant in event_tables: if (variant == pokemon_name and form is None) or (form and form in variant): self.process_event_table(event_tables[variant], game_locations) cache.set(cache_key, game_locations) return game_locations def split_td_contents(self, td): groups = [] current_group = [] for content in td.contents: if isinstance(content, NavigableString): text = content.strip() if text: current_group.append(content) elif content.name == 'br': if current_group: groups.append(''.join(str(item) for item in current_group)) current_group = [] else: current_group.append(content) if current_group: groups.append(''.join(str(item) for item in current_group)) return groups def process_game_locations(self, raw_game, raw_locations, form): locations = [] for raw_location in raw_locations: raw_text = raw_location forms = self.parse_form_information(raw_location) if form is None: if len(forms) > 0: for form_info in forms: main_form = form_info["main_form"] if default_forms and main_form and main_form in self.default_forms_set: main_form = None if main_form and (main_form != "All Forms" and main_form != "Kantonian Form" and main_form != "All Sizes"): continue locations.append({"location": raw_text, "tag": raw_location}) else: locations.append({"location": raw_text, "tag": raw_location}) elif len(forms) > 0: for form_info in forms: if self.form_matches(form_info, form, default_forms): locations.append({"location": raw_text, "tag": raw_location}) else: form_info = {"main_form": None, "sub_form": None, "region": None} if self.form_matches(form_info, form, default_forms): locations.append({"location": raw_text, "tag": raw_location}) return locations if locations else None def process_event_tables(self, events_section): event_tables = {} if events_section: next_element = events_section.parent.find_next_sibling() while next_element and next_element.name != 'h3': if next_element.name == 'h5': variant = next_element.text.strip() table = next_element.find_next_sibling('table', class_='roundy') if table: event_tables[variant] = table next_element = next_element.find_next_sibling() return event_tables def parse_form_information(self, html_content): soup = BeautifulSoup(html_content, 'html.parser') #TODO: This wont work for lines that have several small blocks in one line. #TODO: Adjust this to handle more than one small block, see Basculin for example small_tag = soup.find('small') forms = [] # Form info is in bold inside a small tag. if small_tag: bold_tags = small_tag.find_all('b') for bold_tag in bold_tags: form_text = bold_tag.get_text(strip=True) # Remove parentheses form_text = form_text.strip('()') if "/" in form_text: last_word = form_text.split()[-1] form_text = form_text.replace(last_word, "").strip() parts = form_text.split('/') for part in parts: main_form = part.strip() + " " + singularize(last_word) info = { "main_form": main_form, "sub_form": None } forms.append(info) continue # Split the text into main form and breed (if present) parts = form_text.split('(') main_form = parts[0].strip() # "Factor"s are not actual forms, they are properties of the pokemon you can encoutner. if main_form and "factor" in main_form.lower(): continue breed = parts[1].strip(')') if len(parts) > 1 else None info = { "main_form": main_form, "sub_form": breed } for region in regional_descriptors: if region in main_form.lower(): info["region"] = region break forms.append(info) else: #..... Gimmighoul headings = soup.find_all('b') if len(headings) > 0: for heading in headings: if heading.parent.name == 'sup': continue if "form" not in heading.get_text(strip=True).lower(): continue main_form = heading.get_text(strip=True) info = { "main_form": main_form, "sub_form": None } for region in regional_descriptors: if region in main_form.lower(): info["region"] = region break forms.append(info) return forms def form_matches(self, form_info, form, default_forms): main_form = form_info["main_form"] sub_form = form_info["sub_form"] try: region = form_info['region'] if 'region' in form_info else None except KeyError: region = None if default_forms and main_form and main_form in default_forms: main_form = None if form.lower() in ["spring form", "summer form", "autumn form", "winter form"] and main_form == None: return True if form and main_form is None: return False if main_form in ["All Forms", "All Sizes"]: return True if region == None and main_form in ["Kantonian Form"]: return True main_form_match = compare_pokemon_forms(form, main_form) or fuzz.partial_ratio(form.lower(), main_form.lower()) >= 95 sub_form_match = compare_pokemon_forms(form, sub_form) or (sub_form and fuzz.partial_ratio(form.lower(), sub_form.lower()) >= 95) if not main_form_match and not sub_form_match and region: region_match = compare_pokemon_forms(form, region) or fuzz.partial_ratio(form.lower(), region.lower()) >= 95 return region_match return main_form_match or sub_form_match def extract_routes(self, s): # Find all route numbers, including those after "and" or separated by commas route_pattern = r'Routes?\s+((?:\d+(?:,?\s+(?:and\s+)?)?)+)' route_match = re.search(route_pattern, s, re.IGNORECASE) if route_match: # Extract all numbers from the matched group numbers = re.findall(r'\d+', route_match.group(1)) # Remove the extracted part from the original string remaining = s[:route_match.start()] + s[route_match.end():].lstrip(', ') return numbers, remaining else: return [], s def extract_additional_information(self, s): details = {} details["days"] = [] details["times"] = [] details["dual_slot"] = None details["static_encounter_count"] = 0 details["static_encounter"] = False details["starter"] = False details["extra_text"] = [] details["stars"] = [] details["Fishing"] = False details["Rods"] = [] if s is None: return "", details soup = BeautifulSoup(s, 'html.parser') full_text = soup.get_text() sup_tags = soup.find_all('sup') sup_text = [] if "first partner" in full_text.lower(): details["starter"] = True for sup_tag in sup_tags: text = sup_tag.get_text(strip=True) if find_match_in_string_array(text, days): details["days"].append(text) sup_text.append(text) if find_match_in_string_array(text, times): details["times"].append(text) sup_text.append(text) bracket_text = extract_bracketed_text(full_text) for text in bracket_text: text = text.strip() text_lower = text.lower() game = is_mainline_game(text_lower) if game != None: details["dual_slot"] = game["Name"] text = re.sub(game["Name"], '', text_lower, flags=re.IGNORECASE) match = find_match_in_string_array(text_lower, days) if match: details["days"].append(match) text = re.sub(match, '', text_lower, flags=re.IGNORECASE) match = find_match_in_string_array(text_lower, times) if match: details["times"].append(match) text = re.sub(match, '', text_lower, flags=re.IGNORECASE) if "only one" in text_lower: details["static_encounter_count"] = 1 details["static_encounter"] = True text = re.sub(r'only one', '', text_lower, flags=re.IGNORECASE) elif "only two" in text_lower: details["static_encounter_count"] = 2 details["static_encounter"] = True text = re.sub(r'only two', '', text_lower, flags=re.IGNORECASE) if "rod" in text_lower: match = find_match_in_string_array(text_lower, rods) if match: details["Fishing"] = True details["Rods"].append(match) text = re.sub(match, '', text_lower, flags=re.IGNORECASE) if "★" in text: star_parts = re.findall(r'\d★,*', text) for part in star_parts: details["stars"].append(part.replace(',', '').strip()) text = re.sub(r'\d★,*', '', text) if text.strip() != "": details["extra_text"].append(text.strip()) sup_text.append(text.strip()) if len(sup_text) > 0: for text in sup_text: full_text = full_text.replace(text, "") if len(bracket_text) > 0: for text in bracket_text: full_text = full_text.replace(text, "") full_text = full_text.replace('(', "").replace(')', "") return full_text.strip(), details else: return full_text, details def extract_evolve_information(self, s: str, search_form): details = {} if s is None or s == "": return details s = s.replace("Evolve", "") parts = s.split(" ") if len(parts) >= 1: target_pokemon = parts[0].strip() form = None if "♀" in target_pokemon: target_pokemon = target_pokemon.replace("♀", "").strip() form = "Female" if "♂" in target_pokemon: target_pokemon = target_pokemon.replace("♂", "").strip() form = "Male" results = db.get_pokemon_details_by_name(target_pokemon) if results: for result in results: if compare_pokemon_forms(result["form_name"], form): details["evolve_from"] = result["pfic"] break if results and "evolve_from" not in details: for result in results: if compare_pokemon_forms(result["form_name"], search_form if search_form != form else None): details["evolve_from"] = result["pfic"] break if search_form and results and "evolve_from" not in details: if "female" in search_form.lower(): form = "Female" elif "male" in search_form.lower(): form = "Male" if form: for result in results: if compare_pokemon_forms(result["form_name"], form): details["evolve_from"] = result["pfic"] break return details def save_evolve_encounter(self, pfic, game, days, times, from_pfic): game_id = db.get_game_id_by_name(game) encounter = { "pfic": pfic, "game_id": game_id, "type": "evolve", "data": { "day": None, "time": None, "from_pfic": from_pfic, } } if len(days) > 0: for day in days: encounter["data"]["day"] = day encounter["data"]["time"] = None self.encounters.append(encounter) elif len(times) > 0: for time in times: encounter["data"]["day"] = None encounter["data"]["time"] = time self.encounters.append(encounter) else: encounter["data"]["day"] = None encounter["data"]["time"] = None self.encounters.append(encounter) def save_event_encounter(self, pfic, game): game_id = db.get_game_id_by_name(game) encounter = { "pfic": pfic, "game_id": game_id, "type": "event" } self.encounters.append(encounter) def save_encounter(self, pfic, game, location, days, times, dual_slot, static_encounter, static_encounter_count, extra_text, stars, rods, fishing, starter): game_id = db.get_game_id_by_name(game) extra_text_str = ' '.join(extra_text) if extra_text else None stars_str = ','.join(sorted(stars)) if stars else None rods_str = ','.join(sorted(rods)) if rods else None encounter_type = "random" if starter: encounter_type = "starter" if static_encounter: encounter_type = "static" encounter = { "pfic": pfic, "game_id": game_id, "type": encounter_type, "data": { "location": location, "day": None, "time": None, "dual_slot": dual_slot, "extra_text": extra_text_str, "stars": stars_str, "rods": rods_str, "fishing": fishing } } if static_encounter: encounter["data"]["static_encounter_count"] = static_encounter_count if len(days) > 0: for day in days: encounter["data"]["day"] = day encounter["data"]["time"] = None self.encounters.append(encounter) elif len(times) > 0: for time in times: encounter["data"]["day"] = None encounter["data"]["time"] = time self.encounters.append(encounter) else: encounter["data"]["day"] = None encounter["data"]["time"] = None self.encounters.append(encounter) def process_event_tables(self, events_section): event_tables = {} if events_section: next_element = events_section.parent.find_next_sibling() while next_element and next_element.name != 'h3': if next_element.name == 'h5': variant = next_element.text.strip() table = next_element.find_next_sibling('table', class_='roundy') if table: event_tables[variant] = table next_element = next_element.find_next_sibling() return event_tables def process_event_table(self, table, game_locations): for row in table.find_all('tr')[1:]: # Skip header row cells = row.find_all('td') if len(cells) >= 6: # Ensure all required columns are present # Extract game names as a list game_links = cells[0].find_all('a') individual_games = [] for link in game_links: # Replace specific known prefixes game_name = link['title'].replace("Pokémon ", "").replace("Versions", "").replace(" Version", "").replace(" (Japanese)", "") # Split on " and ", which is used for combined games parsed_names = game_name.split(" and ") # Add the parsed names to the list individual_games.extend(parsed_names) # Print extracted game names for debugging print(f"Extracted game names from row: {individual_games}") # Filter games to include only those in all_games matching_games = [] for game in individual_games: match = is_mainline_game(game) if match: matching_games.append(game) # Print matching games for debugging print(f"Matching games after filtering: {matching_games}") if matching_games: location = cells[2].text.strip() distribution_period = cells[5].text.strip() for game in matching_games: if game not in game_locations: game_locations[game] = [] game_locations[game].append({ "location": f"Event: {location}", "tag": str(cells[2]) })