You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

721 lines
28 KiB

from PyQt6.QtCore import QObject, pyqtSignal, QRunnable
from bs4 import BeautifulSoup, NavigableString
from pattern.en import singularize
from fuzzywuzzy import fuzz
import re
from cache import cache
from db import db
from utility.data import default_forms, regional_descriptors, days, times, rods
from utility.functions import is_mainline_game, compare_pokemon_forms, find_match_in_string_array, extract_bracketed_text
from utility.pokemon_word_ninja import PokemonWordNinja
class GatherEncountersWorkerSignals(QObject):
finished = pyqtSignal(list)
class GatherEncountersWorker(QRunnable):
def __init__(self):
super().__init__()
self.signals = GatherEncountersWorkerSignals()
self.default_forms_set = set(default_forms)
self.splitter = PokemonWordNinja()
self.encounters_to_ignore = [
"trade",
"time capsule",
"unobtainable",
"tradeversion",
"poké transfer",
"friend safari",
"unavailable",
"pokémon home",
"union circle",
"pokémon bank",
"pal park",
"transfer from dream radar",
"global link event",
"pokémon channel",
"pokémon colosseum bonus disc"
]
self.encounters = []
def run(self):
try:
gathered_data = self.gather_encounter_data()
self.signals.finished.emit(gathered_data)
except Exception as e:
print(f"Error gathering Pokémon forms: {e}")
def gather_encounter_data(self):
all_pokemon_forms = db.get_list_of_pokemon_forms()
for form_entry in all_pokemon_forms:
form = form_entry["form_name"]
name = form_entry["name"]
pfic = form_entry["pfic"]
print(f'Processing {name}')
self.splitter.add_custom_word(name)
if form and name in form:
form = form.replace(name, "").strip()
if form and form.startswith("Female"):
form = form.replace("Female", "").strip()
if form and form.startswith("Male"):
form = form.replace("Male", "").strip()
if form and form in default_forms:
form = None
if name == "Unown" and (form != "!" and form != "?"):
form = None
if name == "Tauros" and form == "Combat Breed":
form = "Paldean Form"
if name == "Alcremie":
form = None
if name == "Minior":
form = None
if name.lower() == "ho-oh":
name = "Ho-Oh"
if form == "":
form = None
search_form = form
encounter_data = self.get_locations_from_bulbapedia(name, search_form)
if encounter_data == None:
continue
for encounter in encounter_data:
if len(encounter_data[encounter]) == 0:
break
for location in encounter_data[encounter]:
if location == "":
continue
test_location = location["location"].strip().lower()
test_location_text = BeautifulSoup(test_location, 'html.parser').get_text().lower()
if "evolve" in test_location_text:
remaining, details = self.extract_additional_information(location["tag"])
evolve_info = self.extract_evolve_information(remaining, form_entry["form_name"])
if evolve_info:
#logger.info(f"Evolve Info: {evolve_info}")
self.save_evolve_encounter(pfic, encounter, details["days"], details["times"], evolve_info["evolve_from"])
elif "event" in test_location_text:
#logger.info(f"Event: {location['location']}")
self.save_event_encounter(pfic, encounter)
else:
remaining, details = self.extract_additional_information(location["tag"])
routes, remaining = self.extract_routes(remaining)
#logger.info(f"Routes: {routes}")
#logger.info(f"Remaining: {remaining.strip()}")
#logger.info(f"Details: {details}")
if len(details["times"]) > 0:
#logger.info("Stupid Data")
pass
for route in routes:
route_name = f"Route {route}"
self.save_encounter(pfic, encounter, route_name, details["days"], details["times"], details["dual_slot"], details["static_encounter"], details["static_encounter_count"], details["extra_text"], details["stars"], details["Rods"], details["Fishing"], details["starter"] )
if remaining != "":
remaining_locations = remaining.replace(" and ", ",").split(",")
for remaining_location in remaining_locations:
if remaining_location.strip() == "":
continue
ignore_location = False
for ignore in self.encounters_to_ignore:
if ignore in remaining_location.lower():
ignore_location = True
break
if ignore_location:
continue
self.save_encounter(pfic, encounter, remaining_location.strip(), details["days"], details["times"], details["dual_slot"], details["static_encounter"], details["static_encounter_count"], details["extra_text"], details["stars"], details["Rods"], details["Fishing"], details["starter"] )
return self.encounters
def get_locations_from_bulbapedia(self, pokemon_name, form, force_refresh = False):
url = f"https://bulbapedia.bulbagarden.net/wiki/{pokemon_name}_(Pokémon)"
page_data = cache.fetch_url(url)
if not page_data:
return None
cache_key = f'locations_{url}_data_{form}'
if force_refresh:
cache.purge(cache_key)
cached_entry = cache.get(cache_key)
if cached_entry != None:
return cached_entry
soup = BeautifulSoup(page_data, 'html.parser')
if not soup:
return None
# Try different methods to find the locations table
locations_table = None
possible_headers = ['Game locations', 'In side games', 'In spin-off games']
for header in possible_headers:
span = soup.find('span', id=header.replace(' ', '_'))
if span:
locations_table = span.find_next('table', class_='roundy')
if locations_table:
break
if not locations_table:
print(f"Warning: Couldn't find locations table for {pokemon_name}")
return None
raw_game_locations = {}
generation_tbody = locations_table.find('tbody', recursive=False)
generation_rows = generation_tbody.find_all('tr', recursive=False)
for generation_row in generation_rows:
random_nested_td = generation_row.find('td', recursive=False)
if not random_nested_td:
continue
random_nested_table = random_nested_td.find('table', recursive=False)
if not random_nested_table:
continue
random_nested_tbody = random_nested_table.find('tbody', recursive=False)
random_nested_rows = random_nested_tbody.find_all('tr', recursive=False)
for nested_row in random_nested_rows:
if 'Generation' in nested_row.get_text(strip=True):
continue
games_container_td = nested_row.find('td', recursive=False)
if not games_container_td:
continue
games_container_table = games_container_td.find('table', recursive=False)
if not games_container_table:
continue
games_container_tbody = games_container_table.find('tbody', recursive=False)
games_container_rows = games_container_tbody.find_all('tr', recursive=False)
for games_container_row in games_container_rows:
games = games_container_row.find_all('th')
for game in games:
raw_game = game.get_text(strip=True)
if is_mainline_game(raw_game) == None:
continue
locations_container_td = games_container_row.find('td', recursive=False)
if not locations_container_td:
continue
locations_container_table = locations_container_td.find('table', recursive=False)
if not locations_container_table:
continue
locations_container_tbody = locations_container_table.find('tbody', recursive=False)
locations = locations_container_tbody.find_all('td')
for location in locations:
groups = self.split_td_contents(location)
for group in groups:
if raw_game not in raw_game_locations:
raw_game_locations[raw_game] = []
raw_game_locations[raw_game].append(group)
# Process events
events_section = soup.find('span', id='In_events')
event_tables = self.process_event_tables(events_section) if events_section else {}
game_locations = {}
for raw_game, raw_locations in raw_game_locations.items():
encounters = self.process_game_locations(raw_game, raw_locations, form)
if encounters and len(encounters) > 0:
game_locations[raw_game] = encounters
# Process event tables
for variant in event_tables:
if (variant == pokemon_name and form is None) or (form and form in variant):
self.process_event_table(event_tables[variant], game_locations)
cache.set(cache_key, game_locations)
return game_locations
def split_td_contents(self, td):
groups = []
current_group = []
for content in td.contents:
if isinstance(content, NavigableString):
text = content.strip()
if text:
current_group.append(content)
elif content.name == 'br':
if current_group:
groups.append(''.join(str(item) for item in current_group))
current_group = []
else:
current_group.append(content)
if current_group:
groups.append(''.join(str(item) for item in current_group))
return groups
def process_game_locations(self, raw_game, raw_locations, form):
locations = []
for raw_location in raw_locations:
raw_text = raw_location
forms = self.parse_form_information(raw_location)
if form is None:
if len(forms) > 0:
for form_info in forms:
main_form = form_info["main_form"]
if default_forms and main_form and main_form in self.default_forms_set:
main_form = None
if main_form and (main_form != "All Forms" and main_form != "Kantonian Form" and main_form != "All Sizes"):
continue
locations.append({"location": raw_text, "tag": raw_location})
else:
locations.append({"location": raw_text, "tag": raw_location})
elif len(forms) > 0:
for form_info in forms:
if self.form_matches(form_info, form, default_forms):
locations.append({"location": raw_text, "tag": raw_location})
else:
form_info = {"main_form": None, "sub_form": None, "region": None}
if self.form_matches(form_info, form, default_forms):
locations.append({"location": raw_text, "tag": raw_location})
return locations if locations else None
def process_event_tables(self, events_section):
event_tables = {}
if events_section:
next_element = events_section.parent.find_next_sibling()
while next_element and next_element.name != 'h3':
if next_element.name == 'h5':
variant = next_element.text.strip()
table = next_element.find_next_sibling('table', class_='roundy')
if table:
event_tables[variant] = table
next_element = next_element.find_next_sibling()
return event_tables
def parse_form_information(self, html_content):
soup = BeautifulSoup(html_content, 'html.parser')
#TODO: This wont work for lines that have several small blocks in one line.
#TODO: Adjust this to handle more than one small block, see Basculin for example
small_tag = soup.find('small')
forms = []
# Form info is in bold inside a small tag.
if small_tag:
bold_tags = small_tag.find_all('b')
for bold_tag in bold_tags:
form_text = bold_tag.get_text(strip=True)
# Remove parentheses
form_text = form_text.strip('()')
if "/" in form_text:
last_word = form_text.split()[-1]
form_text = form_text.replace(last_word, "").strip()
parts = form_text.split('/')
for part in parts:
main_form = part.strip() + " " + singularize(last_word)
info = {
"main_form": main_form,
"sub_form": None
}
forms.append(info)
continue
# Split the text into main form and breed (if present)
parts = form_text.split('(')
main_form = parts[0].strip()
# "Factor"s are not actual forms, they are properties of the pokemon you can encoutner.
if main_form and "factor" in main_form.lower():
continue
breed = parts[1].strip(')') if len(parts) > 1 else None
info = {
"main_form": main_form,
"sub_form": breed
}
for region in regional_descriptors:
if region in main_form.lower():
info["region"] = region
break
forms.append(info)
else: #..... Gimmighoul
headings = soup.find_all('b')
if len(headings) > 0:
for heading in headings:
if heading.parent.name == 'sup':
continue
if "form" not in heading.get_text(strip=True).lower():
continue
main_form = heading.get_text(strip=True)
info = {
"main_form": main_form,
"sub_form": None
}
for region in regional_descriptors:
if region in main_form.lower():
info["region"] = region
break
forms.append(info)
return forms
def form_matches(self, form_info, form, default_forms):
main_form = form_info["main_form"]
sub_form = form_info["sub_form"]
try:
region = form_info['region'] if 'region' in form_info else None
except KeyError:
region = None
if default_forms and main_form and main_form in default_forms:
main_form = None
if form.lower() in ["spring form", "summer form", "autumn form", "winter form"] and main_form == None:
return True
if form and main_form is None:
return False
if main_form in ["All Forms", "All Sizes"]:
return True
if region == None and main_form in ["Kantonian Form"]:
return True
main_form_match = compare_pokemon_forms(form, main_form) or fuzz.partial_ratio(form.lower(), main_form.lower()) >= 95
sub_form_match = compare_pokemon_forms(form, sub_form) or (sub_form and fuzz.partial_ratio(form.lower(), sub_form.lower()) >= 95)
if not main_form_match and not sub_form_match and region:
region_match = compare_pokemon_forms(form, region) or fuzz.partial_ratio(form.lower(), region.lower()) >= 95
return region_match
return main_form_match or sub_form_match
def extract_routes(self, s):
# Find all route numbers, including those after "and" or separated by commas
route_pattern = r'Routes?\s?((?:\d+(?:,?\s+(?:and\s+)?)?)+)'
route_match = re.search(route_pattern, s, re.IGNORECASE)
if route_match:
# Extract all numbers from the matched group
numbers = re.findall(r'\d+', route_match.group(1))
# Remove the extracted part from the original string
remaining = s[:route_match.start()] + s[route_match.end():].lstrip(', ')
return numbers, remaining
else:
return [], s
def extract_additional_information(self, s):
details = {}
details["days"] = []
details["times"] = []
details["dual_slot"] = None
details["static_encounter_count"] = 0
details["static_encounter"] = False
details["starter"] = False
details["extra_text"] = []
details["stars"] = []
details["Fishing"] = False
details["Rods"] = []
if s is None:
return "", details
soup = BeautifulSoup(s, 'html.parser')
full_text = soup.get_text()
sup_tags = soup.find_all('sup')
sup_text = []
if "first partner" in full_text.lower():
details["starter"] = True
for sup_tag in sup_tags:
text = sup_tag.get_text(strip=True)
if find_match_in_string_array(text, days):
details["days"].append(text)
sup_text.append(text)
if find_match_in_string_array(text, times):
details["times"].append(text)
sup_text.append(text)
bracket_text = extract_bracketed_text(full_text)
for text in bracket_text:
text = text.strip()
text_lower = text.lower()
game = is_mainline_game(text_lower)
if game != None:
details["dual_slot"] = game["Name"]
text = re.sub(game["Name"], '', text_lower, flags=re.IGNORECASE)
match = find_match_in_string_array(text_lower, days)
if match:
details["days"].append(match)
text = re.sub(match, '', text_lower, flags=re.IGNORECASE)
match = find_match_in_string_array(text_lower, times)
if match:
details["times"].append(match)
text = re.sub(match, '', text_lower, flags=re.IGNORECASE)
if "only one" in text_lower:
details["static_encounter_count"] = 1
details["static_encounter"] = True
text = re.sub(r'only one', '', text_lower, flags=re.IGNORECASE)
elif "only two" in text_lower:
details["static_encounter_count"] = 2
details["static_encounter"] = True
text = re.sub(r'only two', '', text_lower, flags=re.IGNORECASE)
if "rod" in text_lower:
match = find_match_in_string_array(text_lower, rods)
if match:
details["Fishing"] = True
details["Rods"].append(match)
text = re.sub(match, '', text_lower, flags=re.IGNORECASE)
if "" in text:
star_parts = re.findall(r'\d★,*', text)
for part in star_parts:
details["stars"].append(part.replace(',', '').strip())
text = re.sub(r'\d★,*', '', text)
if text.strip() != "":
details["extra_text"].append(text.strip())
sup_text.append(text.strip())
if len(sup_text) > 0:
for text in sup_text:
full_text = full_text.replace(text, "")
if len(bracket_text) > 0:
for text in bracket_text:
full_text = full_text.replace(text, "")
full_text = full_text.replace('(', "").replace(')', "")
return full_text.strip(), details
else:
return full_text, details
def extract_evolve_information(self, s: str, search_form):
details = {}
if s is None or s == "":
return details
s = s.replace("Evolve", "")
parts = s.split(" ")
if len(parts) >= 1:
target_pokemon = parts[0].strip()
form = None
if "" in target_pokemon:
target_pokemon = target_pokemon.replace("", "").strip()
form = "Female"
if "" in target_pokemon:
target_pokemon = target_pokemon.replace("", "").strip()
form = "Male"
results = db.get_pokemon_details_by_name(target_pokemon)
if results:
for result in results:
if compare_pokemon_forms(result["form_name"], form):
details["evolve_from"] = result["pfic"]
break
if results and "evolve_from" not in details:
for result in results:
if compare_pokemon_forms(result["form_name"], search_form if search_form != form else None):
details["evolve_from"] = result["pfic"]
break
if search_form and results and "evolve_from" not in details:
if "female" in search_form.lower():
form = "Female"
elif "male" in search_form.lower():
form = "Male"
if form:
for result in results:
if compare_pokemon_forms(result["form_name"], form):
details["evolve_from"] = result["pfic"]
break
return details
def save_evolve_encounter(self, pfic, game, days, times, from_pfic):
game_id = db.get_game_id_by_name(game)
encounter = {
"pfic": pfic,
"game_id": game_id,
"type": "evolve",
"data": {
"day": None,
"time": None,
"from_pfic": from_pfic,
}
}
if len(days) > 0:
for day in days:
encounter["data"]["day"] = day
encounter["data"]["time"] = None
self.encounters.append(encounter)
elif len(times) > 0:
for time in times:
encounter["data"]["day"] = None
encounter["data"]["time"] = time
self.encounters.append(encounter)
else:
encounter["data"]["day"] = None
encounter["data"]["time"] = None
self.encounters.append(encounter)
def save_event_encounter(self, pfic, game):
game_id = db.get_game_id_by_name(game)
encounter = {
"pfic": pfic,
"game_id": game_id,
"type": "event"
}
self.encounters.append(encounter)
def save_encounter(self, pfic, game, location, days, times, dual_slot, static_encounter, static_encounter_count, extra_text, stars, rods, fishing, starter):
game_id = db.get_game_id_by_name(game)
extra_text_str = ' '.join(extra_text) if extra_text else None
stars_str = ','.join(sorted(stars)) if stars else None
rods_str = ','.join(sorted(rods)) if rods else None
encounter_type = "random"
if starter:
encounter_type = "starter"
if static_encounter:
encounter_type = "static"
encounter = {
"pfic": pfic,
"game_id": game_id,
"type": encounter_type,
"data": {
"location": location,
"day": None,
"time": None,
"dual_slot": dual_slot,
"extra_text": extra_text_str,
"stars": stars_str,
"rods": rods_str,
"fishing": fishing
}
}
if static_encounter:
encounter["data"]["static_encounter_count"] = static_encounter_count
if len(days) > 0:
for day in days:
encounter["data"]["day"] = day
encounter["data"]["time"] = None
self.encounters.append(encounter)
elif len(times) > 0:
for time in times:
encounter["data"]["day"] = None
encounter["data"]["time"] = time
self.encounters.append(encounter)
else:
encounter["data"]["day"] = None
encounter["data"]["time"] = None
self.encounters.append(encounter)
def process_event_tables(self, events_section):
event_tables = {}
if events_section:
next_element = events_section.parent.find_next_sibling()
while next_element and next_element.name != 'h3':
if next_element.name == 'h5':
variant = next_element.text.strip()
table = next_element.find_next_sibling('table', class_='roundy')
if table:
event_tables[variant] = table
next_element = next_element.find_next_sibling()
return event_tables
def process_event_table(self, table, game_locations):
for row in table.find_all('tr')[1:]: # Skip header row
cells = row.find_all('td')
if len(cells) >= 6: # Ensure all required columns are present
# Extract game names as a list
game_links = cells[0].find_all('a')
individual_games = []
for link in game_links:
# Replace specific known prefixes
game_name = link['title'].replace("Pokémon ", "").replace("Versions", "").replace(" Version", "").replace(" (Japanese)", "")
# Split on " and ", which is used for combined games
parsed_names = game_name.split(" and ")
# Add the parsed names to the list
individual_games.extend(parsed_names)
# Print extracted game names for debugging
print(f"Extracted game names from row: {individual_games}")
# Filter games to include only those in all_games
matching_games = []
for game in individual_games:
match = is_mainline_game(game)
if match:
matching_games.append(game)
# Print matching games for debugging
print(f"Matching games after filtering: {matching_games}")
if matching_games:
location = cells[2].text.strip()
distribution_period = cells[5].text.strip()
for game in matching_games:
if game not in game_locations:
game_locations[game] = []
game_locations[game].append({
"location": f"Event: {location}",
"tag": str(cells[2])
})