|
|
|
@ -1,16 +1,19 @@ |
|
|
|
from typing import Optional |
|
|
|
from PyQt6.QtCore import QObject, pyqtSignal, QRunnable |
|
|
|
from bs4 import BeautifulSoup |
|
|
|
from bs4 import BeautifulSoup, Tag |
|
|
|
from cache import cache |
|
|
|
from db import db |
|
|
|
|
|
|
|
from utility.functions import get_form_name, get_display_name |
|
|
|
|
|
|
|
class GatherEvolutionsWorkerSignals(QObject): |
|
|
|
finished = pyqtSignal(list) |
|
|
|
|
|
|
|
class GatherHEvolutions(QRunnable): |
|
|
|
class GatherEvolutions(QRunnable): |
|
|
|
def __init__(self): |
|
|
|
super().__init__() |
|
|
|
self.signals = GatherEvolutionsWorkerSignals() |
|
|
|
self.base_url = "https://www.serebii.net/pokemonhome/" |
|
|
|
self.base_url = "https://bulbapedia.bulbagarden.net/wiki/" |
|
|
|
|
|
|
|
def run(self): |
|
|
|
try: |
|
|
|
@ -24,6 +27,210 @@ class GatherHEvolutions(QRunnable): |
|
|
|
evolutions = [] |
|
|
|
|
|
|
|
for pokemon_form in all_pokemon_forms: |
|
|
|
pass |
|
|
|
print(f"Processing {get_display_name(pokemon_form)}'s evolutions") |
|
|
|
url = f"https://bulbapedia.bulbagarden.net/wiki/{pokemon_form["name"]}_(Pokémon)" |
|
|
|
page_data = cache.fetch_url(url) |
|
|
|
if not page_data: |
|
|
|
continue |
|
|
|
soup = BeautifulSoup(page_data, 'html.parser') |
|
|
|
evolution_section = soup.find('span', id='Evolution_data') |
|
|
|
if not evolution_section: |
|
|
|
continue |
|
|
|
evolution_table = None |
|
|
|
form = get_form_name(pokemon_form, not pokemon_form["gender_relevant"]) |
|
|
|
pokemon_name = pokemon_form["name"] |
|
|
|
evolution_table = evolution_section.parent.find_next('table') |
|
|
|
if form: |
|
|
|
form_without_form = form.replace('Form', '').replace('form', '').strip() |
|
|
|
for tag in evolution_section.parent.find_next_siblings(): |
|
|
|
if tag.name == 'h4' and form_without_form in tag.get_text(strip=True): |
|
|
|
evolution_table = tag.find_next('table') |
|
|
|
break |
|
|
|
if tag.name == 'h3': |
|
|
|
break |
|
|
|
if not evolution_table: |
|
|
|
continue |
|
|
|
|
|
|
|
if pokemon_name == "Eevee": |
|
|
|
evolution_chain = self.parse_eevee_evolution_chain(evolution_table, pokemon_form) |
|
|
|
evolutions.append(evolution_chain) |
|
|
|
else: |
|
|
|
evolution_chain = self.parse_evolution_chain(evolution_table, pokemon_form) |
|
|
|
evolutions.append(evolution_chain) |
|
|
|
|
|
|
|
return evolutions |
|
|
|
|
|
|
|
def parse_evolution_chain(self, table, pokemon_form, force_refresh = False): |
|
|
|
cache_record_name = f"evo_{pokemon_form["pfic"]}" |
|
|
|
if force_refresh: |
|
|
|
cache.purge(cache_record_name) |
|
|
|
|
|
|
|
cached_entry = cache.get(cache_record_name) |
|
|
|
if cached_entry != None: |
|
|
|
return cached_entry |
|
|
|
|
|
|
|
main_chain = [] |
|
|
|
current_stage = None |
|
|
|
pending_method = None |
|
|
|
form = get_form_name(pokemon_form, not pokemon_form["gender_relevant"]) |
|
|
|
|
|
|
|
tbody = table.find('tbody', recursive=False) |
|
|
|
if not tbody: |
|
|
|
return [] |
|
|
|
|
|
|
|
rows = tbody.find_all('tr', recursive=False) |
|
|
|
main_row = rows[0] |
|
|
|
branch_rows = rows[1:] |
|
|
|
|
|
|
|
# Parse main evolution chain |
|
|
|
for td in main_row.find_all('td', recursive=False): |
|
|
|
if td.find('table'): |
|
|
|
# This TD contains Pokemon information |
|
|
|
pokemon_name = self.extract_pokemon_name(td) |
|
|
|
stage = self.extract_stage_form(td) |
|
|
|
evolution_form = self.extract_evolution_form(td, pokemon_name) |
|
|
|
new_stage = { |
|
|
|
"pokemon":pokemon_name, |
|
|
|
"method": pending_method, |
|
|
|
"stage": stage, |
|
|
|
"form": evolution_form, |
|
|
|
"next_stage": None, |
|
|
|
"previous_stage": None, |
|
|
|
"branches": [], |
|
|
|
"pfic": pokemon_form["pfic"] |
|
|
|
} |
|
|
|
pending_method = None |
|
|
|
if current_stage: |
|
|
|
current_stage["next_stage"] = new_stage |
|
|
|
new_stage["previous_stage"] = current_stage # Set the back link |
|
|
|
current_stage = new_stage |
|
|
|
main_chain.append(current_stage) |
|
|
|
else: |
|
|
|
# This TD contains evolution method for the next Pokemon |
|
|
|
pending_method = self.extract_evolution_method(td) |
|
|
|
|
|
|
|
# Parse branching evolutions |
|
|
|
for row in branch_rows: |
|
|
|
branch_stage = None |
|
|
|
branch_method = None |
|
|
|
for td in row.find_all('td', recursive=False): |
|
|
|
if td.find('table'): |
|
|
|
pokemon_name = self.extract_pokemon_name(td) |
|
|
|
stage = self.extract_stage_form(td) |
|
|
|
evolution_form = self.extract_evolution_form(td, pokemon_name) |
|
|
|
new_stage = { |
|
|
|
"pokemon":pokemon_name, |
|
|
|
"method": branch_method, |
|
|
|
"stage": stage, |
|
|
|
"form": evolution_form, |
|
|
|
"next_stage": None, |
|
|
|
"previous_stage": None, |
|
|
|
"branches": [], |
|
|
|
"pfic": pokemon_form["pfic"] |
|
|
|
} |
|
|
|
branch_method = None |
|
|
|
if branch_stage: |
|
|
|
branch_stage["next_stage"] = new_stage |
|
|
|
new_stage["previous_stage"] = branch_stage # Set the back link |
|
|
|
branch_stage = new_stage |
|
|
|
# Find which main chain Pokemon this branches from |
|
|
|
for main_stage in main_chain: |
|
|
|
if td.get('rowspan') and main_stage.pokemon == pokemon_name: |
|
|
|
main_stage["branches"].append(branch_stage) |
|
|
|
branch_stage["previous_stage"] = main_stage # Set the back link to the main chain |
|
|
|
break |
|
|
|
else: |
|
|
|
branch_method = self.extract_evolution_method(td) |
|
|
|
|
|
|
|
cache.set(cache_record_name, main_chain) |
|
|
|
return main_chain |
|
|
|
|
|
|
|
def extract_pokemon_name(self, td: Tag) -> Optional[str]: |
|
|
|
name_tag = self.find_name_tag(td) |
|
|
|
if name_tag: |
|
|
|
return name_tag.get_text(strip=True) |
|
|
|
return None |
|
|
|
|
|
|
|
def find_name_tag(self, td: Tag) -> Optional[Tag]: |
|
|
|
table = td.find('table') |
|
|
|
name_tag = table.find('a', class_='selflink') |
|
|
|
if name_tag: |
|
|
|
return name_tag |
|
|
|
name_tag = table.find('a', title=True, class_=lambda x: x != 'image') |
|
|
|
return name_tag |
|
|
|
|
|
|
|
def extract_stage_form(self, td: Tag) -> Optional[str]: |
|
|
|
stage_tag = td.find('table').find('small') |
|
|
|
if stage_tag: |
|
|
|
return stage_tag.get_text(strip=True) |
|
|
|
return None |
|
|
|
|
|
|
|
def extract_evolution_form(self, td: Tag, name: str) -> Optional[str]: |
|
|
|
name_tag = self.find_name_tag(td) |
|
|
|
if name_tag: |
|
|
|
name_row = name_tag.parent |
|
|
|
small_tags = name_row.find_all('small') |
|
|
|
if len(small_tags) > 1: |
|
|
|
return small_tags[0].get_text(strip=True) |
|
|
|
return None |
|
|
|
|
|
|
|
def extract_evolution_method(self, td: Tag) -> str: |
|
|
|
# Extract evolution method from the TD |
|
|
|
return td.get_text(strip=True) |
|
|
|
|
|
|
|
def parse_eevee_evolution_chain(self, table, pokemon_form): |
|
|
|
tbody = table.find('tbody', recursive=False) |
|
|
|
if not tbody: |
|
|
|
return [] |
|
|
|
|
|
|
|
rows = tbody.find_all('tr', recursive=False) |
|
|
|
eevee_row = rows[1] |
|
|
|
method_row = rows[2] |
|
|
|
eeveelutions_row = rows[3] |
|
|
|
|
|
|
|
eevee_td = eevee_row.find('td', recursive=False) |
|
|
|
pokemon_name, stage = self.parse_pokemon_subtable(eevee_td) |
|
|
|
eevee_stage = { |
|
|
|
"pokemon":pokemon_name, |
|
|
|
"method": None, |
|
|
|
"stage": stage, |
|
|
|
"form": None, |
|
|
|
"next_stage": None, |
|
|
|
"previous_stage": None, |
|
|
|
"branches": [], |
|
|
|
"pfic": pokemon_form["pfic"] |
|
|
|
} |
|
|
|
|
|
|
|
methods = [] |
|
|
|
for method in method_row.find_all('td', recursive=False): |
|
|
|
methods.append(self.extract_evolution_method(method)) |
|
|
|
|
|
|
|
eeveelutions = [] |
|
|
|
index = 0 |
|
|
|
for eeveelution in eeveelutions_row.find_all('td', recursive=False): |
|
|
|
pokemon_name, stage = self.parse_pokemon_subtable(eeveelution) |
|
|
|
eeveelution_stage = { |
|
|
|
"pokemon":pokemon_name, |
|
|
|
"method": methods[index], |
|
|
|
"stage": stage, |
|
|
|
"form": None, |
|
|
|
"next_stage": None, |
|
|
|
"previous_stage": None, |
|
|
|
"branches": [], |
|
|
|
"pfic": pokemon_form["pfic"] |
|
|
|
} |
|
|
|
eeveelution_stage["previous_stage"] = eevee_stage # Set the back link to Eevee |
|
|
|
eeveelutions.append(eeveelution_stage) |
|
|
|
index += 1 |
|
|
|
|
|
|
|
eevee_stage["branches"] = eeveelutions # Set the branches directly, not as a nested list |
|
|
|
|
|
|
|
return evolutions |
|
|
|
return [eevee_stage] |
|
|
|
|
|
|
|
def parse_pokemon_subtable(self, td): |
|
|
|
if td.find('table'): |
|
|
|
# This TD contains Pokemon information |
|
|
|
pokemon_name = self.extract_pokemon_name(td) |
|
|
|
stage = self.extract_stage_form(td) |
|
|
|
return pokemon_name, stage |
|
|
|
return None, None |