Browse Source

- WIP reworking form p-arsing to be more consistant

feature-new-db-implementation
Dan 1 year ago
parent
commit
e5f109949a
  1. 1
      ui/main_window_controller.py
  2. 4
      ui/workers/gather_evolutions_worker.py
  3. 2
      ui/workers/gather_home_storage_status_worker.py
  4. 4
      ui/workers/gather_pokemon_forms_worker.py
  5. 16
      utility/data.py
  6. 54
      utility/pokemon_word_ninja.py

1
ui/main_window_controller.py

@ -106,6 +106,7 @@ class MainWindowController:
db.add_pokemon_form(pokemon["pfic"], pokemon["name"], pokemon["form_name"], pokemon["national_dex"], pokemon["generation"], pokemon["sprite_url"], pokemon["gender_relevant"]) db.add_pokemon_form(pokemon["pfic"], pokemon["name"], pokemon["form_name"], pokemon["national_dex"], pokemon["generation"], pokemon["sprite_url"], pokemon["gender_relevant"])
self.pokemon_data_cache = data self.pokemon_data_cache = data
self.view.update_pokemon_forms(data) self.view.update_pokemon_forms(data)
self.apply_filters()
db.save_changes() db.save_changes()

4
ui/workers/gather_evolutions_worker.py

@ -28,7 +28,7 @@ class GatherEvolutions(QRunnable):
except Exception as e: except Exception as e:
print(f"Error gathering Pokémon home storage status: {e}") print(f"Error gathering Pokémon home storage status: {e}")
def gather_evolution_data(self, force_refresh = False): def gather_evolution_data(self, force_refresh = True):
all_pokemon_forms = db.get_list_of_pokemon_forms() all_pokemon_forms = db.get_list_of_pokemon_forms()
evolutions = {} evolutions = {}
@ -121,7 +121,7 @@ class GatherEvolutions(QRunnable):
evolutions[composite_key] = (evolution_info) evolutions[composite_key] = (evolution_info)
self.traverse_and_store(next_stage, evolutions, gender) self.traverse_and_store(next_stage, evolutions, gender)
def parse_evolution_chain(self, table, pokemon_form, force_refresh = False): def parse_evolution_chain(self, table, pokemon_form, force_refresh = True):
cache_record_name = f"evo_{pokemon_form['pfic']}" cache_record_name = f"evo_{pokemon_form['pfic']}"
if force_refresh: if force_refresh:
cache.purge(cache_record_name) cache.purge(cache_record_name)

2
ui/workers/gather_home_storage_status_worker.py

@ -104,7 +104,7 @@ class GatherHomeStorageStatus(QRunnable):
if cached_entry is not None: if cached_entry is not None:
return cached_entry return cached_entry
url = f"{self.base_url}{region}pokemon.shtml" url = f"{self.base_url}{region.lower()}pokemon.shtml"
response = cache.fetch_url(url) response = cache.fetch_url(url)
if not response: if not response:
return [] return []

4
ui/workers/gather_pokemon_forms_worker.py

@ -4,6 +4,7 @@ import re
from cache import cache from cache import cache
from utility.functions import get_generation_from_national_dex, sanitise_pokemon_name_for_url, remove_accents, compare_pokemon_forms, find_game_generation, format_pokemon_id from utility.functions import get_generation_from_national_dex, sanitise_pokemon_name_for_url, remove_accents, compare_pokemon_forms, find_game_generation, format_pokemon_id
from utility.pokemon_word_ninja import PokemonWordNinja
class GatherPokemonFormsWorkerSignals(QObject): class GatherPokemonFormsWorkerSignals(QObject):
finished = pyqtSignal(list) finished = pyqtSignal(list)
@ -12,6 +13,7 @@ class GatherPokemonFormsWorker(QRunnable):
def __init__(self): def __init__(self):
super().__init__() super().__init__()
self.signals = GatherPokemonFormsWorkerSignals() self.signals = GatherPokemonFormsWorkerSignals()
self.splitter = PokemonWordNinja()
def run(self): def run(self):
try: try:
@ -56,6 +58,7 @@ class GatherPokemonFormsWorker(QRunnable):
for small in smalls: for small in smalls:
form_name += small.get_text(strip=True) + " " form_name += small.get_text(strip=True) + " "
form_name = form_name.strip() form_name = form_name.strip()
form_name = self.splitter.split(form_name)
return form_name return form_name
return "None" return "None"
@ -63,6 +66,7 @@ class GatherPokemonFormsWorker(QRunnable):
found_forms = [] found_forms = []
generation = get_generation_from_national_dex(national_dex_number) generation = get_generation_from_national_dex(national_dex_number)
pokemon_name = pokemon_soup.get_text(strip=True) pokemon_name = pokemon_soup.get_text(strip=True)
self.splitter.add_custom_word(pokemon_name)
print(f"Processing {pokemon_name}") print(f"Processing {pokemon_name}")
url_name = sanitise_pokemon_name_for_url(pokemon_name) url_name = sanitise_pokemon_name_for_url(pokemon_name)

16
utility/data.py

@ -10,8 +10,8 @@ pokemon_generations = {
9: {"min": 906, "max": 1025}, 9: {"min": 906, "max": 1025},
} }
regions = ["kanto", "johto", "hoenn", "sinnoh", "unova", "kalos", "alola", "galar", "paldea", "hisui", "unknown"] regions = ["Kanto", "Johto", "Hoenn", "Sinnoh", "Unova", "Kalos", "Alola", "Galar", "Paldea", "Hisui", "Unknown"]
regional_descriptors = ["kantonian", "johtonian", "hoennian", "sinnohan", "unovan", "kalosian", "alolan", "galarian", "hisuian", "paldean"] regional_descriptors = ["Kantonian", "Johtonian", "Hoennian", "Sinnohan", "Unovan", "Kalosian", "Alolan", "Galarian", "Hisuian", "Paldean"]
yellow = { yellow = {
"Name": "Yellow", "Name": "Yellow",
@ -308,4 +308,14 @@ non_evolution_forms = [
"Mega", "Mega",
"Dynamax", "Dynamax",
"Gigantamax" "Gigantamax"
] ]
POKEMON_PROPER_NOUNS = {
"Augurite",
"Electirizer",
"Magmarizer",
"Gigantamax"
}
POKEMON_PROPER_NOUNS = POKEMON_PROPER_NOUNS | set(regions)
POKEMON_PROPER_NOUNS = POKEMON_PROPER_NOUNS | set(regional_descriptors)

54
utility/pokemon_word_ninja.py

@ -0,0 +1,54 @@
import wordninja
import re
from typing import List
from utility.data import POKEMON_PROPER_NOUNS
class PokemonWordNinja:
def __init__(self, custom_word_list: List[str] = None):
custom_words = POKEMON_PROPER_NOUNS
if custom_word_list:
custom_words = custom_words | set(custom_word_list)
self.custom_words = []
self.placeholder_map = {}
self.word_to_placeholder_map = {}
if custom_words:
# Store custom words with original capitalization, sorted by length
self.custom_words = sorted(custom_words, key=len, reverse=True)
for word in self.custom_words:
# Generate a unique placeholder
placeholder = f"__PLACEHOLDER_{hash(word)}__"
self.placeholder_map[placeholder] = word
self.word_to_placeholder_map[word] = placeholder
def add_custom_word(self, word: str):
words = self.custom_words
words.append(word)
self.custom_words = sorted(words, key=len, reverse=True)
placeholder = f"__PLACEHOLDER_{hash(word)}__"
self.placeholder_map[placeholder] = word
self.word_to_placeholder_map[word] = placeholder
def split(self, text: str) -> str:
working_text = text
# First handle exact custom words to preserve capitalization
for word in self.custom_words:
placeholder = self.word_to_placeholder_map[word]
pattern = re.compile(re.escape(word), re.IGNORECASE)
working_text = pattern.sub(placeholder, working_text)
# Clean up spaces
working_text = ' '.join(working_text.split())
# For remaining text, use wordninja
parts = []
for part in working_text.split():
if part in self.placeholder_map:
# Replace placeholder with the original word
parts.append(self.placeholder_map[part])
else:
split_parts = wordninja.split(part)
parts.extend(split_parts)
return ' '.join(parts)
Loading…
Cancel
Save