diff --git a/DBEditor/DBEditor.py b/DBEditor/DBEditor.py index e208d14..615b35d 100644 --- a/DBEditor/DBEditor.py +++ b/DBEditor/DBEditor.py @@ -128,8 +128,8 @@ class DBEditor(QMainWindow): self.logger.addHandler(ui_handler) # Add handlers to the logger - self.logger.addHandler(console_handler) - self.logger.addHandler(file_handler) + #self.logger.addHandler(console_handler) + #self.logger.addHandler(file_handler) def load_and_apply_patches(self): try: diff --git a/DBEditor/db_controller.py b/DBEditor/db_controller.py index 68f8bac..746c807 100644 --- a/DBEditor/db_controller.py +++ b/DBEditor/db_controller.py @@ -628,3 +628,19 @@ class DBController: conn.close() self.executor.shutdown() + def build_query_string(self, table_name, criteria): + conditions = [] + values = [] + query = f"SELECT * FROM {table_name} WHERE " + + for column, value in criteria.items(): + if value is None: + conditions.append(f"({column} IS NULL OR {column} = '')") + elif value == "": + conditions.append(f"{column} = ''") + else: + conditions.append(f"{column} = ?") + values.append(value) + + return query + " AND ".join(conditions), values + diff --git a/DataGatherers/DetermineOriginGame.py b/DataGatherers/DetermineOriginGame.py index 7c65583..8c0ce49 100644 --- a/DataGatherers/DetermineOriginGame.py +++ b/DataGatherers/DetermineOriginGame.py @@ -613,26 +613,57 @@ def get_locations_from_bulbapedia(pokemon_name, form, cache: CacheManager, defau raw_game_locations = {} - # Process game locations - for row in locations_table.select('tr'): - games = row.select('th') - locations = row.select('td') - - if len(games) != len(locations): + generation_tbody = locations_table.find('tbody', recursive=False) + generation_rows = generation_tbody.find_all('tr', recursive=False) + for generation_row in generation_rows: + random_nested_td = generation_row.find('td', recursive=False) + if not random_nested_td: continue + random_nested_table = random_nested_td.find('table', recursive=False) + if not random_nested_table: + continue + random_nested_tbody = random_nested_table.find('tbody', recursive=False) + random_nested_rows = random_nested_tbody.find_all('tr', recursive=False) - for game, location in zip(games, locations): - raw_game = game.get_text(strip=True) - if raw_game in all_games: - groups = split_td_contents(location) - raw_game_locations.setdefault(raw_game, []).extend(groups) + for nested_row in random_nested_rows: + if 'Generation' in nested_row.get_text(strip=True): + continue + + games_container_td = nested_row.find('td', recursive=False) + if not games_container_td: + continue + games_container_table = games_container_td.find('table', recursive=False) + if not games_container_table: + continue + games_container_tbody = games_container_table.find('tbody', recursive=False) + games_container_rows = games_container_tbody.find_all('tr', recursive=False) + for games_container_row in games_container_rows: + games = games_container_row.find_all('th') + for game in games: + raw_game = game.get_text(strip=True) + if raw_game not in all_games: + continue + locations_container_td = games_container_row.find('td', recursive=False) + if not locations_container_td: + continue + locations_container_table = locations_container_td.find('table', recursive=False) + if not locations_container_table: + continue + locations_container_tbody = locations_container_table.find('tbody', recursive=False) + locations = locations_container_tbody.find_all('td') + for location in locations: + groups = split_td_contents(location) + for group in groups: + if raw_game not in raw_game_locations: + raw_game_locations[raw_game] = [] + raw_game_locations[raw_game].append(group) # Process events events_section = soup.find('span', id='In_events') event_tables = process_event_tables(events_section) if events_section else {} # Process game locations in parallel - with ThreadPoolExecutor() as executor: + with ThreadPoolExecutor(max_workers=1) as executor: futures = {executor.submit(process_game_locations, raw_game, raw_locations, form, default_forms): raw_game for raw_game, raw_locations in raw_game_locations.items()} diff --git a/DataGatherers/Update_evolution_information.py b/DataGatherers/Update_evolution_information.py index d6ea4d0..3bde7eb 100644 --- a/DataGatherers/Update_evolution_information.py +++ b/DataGatherers/Update_evolution_information.py @@ -7,6 +7,9 @@ import re import sys import os import logging +from queue import Queue +from threading import Thread +import threading sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) @@ -56,12 +59,23 @@ def sanitize_for_logging(text: str) -> str: return text def insert_evolution_info(db_controller, evolution_info: EvolutionInfo): - db_controller.execute_query_with_commit(''' - INSERT OR REPLACE INTO evolution_chains - (from_pfic, to_pfic, method) - VALUES (?, ?, ?) - ''', (evolution_info.from_pfic, evolution_info.to_pfic, evolution_info.method)) - logger.info(sanitize_for_logging(f"Adding a link from {evolution_info.from_pfic} to {evolution_info.to_pfic}, via {evolution_info.method}")) + criteria = { + 'from_pfic': evolution_info.from_pfic, + 'to_pfic': evolution_info.to_pfic, + 'method': evolution_info.method + } + + query, values = db_controller.build_query_string('evolution_chains', criteria) + + result = db_controller.execute_query(query, values) + + if not result: + db_controller.execute_query_with_commit(''' + INSERT OR REPLACE INTO evolution_chains + (from_pfic, to_pfic, method) + VALUES (?, ?, ?) + ''', (evolution_info.from_pfic, evolution_info.to_pfic, evolution_info.method)) + logger.info(sanitize_for_logging(f"Adding a link from {evolution_info.from_pfic} to {evolution_info.to_pfic}, via {evolution_info.method}")) def strip_pokemon_name(pokemon_name: str, form_name: str) -> str: """Remove the Pokémon's name from the form name if present.""" @@ -78,6 +92,10 @@ def fuzzy_match_form(form1: str, form2: str, threshold: int = 80) -> bool: return form1 == form2 return fuzz.ratio(form1.lower(), form2.lower()) >= threshold +def parse_pfic(pfic): + parts = pfic.split('-') + return tuple(int(part) if part.isdigit() else part for part in parts) + def get_pokemon_form_by_name(db_controller, name: str, form: Optional[str] = None, threshold: int = 80, gender: Optional[str] = None) -> Optional[str]: results = db_controller.execute_query('SELECT PFIC, name, form_name FROM pokemon_forms WHERE name = ?', (name,)) @@ -85,6 +103,8 @@ def get_pokemon_form_by_name(db_controller, name: str, form: Optional[str] = Non if not results: return None + results.sort(key=lambda x: parse_pfic(x[0])) + if form is None and gender is None: if len(results) > 1: if results[0][2] == None: @@ -144,45 +164,87 @@ def update_pokemon_baby_status(db_controller, from_pfic, is_baby_form): WHERE PFIC = ? ''', (is_baby_form, from_pfic)) +def process_single_pokemon(name, form, db_controller, cache, progress_callback): + if progress_callback: + progress_callback(f"Processing {name} {form if form else ''}") + + if form and name in form: + form = form.replace(name, "").strip() + + gender = None + if form and "male" in form.lower(): + gender = form + form = None + + evolution_chain = get_evolution_data_from_bulbapedia(name, form, cache, gender) + if evolution_chain: + if name == "Tauros": # Bulbapedia has a weird formatting for Tauros. + for stage in evolution_chain: + if stage.form: + stage.form = stage.form.replace("Paldean Form(", "").replace(")", "").strip() + process_evolution_chain(db_controller, evolution_chain, cache, gender) def update_evolution_chains(cache, progress_callback=None): db_controller = DBController('pokemon_forms.db', max_connections=20) try: - db_controller.execute_query_with_commit('BEGIN') + #db_controller.execute_query_with_commit('BEGIN') pokemon_forms = db_controller.execute_query(''' SELECT pf.name, pf.form_name FROM pokemon_forms pf ORDER BY pf.national_dex, pf.form_name ''') + # Create a queue and workers + num_workers = 1 # Adjust based on your needs + task_queue = Queue() + workers = [] + + for _ in range(num_workers): + worker_thread = Thread(target=worker, + args=(task_queue, db_controller, cache, progress_callback)) + worker_thread.daemon = True + worker_thread.start() + workers.append(worker_thread) + for name, form in pokemon_forms: - if progress_callback: - progress_callback(f"Processing {name} {form if form else ''}") + task_queue.put((name, form)) + + #db_controller.execute_query_with_commit('COMMIT') - if form and name in form: - form = form.replace(name, "").strip() + # Add poison pills to stop workers + for _ in range(num_workers): + task_queue.put(None) + + task_queue.join() + + for worker_thread in workers: + worker_thread.join() - gender = None - if form and "male" in form.lower(): - gender = form - form = None - - evolution_chain = get_evolution_data_from_bulbapedia(name, form, cache, gender) - if evolution_chain: - if name == "Tauros": # Bulbapedia has a weird formatting for Tauros. - for stage in evolution_chain: - if stage.form: - stage.form = stage.form.replace("Paldean Form(", "").replace(")", "").strip() - process_evolution_chain(db_controller, evolution_chain, cache, gender) - db_controller.execute_query_with_commit('COMMIT') except Exception as e: # Rollback in case of error - db_controller.execute_query_with_commit('ROLLBACK') + #db_controller.execute_query_with_commit('ROLLBACK') logger.error(f"Error updating evolution chains: {str(e)}") raise finally: db_controller.close() +def worker(queue: Queue, db_controller: DBController, + cache: CacheManager, progress_callback=None): + while True: + try: + # Get task from queue + task = queue.get() + if task is None: # Poison pill to stop worker + break + + name, form = task + process_single_pokemon(name, form, db_controller, cache, progress_callback) + + except Exception as e: + logger.error(f"Error processing pokemon: {e}") + finally: + queue.task_done() + if __name__ == "__main__": cache = CacheManager() update_evolution_chains(cache) diff --git a/DataGatherers/cache_manager.py b/DataGatherers/cache_manager.py index 731aaac..67966fd 100644 --- a/DataGatherers/cache_manager.py +++ b/DataGatherers/cache_manager.py @@ -117,7 +117,7 @@ class CacheManager: 'content': content, 'timestamp': time.time() }) - time.sleep(1) + time.sleep(0.25) return content return None diff --git a/DataGatherers/pokemondb_scraper.py b/DataGatherers/pokemondb_scraper.py index 36f3e68..b05770a 100644 --- a/DataGatherers/pokemondb_scraper.py +++ b/DataGatherers/pokemondb_scraper.py @@ -8,10 +8,14 @@ import sys import logging import re import unicodedata +from queue import Queue +from threading import Thread +import threading sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from DataGatherers.cache_manager import CacheManager +from db_controller import DBController logger = logging.getLogger('ui_feedback') @@ -56,9 +60,8 @@ def initialize_db(): create_pokemon_db() create_pokemon_storage_db() -def insert_pokemon_form(conn, pokemon_form): - cursor = conn.cursor() - cursor.execute(''' +def insert_pokemon_form(db_controller, pokemon_form): + db_controller.execute_query_with_commit(''' INSERT OR REPLACE INTO pokemon_forms (PFIC, name, form_name, national_dex, generation) VALUES (?, ?, ?, ?, ?) @@ -69,28 +72,28 @@ def insert_pokemon_form(conn, pokemon_form): pokemon_form.national_dex, pokemon_form.generation )) - conn.commit() -def insert_pokemon_storage(conn, pfic: str, storable_in_home: bool): - cursor = conn.cursor() - cursor.execute(''' +def insert_pokemon_storage(db_controller, pfic: str, storable_in_home: bool): + db_controller.execute_query_with_commit(''' INSERT OR REPLACE INTO pokemon_storage (PFIC, storable_in_home) VALUES (?, ?) ''', (pfic, storable_in_home)) - conn.commit() class PokemonDatabase: def __init__(self): self.pokemon: Dict[str, List[PokemonForm]] = {} + self._lock = threading.Lock() def add_pokemon(self, national_dex: int, name: str, region_code: int, form_index: int, gender_code: int, form_name: Optional[str], sprite_url: str): + pokemon_id = format_pokemon_id(national_dex, region_code, form_index, gender_code) pokemon_form = PokemonForm(id=pokemon_id, name=name, form_name=form_name, sprite_url=sprite_url, national_dex=national_dex, generation=region_code) - if national_dex not in self.pokemon: - self.pokemon[national_dex] = [] - self.pokemon[national_dex].append(pokemon_form) + with self._lock: + if national_dex not in self.pokemon: + self.pokemon[national_dex] = [] + self.pokemon[national_dex].append(pokemon_form) def get_pokemon(self, national_dex: Optional[int] = None, region_code: Optional[int] = None, form_index: Optional[int] = None, gender_code: Optional[int] = None) -> List[PokemonForm]: @@ -156,17 +159,170 @@ def download_image(url, filename): with open(filename, 'wb') as f: f.write(response.content) +def worker(queue: Queue, db: PokemonDatabase, pokemon_generations: dict, db_controller: DBController, + cache: CacheManager, progress_callback=None): + while True: + try: + # Get task from queue + task = queue.get() + if task is None: # Poison pill to stop worker + break + + index, mon = task + process_single_pokemon(index + 1, mon, db, pokemon_generations, db_controller, cache, progress_callback) + + except Exception as e: + logger.error(f"Error processing pokemon: {e}") + finally: + queue.task_done() + +def process_single_pokemon(national_dex_index, mon, db, pokemon_generations, db_controller, cache, progress_callback): + generation = 1 + for gen in pokemon_generations: + if pokemon_generations[gen]["min"] <= national_dex_index <= pokemon_generations[gen]["max"]: + generation = gen + break + + pokemon_name = mon.get_text(strip=True) + logger.info(pokemon_name) + if progress_callback: + progress_callback(f"Processing {pokemon_name}") + + pokemon_url_name = pokemon_name.replace("♀", "-f").replace("♂", "-m").replace("'", "").replace(".", "").replace('é', 'e').replace(':', '') + pokemon_url_name = pokemon_url_name.replace(" ", "-") + + sprites_page_data = get_pokemon_sprites_page_data(cache, pokemon_url_name) + if not sprites_page_data: + return + sprites_soup = BeautifulSoup(sprites_page_data, 'html.parser') + + generation_8_header = sprites_soup.find('h2', string='Generation 8') + if not generation_8_header: + return + generation_8_table = generation_8_header.find_next('table') + if not generation_8_table: + return + + generation_8_tbody = generation_8_table.find('tbody') + if not generation_8_tbody: + return + + generation_8_rows = generation_8_tbody.find_all('tr') + + for row in generation_8_rows: + row_text = row.get_text(strip=True) + if 'Home' in row_text: + sprites = row.find_all('span', class_='sprites-table-card') + if not sprites: + continue + form = 0 + for sprite in sprites: + sprite_img = sprite.find('img') + sprite_url = "missing" + if sprite_img: + sprite_url = sprite_img.get('src') + + if "shiny" in sprite_url: + continue + + form_name = "None" + if sprite.find('small'): + smalls = sprite.find_all('small') + form_name = "" + for small in smalls: + form_name += small.get_text(strip=True) + " " + form_name = form_name.strip() + logger.info(f'{sprite_url}, {form_name}') + if form_name != "None": + form += 1 + gender = 0 + if form_name.startswith("Male"): + form -= 1 + gender = 1 + elif form_name.startswith("Female"): + form -= 1 + gender = 2 + + dex_page_data = get_pokemon_dex_page(cache, pokemon_url_name) + if dex_page_data: + dex_soup = BeautifulSoup(dex_page_data, 'html.parser') + + #Find a heading that has the pokemon name in it + dex_header = dex_soup.find('h1', string=pokemon_name) + if dex_header: + #The next
tag contains the generation number, in the format "{pokemon name} is a {type}(/{2nd_type}) type Pokémon introduced in Generation {generation number}." + generation_tag = dex_header.find_next('p') + dex_text = generation_tag.get_text() + pattern = r'^(.+?) is a (\w+)(?:/(\w+))? type Pokémon introduced in Generation (\d+)\.$' + match = re.match(pattern, dex_text) + if match: + name, type1, type2, gen = match.groups() + generation = int(gen) + + if form_name != "None": + next_tag = generation_tag.find_next('p') + if next_tag: + extra_text = next_tag.get_text() + extra_text = remove_accents(extra_text) + form_pattern = r'a(?:n)? (\w+) Form(?:,)? introduced in (?:the )?([\w\s:]+)(?:\/([\w\s:]+))?' + update_pattern = r'a(?:n)? (\w+) form(?:,)? available in the latest update to ([\w\s:]+)(?:& ([\w\s:]+))?' + multiple_forms_pattern = r'has (?:\w+) new (\w+) Form(?:s)?(?:,)? available in (?:the )?([\w\s:]+)(?:& ([\w\s:]+))?' + expansion_pass_pattern = r'a(?:n)? (\w+) form(?:,)? introduced in the Crown Tundra Expansion Pass to ([\w\s:]+)(?:& ([\w\s:]+))?' + patterns = [form_pattern, update_pattern, multiple_forms_pattern, expansion_pass_pattern] + test_form = form_name.replace(pokemon_name, "").replace("Male", "").replace("Female", "").strip() + if pokemon_name == "Tauros" and (form_name == "Aqua Breed" or form_name == "Blaze Breed" or form_name == "Combat Breed"): + test_form = "Paldean" + for pattern in patterns: + matches = re.findall(pattern, extra_text, re.IGNORECASE) + generation_found = False + for i, (regional, game1, game2) in enumerate(matches, 1): + if compare_forms(test_form, regional): + target_game = game1.replace("Pokemon", "").strip() + result = db_controller.execute_query(''' + SELECT g.generation + FROM games g + LEFT JOIN alternate_game_names agn ON g.id = agn.game_id + WHERE g.name = ? OR agn.alternate_name = ? + LIMIT 1 + ''', (target_game, target_game)) + if result: + generation = result[0][0] + generation_found = True + break + if generation_found: + break + + pokemon_form = PokemonForm( + id=format_pokemon_id(national_dex_index, generation, form, gender), + name=pokemon_name, + form_name=form_name if form_name != "None" else None, + sprite_url=sprite_url, + national_dex=national_dex_index, + generation=generation + ) + db.add_pokemon( + national_dex_index, + pokemon_name, + generation, + form, + gender, + form_name if form_name != "None" else None, + sprite_url + ) + insert_pokemon_form(db_controller, pokemon_form) + + storable_in_home = not any(keyword in form_name.lower() for keyword in ['mega', 'gigantamax']) if form_name else True + insert_pokemon_storage(db_controller, pokemon_form.id, storable_in_home) + def retrieve_all_pokemon_forms(cache: CacheManager, progress_callback=None): db = PokemonDatabase() - pokemon_db_conn = create_pokemon_db() - create_pokemon_storage_db() + db_controller = DBController('pokemon_forms.db', max_connections=20) page_data = get_pokemon_sprites_page(cache) if not page_data: return None soup = BeautifulSoup(page_data, 'html.parser') - pokemon = soup.find_all('a', class_='infocard') pokemon_generations = { @@ -181,148 +337,30 @@ def retrieve_all_pokemon_forms(cache: CacheManager, progress_callback=None): 9: {"min": 906, "max": 1025}, } - national_dex_index = 1 - for mon in pokemon: - generation = 1 - for gen in pokemon_generations: - if pokemon_generations[gen]["min"] <= national_dex_index <= pokemon_generations[gen]["max"]: - generation = gen - break - - pokemon_name = mon.get_text(strip=True) - logger.info(pokemon_name) - if progress_callback: - progress_callback(f"Processing {pokemon_name}") - - pokemon_url_name = pokemon_name.replace("♀", "-f").replace("♂", "-m").replace("'", "").replace(".", "").replace('é', 'e').replace(':', '') - pokemon_url_name = pokemon_url_name.replace(" ", "-") - - sprites_page_data = get_pokemon_sprites_page_data(cache, pokemon_url_name) - if not sprites_page_data: - return None - sprites_soup = BeautifulSoup(sprites_page_data, 'html.parser') - - generation_8_header = sprites_soup.find('h2', string='Generation 8') - if not generation_8_header: - continue - generation_8_table = generation_8_header.find_next('table') - if not generation_8_table: - continue - - generation_8_tbody = generation_8_table.find('tbody') - if not generation_8_tbody: - continue - - generation_8_rows = generation_8_tbody.find_all('tr') + # Create a queue and workers + num_workers = 1 # Adjust based on your needs + task_queue = Queue() + workers = [] + + for _ in range(num_workers): + worker_thread = Thread(target=worker, + args=(task_queue, db, pokemon_generations, + db_controller, cache, progress_callback)) + worker_thread.daemon = True + worker_thread.start() + workers.append(worker_thread) - for row in generation_8_rows: - row_text = row.get_text(strip=True) - if 'Home' in row_text: - sprites = row.find_all('span', class_='sprites-table-card') - if not sprites: - continue - form = 0 - for sprite in sprites: - sprite_img = sprite.find('img') - sprite_url = "missing" - if sprite_img: - sprite_url = sprite_img.get('src') - - if "shiny" in sprite_url: - continue - - form_name = "None" - if sprite.find('small'): - smalls = sprite.find_all('small') - form_name = "" - for small in smalls: - form_name += small.get_text(strip=True) + " " - form_name = form_name.strip() - logger.info(f'{sprite_url}, {form_name}') - if form_name != "None": - form += 1 - gender = 0 - if form_name.startswith("Male"): - form -= 1 - gender = 1 - elif form_name.startswith("Female"): - form -= 1 - gender = 2 - - dex_page_data = get_pokemon_dex_page(cache, pokemon_name.replace("'", "").replace(".", "-").replace(" ", "")) - if dex_page_data: - dex_soup = BeautifulSoup(dex_page_data, 'html.parser') - - #Find a heading that has the pokemon name in it - dex_header = dex_soup.find('h1', string=pokemon_name) - if dex_header: - #The next
tag contains the generation number, in the format "{pokemon name} is a {type}(/{2nd_type}) type Pokémon introduced in Generation {generation number}." - generation_tag = dex_header.find_next('p') - dex_text = generation_tag.get_text() - pattern = r'^(.+?) is a (\w+)(?:/(\w+))? type Pokémon introduced in Generation (\d+)\.$' - match = re.match(pattern, dex_text) - if match: - name, type1, type2, gen = match.groups() - generation = int(gen) - - if form_name != "None": - next_tag = generation_tag.find_next('p') - if next_tag: - extra_text = next_tag.get_text() - extra_text = remove_accents(extra_text) - form_pattern = r'a(?:n)? (\w+) Form(?:,)? introduced in (?:the )?([\w\s:]+)(?:\/([\w\s:]+))?' - update_pattern = r'a(?:n)? (\w+) form(?:,)? available in the latest update to ([\w\s:]+)(?:& ([\w\s:]+))?' - multiple_forms_pattern = r'has (?:\w+) new (\w+) Form(?:s)?(?:,)? available in (?:the )?([\w\s:]+)(?:& ([\w\s:]+))?' - expansion_pass_pattern = r'a(?:n)? (\w+) form(?:,)? introduced in the Crown Tundra Expansion Pass to ([\w\s:]+)(?:& ([\w\s:]+))?' - patterns = [form_pattern, update_pattern, multiple_forms_pattern, expansion_pass_pattern] - test_form = form_name.replace(pokemon_name, "").replace("Male", "").replace("Female", "").strip() - if pokemon_name == "Tauros" and (form_name == "Aqua Breed" or form_name == "Blaze Breed" or form_name == "Combat Breed"): - test_form = "Paldean" - for pattern in patterns: - matches = re.findall(pattern, extra_text, re.IGNORECASE) - generation_found = False - for i, (regional, game1, game2) in enumerate(matches, 1): - if compare_forms(test_form, regional): - target_game = game1.replace("Pokemon", "").strip() - cursor = pokemon_db_conn.cursor() - cursor.execute(''' - SELECT g.generation - FROM games g - LEFT JOIN alternate_game_names agn ON g.id = agn.game_id - WHERE g.name = ? OR agn.alternate_name = ? - LIMIT 1 - ''', (target_game, target_game)) - result = cursor.fetchone() - if result: - generation = result[0] - generation_found = True - break - if generation_found: - break - - pokemon_form = PokemonForm( - id=format_pokemon_id(national_dex_index, generation, form, gender), - name=pokemon_name, - form_name=form_name if form_name != "None" else None, - sprite_url=sprite_url, - national_dex=national_dex_index, - generation=generation - ) - db.add_pokemon( - national_dex_index, - pokemon_name, - generation, - form, - gender, - form_name if form_name != "None" else None, - sprite_url - ) - insert_pokemon_form(pokemon_db_conn, pokemon_form) - - storable_in_home = not any(keyword in form_name.lower() for keyword in ['mega', 'gigantamax']) if form_name else True - insert_pokemon_storage(pokemon_db_conn, pokemon_form.id, storable_in_home) - - national_dex_index += 1 + for index, mon in enumerate(pokemon): + task_queue.put((index, mon)) + + # Add poison pills to stop workers + for _ in range(num_workers): + task_queue.put(None) + + task_queue.join() + + for worker_thread in workers: + worker_thread.join() logger.info(f"Total Pokémon forms: {sum(len(forms) for forms in db.pokemon.values())}") logger.info(f"Pokémon with multiple forms: {sum(1 for forms in db.pokemon.values() if len(forms) > 1)}") @@ -339,7 +377,7 @@ def retrieve_all_pokemon_forms(cache: CacheManager, progress_callback=None): download_image(form.sprite_url, filename) logger.info(f"Downloaded image for {form.id}") - pokemon_db_conn.close() + db_controller.close() if __name__ == "__main__": cache = CacheManager() diff --git a/DataGatherers/update_location_information.py b/DataGatherers/update_location_information.py index cd6ae1b..141bfed 100644 --- a/DataGatherers/update_location_information.py +++ b/DataGatherers/update_location_information.py @@ -443,11 +443,13 @@ def process_pokemon_for_location_data(pfic, name, form, national_dex, default_fo for location in encounter_data[encounter]: if location == "": continue - test_location = location["location"].strip().lower() + test_location = location["location"].strip().lower() + + test_location_text = BeautifulSoup(test_location, 'html.parser').get_text().lower() ignore_location = False for ignore in encounters_to_ignore: - if ignore in test_location: + if ignore in test_location_text: ignore_location = True break @@ -458,7 +460,7 @@ def process_pokemon_for_location_data(pfic, name, form, national_dex, default_fo logger.info(f"Found in {encounter}:") print_encounter = False - if "evolve" in test_location: + if "evolve" in test_location_text: remaining, details = extract_additional_information(location["tag"]) evolve_info = extract_evolve_information(remaining, db_controller) @@ -544,7 +546,7 @@ def update_location_information(cache, progress_callback=None): except Exception as exc: logger.error(f'Error processing {name} {form}: {exc}') - with ThreadPoolExecutor(max_workers=10) as executor: + with ThreadPoolExecutor(max_workers=1) as executor: futures = [executor.submit(process_single_pokemon, form_data) for form_data in pokemon_forms] for future in as_completed(futures):