Browse Source

- Fixes for the more annoying pokemon

master
Dan 1 year ago
parent
commit
13f3e1c6a6
  1. 2
      DataGatherers/DefaultForms.json
  2. 121
      DataGatherers/DetermineOriginGame.py
  3. 114
      DataGatherers/update_location_information.py

2
DataGatherers/DefaultForms.json

@ -20,7 +20,6 @@
"Full Belly Mode", "Full Belly Mode",
"Zero Form", "Zero Form",
"Curly Form", "Curly Form",
"Chest Form",
"Apex Build", "Apex Build",
"Ultimate Mode", "Ultimate Mode",
"Teal Mask", "Teal Mask",
@ -42,7 +41,6 @@
"Amped Form", "Amped Form",
"Vanilla Cream Strawberry Sweet", "Vanilla Cream Strawberry Sweet",
"Single Strike Style", "Single Strike Style",
"Family of Three",
"Green Plumage", "Green Plumage",
"Two-Segment Form", "Two-Segment Form",
"Standard Form" "Standard Form"

121
DataGatherers/DetermineOriginGame.py

@ -374,11 +374,13 @@ def split_td_contents(td):
current_group = [] current_group = []
for content in td.contents: for content in td.contents:
if isinstance(content, Tag) and content.name == 'br': if isinstance(content, Tag) and (content.name == 'br' or content.name == 'p'):
if current_group: if current_group:
groups.append(BeautifulSoup('', 'html.parser').new_tag('div')) groups.append(BeautifulSoup('', 'html.parser').new_tag('div'))
for item in current_group: for item in current_group:
groups[-1].append(copy.copy(item)) groups[-1].append(copy.copy(item))
if content.name == 'p':
groups[-1].append(copy.copy(content))
current_group = [] current_group = []
else: else:
current_group.append(content) current_group.append(content)
@ -394,13 +396,28 @@ def parse_form_information(html_content):
soup = BeautifulSoup(html_content, 'html.parser') soup = BeautifulSoup(html_content, 'html.parser')
small_tag = soup.find('small') small_tag = soup.find('small')
forms = []
# Form info is in bold inside a small tag. # Form info is in bold inside a small tag.
if small_tag: if small_tag:
bold_tag = small_tag.find('b') bold_tags = small_tag.find_all('b')
if bold_tag: for bold_tag in bold_tags:
form_text = bold_tag.get_text(strip=True) form_text = bold_tag.get_text(strip=True)
# Remove parentheses # Remove parentheses
form_text = form_text.strip('()') form_text = form_text.strip('()')
if "/" in form_text:
last_word = form_text.split()[-1]
form_text = form_text.replace(last_word, "").strip()
parts = form_text.split('/')
for part in parts:
main_form = part.strip() + " " + last_word
info = {
"main_form": main_form,
"sub_form": None
}
forms.append(info)
continue
# Split the text into main form and breed (if present) # Split the text into main form and breed (if present)
parts = form_text.split('(') parts = form_text.split('(')
@ -408,13 +425,28 @@ def parse_form_information(html_content):
# "Factor"s are not actual forms, they are properties of the pokemon you can encoutner. # "Factor"s are not actual forms, they are properties of the pokemon you can encoutner.
if main_form and "factor" in main_form.lower(): if main_form and "factor" in main_form.lower():
return None, None continue
breed = parts[1].strip(')') if len(parts) > 1 else None breed = parts[1].strip(')') if len(parts) > 1 else None
return main_form, breed info = {
"main_form": main_form,
return None, None "sub_form": breed
}
forms.append(info)
else: #..... Gimmighoul
headings = soup.find_all('b')
if len(headings) > 0:
for heading in headings:
main_form = heading.get_text(strip=True)
info = {
"main_form": main_form,
"sub_form": None
}
forms.append(info)
return forms
def get_evolution_data_from_bulbapedia(pokemon_name, form, cache: CacheManager, gender: Optional[str] = None): def get_evolution_data_from_bulbapedia(pokemon_name, form, cache: CacheManager, gender: Optional[str] = None):
page_data = get_pokemon_data_bulbapedia(pokemon_name, cache) page_data = get_pokemon_data_bulbapedia(pokemon_name, cache)
@ -640,48 +672,44 @@ def get_locations_from_bulbapedia(pokemon_name, form, cache: CacheManager, defau
if form is None: if form is None:
for raw_location in raw_locations: for raw_location in raw_locations:
raw_text = raw_location.get_text() raw_text = raw_location.get_text()
main_form, sub_form = parse_form_information(str(raw_location)) forms = parse_form_information(str(raw_location))
if len(forms) > 0:
for form_info in forms:
main_form = form_info["main_form"]
if default_forms and main_form and main_form in default_forms: if default_forms and main_form and main_form in default_forms:
main_form = None main_form = None
if main_form and (main_form != "All Forms" and main_form != "Kantonian Form"): if main_form and (main_form != "All Forms" and main_form != "Kantonian Form" and main_form != "All Sizes"):
continue continue
if raw_game not in game_locations: record_location_info(raw_game, game_locations, raw_location, raw_text)
game_locations[raw_game] = [] else:
info = {} record_location_info(raw_game, game_locations, raw_location, raw_text)
info["location"] = raw_text
info["tag"] = str(raw_location)
game_locations[raw_game].append(info)
else: else:
for raw_location in raw_locations: for raw_location in raw_locations:
main_form, sub_form = parse_form_information(str(raw_location)) forms = parse_form_information(str(raw_location))
if not main_form: for form_info in forms:
continue main_form = form_info["main_form"]
sub_form = form_info["sub_form"]
if main_form == "Kantonian Form": if not main_form:
continue continue
if main_form == "All Forms": if main_form == "All Forms" or main_form == "Kantonian Form" or main_form == "All Sizes":
main_form = form main_form = form
main_form_match = compare_forms(form, main_form) main_form_match = compare_forms(form, main_form)
if not main_form_match: if not main_form_match:
main_form_match = fuzz.partial_ratio(form.lower(), main_form.lower()) >= 80 main_form_match = fuzz.partial_ratio(form.lower(), main_form.lower()) >= 80
sub_form_match = compare_forms(form, sub_form) sub_form_match = compare_forms(form, sub_form)
if not sub_form_match: if not sub_form_match:
sub_form_match = False if not sub_form else fuzz.partial_ratio(form.lower(), sub_form.lower()) >= 80 sub_form_match = False if not sub_form else fuzz.partial_ratio(form.lower(), sub_form.lower()) >= 80
if main_form_match or sub_form_match: if main_form_match or sub_form_match:
raw_text = raw_location.get_text() raw_text = raw_location.get_text()
if raw_game not in game_locations: record_location_info(raw_game, game_locations, raw_location, raw_text)
game_locations[raw_game] = []
info = {}
info["location"] = raw_text
info["tag"] = str(raw_location)
game_locations[raw_game].append(info)
# For Later # For Later
for variant in event_tables: for variant in event_tables:
@ -693,15 +721,18 @@ def get_locations_from_bulbapedia(pokemon_name, form, cache: CacheManager, defau
games_string = entries[0].find('a').get('title') games_string = entries[0].find('a').get('title')
for game in all_games: for game in all_games:
if game in games_string: if game in games_string:
if game not in game_locations: record_location_info(game, game_locations, None, "Event")
game_locations[game] = []
info = {}
info["location"] = "Event"
info["tag"] = None
game_locations[game].append(info)
return game_locations return game_locations
def record_location_info(raw_game, game_locations, raw_location, raw_text):
if raw_game not in game_locations:
game_locations[raw_game] = []
info = {}
info["location"] = raw_text
info["tag"] = str(raw_location)
game_locations[raw_game].append(info)
def split_outside_brackets(str): def split_outside_brackets(str):
return re.split(r',(?![^()]*\))', str) return re.split(r',(?![^()]*\))', str)

114
DataGatherers/update_location_information.py

@ -207,31 +207,63 @@ def extract_additional_information(s):
else: else:
return full_text, details return full_text, details
def build_query_string(table_name, criteria):
conditions = []
values = []
query = f"SELECT * FROM {table_name} WHERE "
for column, value in criteria.items():
if value is None:
conditions.append(f"({column} IS NULL OR {column} = '')")
elif value == "":
conditions.append(f"{column} = ''")
else:
conditions.append(f"{column} = ?")
values.append(value)
return query + " AND ".join(conditions), values
def save_encounter(conn, pfic, game, location, days, times, dual_slot, static_encounter, static_encounter_count, extra_text, stars, rods, fishing, starter): def save_encounter(conn, pfic, game, location, days, times, dual_slot, static_encounter, static_encounter_count, extra_text, stars, rods, fishing, starter):
cursor = conn.cursor() cursor = conn.cursor()
# Convert lists to strings for comparison, except days and times
extra_text_str = ' '.join(extra_text) if extra_text else None extra_text_str = ' '.join(extra_text) if extra_text else None
stars_str = ','.join(sorted(stars)) if stars else None stars_str = ','.join(sorted(stars)) if stars else None
rods_str = ','.join(sorted(rods)) if rods else None rods_str = ','.join(sorted(rods)) if rods else None
insert_query = '''
INSERT INTO encounters
(pfic, game, location, day, time, dual_slot, static_encounter_count, static_encounter, extra_text, stars, rods, fishing, starter)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
'''
criteria = {
"pfic": pfic,
"game": game,
"location": location,
"day": None,
"time": None,
"dual_slot": dual_slot,
"static_encounter": static_encounter,
"static_encounter_count": static_encounter_count,
"extra_text": extra_text_str,
"stars": stars_str,
"rods": rods_str,
"fishing": fishing,
"starter": starter
}
if len(days) > 0: if len(days) > 0:
for day in days: for day in days:
criteria["day"] = day
criteria["time"] = None
query, values = build_query_string("encounters", criteria)
# Check if an identical record already exists # Check if an identical record already exists
cursor.execute(''' cursor.execute(query, values)
SELECT COUNT(*) FROM encounters
WHERE pfic = ? AND game = ? AND location = ? AND day = ? AND time IS NULL encounter = cursor.fetchone()
AND dual_slot = ? AND static_encounter = ? AND static_encounter_count = ?
AND extra_text = ? AND stars = ? AND rods = ? AND fishing = ? AND starter = ? if encounter == None or encounter[0] == 0:
''', (pfic, game, location, day, dual_slot, static_encounter, cursor.execute(insert_query, (pfic, game, location, day, None, dual_slot, static_encounter_count,
static_encounter_count, extra_text_str, stars_str, rods_str, fishing, starter))
if cursor.fetchone()[0] == 0:
cursor.execute('''
INSERT INTO encounters
(pfic, game, location, day, time, dual_slot, static_encounter_count, static_encounter, extra_text, stars, rods, fishing, starter)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
''', (pfic, game, location, day, None, dual_slot, static_encounter_count,
static_encounter, extra_text_str, stars_str, rods_str, fishing, starter)) static_encounter, extra_text_str, stars_str, rods_str, fishing, starter))
print(f"New encounter added for {pfic} in {game} at {location} on {day}") print(f"New encounter added for {pfic} in {game} at {location} on {day}")
else: else:
@ -239,42 +271,32 @@ def save_encounter(conn, pfic, game, location, days, times, dual_slot, static_en
elif len(times) > 0: elif len(times) > 0:
for time in times: for time in times:
criteria["day"] = None
criteria["time"] = time
query, values = build_query_string("encounters", criteria)
# Check if an identical record already exists # Check if an identical record already exists
cursor.execute(''' cursor.execute(query, values)
SELECT COUNT(*) FROM encounters
WHERE pfic = ? AND game = ? AND location = ? AND day IS NULL AND time = ?
AND dual_slot = ? AND static_encounter = ? AND static_encounter_count = ?
AND extra_text = ? AND stars = ? AND rods = ? AND fishing = ? AND starter = ?
''', (pfic, game, location, time, dual_slot, static_encounter,
static_encounter_count, extra_text_str, stars_str, rods_str, fishing, starter))
if cursor.fetchone()[0] == 0: encounter = cursor.fetchone()
cursor.execute('''
INSERT INTO encounters if encounter == None or encounter[0] == 0:
(pfic, game, location, day, time, dual_slot, static_encounter_count, static_encounter, extra_text, stars, rods, fishing, starter) cursor.execute(insert_query, (pfic, game, location, None, time, dual_slot, static_encounter_count,
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
''', (pfic, game, location, None, time, dual_slot, static_encounter_count,
static_encounter, extra_text_str, stars_str, rods_str, fishing, starter)) static_encounter, extra_text_str, stars_str, rods_str, fishing, starter))
print(f"New encounter added for {pfic} in {game} at {location} at {time}") print(f"New encounter added for {pfic} in {game} at {location} at {time}")
else: else:
print(f"Identical encounter already exists for {pfic} in {game} at {location} at {time}") print(f"Identical encounter already exists for {pfic} in {game} at {location} at {time}")
else: else:
criteria["day"] = None
criteria["time"] = None
query, values = build_query_string("encounters", criteria)
# Check if an identical record already exists # Check if an identical record already exists
cursor.execute(''' cursor.execute(query, values)
SELECT COUNT(*) FROM encounters
WHERE pfic = ? AND game = ? AND location = ? AND day IS NULL AND time IS NULL encounter = cursor.fetchone()
AND dual_slot = ? AND static_encounter = ? AND static_encounter_count = ?
AND extra_text = ? AND stars = ? AND rods = ? AND fishing = ? AND starter = ? if encounter == None or encounter[0] == 0:
''', (pfic, game, location, dual_slot, static_encounter, cursor.execute(insert_query, (pfic, game, location, None, None, dual_slot, static_encounter_count,
static_encounter_count, extra_text_str, stars_str, rods_str, fishing, starter))
thing = cursor.fetchone()
if thing[0] == 0:
cursor.execute('''
INSERT INTO encounters
(pfic, game, location, day, time, dual_slot, static_encounter_count, static_encounter, extra_text, stars, rods, fishing, starter)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
''', (pfic, game, location, None, None, dual_slot, static_encounter_count,
static_encounter, extra_text_str, stars_str, rods_str, fishing, starter)) static_encounter, extra_text_str, stars_str, rods_str, fishing, starter))
print(f"New encounter added for {pfic} in {game} at {location}") print(f"New encounter added for {pfic} in {game} at {location}")
else: else:
@ -315,6 +337,9 @@ def process_pokemon_for_location_data(pfic, name, form, national_dex, default_fo
if name == "Alcremie": if name == "Alcremie":
form = None form = None
if name == "Minior":
form = None
if form and form.lower() == "female": if form and form.lower() == "female":
form = None form = None
@ -366,6 +391,9 @@ def process_pokemon_for_location_data(pfic, name, form, national_dex, default_fo
if remaining != "": if remaining != "":
remaining_locations = remaining.replace(" and ", ",").split(",") remaining_locations = remaining.replace(" and ", ",").split(",")
for remaining_location in remaining_locations: for remaining_location in remaining_locations:
if remaining_location.strip() == "":
continue
save_encounter(conn, pfic, encounter, remaining_location.strip(), details["days"], details["times"], details["dual_slot"], details["static_encounter"], details["static_encounter_count"], details["extra_text"], details["stars"], details["Rods"], details["Fishing"], details["starter"] ) save_encounter(conn, pfic, encounter, remaining_location.strip(), details["days"], details["times"], details["dual_slot"], details["static_encounter"], details["static_encounter_count"], details["extra_text"], details["stars"], details["Rods"], details["Fishing"], details["starter"] )
if __name__ == "__main__": if __name__ == "__main__":

Loading…
Cancel
Save