Browse Source

- Fixes for the more annoying pokemon

master
Dan 1 year ago
parent
commit
13f3e1c6a6
  1. 2
      DataGatherers/DefaultForms.json
  2. 121
      DataGatherers/DetermineOriginGame.py
  3. 114
      DataGatherers/update_location_information.py

2
DataGatherers/DefaultForms.json

@ -20,7 +20,6 @@
"Full Belly Mode",
"Zero Form",
"Curly Form",
"Chest Form",
"Apex Build",
"Ultimate Mode",
"Teal Mask",
@ -42,7 +41,6 @@
"Amped Form",
"Vanilla Cream Strawberry Sweet",
"Single Strike Style",
"Family of Three",
"Green Plumage",
"Two-Segment Form",
"Standard Form"

121
DataGatherers/DetermineOriginGame.py

@ -374,11 +374,13 @@ def split_td_contents(td):
current_group = []
for content in td.contents:
if isinstance(content, Tag) and content.name == 'br':
if isinstance(content, Tag) and (content.name == 'br' or content.name == 'p'):
if current_group:
groups.append(BeautifulSoup('', 'html.parser').new_tag('div'))
for item in current_group:
groups[-1].append(copy.copy(item))
if content.name == 'p':
groups[-1].append(copy.copy(content))
current_group = []
else:
current_group.append(content)
@ -394,13 +396,28 @@ def parse_form_information(html_content):
soup = BeautifulSoup(html_content, 'html.parser')
small_tag = soup.find('small')
forms = []
# Form info is in bold inside a small tag.
if small_tag:
bold_tag = small_tag.find('b')
if bold_tag:
bold_tags = small_tag.find_all('b')
for bold_tag in bold_tags:
form_text = bold_tag.get_text(strip=True)
# Remove parentheses
form_text = form_text.strip('()')
if "/" in form_text:
last_word = form_text.split()[-1]
form_text = form_text.replace(last_word, "").strip()
parts = form_text.split('/')
for part in parts:
main_form = part.strip() + " " + last_word
info = {
"main_form": main_form,
"sub_form": None
}
forms.append(info)
continue
# Split the text into main form and breed (if present)
parts = form_text.split('(')
@ -408,13 +425,28 @@ def parse_form_information(html_content):
# "Factor"s are not actual forms, they are properties of the pokemon you can encoutner.
if main_form and "factor" in main_form.lower():
return None, None
continue
breed = parts[1].strip(')') if len(parts) > 1 else None
return main_form, breed
return None, None
info = {
"main_form": main_form,
"sub_form": breed
}
forms.append(info)
else: #..... Gimmighoul
headings = soup.find_all('b')
if len(headings) > 0:
for heading in headings:
main_form = heading.get_text(strip=True)
info = {
"main_form": main_form,
"sub_form": None
}
forms.append(info)
return forms
def get_evolution_data_from_bulbapedia(pokemon_name, form, cache: CacheManager, gender: Optional[str] = None):
page_data = get_pokemon_data_bulbapedia(pokemon_name, cache)
@ -640,48 +672,44 @@ def get_locations_from_bulbapedia(pokemon_name, form, cache: CacheManager, defau
if form is None:
for raw_location in raw_locations:
raw_text = raw_location.get_text()
main_form, sub_form = parse_form_information(str(raw_location))
forms = parse_form_information(str(raw_location))
if len(forms) > 0:
for form_info in forms:
main_form = form_info["main_form"]
if default_forms and main_form and main_form in default_forms:
main_form = None
if default_forms and main_form and main_form in default_forms:
main_form = None
if main_form and (main_form != "All Forms" and main_form != "Kantonian Form"):
continue
if main_form and (main_form != "All Forms" and main_form != "Kantonian Form" and main_form != "All Sizes"):
continue
if raw_game not in game_locations:
game_locations[raw_game] = []
info = {}
info["location"] = raw_text
info["tag"] = str(raw_location)
game_locations[raw_game].append(info)
record_location_info(raw_game, game_locations, raw_location, raw_text)
else:
record_location_info(raw_game, game_locations, raw_location, raw_text)
else:
for raw_location in raw_locations:
main_form, sub_form = parse_form_information(str(raw_location))
if not main_form:
continue
forms = parse_form_information(str(raw_location))
for form_info in forms:
main_form = form_info["main_form"]
sub_form = form_info["sub_form"]
if main_form == "Kantonian Form":
continue
if not main_form:
continue
if main_form == "All Forms":
main_form = form
if main_form == "All Forms" or main_form == "Kantonian Form" or main_form == "All Sizes":
main_form = form
main_form_match = compare_forms(form, main_form)
if not main_form_match:
main_form_match = fuzz.partial_ratio(form.lower(), main_form.lower()) >= 80
main_form_match = compare_forms(form, main_form)
if not main_form_match:
main_form_match = fuzz.partial_ratio(form.lower(), main_form.lower()) >= 80
sub_form_match = compare_forms(form, sub_form)
if not sub_form_match:
sub_form_match = False if not sub_form else fuzz.partial_ratio(form.lower(), sub_form.lower()) >= 80
sub_form_match = compare_forms(form, sub_form)
if not sub_form_match:
sub_form_match = False if not sub_form else fuzz.partial_ratio(form.lower(), sub_form.lower()) >= 80
if main_form_match or sub_form_match:
raw_text = raw_location.get_text()
if raw_game not in game_locations:
game_locations[raw_game] = []
info = {}
info["location"] = raw_text
info["tag"] = str(raw_location)
game_locations[raw_game].append(info)
if main_form_match or sub_form_match:
raw_text = raw_location.get_text()
record_location_info(raw_game, game_locations, raw_location, raw_text)
# For Later
for variant in event_tables:
@ -693,15 +721,18 @@ def get_locations_from_bulbapedia(pokemon_name, form, cache: CacheManager, defau
games_string = entries[0].find('a').get('title')
for game in all_games:
if game in games_string:
if game not in game_locations:
game_locations[game] = []
info = {}
info["location"] = "Event"
info["tag"] = None
game_locations[game].append(info)
record_location_info(game, game_locations, None, "Event")
return game_locations
def record_location_info(raw_game, game_locations, raw_location, raw_text):
if raw_game not in game_locations:
game_locations[raw_game] = []
info = {}
info["location"] = raw_text
info["tag"] = str(raw_location)
game_locations[raw_game].append(info)
def split_outside_brackets(str):
return re.split(r',(?![^()]*\))', str)

114
DataGatherers/update_location_information.py

@ -207,31 +207,63 @@ def extract_additional_information(s):
else:
return full_text, details
def build_query_string(table_name, criteria):
conditions = []
values = []
query = f"SELECT * FROM {table_name} WHERE "
for column, value in criteria.items():
if value is None:
conditions.append(f"({column} IS NULL OR {column} = '')")
elif value == "":
conditions.append(f"{column} = ''")
else:
conditions.append(f"{column} = ?")
values.append(value)
return query + " AND ".join(conditions), values
def save_encounter(conn, pfic, game, location, days, times, dual_slot, static_encounter, static_encounter_count, extra_text, stars, rods, fishing, starter):
cursor = conn.cursor()
# Convert lists to strings for comparison, except days and times
extra_text_str = ' '.join(extra_text) if extra_text else None
stars_str = ','.join(sorted(stars)) if stars else None
rods_str = ','.join(sorted(rods)) if rods else None
insert_query = '''
INSERT INTO encounters
(pfic, game, location, day, time, dual_slot, static_encounter_count, static_encounter, extra_text, stars, rods, fishing, starter)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
'''
criteria = {
"pfic": pfic,
"game": game,
"location": location,
"day": None,
"time": None,
"dual_slot": dual_slot,
"static_encounter": static_encounter,
"static_encounter_count": static_encounter_count,
"extra_text": extra_text_str,
"stars": stars_str,
"rods": rods_str,
"fishing": fishing,
"starter": starter
}
if len(days) > 0:
for day in days:
criteria["day"] = day
criteria["time"] = None
query, values = build_query_string("encounters", criteria)
# Check if an identical record already exists
cursor.execute('''
SELECT COUNT(*) FROM encounters
WHERE pfic = ? AND game = ? AND location = ? AND day = ? AND time IS NULL
AND dual_slot = ? AND static_encounter = ? AND static_encounter_count = ?
AND extra_text = ? AND stars = ? AND rods = ? AND fishing = ? AND starter = ?
''', (pfic, game, location, day, dual_slot, static_encounter,
static_encounter_count, extra_text_str, stars_str, rods_str, fishing, starter))
if cursor.fetchone()[0] == 0:
cursor.execute('''
INSERT INTO encounters
(pfic, game, location, day, time, dual_slot, static_encounter_count, static_encounter, extra_text, stars, rods, fishing, starter)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
''', (pfic, game, location, day, None, dual_slot, static_encounter_count,
cursor.execute(query, values)
encounter = cursor.fetchone()
if encounter == None or encounter[0] == 0:
cursor.execute(insert_query, (pfic, game, location, day, None, dual_slot, static_encounter_count,
static_encounter, extra_text_str, stars_str, rods_str, fishing, starter))
print(f"New encounter added for {pfic} in {game} at {location} on {day}")
else:
@ -239,42 +271,32 @@ def save_encounter(conn, pfic, game, location, days, times, dual_slot, static_en
elif len(times) > 0:
for time in times:
criteria["day"] = None
criteria["time"] = time
query, values = build_query_string("encounters", criteria)
# Check if an identical record already exists
cursor.execute('''
SELECT COUNT(*) FROM encounters
WHERE pfic = ? AND game = ? AND location = ? AND day IS NULL AND time = ?
AND dual_slot = ? AND static_encounter = ? AND static_encounter_count = ?
AND extra_text = ? AND stars = ? AND rods = ? AND fishing = ? AND starter = ?
''', (pfic, game, location, time, dual_slot, static_encounter,
static_encounter_count, extra_text_str, stars_str, rods_str, fishing, starter))
cursor.execute(query, values)
if cursor.fetchone()[0] == 0:
cursor.execute('''
INSERT INTO encounters
(pfic, game, location, day, time, dual_slot, static_encounter_count, static_encounter, extra_text, stars, rods, fishing, starter)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
''', (pfic, game, location, None, time, dual_slot, static_encounter_count,
encounter = cursor.fetchone()
if encounter == None or encounter[0] == 0:
cursor.execute(insert_query, (pfic, game, location, None, time, dual_slot, static_encounter_count,
static_encounter, extra_text_str, stars_str, rods_str, fishing, starter))
print(f"New encounter added for {pfic} in {game} at {location} at {time}")
else:
print(f"Identical encounter already exists for {pfic} in {game} at {location} at {time}")
else:
criteria["day"] = None
criteria["time"] = None
query, values = build_query_string("encounters", criteria)
# Check if an identical record already exists
cursor.execute('''
SELECT COUNT(*) FROM encounters
WHERE pfic = ? AND game = ? AND location = ? AND day IS NULL AND time IS NULL
AND dual_slot = ? AND static_encounter = ? AND static_encounter_count = ?
AND extra_text = ? AND stars = ? AND rods = ? AND fishing = ? AND starter = ?
''', (pfic, game, location, dual_slot, static_encounter,
static_encounter_count, extra_text_str, stars_str, rods_str, fishing, starter))
thing = cursor.fetchone()
if thing[0] == 0:
cursor.execute('''
INSERT INTO encounters
(pfic, game, location, day, time, dual_slot, static_encounter_count, static_encounter, extra_text, stars, rods, fishing, starter)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
''', (pfic, game, location, None, None, dual_slot, static_encounter_count,
cursor.execute(query, values)
encounter = cursor.fetchone()
if encounter == None or encounter[0] == 0:
cursor.execute(insert_query, (pfic, game, location, None, None, dual_slot, static_encounter_count,
static_encounter, extra_text_str, stars_str, rods_str, fishing, starter))
print(f"New encounter added for {pfic} in {game} at {location}")
else:
@ -315,6 +337,9 @@ def process_pokemon_for_location_data(pfic, name, form, national_dex, default_fo
if name == "Alcremie":
form = None
if name == "Minior":
form = None
if form and form.lower() == "female":
form = None
@ -366,6 +391,9 @@ def process_pokemon_for_location_data(pfic, name, form, national_dex, default_fo
if remaining != "":
remaining_locations = remaining.replace(" and ", ",").split(",")
for remaining_location in remaining_locations:
if remaining_location.strip() == "":
continue
save_encounter(conn, pfic, encounter, remaining_location.strip(), details["days"], details["times"], details["dual_slot"], details["static_encounter"], details["static_encounter_count"], details["extra_text"], details["stars"], details["Rods"], details["Fishing"], details["starter"] )
if __name__ == "__main__":

Loading…
Cancel
Save