Browse Source

location parsing progress

master
Dan 1 year ago
parent
commit
242ddc6184
  1. 5
      DataGatherers/DetermineOriginGame.py
  2. 1
      DataGatherers/cache_manager.py
  3. 184
      DataGatherers/update_location_information.py

5
DataGatherers/DetermineOriginGame.py

@ -658,7 +658,10 @@ def get_locations_from_bulbapedia(pokemon_name, form, cache: CacheManager):
if game in games_string:
if game not in game_locations:
game_locations[game] = []
game_locations[game].append("Event")
info = {}
info["location"] = "Event"
info["tag"] = None
game_locations[game].append(info)
return game_locations

1
DataGatherers/cache_manager.py

@ -54,6 +54,7 @@ class CacheManager:
'content': content,
'timestamp': time.time()
})
time.sleep(1)
return content
return None

184
DataGatherers/update_location_information.py

@ -3,17 +3,31 @@ from cache_manager import CacheManager
from DetermineOriginGame import get_locations_from_bulbapedia
from bs4 import BeautifulSoup, Tag
import re
import time
import unicodedata
def create_encounters_table():
conn = sqlite3.connect('pokemon_forms.db')
#cursor = conn.cursor()
#cursor.execute('''
#CREATE TABLE IF NOT EXISTS encounters (
# pfic TEXT,
#
#)
#''')
#conn.commit()
cursor = conn.cursor()
cursor.execute('''
CREATE TABLE IF NOT EXISTS encounters (
pfic TEXT,
game TEXT,
location TEXT,
day TEXT,
time TEXT,
dual_slot TEXT,
static_encounter_count INTEGER,
static_encounter BOOLEAN,
only_two BOOLEAN,
extra_text TEXT,
stars TEXT,
fishing BOOLEAN,
fishing_rod_needed TEXT,
PRIMARY KEY (pfic, game, location)
)
''')
conn.commit()
return conn
def extract_routes(s):
@ -34,6 +48,7 @@ def extract_routes(s):
days = ["Mo", "Tu", "We", "Th", "Fr", "Sa", "Su"]
times = ["Morning", "Day", "Night"]
rods = ["Old Rod", "Good Rod", "Super Rod"]
all_games = [
"Yellow", "Red", "Blue",
@ -53,36 +68,62 @@ all_games = [
]
def find_match(search_string, string_array):
return next((item for item in string_array if search_string in item), None)
return next((item for item in string_array if item.lower() == search_string.lower()), None)
def find_all_matches_from_array(string, array):
return [item for item in array if item in string]
return [item for item in array if item.lower() == string.lower()]
# This pattern matches text within parentheses, including nested parentheses
pattern = r'\((?:[^()]*|\([^()]*\))*\)'
in_brackets = re.compile(pattern)
def extract_bracketed_text(string):
# This pattern matches text within parentheses, including nested parentheses
pattern = r'\((?:[^()]*|\([^()]*\))*\)'
def extract_bracketed_text(string, timeout=1):
results = []
start_time = time.time()
stack = []
start_index = -1
# Find all matches
matches = re.findall(pattern, string)
for i, char in enumerate(string):
if char == '(':
if not stack:
start_index = i
stack.append(i)
elif char == ')':
if stack:
stack.pop()
if not stack:
results.append(string[start_index + 1:i])
start_index = -1
else:
print(f"Warning: Unmatched closing parenthesis at position {i}")
# Remove the outer parentheses from each match
return [match[1:-1] for match in matches]
# Handle any remaining unclosed brackets
if stack:
print(f"Warning: {len(stack)} unmatched opening parentheses")
for unmatched_start in stack:
results.append(string[unmatched_start + 1:])
def extract_additional_information(s):
soup = BeautifulSoup(s, 'html.parser')
full_text = soup.get_text(strip=True)
sup_tags = soup.find_all('sup')
sup_text = None
return results
def extract_additional_information(s):
details = {}
details["days"] = []
details["times"] = []
details["dual_slot"] = None
details["only_one"] = False
details["static_encounter_count"] = 0
details["static_encounter"] = False
details["only_two"] = False
details["extra_text"] = None
details["stars"] = None
details["extra_text"] = []
details["stars"] = []
details["Fishing"] = False
details["Rods"] = []
if s is None:
return "", details
soup = BeautifulSoup(s, 'html.parser')
full_text = soup.get_text()
sup_tags = soup.find_all('sup')
sup_text = None
for sup_tag in sup_tags:
sup_text = sup_tag.get_text(strip=True)
@ -93,38 +134,79 @@ def extract_additional_information(s):
if find_match(sup_text, times):
details["times"].append(sup_text)
bracket_text = extract_bracketed_text(full_text)
if len(bracket_text) > 0:
if bracket_text[0] in all_games:
details["dual_slot"] = bracket_text[0]
bracket_text = extract_bracketed_text(full_text, 2)
if "Only One" in bracket_text:
details["only_one"] = True
details["static_encounter"] = True
for text in bracket_text:
text = text.strip()
text_lower = text.lower()
if text_lower in all_games:
details["dual_slot"] = text
if "Only Two" in bracket_text:
details["only_two"] = True
if "only one" in text_lower:
details["static_encounter_count"] = 1
details["static_encounter"] = True
text = re.sub(r'only one', '', text_lower, flags=re.IGNORECASE).strip()
elif "only two" in text_lower:
details["static_encounter_count"] = 2
details["static_encounter"] = True
text = re.sub(r'only two', '', text_lower, flags=re.IGNORECASE).strip()
#elif "rod" in text_lower:
# details["static_encounter_count"] = 2
# details["static_encounter"] = True
# text = re.sub(r'only two', '', text_lower, flags=re.IGNORECASE).strip()
if "" in bracket_text:
details["stars"] = bracket_text
if "" in text:
star_parts = re.findall(r'\d★,*', text)
for part in star_parts:
details["stars"].append(part.replace(',', '').strip())
text = re.sub(r'\d★,*', '', text).strip()
details["extra_text"] = " ".join(bracket_text)
if text:
details["extra_text"].append(text)
if sup_text:
return full_text.replace(sup_text, ""), details
else:
return full_text, details
def save_encounter(conn, pfic, game, location, days, times, dual_slot,static_encounter, static_encounter_count, extra_text, stars):
cursor = conn.cursor()
if len(days) > 0:
for day in days:
cursor.execute('''
INSERT OR REPLACE INTO encounters
(pfic, game, location, day, time, dual_slot, static_encounter_count, static_encounter, extra_text, stars)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
''', (pfic, game, location, day, None, dual_slot, static_encounter_count, static_encounter, ' '.join(extra_text), ','.join(stars)))
elif len(times) > 0:
for time in times:
cursor.execute('''
INSERT OR REPLACE INTO encounters
(pfic, game, location, day, time, dual_slot, static_encounter_count, static_encounter, extra_text, stars)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
''', (pfic, game, location, None, time, dual_slot, static_encounter_count, static_encounter, ' '.join(extra_text), ','.join(stars)))
else:
cursor.execute('''
INSERT OR REPLACE INTO encounters
(pfic, game, location, day, time, dual_slot, static_encounter_count, static_encounter, extra_text, stars)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
''', (pfic, game, location, None, None, dual_slot, static_encounter_count, static_encounter, ' '.join(extra_text), ','.join(stars)))
conn.commit()
if __name__ == "__main__":
cache = CacheManager()
conn = create_encounters_table()
cursor = conn.cursor()
cursor.execute('SELECT DISTINCT name, form_name FROM pokemon_forms')
cursor.execute('''
SELECT pf.PFIC, pf.name, pf.form_name, pf.national_dex
FROM pokemon_forms pf
ORDER BY pf.national_dex, pf.form_name
''')
pokemon_forms = cursor.fetchall()
for name, form in pokemon_forms:
for pfic, name, form, national_dex in pokemon_forms:
print(f"Processing {name} {form if form else ''}")
if form and name in form:
@ -138,6 +220,9 @@ if __name__ == "__main__":
encounters_to_ignore = ["trade", "time capsule", "unobtainable", "evolve", "tradeversion", "poké transfer", "friend safari"]
encounter_data = get_locations_from_bulbapedia(name, form, cache)
if encounter_data == None:
continue
for encounter in encounter_data:
if len(encounter_data[encounter]) == 0:
continue
@ -162,8 +247,21 @@ if __name__ == "__main__":
print(f"Found in {encounter}:")
print_encounter = False
routes, remaining = extract_routes(location["location"].strip())
remaining, details = extract_additional_information(location["tag"])
routes, remaining = extract_routes(remaining)
print(f"Routes: {routes}")
remaining_locations, details = extract_additional_information(location["tag"])
print(f"Remaining: {remaining_locations}")
print(f"Remaining: {remaining.strip()}")
print(f"Details: {details}")
if len(details["days"]) > 0 and len(details["times"]) > 0:
print("Stupid Data")
for route in routes:
route_name = f"Route {route}"
save_encounter(conn, pfic, encounter, route_name, details["days"], details["times"], details["dual_slot"], details["static_encounter"], details["static_encounter_count"], details["extra_text"], details["stars"])
if remaining != "":
remaining_locations = remaining.replace(" and ", ",").split(",")
for remaining_location in remaining_locations:
save_encounter(conn, pfic, encounter, remaining_location.strip(), details["days"], details["times"], details["dual_slot"], details["static_encounter"], details["static_encounter_count"], details["extra_text"], details["stars"])
conn.close()

Loading…
Cancel
Save