import requests from bs4 import BeautifulSoup import csv import os import time import re def scrape_serebii_region_pokemon(url): response = requests.get(url) soup = BeautifulSoup(response.content, 'html.parser') pokemon_list = [] # Find the main table containing Pokémon data table = soup.find('table', class_='dextable') if table: rows = table.find_all('tr')[2:] # Skip the header row and the game intro row for row in rows: cells = row.find_all('td') if len(cells) <= 5: # Ensure we have enough cells to check depositability. if only 5 then its not depositable in any game. continue number = cells[0].text.strip().lstrip('#') name = cells[2].text.strip() # Get the image URL img_url = cells[1].find('img')['src'] full_img_url = f"https://www.serebii.net{img_url}" pokemon_list.append({ 'number': number, 'name': name, 'image_url': full_img_url }) return pokemon_list def download_image(url, filename): response = requests.get(url) if response.status_code == 200: with open(filename, 'wb') as f: f.write(response.content) def sanitize_filename(filename): # Define a dictionary of symbol replacements symbol_replacements = { '?': 'questionmark', '*': 'asterisk', ':': 'colon', '/': 'slash', '\\': 'backslash', '|': 'pipe', '<': 'lessthan', '>': 'greaterthan', '"': 'quote', ' ': '_' } # Replace symbols with their word equivalents for symbol, word in symbol_replacements.items(): filename = filename.replace(symbol, word) # Remove any remaining invalid characters return re.sub(r'[<>:"/\\|?*]', '', filename) def scrape_all_regions(): base_url = "https://www.serebii.net/pokemonhome/" regions = ["kanto", "johto", "hoenn", "sinnoh", "unova", "kalos", "alola", "galar", "paldea", "hisui", "unknown"] all_pokemon = [] for region in regions: url = f"{base_url}{region}pokemon.shtml" region_pokemon = scrape_serebii_region_pokemon(url) all_pokemon.extend(region_pokemon) print(f"Scraped {len(region_pokemon)} Pokémon from {region.capitalize()} region") time.sleep(1) # Be nice to the server return all_pokemon def save_to_csv(pokemon_list, filename='pokemon_home_list.csv'): with open(filename, 'w', newline='', encoding='utf-8') as csvfile: fieldnames = ['number', 'name'] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for pokemon in pokemon_list: writer.writerow({k: pokemon[k] for k in fieldnames}) if __name__ == "__main__": all_pokemon = scrape_all_regions() save_to_csv(all_pokemon) print(f"Scraped a total of {len(all_pokemon)} Pokémon and saved to pokemon_home_list.csv") # Create 'images' directory if it doesn't exist if not os.path.exists('images'): os.makedirs('images') # Download images for pokemon in all_pokemon: sanitized_name = sanitize_filename(pokemon['name']) filename = f"images/{pokemon['number']}_{sanitized_name}.png" if os.path.exists(filename): print(f"Image for {pokemon['name']} already exists, skipping download") else: download_image(pokemon['image_url'], filename) print(f"Downloaded image for {pokemon['name']}") time.sleep(0.5) # Be nice to the server print("All images downloaded successfully.")