import requests
from bs4 import BeautifulSoup
import csv
import os
import time
import re

def scrape_serebii_region_pokemon(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    
    pokemon_list = []
    
    # Find the main table containing Pokémon data
    table = soup.find('table', class_='dextable')
    
    if table:
        rows = table.find_all('tr')[2:]  # Skip the header row and the game intro row
        for row in rows:
            cells = row.find_all('td')
            if len(cells) <= 5:  # Ensure we have enough cells to check depositability. if only 5 then its not depositable in any game.
                continue

            number = cells[0].text.strip().lstrip('#')
            name = cells[2].text.strip()
            
                # Get the image URL
            img_url = cells[1].find('img')['src']
            full_img_url = f"https://www.serebii.net{img_url}"
            
            pokemon_list.append({
                'number': number,
                'name': name,
                'image_url': full_img_url
            })
    
    return pokemon_list

def download_image(url, filename):
    response = requests.get(url)
    if response.status_code == 200:
        with open(filename, 'wb') as f:
            f.write(response.content)

def sanitize_filename(filename):
    # Define a dictionary of symbol replacements
    symbol_replacements = {
        '?': 'questionmark',
        '*': 'asterisk',
        ':': 'colon',
        '/': 'slash',
        '\\': 'backslash',
        '|': 'pipe',
        '<': 'lessthan',
        '>': 'greaterthan',
        '"': 'quote',
        ' ': '_'
    }
    
    # Replace symbols with their word equivalents
    for symbol, word in symbol_replacements.items():
        filename = filename.replace(symbol, word)
    
    # Remove any remaining invalid characters
    return re.sub(r'[<>:"/\\|?*]', '', filename)

def scrape_all_regions():
    base_url = "https://www.serebii.net/pokemonhome/"
    regions = ["kanto", "johto", "hoenn", "sinnoh", "unova", "kalos", "alola", "galar", "paldea", "hisui", "unknown"]
    all_pokemon = []

    for region in regions:
        url = f"{base_url}{region}pokemon.shtml"
        region_pokemon = scrape_serebii_region_pokemon(url)
        all_pokemon.extend(region_pokemon)
        print(f"Scraped {len(region_pokemon)} Pokémon from {region.capitalize()} region")
        time.sleep(1)  # Be nice to the server

    return all_pokemon

def save_to_csv(pokemon_list, filename='pokemon_home_list.csv'):
    with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
        fieldnames = ['number', 'name']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        
        writer.writeheader()
        for pokemon in pokemon_list:
            writer.writerow({k: pokemon[k] for k in fieldnames})

if __name__ == "__main__":
    all_pokemon = scrape_all_regions()
    save_to_csv(all_pokemon)
    print(f"Scraped a total of {len(all_pokemon)} Pokémon and saved to pokemon_home_list.csv")

    # Create 'images' directory if it doesn't exist
    if not os.path.exists('images'):
        os.makedirs('images')

    # Download images
    for pokemon in all_pokemon:
        sanitized_name = sanitize_filename(pokemon['name'])
        filename = f"images/{pokemon['number']}_{sanitized_name}.png"
        
        if os.path.exists(filename):
            print(f"Image for {pokemon['name']} already exists, skipping download")
        else:
            download_image(pokemon['image_url'], filename)
            print(f"Downloaded image for {pokemon['name']}")
            time.sleep(0.5)  # Be nice to the server

    print("All images downloaded successfully.")