from config import input_files_dir, country_codes
import os
import requests
from bs4 import BeautifulSoup

def list_igra_files(base_url, input_files_dir):
    """
    List all available IGRA files for a given country code.

    Args:
        base_url (str): Base URL of the IGRA archive.
        country_code (str): Country code (e.g., 'US' for the United States).

    Returns:
        list: List of file URLs matching the country code.
    """
    response = requests.get(base_url)
    if response.status_code != 200:
        raise ConnectionError(f"Failed to access {base_url}. Status code: {response.status_code}")

    soup = BeautifulSoup(response.text, 'html.parser')
    links = soup.find_all('a')
    files = [base_url + link.get('href') for link in links if link.get('href', '').startswith(country_code)]
    return files


def download_igra_files(files, input_files_dir):
    """
    Download IGRA files to a specified folder.

    Args:
        files (list): List of file URLs to download.
        output_folder (str): Folder where the files will be saved.
    """
    if not os.path.exists(input_files_dir):
        os.makedirs(input_files_dir)

    for file_url in files:
        file_name = os.path.basename(file_url)
        output_path = os.path.join(input_files_dir, file_name)

        if os.path.exists(output_path):
            print(f"File already exists: {output_path}. Skipping.")
            continue

        print(f"Downloading {file_url}...")
        response = requests.get(file_url, stream=True)
        if response.status_code == 200:
            with open(output_path, 'wb') as f:
                for chunk in response.iter_content(chunk_size=1024):
                    f.write(chunk)
            print(f"Downloaded: {output_path}")
        else:
            print(f"Failed to download {file_url}. Status code: {response.status_code}")

def main(country_code, input_files_dir):
    """
    Main function to download IGRA files for a given country code.

    Args:
        country_code (str): Country code (e.g., 'US').
        output_folder (str): Folder to save downloaded files.
    """
    base_url = "https://www.ncei.noaa.gov/pub/data/igra/data/data-por/"
    try:
        files = list_igra_files(base_url, country_code)
        if not files:
            print(f"No files found for country code: {country_code}")
            return
        print(f"Found {len(files)} files for country code {country_code}.")
        download_igra_files(files, input_files_dir)
    except Exception as e:
        print(f"An error occurred: {e}")

if __name__ == "__main__":
    # Replace with the desired country code and output folder
    for country_code in country_codes:
        main(country_code, input_files_dir)
