import os
import pandas as pd
from config import year, output_files_dir, input_files_dir

def load_station_metadata(metadata_file):
    """
    Load IGRA station metadata, including elevation.

    Args:
        metadata_file (str): Path to the IGRA station metadata file.

    Returns:
        dict: Dictionary with station IDs as keys and elevation (meters) as values.
    """
    station_data = {}

    with open(metadata_file, 'r') as file:
        for line in file:
            station_id = line[:11].strip()
            elevation = line[31:37].strip()
            # turn elevation string into an integer string
            elevation = float(elevation)
            elevation = int(elevation)

            station_data[station_id] = elevation
        
    return station_data


def parse_igra(file_path, year, site_id):
    """
    Parse IGRA sounding data into a structured format.

    Args:
        file_path (str): Path to the IGRA data file.
        year: year of the data required
        
    Returns:
        list[dict]: List of header dictionaries, one for each sounding.
        list[pd.DataFrame]: List of DataFrames, one for each sounding's data.
    """
    with open(file_path, 'r') as file:
        lines = file.readlines()

    headers = []
    dataframes = []
    current_data = []
    current_header = None

    for line in lines:
        line = line.strip()  # Remove leading and trailing whitespaces
        
        if len(line) == 0:
            # Skip empty lines
            continue

        if line[0] == "#":  # Header line
            if current_header and current_data:
                # Save the previous sounding if there's existing data
                df = pd.DataFrame(current_data, columns=["Pressure", "Height", "Temperature", "Dewpoint", "Wind_Dir", "Wind_Speed"])
                dataframes.append(df)
                headers.append(current_header)

            # Parse a new header
            current_header = {
                "rawID": line[1:12].strip(),
                "YEAR": line[13:17],
                "MONTH": line[18:20].strip(),
                "DAY": line[21:23].strip(),
                "HOUR": line[24:26].strip(),
                "LEVELS": line[33:36].strip(),
                "LAT": float(line[55:61].strip()) if len(line) > 61 else None,
                "LON": float(line[63:70].strip()) if len(line) > 70 else None,
            }

            # Check the ID is valid
            if len(current_header['rawID']) > 5 and current_header['rawID'][-6] == '0':
                current_header['ID'] = " " + current_header['rawID'][-5:]
            else:
                current_header['ID'] = current_header['rawID'][-6:]

            # Ensure day and hour are correctly formatted
            current_header['DAY'] = " " + current_header['DAY'] if int(current_header['DAY']) < 10 else current_header['DAY']
            current_header['HOUR'] = " " + current_header['HOUR'] if int(current_header['HOUR']) < 10 else current_header['HOUR']
            
            # Skip if year or site_id is specified and doesn't match
            if (current_header['YEAR'].strip() != str(year).strip()):
                current_header = None
                continue

            current_data = []

        else:  # Data line
            if len(line) < 50:
                # Skip lines that are too short to be valid data lines
                continue

            # Extract and clean data values
            try:
                pressure = line[9:14].strip()
                height = line[16:21].strip()
                temp = line[22:27].strip()
                dewpt = line[34:39].strip()
                wdir = line[40:45].strip()
                wspd = line[46:51].strip()
                
                # Handle missing values
                row = [
                    '99999' if pressure == '-9999' else pressure,
                    '9999' if height == '-9999' else height, # height is 4 9s 
                    '99999' if temp == '-9999' else temp,
                    '99999' if dewpt == '-9999' else dewpt,
                    '99999' if wdir == '-9999' else wdir,
                    '99999' if wspd == '-9999' else wspd,
                ]
                current_data.append(row)

            except IndexError:
                # Skip malformed lines
                continue

    # Save the last sounding if present
    if current_header and current_data:
        df = pd.DataFrame(current_data, columns=["Pressure", "Height", "Temperature", "Dewpoint", "Wind_Dir", "Wind_Speed"])
        dataframes.append(df)
        headers.append(current_header)

    return headers, dataframes


def format_to_fsl(header, df):
    """
    Format data into the required FSL format.

    Args:
        header (dict): Parsed header information.
        df (pd.DataFrame): Sounding data as a DataFrame.

    Returns:
        str: Formatted FSL data as a string.
    """
    fsl_data = []

    # convert month to JAN, FEB, MAR, APR etc

    months = ["JAN", "FEB", "MAR", "APR", "MAY", "JUN", "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"]

    month_index = int(header['MONTH'])-1
    header['MONTH'] = months[month_index]

    # Add header line
    header_line = f"{254:>7}{header['HOUR']:>7}{header['DAY']:>7}{header['MONTH']:>9}{header['YEAR']:>8}"
    fsl_data.append(header_line)
    
    # add linetype 1
    lat_formatted = header['LAT'] / 1000
    if lat_formatted < 0:
        lat_formatted = f"{abs(lat_formatted):.2f}S"
    else:
        lat_formatted = f"{lat_formatted:.2f}N"
    lon_formatted = header['LON'] / 1000
    
    if lon_formatted < 0:
        lon_formatted = f"{abs(lon_formatted):.2f}W"
    else:
        lon_formatted = f"{lon_formatted:.2f}E"

    
    # linetype 1
    header['HOUR'] = str(header['HOUR']) + "00"
    linetype1 = f"{1:>7}{99999:>7}{header['ID']:>7}{lat_formatted:>8}{lon_formatted:>7}{header['ELEV']:>6}{header['HOUR']:>7}"
    fsl_data.append(linetype1)
    
    # linetype 2
    levels = int(header['LEVELS']) + 4 # offset to line up the data

    linetype2 = f"{2:>7}{99999:>7}{99999:>7}{99999:>7}{str(levels):>7}{99999:>7}{0:>7}"
    fsl_data.append(linetype2)
    
    # IGRA uses ms not kt
    wind_speed_unit = 'ms'

    # linetype 3
    linetype3 = f"{3:>7}{' ':>7}{9999:>7}{' ':>7}{' ':>7}{99999:>7}{wind_speed_unit:>7}"
    fsl_data.append(linetype3)

    # Add data lines
    for i, row in df.iterrows():
        lintyp = 9 if i == 0 else 5  # Line type 9 for surface level first in the loop, then 5 for others
        if lintyp == 9:
            linetype_data = f"{lintyp:>7}{row['Pressure']:>7}{header['ELEV']:>7}{row['Temperature']:>7}{row['Dewpoint']:>7}{row['Wind_Dir']:>7}{row['Wind_Speed']:>7}"
        else:
            linetype_data = f"{lintyp:>7}{row['Pressure']:>7}{row['Height']:>7}{row['Temperature']:>7}{row['Dewpoint']:>7}{row['Wind_Dir']:>7}{row['Wind_Speed']:>7}"
        
        fsl_data.append(linetype_data)
            #f"      {lintyp}  {row['Pressure']:06d}  {row['Height']:06d}  {row['Temperature']:06d}  {row['Dewpoint']:06d}  {row['Wind_Dir']:03d}  {row['Wind_Speed']:03d}"
    return "\n".join(fsl_data)


def convert_igra_to_fsl(input_file, output_folder, metadata_file, year=None, site_id=None):
    """
    Convert IGRA sounding data to FSL format for all years in the dataset.

    Args:
        input_file (str): Path to the IGRA input file.
        output_folder (str): Folder to save the converted FSL files, organized by year.
    """
    print(f"Processing input file: {input_file}")

    # ask user to confirm if they want to delete the files in the output folder interactively
    if os.path.exists(output_folder):
        print(f"Output folder already exists: {output_folder}")
        old_file_list = os.listdir(output_folder)
        if ".FSL" in old_file_list:
            response = input(f"Do you want to delete the existing files in the {output_folder} folder? (y/n): ")
            if response.lower() != 'y':
                print("Proceeding without deleting files.")
                return
            else:
                print("Deleting existing files in the output folder.")
                for file in os.listdir(output_folder):
                    if file.endswith(".FSL"):
                        file_path = os.path.join(output_folder, file)
                        try:
                            if os.path.isfile(file_path):
                                os.unlink(file_path)
                        except Exception as e:
                            print(f"Error deleting file: {file_path}")
                            print(e)

    # read the file
    station_metadata = load_station_metadata(metadata_file)
    headers, dataframes = parse_igra(input_file, year, site_id)
    print(f"Parsed {len(headers)} headers and {len(dataframes)} dataframes.")

    # Ensure the output folder exists
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
        print(f"Created output folder: {output_folder}")

    # Process each header-dataframe pair
    for header, df in zip(headers, dataframes):
        header['ELEV'] = station_metadata.get(header['rawID'], 99999)
        #try:
        site_code = header['ID'].lstrip("0")
        output_file = os.path.join(output_folder, f"{site_code}_{header['YEAR']}.FSL")
        fsl_formatted_data = format_to_fsl(header, df) # set back to header,df
        with open(output_file, 'a') as file:
            file.write(fsl_formatted_data + "\n")

# example usage from command line:
file_list = os.listdir(input_files_dir)

for igra_file in file_list:
    if igra_file.endswith(".txt"):
        convert_igra_to_fsl(f"{input_files_dir}/{igra_file}",               # input IGRA file
                            f"{output_files_dir}/{year}",                             # output folder 
                            f"igra2-station-list.txt",
                            year=year,
                            site_id=None)     # station list file (included in this repo)
    else:
        pass