import os
import pandas as pd
from config import year, output_files_dir, input_files_dir

print(output_files_dir)
print(input_files_dir)

def load_station_metadata(metadata_file):
    """
    Load IGRA station metadata, including elevation.

    Args:
        metadata_file (str): Path to the IGRA station metadata file.

    Returns:
        dict: Dictionary with station IDs as keys and elevation (meters) as values.
    """
    station_data = {}

    with open(metadata_file, 'r') as file:
        for line in file:
            station_id = line[:11].strip()
            elevation = line[31:37].strip()
            # turn elevation string into an integer string
            elevation = float(elevation)
            elevation = int(elevation)

            station_data[station_id] = elevation
        
    return station_data


def process_sounding(header, data_rows, output_file, station_metadata):
    """
    Process a single sounding and write it to the output file immediately.
    
    Args:
        header (dict): Parsed header information.
        data_rows (list): List of data rows for this sounding.
        output_file (str): Path to the output file.
        station_metadata (dict): Station metadata dictionary.
    """
    if not data_rows:
        return
    
    # Create DataFrame from current sounding data
    df = pd.DataFrame(data_rows, columns=["Pressure", "Height", "Temperature", "Dewpoint", "Wind_Dir", "Wind_Speed"])
    
    # Set elevation from metadata
    header['ELEV'] = station_metadata.get(header['rawID'], 99999)
    
    # Format and write immediately
    fsl_formatted_data = format_to_fsl(header, df)
    with open(output_file, 'a') as file:
        file.write(fsl_formatted_data + "\n")


def parse_igra_streaming(file_path, year, site_id, output_folder, station_metadata):
    """
    Parse IGRA sounding data in a streaming fashion, writing each sounding immediately.

    Args:
        file_path (str): Path to the IGRA data file.
        year: year of the data required
        site_id: site ID filter (currently unused)
        output_folder (str): Folder to save the converted FSL files.
        station_metadata (dict): Station metadata dictionary.
        
    Returns:
        int: Number of soundings processed.
    """
    current_data = []
    current_header = None
    soundings_count = 0

    with open(file_path, 'r') as file:
        for line in file:
            line = line.strip()  # Remove leading and trailing whitespaces
            
            if len(line) == 0:
                # Skip empty lines
                continue

            if line[0] == "#":  # Header line
                # Process previous sounding if exists
                if current_header and current_data:
                    site_code = current_header['ID'].lstrip("0")
                    output_file = os.path.join(output_folder, f"{site_code}_{current_header['YEAR']}.FSL")
                    process_sounding(current_header, current_data, output_file, station_metadata)
                    soundings_count += 1
                    current_data = []

                # Parse a new header
                current_header = {
                    "rawID": line[1:12].strip(),
                    "YEAR": line[13:17],
                    "MONTH": line[18:20].strip(),
                    "DAY": line[21:23].strip(),
                    "HOUR": line[24:26].strip(),
                    "LEVELS": line[33:36].strip(),
                    "LAT": float(line[55:61].strip()) if len(line) > 61 else None,
                    "LON": float(line[63:70].strip()) if len(line) > 70 else None,
                }

                # Check the ID is valid
                if len(current_header['rawID']) > 5 and current_header['rawID'][-6] == '0':
                    current_header['ID'] = " " + current_header['rawID'][-5:]
                else:
                    current_header['ID'] = current_header['rawID'][-6:]

                # Ensure day and hour are correctly formatted
                current_header['DAY'] = " " + current_header['DAY'] if int(current_header['DAY']) < 10 else current_header['DAY']
                current_header['HOUR'] = " " + current_header['HOUR'] if int(current_header['HOUR']) < 10 else current_header['HOUR']
                
                # Skip if year doesn't match
                if (current_header['YEAR'].strip() != str(year).strip()):
                    current_header = None
                    continue

            else:  # Data line
                if current_header is None:
                    # Skip data for filtered out soundings
                    continue
                    
                if len(line) < 50:
                    # Skip lines that are too short to be valid data lines
                    continue

                # Extract and clean data values
                try:
                    pressure = line[9:14].strip()
                    height = line[16:21].strip()
                    temp = line[22:27].strip()
                    dewpt = line[34:39].strip()
                    wdir = line[40:45].strip()
                    wspd = line[46:51].strip()
                    
                    # Handle missing values
                    row = [
                        '99999' if pressure == '-9999' else pressure,
                        '9999' if height == '-9999' else height, # height is 4 9s 
                        '99999' if temp == '-9999' else temp,
                        '99999' if dewpt == '-9999' else dewpt,
                        '99999' if wdir == '-9999' else wdir,
                        '99999' if wspd == '-9999' else wspd,
                    ]
                    current_data.append(row)

                except IndexError:
                    # Skip malformed lines
                    continue

    # Process the last sounding if present
    if current_header and current_data:
        site_code = current_header['ID'].lstrip("0")
        output_file = os.path.join(output_folder, f"{site_code}_{current_header['YEAR']}.FSL")
        process_sounding(current_header, current_data, output_file, station_metadata)
        soundings_count += 1

    return soundings_count


def format_to_fsl(header, df):
    """
    Format data into the required FSL format.

    Args:
        header (dict): Parsed header information.
        df (pd.DataFrame): Sounding data as a DataFrame.

    Returns:
        str: Formatted FSL data as a string.
    """
    fsl_data = []

    # convert month to JAN, FEB, MAR, APR etc

    months = ["JAN", "FEB", "MAR", "APR", "MAY", "JUN", "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"]

    month_index = int(header['MONTH'])-1
    header['MONTH'] = months[month_index]

    # Add header line
    header_line = f"{254:>7}{header['HOUR']:>7}{header['DAY']:>7}{header['MONTH']:>9}{header['YEAR']:>8}"
    fsl_data.append(header_line)
    
    # add linetype 1
    lat_formatted = header['LAT'] / 1000
    if lat_formatted < 0:
        lat_formatted = f"{abs(lat_formatted):.2f}S"
    else:
        lat_formatted = f"{lat_formatted:.2f}N"
    lon_formatted = header['LON'] / 1000
    
    if lon_formatted < 0:
        lon_formatted = f"{abs(lon_formatted):.2f}W"
    else:
        lon_formatted = f"{lon_formatted:.2f}E"

    
    # linetype 1
    header['HOUR'] = str(header['HOUR']) + "00"
    linetype1 = f"{1:>7}{99999:>7}{header['ID']:>7}{lat_formatted:>8}{lon_formatted:>7}{header['ELEV']:>6}{header['HOUR']:>7}"
    fsl_data.append(linetype1)
    
    # linetype 2
    levels = int(header['LEVELS']) + 4 # offset to line up the data

    linetype2 = f"{2:>7}{99999:>7}{99999:>7}{99999:>7}{str(levels):>7}{99999:>7}{0:>7}"
    fsl_data.append(linetype2)
    
    # IGRA uses ms not kt
    wind_speed_unit = 'ms'

    # linetype 3
    linetype3 = f"{3:>7}{' ':>7}{9999:>7}{' ':>7}{' ':>7}{99999:>7}{wind_speed_unit:>7}"
    fsl_data.append(linetype3)

    # Add data lines
    for i, row in df.iterrows():
        lintyp = 9 if i == 0 else 5  # Line type 9 for surface level first in the loop, then 5 for others
        if lintyp == 9:
            linetype_data = f"{lintyp:>7}{row['Pressure']:>7}{header['ELEV']:>7}{row['Temperature']:>7}{row['Dewpoint']:>7}{row['Wind_Dir']:>7}{row['Wind_Speed']:>7}"
        else:
            linetype_data = f"{lintyp:>7}{row['Pressure']:>7}{row['Height']:>7}{row['Temperature']:>7}{row['Dewpoint']:>7}{row['Wind_Dir']:>7}{row['Wind_Speed']:>7}"
        
        fsl_data.append(linetype_data)
            #f"      {lintyp}  {row['Pressure']:06d}  {row['Height']:06d}  {row['Temperature']:06d}  {row['Dewpoint']:06d}  {row['Wind_Dir']:03d}  {row['Wind_Speed']:03d}"
    return "\n".join(fsl_data)


def convert_igra_to_fsl(input_file, output_folder, metadata_file, year=None, site_id=None):
    """
    Convert IGRA sounding data to FSL format for all years in the dataset.

    Args:
        input_file (str): Path to the IGRA input file.
        output_folder (str): Folder to save the converted FSL files, organized by year.
    """
    print(f"Processing input file: {input_file}")

    # ask user to confirm if they want to delete the files in the output folder interactively
    if os.path.exists(output_folder):
        print(f"Output folder already exists: {output_folder}")
        old_file_list = os.listdir(output_folder)
        if ".FSL" in old_file_list:
            response = input(f"Do you want to delete the existing files in the {output_folder} folder? (y/n): ")
            if response.lower() != 'y':
                print("Proceeding without deleting files.")
                return
            else:
                print("Deleting existing files in the output folder.")
                for file in os.listdir(output_folder):
                    if file.endswith(".FSL"):
                        file_path = os.path.join(output_folder, file)
                        try:
                            if os.path.isfile(file_path):
                                os.unlink(file_path)
                        except Exception as e:
                            print(f"Error deleting file: {file_path}")
                            print(e)

    # Ensure the output folder exists
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
        print(f"Created output folder: {output_folder}")

    # read the file in streaming mode
    station_metadata = load_station_metadata(metadata_file)
    soundings_count = parse_igra_streaming(input_file, year, site_id, output_folder, station_metadata)
    print(f"Parsed and wrote {soundings_count} soundings.")

# example usage from command line:
file_list = os.listdir(input_files_dir)


for igra_file in file_list:
    if igra_file.endswith(".txt"):
        convert_igra_to_fsl(f"{input_files_dir}/{igra_file}", # input IGRA file
                            f"{output_files_dir}/{year}",     # output folder 
                            f"igra2-station-list.txt",
                            year=year,
                            site_id=None)     # station list file (included in this repo)
    else:
        pass
