import os
import sys
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import re

def main(input_directory, pattern):
    # Set draw_unchanged to True if 'draw_unchanged' file exists in
    # the current directory, else False. If true add a bar for
    # unchanged
    draw_unchanged = os.path.exists('draw_unchanged')
    
    # Initialize an empty DataFrame to store the data
    df_all_deltas = pd.DataFrame()

    # Include the '*_DELTA_*.xml3.csv' part in the pattern
    full_pattern = f'{pattern}_DELTA_.*\.xmlv3\.csv'
    
    # Load all *_DELTA_*.xml3.csv files into a single DataFrame
    for filename in os.listdir(input_directory):
        if re.match(full_pattern, filename):
            filepath = os.path.join(input_directory, filename)
            df = pd.read_csv(filepath)
            df['Task Set'] = filename  # Add a column indicating the task set
            df_all_deltas = pd.concat([df_all_deltas, df])

    # Reset the index of the combined DataFrame
    df_all_deltas = df_all_deltas.reset_index(drop=True)
    
    # Extract numeric task numbers and sort them
    task_numbers = df_all_deltas['task'].str.extract(r'(\d+)').astype(int)
    df_all_deltas['num'] = task_numbers
    df_all_deltas = df_all_deltas.sort_values(by='num')

    # Dropping irrelevant columns
    df_all_deltas = df_all_deltas.drop(columns=['num', 'missed', 'Task Set'])

    # Melting the df_all_deltas into a format suitable for a boxplot
    df_all_deltas_melted = pd.melt(df_all_deltas, id_vars=['task'], var_name='Measure', value_name='Difference')

    # Converting 'Difference' column to numeric (to handle any non-numeric entries)
    df_all_deltas_melted['Difference'] = pd.to_numeric(df_all_deltas_melted['Difference'], errors='coerce')

    # Counting improvements (difference > 0), deteriorations (difference < 0) and unchanges (difference ==0)
    # for each measure
    improvements = df_all_deltas_melted[df_all_deltas_melted['Difference'] > 0].groupby('Measure')['Difference'].count()
    deteriorations = df_all_deltas_melted[df_all_deltas_melted['Difference'] < 0].groupby('Measure')['Difference'].count()
    if draw_unchanged:
        unchanged = df_all_deltas_melted[df_all_deltas_melted['Difference'] == 0].groupby('Measure')['Difference'].count()

    # Creating a DataFrame for the bar graph
    if draw_unchanged:
        bar_data = pd.DataFrame({'Improvements': improvements,
                                 'Deteriorations': deteriorations,
                                 'Unchanged': unchanged}).reset_index()
    else:
        bar_data = pd.DataFrame({'Improvements': improvements,
                                 'Deteriorations': deteriorations}).reset_index()

    # If pattern is of the form '(\d+)_.*', the data is by task
    # number. Otherwise if pattern is of the form '.*(\d+)', the data
    # is by CPU load.

    subtitle = ''

    match = re.match(r'(\d+)_.*', pattern)
    if match:
        subtitle = f"Task sets with {match.group(1)} Tasks"
    elif (match := re.match(r'[^\d]*(\d+)$', pattern)):
        subtitle = f"Task sets with {match.group(1)}% CPU load"

    # Adjusting the title to include subtitle if it's not empty
    plot_title = 'Number of Improvements and Deteriorations by Response Time Measure'
    if subtitle:
        plot_title += f"\n{subtitle}"  # Add the subtitle on a new line if it's not empty
    print (plot_title)
    
    # Creating the boxplot of differences in response times
    plt.figure(figsize=(12, 8))
    ax = bar_data.plot(x='Measure', kind='bar', stacked=False, ax=plt.gca())
    # Increase font size for title and axes labels
    title_fontsize = 16       # Adjust this size as needed
    axes_label_fontsize = 14  # Adjust this size as needed
    plt.title(plot_title, fontsize=title_fontsize)
    plt.xlabel('Response Time Measure', fontsize=axes_label_fontsize)
    plt.ylabel('Count', fontsize=axes_label_fontsize)

    # Adjust the tick parameters
    plt.xticks(rotation=0, fontsize=12)  # Adjust fontsize as needed
    plt.yticks(fontsize=12)              # Adjust fontsize as needed
    
    # Adjusting grid and removing margins
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.margins(x=0, y=0)  # This reduces the margins around the plot
    
    # We want the plot to be tightly fitted
    plt.tight_layout()

    # Adding values above the bars
    threshold = 0.9 * ax.get_ylim()[1]  # Set threshold at 90% of the y-axis limit
    for p in ax.patches:
        x = p.get_x() + p.get_width() / 2.
        y = p.get_height()
        # Decide whether to place the annotation above or inside the bar
        if y > threshold:
            # Place annotation inside the bar near the top
            vertical_alignment = 'top'
            y_offset = -10  # Move text inside the bar downwards
            text_color = 'white'  # White color for text inside the bar
        else:
            # Place annotation above the bar
            vertical_alignment = 'center'
            y_offset = 10  # Move text above the bar
            text_color = 'black'  # Default color

        ax.annotate(str(y), (x, y), ha='center', va=vertical_alignment, xytext=(0, y_offset), textcoords='offset points', color=text_color)

    # Remove special characters and use the cleaned pattern in the output filename
    clean_pattern = re.sub(r'[^a-zA-Z0-9_]', '', pattern)
    if draw_unchanged:
        output_filename = f'response_time_diffs_by_kind_grouped_{clean_pattern}.png'
    else:
        output_filename = f'response_time_diffs_by_kind_grouped_{clean_pattern}_no_unchanged.png'

    # Save the plot as a PNG figure in the same directory
    output_filepath = os.path.join(input_directory, output_filename)
    plt.savefig(output_filepath)

    # Debug
    if draw_unchanged:
        bar_data.to_csv(os.path.join(input_directory, f'response_time_diffs_by_kind_grouped_{clean_pattern}.csv'), index=False)
    else:
        bar_data.to_csv(os.path.join(input_directory, f'response_time_diffs_by_kind_grouped_{clean_pattern}_no_unchanged.csv'), index=False)
    
    # Show the plot
    #plt.show()

if __name__ == "__main__":
    if len (sys.argv) < 1 or len(sys.argv) > 3:
        print("Usage: python script.py [ input_directory [pattern]  ]")
        sys.exit(1)

    input_directory = sys.argv[1] if len(sys.argv) > 1 else '.'
    pattern = sys.argv[2] if len(sys.argv) > 2 else '.*'

    print (input_directory, pattern)

    main (input_directory, pattern)