import os import sys import pandas as pd import matplotlib.pyplot as plt import seaborn as sns import re def main(input_directory, pattern): # Set draw_unchanged to True if 'draw_unchanged' file exists in # the current directory, else False. If true add a bar for # unchanged draw_unchanged = os.path.exists('draw_unchanged') # Initialize an empty DataFrame to store the data df_all_deltas = pd.DataFrame() # Include the '*_DELTA_*.xml3.csv' part in the pattern full_pattern = f'{pattern}_DELTA_.*\.xmlv3\.csv' # Load all *_DELTA_*.xml3.csv files into a single DataFrame for filename in os.listdir(input_directory): if re.match(full_pattern, filename): filepath = os.path.join(input_directory, filename) df = pd.read_csv(filepath) df['Task Set'] = filename # Add a column indicating the task set df_all_deltas = pd.concat([df_all_deltas, df]) # Reset the index of the combined DataFrame df_all_deltas = df_all_deltas.reset_index(drop=True) # Extract numeric task numbers and sort them task_numbers = df_all_deltas['task'].str.extract(r'(\d+)').astype(int) df_all_deltas['num'] = task_numbers df_all_deltas = df_all_deltas.sort_values(by='num') # Dropping irrelevant columns df_all_deltas = df_all_deltas.drop(columns=['num', 'missed', 'Task Set']) # Melting the df_all_deltas into a format suitable for a boxplot df_all_deltas_melted = pd.melt(df_all_deltas, id_vars=['task'], var_name='Measure', value_name='Difference') # Converting 'Difference' column to numeric (to handle any non-numeric entries) df_all_deltas_melted['Difference'] = pd.to_numeric(df_all_deltas_melted['Difference'], errors='coerce') # Counting improvements (difference > 0), deteriorations (difference < 0) and unchanges (difference ==0) # for each measure improvements = df_all_deltas_melted[df_all_deltas_melted['Difference'] > 0].groupby('Measure')['Difference'].count() deteriorations = df_all_deltas_melted[df_all_deltas_melted['Difference'] < 0].groupby('Measure')['Difference'].count() if draw_unchanged: unchanged = df_all_deltas_melted[df_all_deltas_melted['Difference'] == 0].groupby('Measure')['Difference'].count() # Creating a DataFrame for the bar graph if draw_unchanged: bar_data = pd.DataFrame({'Improvements': improvements, 'Deteriorations': deteriorations, 'Unchanged': unchanged}).reset_index() else: bar_data = pd.DataFrame({'Improvements': improvements, 'Deteriorations': deteriorations}).reset_index() # If pattern is of the form '(\d+)_.*', the data is by task # number. Otherwise if pattern is of the form '.*(\d+)', the data # is by CPU load. subtitle = '' match = re.match(r'(\d+)_.*', pattern) if match: subtitle = f"Task sets with {match.group(1)} Tasks" elif (match := re.match(r'[^\d]*(\d+)$', pattern)): subtitle = f"Task sets with {match.group(1)}% CPU load" # Adjusting the title to include subtitle if it's not empty plot_title = 'Number of Improvements and Deteriorations by Response Time Measure' if subtitle: plot_title += f"\n{subtitle}" # Add the subtitle on a new line if it's not empty print (plot_title) # Creating the boxplot of differences in response times plt.figure(figsize=(12, 8)) ax = bar_data.plot(x='Measure', kind='bar', stacked=False, ax=plt.gca()) # Increase font size for title and axes labels title_fontsize = 16 # Adjust this size as needed axes_label_fontsize = 14 # Adjust this size as needed plt.title(plot_title, fontsize=title_fontsize) plt.xlabel('Response Time Measure', fontsize=axes_label_fontsize) plt.ylabel('Count', fontsize=axes_label_fontsize) # Adjust the tick parameters plt.xticks(rotation=0, fontsize=12) # Adjust fontsize as needed plt.yticks(fontsize=12) # Adjust fontsize as needed # Adjusting grid and removing margins plt.grid(axis='y', linestyle='--', alpha=0.7) plt.margins(x=0, y=0) # This reduces the margins around the plot # We want the plot to be tightly fitted plt.tight_layout() # Adding values above the bars threshold = 0.9 * ax.get_ylim()[1] # Set threshold at 90% of the y-axis limit for p in ax.patches: x = p.get_x() + p.get_width() / 2. y = p.get_height() # Decide whether to place the annotation above or inside the bar if y > threshold: # Place annotation inside the bar near the top vertical_alignment = 'top' y_offset = -10 # Move text inside the bar downwards text_color = 'white' # White color for text inside the bar else: # Place annotation above the bar vertical_alignment = 'center' y_offset = 10 # Move text above the bar text_color = 'black' # Default color ax.annotate(str(y), (x, y), ha='center', va=vertical_alignment, xytext=(0, y_offset), textcoords='offset points', color=text_color) # Remove special characters and use the cleaned pattern in the output filename clean_pattern = re.sub(r'[^a-zA-Z0-9_]', '', pattern) if draw_unchanged: output_filename = f'response_time_diffs_by_kind_grouped_{clean_pattern}.png' else: output_filename = f'response_time_diffs_by_kind_grouped_{clean_pattern}_no_unchanged.png' # Save the plot as a PNG figure in the same directory output_filepath = os.path.join(input_directory, output_filename) plt.savefig(output_filepath) # Debug if draw_unchanged: bar_data.to_csv(os.path.join(input_directory, f'response_time_diffs_by_kind_grouped_{clean_pattern}.csv'), index=False) else: bar_data.to_csv(os.path.join(input_directory, f'response_time_diffs_by_kind_grouped_{clean_pattern}_no_unchanged.csv'), index=False) # Show the plot #plt.show() if __name__ == "__main__": if len (sys.argv) < 1 or len(sys.argv) > 3: print("Usage: python script.py [ input_directory [pattern] ]") sys.exit(1) input_directory = sys.argv[1] if len(sys.argv) > 1 else '.' pattern = sys.argv[2] if len(sys.argv) > 2 else '.*' print (input_directory, pattern) main (input_directory, pattern)