From ccd03dbc23130d334dab53c1b56467cf50d80a49 Mon Sep 17 00:00:00 2001 From: KyleKlenk <kyle.c.klenk@gmail.com> Date: Wed, 26 Apr 2023 14:33:57 -0600 Subject: [PATCH] Update README Do not need separate config file for specifying threads. Can use $SLURM_CPUS_PER_TASK --- {bin => .vscode}/caf-application.conf | 0 README.md | 37 ++-- build/source/actors/main.cpp | 2 +- utils/StatisticsScripts/plots.py | 258 ++++++++++++++++++++++++++ 4 files changed, 280 insertions(+), 17 deletions(-) rename {bin => .vscode}/caf-application.conf (100%) create mode 100644 utils/StatisticsScripts/plots.py diff --git a/bin/caf-application.conf b/.vscode/caf-application.conf similarity index 100% rename from bin/caf-application.conf rename to .vscode/caf-application.conf diff --git a/README.md b/README.md index da7ed4b..c1daf29 100644 --- a/README.md +++ b/README.md @@ -50,39 +50,44 @@ SUMMA-Actors supports four build types: Debug, Cluster, Release, and Cluster_Deb Running SUMMA-Actors is done with the following command: ./summa_actor -g startGRU -n numGRU -c path_to_config_file +If you are running SUMMA-Actors on a cluster, you will need to specify the number of threads when not using whole nodes. + This can be done with the --caf.scheduler.max-threads option + ./summa_actor -g startGRU -n numGRU -c path_to_config_file --caf.scheduler.max-threads $SLURM_CPUS_PER_TASK + The values for -g and -n are integers where -c is the full path to the configuraiton file for summa actors. The configuration file is a json file. The contents of the JSON file are below: - { +{ "Distributed_Settings": { "distributed_mode": false, - "servers_list": [{"hostname": "simlab01"}, {"hostname": "simlab05"}], + "servers_list": [{"hostname": "cnic-giws-cpu-19001-04"}, {"hostname": "cnic-giws-utl-19002"}, {"hostname": "cnic-giws-utl-19003"}], "port": 4444, - "total_hru_count": 517315, - "num_hru_per_batch": 1000 + "total_hru_count": 800, + "num_hru_per_batch": 50 }, - + "Summa_Actor": { - "max_gru_per_job": 500 + "max_gru_per_job": 4000 }, - + "File_Access_Actor": { - "num_partitions_in_output_buffer": 4, + "num_partitions_in_output_buffer": 8, "num_timesteps_in_output_buffer": 500 }, - + "Job_Actor": { - "file_manager_path": "/gladwell/kck540/Sundials_Settings/fileManager_actors.txt", - "output_csv": false, - "csv_path": "" + "file_manager_path": "/scratch/gwf/gwf_cmt/kck540/Summa-Actors/settings/file_manager_actors.txt", + "max_run_attempts": 3 }, - + "HRU_Actor": { - "print_output": true, - "output_frequency": 1000 + "print_output": true, + "output_frequency": 100000, + "dt_init_factor": 1 } - } +} + The settings above should work for most use cases, some of the feautures we want to automate such as max_gru_per_job. However, the only field that you should have to adjust is the `file_manager_path`. This is the path to the file that manages the complete configuration of the SUMMA simulation. The summa confiuration files are explained in more depth in the follwoing (documentation)[https://summa.readthedocs.io/en/latest/input_output/SUMMA_input/] diff --git a/build/source/actors/main.cpp b/build/source/actors/main.cpp index 9451b50..ca5b8ee 100644 --- a/build/source/actors/main.cpp +++ b/build/source/actors/main.cpp @@ -155,7 +155,7 @@ void caf_main(actor_system& sys, const config& cfg) { "fileManger is set with the \"-c\" option\n"; aout(self) << "EXAMPLE: ./summaMain -g 1 -n 10 -c location/of/config \n"; return; - } + } auto summa = sys.spawn(summa_actor, cfg.startGRU, cfg.countGRU, summa_actor_settings, file_access_actor_settings, job_actor_settings, hru_actor_settings, self); diff --git a/utils/StatisticsScripts/plots.py b/utils/StatisticsScripts/plots.py new file mode 100644 index 0000000..6522089 --- /dev/null +++ b/utils/StatisticsScripts/plots.py @@ -0,0 +1,258 @@ +import matplotlib as mpl +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +from pylab import cm +# from mpl_toolkits.axes_grid.anchored_artists import AnchoredText + +# MATPLOTLIBRC stuff +import matplotlib as mpl +# mpl.use('AGG') # for systems not running a GUI + +font = {'family' : 'serif', + 'weight' : 'normal', + 'size' : 20 +} +mpl.rc('font', **font) + +def time_convert(seconds): + seconds = seconds % (24 * 3600) + hours = seconds / 3600 + return round(hours, 2) + +def add_failures(failed_start_hru): + failure_data = pd.read_csv('SummaOriginal_rerun_failed_Jul-09.csv') + + failed_wall_clock = failure_data.loc[failure_data["startHRU"] == failed_start_hru, "Wall-Clock Time"] + + + return(time_convert(failed_wall_clock.values[0])) + + +def summa_worst_case(): + # wall clock times of the processes + batches = ["1", "2", "3", "4", \ + "5", "6", "7", "8"] + + plt_1 = plt.figure(figsize=(12, 8)) + + results = [0.87, 0.75, 2.27, 0.87, 0.88, 0.75, 0.76, 1.48] + + cmap = plt.get_cmap('viridis').copy() + original_color = cmap(0.5) + + + plt.bar(batches,results, color=original_color, alpha=0.6, edgecolor=original_color) + # plt.title('SUMMA Job CPU Distribution', fontsize=20) + plt.xlabel('Sub-task' +# , fontsize=16 + ) + plt.ylabel('Hours' + # , fontsize=16 + ) + # plt.xticks(fontsize=14) + # plt.yticks(fontsize=14) + plt.savefig("SUMMA-worst-case.png",bbox_inches="tight") + + +def summa_best_case(): + batches = [] + +def cpuComparsion(data_set_1, data_set_2): + + df1 = pd.DataFrame(data_set_1) + df2 = pd.DataFrame(data_set_2) + SummaActors = df1["CPU Efficiency"].values + SummaOriginal = df2["CPU Efficiency"].values + print("Average CPU-Efficiency SUMMA-Actors =",sum(SummaActors) / len(SummaActors)) + print("Average CPU-Efficiency SUMMA =",sum(SummaOriginal) / len(SummaOriginal)) + + summa_count = 0 + actors_count = 0 + for x in range(0, len(SummaOriginal)): + if SummaActors[x] > SummaOriginal[x]: + actors_count += 1 + else: + summa_count += 1 + + print("TOTAL ACTORS COUNT =", actors_count) + print("TOTAL SUMMA COUNT =", summa_count) + + min_summa_actors = min(SummaActors) + max_summa_actors = max(SummaActors) + + min_summa_original = min(SummaOriginal) + max_summa_original = max(SummaOriginal) + string_text = "SUMMA-Actors Range: [{}% - {}%]\nSUMMA Range: [{}% - {}%]".format(min_summa_actors, max_summa_actors, min_summa_original, max_summa_original) + # text = AnchoredText(string_text, prop=dict(size=12), frameon=True, loc=2) + + # Create plot + nbins = 50 + + # Setup colors + cmap = plt.get_cmap('viridis').copy() + actors_color = cmap(0.02) + print("Actors color is: ", actors_color) + original_color = cmap(0.5) + + # Get the binning range of the historgram plots (this is to help us ensure the bins are consistent) + hist, actors_bins, _ = plt.hist(SummaActors, bins = nbins, color = "red") + hist, original_bins, _ = plt.hist(SummaOriginal, bins = nbins, color = "pink") + + # This needs to come after the histograms above so they do not get added to our plot. + fig, ax = plt.subplots(1,1, figsize=(12,8)) + + # Get the proper bins + lower = min(actors_bins[0],original_bins[0]) + upper = max(actors_bins[-1],original_bins[-1]) + linbins = np.linspace(lower,upper,nbins) + + + sa_hist = ax.hist(SummaActors, bins=linbins, alpha=0.75, label="SUMMA-Actors", edgecolor=actors_color, linewidth=0.5, color=actors_color) + so_hist = ax.hist(SummaOriginal, bins=linbins, alpha=0.6, label="SUMMA", edgecolor=original_color, linewidth=0.5, color=original_color) + + # Set Lables + ax.set_xlabel('CPU Efficiency as Percentage') + ax.set_ylabel('Count') + + props = dict(boxstyle='round', facecolor="cornflowerblue", alpha=0.5, edgecolor='black') + ax.text(0.02, 0.8, string_text, transform=ax.transAxes, + verticalalignment='top', bbox=props) + + + + # plot legend + handles, labels = plt.gca().get_legend_handles_labels() + order = [0,1] + ax.legend([handles[idx] for idx in order], [labels[idx] for idx in order], loc=0, edgecolor='black') + + plt.savefig("CPU-Efficiency.png", bbox_inches="tight") + + +def wallClockComparison(data_set_1, data_set_2): + nbins = 50 + + data_set_1_values = data_set_1["job_duration"].values + data_set_1_reads = list(map(time_convert, data_set_1["read_duration"].values)) + data_set_1_writes = list(map(time_convert, data_set_1["write_duration"].values)) + + data_set_2_values = data_set_2["job_duration"].values + data_set_2_reads = list(map(time_convert, data_set_2["read_duration"].values)) + data_set_2_writes = list(map(time_convert, data_set_2["write_duration"].values)) + + + print("Total Time For SUMMA-Actors = ", sum(data_set_1_values)) + print(" Total Read Time = ", sum(data_set_1_reads)) + print(" Total Write Time = ", sum(data_set_1_writes)) + print(" Total IO Time = ", sum(data_set_1_reads) + sum(data_set_1_writes)) + print(" Total CPU Time = ", sum(data_set_1_values) - (sum(data_set_1_reads) + sum(data_set_1_writes))) + print("Total Time For SUMMA = ", sum(data_set_2_values)) + print(" Total Read Time = ", sum(data_set_2_reads)) + print(" Total Write Time = ", sum(data_set_2_writes)) + print(" Total IO Time = ", sum(data_set_2_reads) + sum(data_set_2_writes)) + print(" Total CPU Time = ", sum(data_set_2_values) - (sum(data_set_2_reads) + sum(data_set_2_writes))) + + summa_count = 0 + actors_count = 0 + for x in range(0, len(data_set_1_values)): + if data_set_1_values[x] < data_set_2_values[x]: + actors_count += 1 + else: + summa_count += 1 + + print("TOTAL ACTORS COUNT =", actors_count) + print("TOTAL SUMMA COUNT =", summa_count) + + + histActors, sa_bins, _ = plt.hist(data_set_1_values, bins=50, edgecolor='black', linewidth=1.0) + # Get the binning range from a default histogram plot + histOriginal, so_bins, _ = plt.hist(data_set_2_values, bins=50, edgecolor='black', linewidth=1.0) + + fig2, ax2 = plt.subplots(1, 1, figsize=(12, 8)) + + + cmap = plt.get_cmap('viridis').copy() + sa_color = cmap(0.02) + so_color = cmap(0.5) + + lower = min(sa_bins[0], so_bins[0]) + upper = max(sa_bins[-1], so_bins[-1]) + linbins = np.linspace(lower,upper,nbins) + binsForCDF = np.linspace(lower,upper,517) + + pdf1 = histActors / sum(histActors) + # print(pdf1) + pdf2 = histOriginal / sum(histOriginal) + # print(pdf2) + newList1 = [] + newList2 = [] + newList1[:] = [x / 70 for x in data_set_1_values] + newList2[:] = [x / 70 for x in data_set_2_values] + + # Create the bar graph + sa_n, sa_bins, patch = ax2.hist(data_set_1_values, bins=linbins, alpha=0.75, label="SUMMA-Actors", edgecolor=sa_color, linewidth=0.5, color=sa_color) + so_n, so_bins, patch2 = ax2.hist(data_set_2_values, bins=linbins, alpha=0.6, edgecolor=so_color, linewidth=0.5, label="SUMMA", color=so_color) + + ax2.set_title("Wall-Clock Time Comparison Of Jobs On Each CPU", fontsize=20) + ax2.set_xlabel('Wall-Clock Time (Hours)' + # ,fontsize=16 + ) + ax2.set_ylabel('Count' + # ,fontsize=16 + ) + + # Merge graphs + ax3 = ax2.twinx() + + ax3.hist(data_set_1_values, bins=sa_bins, linewidth=1.5, histtype='step', cumulative=True, label="SUMMA-Actors", color="#CC00CC") + ax3.hist(data_set_2_values, bins=so_bins, linewidth=1.5, histtype='step', cumulative=True, label="SUMMA", color="#00CCCC") + ax3.set_ylabel('Total Number of Bins', labelpad=15) + + # Get rid of line that goes straigh down at the end of this plot + axpolygons = [poly for poly in ax3.get_children() if isinstance(poly, mpl.patches.Polygon)] + for poly in axpolygons: + poly.set_xy(poly.get_xy()[:-1]) + + + handles1, labels1 = ax2.get_legend_handles_labels() + handles2, labels2 = ax3.get_legend_handles_labels() + + ax2.legend((*handles1, *handles2), (*len(labels1)*[''], *labels2), + loc='right', ncol=2, handlelength=3, fontsize=16) + # ax2.legend(()) + plt.savefig("WallClock.png",bbox_inches="tight") + +def boxPlot(data_set_1, data_set_2): + + data_set_1_reads = data_set_1["read_duration"].values + data_set_1_writes = data_set_1["write_duration"].values + + data_set_2_reads = data_set_2["read_duration"].values + data_set_2_writes = data_set_2["write_duration"].values + + + fig, ax = plt.subplots(1, 1, figsize=(14, 8)) + + ax.set_title("Box Plot Of Read And Write Times", fontsize=20) + ax.set_xlabel('Read/Write', fontsize=14) + ax.set_ylabel('Time (Seconds)', fontsize=14) + + ax.boxplot([data_set_1_reads, data_set_2_reads, data_set_1_writes, data_set_2_writes], labels=["SUMMA-Actors Read", "SUMMA Read", "SUMMA-Actors Write", "SUMMA Write"], flierprops=dict(markerfacecolor='r', marker='D'), vert=False) + # set size of tick labels + ax.tick_params(axis='both', which='major', labelsize=14) + + plt.savefig("BoxPlot.png",bbox_inches="tight") + + + + +# Assemble Da"a +data_set_1 = pd.read_csv("/scratch/gwf/gwf_cmt/kck540/Single_CPU_Test/actors/logs/_log_summaryActors_sorted.csv") +data_set_2 = pd.read_csv("/scratch/gwf/gwf_cmt/kck540/Single_CPU_Test/non-actors/logs/_log_summaryOriginal_sorted.csv") + + +# data_set 1 and 2 are used for the paper + + +wallClockComparison(data_set_1, data_set_2) +# boxPlot(data_set_1, data_set_2) -- GitLab