Skip to content
Snippets Groups Projects
Commit ccd03dbc authored by KyleKlenk's avatar KyleKlenk
Browse files

Update README

Do not need separate config file for specifying threads. Can use $SLURM_CPUS_PER_TASK
parent e16e5949
No related branches found
No related tags found
No related merge requests found
File moved
......@@ -50,39 +50,44 @@ SUMMA-Actors supports four build types: Debug, Cluster, Release, and Cluster_Deb
Running SUMMA-Actors is done with the following command:
./summa_actor -g startGRU -n numGRU -c path_to_config_file
If you are running SUMMA-Actors on a cluster, you will need to specify the number of threads when not using whole nodes.
This can be done with the --caf.scheduler.max-threads option
./summa_actor -g startGRU -n numGRU -c path_to_config_file --caf.scheduler.max-threads $SLURM_CPUS_PER_TASK
The values for -g and -n are integers where -c is the full path to the configuraiton file for summa actors.
The configuration file is a json file. The contents of the JSON file are below:
{
{
"Distributed_Settings": {
"distributed_mode": false,
"servers_list": [{"hostname": "simlab01"}, {"hostname": "simlab05"}],
"servers_list": [{"hostname": "cnic-giws-cpu-19001-04"}, {"hostname": "cnic-giws-utl-19002"}, {"hostname": "cnic-giws-utl-19003"}],
"port": 4444,
"total_hru_count": 517315,
"num_hru_per_batch": 1000
"total_hru_count": 800,
"num_hru_per_batch": 50
},
"Summa_Actor": {
"max_gru_per_job": 500
"max_gru_per_job": 4000
},
"File_Access_Actor": {
"num_partitions_in_output_buffer": 4,
"num_partitions_in_output_buffer": 8,
"num_timesteps_in_output_buffer": 500
},
"Job_Actor": {
"file_manager_path": "/gladwell/kck540/Sundials_Settings/fileManager_actors.txt",
"output_csv": false,
"csv_path": ""
"file_manager_path": "/scratch/gwf/gwf_cmt/kck540/Summa-Actors/settings/file_manager_actors.txt",
"max_run_attempts": 3
},
"HRU_Actor": {
"print_output": true,
"output_frequency": 1000
"print_output": true,
"output_frequency": 100000,
"dt_init_factor": 1
}
}
}
The settings above should work for most use cases, some of the feautures we want to automate such as max_gru_per_job. However, the only field that you should have to adjust is the `file_manager_path`. This is the path to the file that manages the complete configuration of the SUMMA simulation. The summa confiuration files are explained in more depth in the follwoing (documentation)[https://summa.readthedocs.io/en/latest/input_output/SUMMA_input/]
......
......@@ -155,7 +155,7 @@ void caf_main(actor_system& sys, const config& cfg) {
"fileManger is set with the \"-c\" option\n";
aout(self) << "EXAMPLE: ./summaMain -g 1 -n 10 -c location/of/config \n";
return;
}
}
auto summa = sys.spawn(summa_actor, cfg.startGRU, cfg.countGRU, summa_actor_settings,
file_access_actor_settings, job_actor_settings, hru_actor_settings, self);
......
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pylab import cm
# from mpl_toolkits.axes_grid.anchored_artists import AnchoredText
# MATPLOTLIBRC stuff
import matplotlib as mpl
# mpl.use('AGG') # for systems not running a GUI
font = {'family' : 'serif',
'weight' : 'normal',
'size' : 20
}
mpl.rc('font', **font)
def time_convert(seconds):
seconds = seconds % (24 * 3600)
hours = seconds / 3600
return round(hours, 2)
def add_failures(failed_start_hru):
failure_data = pd.read_csv('SummaOriginal_rerun_failed_Jul-09.csv')
failed_wall_clock = failure_data.loc[failure_data["startHRU"] == failed_start_hru, "Wall-Clock Time"]
return(time_convert(failed_wall_clock.values[0]))
def summa_worst_case():
# wall clock times of the processes
batches = ["1", "2", "3", "4", \
"5", "6", "7", "8"]
plt_1 = plt.figure(figsize=(12, 8))
results = [0.87, 0.75, 2.27, 0.87, 0.88, 0.75, 0.76, 1.48]
cmap = plt.get_cmap('viridis').copy()
original_color = cmap(0.5)
plt.bar(batches,results, color=original_color, alpha=0.6, edgecolor=original_color)
# plt.title('SUMMA Job CPU Distribution', fontsize=20)
plt.xlabel('Sub-task'
# , fontsize=16
)
plt.ylabel('Hours'
# , fontsize=16
)
# plt.xticks(fontsize=14)
# plt.yticks(fontsize=14)
plt.savefig("SUMMA-worst-case.png",bbox_inches="tight")
def summa_best_case():
batches = []
def cpuComparsion(data_set_1, data_set_2):
df1 = pd.DataFrame(data_set_1)
df2 = pd.DataFrame(data_set_2)
SummaActors = df1["CPU Efficiency"].values
SummaOriginal = df2["CPU Efficiency"].values
print("Average CPU-Efficiency SUMMA-Actors =",sum(SummaActors) / len(SummaActors))
print("Average CPU-Efficiency SUMMA =",sum(SummaOriginal) / len(SummaOriginal))
summa_count = 0
actors_count = 0
for x in range(0, len(SummaOriginal)):
if SummaActors[x] > SummaOriginal[x]:
actors_count += 1
else:
summa_count += 1
print("TOTAL ACTORS COUNT =", actors_count)
print("TOTAL SUMMA COUNT =", summa_count)
min_summa_actors = min(SummaActors)
max_summa_actors = max(SummaActors)
min_summa_original = min(SummaOriginal)
max_summa_original = max(SummaOriginal)
string_text = "SUMMA-Actors Range: [{}% - {}%]\nSUMMA Range: [{}% - {}%]".format(min_summa_actors, max_summa_actors, min_summa_original, max_summa_original)
# text = AnchoredText(string_text, prop=dict(size=12), frameon=True, loc=2)
# Create plot
nbins = 50
# Setup colors
cmap = plt.get_cmap('viridis').copy()
actors_color = cmap(0.02)
print("Actors color is: ", actors_color)
original_color = cmap(0.5)
# Get the binning range of the historgram plots (this is to help us ensure the bins are consistent)
hist, actors_bins, _ = plt.hist(SummaActors, bins = nbins, color = "red")
hist, original_bins, _ = plt.hist(SummaOriginal, bins = nbins, color = "pink")
# This needs to come after the histograms above so they do not get added to our plot.
fig, ax = plt.subplots(1,1, figsize=(12,8))
# Get the proper bins
lower = min(actors_bins[0],original_bins[0])
upper = max(actors_bins[-1],original_bins[-1])
linbins = np.linspace(lower,upper,nbins)
sa_hist = ax.hist(SummaActors, bins=linbins, alpha=0.75, label="SUMMA-Actors", edgecolor=actors_color, linewidth=0.5, color=actors_color)
so_hist = ax.hist(SummaOriginal, bins=linbins, alpha=0.6, label="SUMMA", edgecolor=original_color, linewidth=0.5, color=original_color)
# Set Lables
ax.set_xlabel('CPU Efficiency as Percentage')
ax.set_ylabel('Count')
props = dict(boxstyle='round', facecolor="cornflowerblue", alpha=0.5, edgecolor='black')
ax.text(0.02, 0.8, string_text, transform=ax.transAxes,
verticalalignment='top', bbox=props)
# plot legend
handles, labels = plt.gca().get_legend_handles_labels()
order = [0,1]
ax.legend([handles[idx] for idx in order], [labels[idx] for idx in order], loc=0, edgecolor='black')
plt.savefig("CPU-Efficiency.png", bbox_inches="tight")
def wallClockComparison(data_set_1, data_set_2):
nbins = 50
data_set_1_values = data_set_1["job_duration"].values
data_set_1_reads = list(map(time_convert, data_set_1["read_duration"].values))
data_set_1_writes = list(map(time_convert, data_set_1["write_duration"].values))
data_set_2_values = data_set_2["job_duration"].values
data_set_2_reads = list(map(time_convert, data_set_2["read_duration"].values))
data_set_2_writes = list(map(time_convert, data_set_2["write_duration"].values))
print("Total Time For SUMMA-Actors = ", sum(data_set_1_values))
print(" Total Read Time = ", sum(data_set_1_reads))
print(" Total Write Time = ", sum(data_set_1_writes))
print(" Total IO Time = ", sum(data_set_1_reads) + sum(data_set_1_writes))
print(" Total CPU Time = ", sum(data_set_1_values) - (sum(data_set_1_reads) + sum(data_set_1_writes)))
print("Total Time For SUMMA = ", sum(data_set_2_values))
print(" Total Read Time = ", sum(data_set_2_reads))
print(" Total Write Time = ", sum(data_set_2_writes))
print(" Total IO Time = ", sum(data_set_2_reads) + sum(data_set_2_writes))
print(" Total CPU Time = ", sum(data_set_2_values) - (sum(data_set_2_reads) + sum(data_set_2_writes)))
summa_count = 0
actors_count = 0
for x in range(0, len(data_set_1_values)):
if data_set_1_values[x] < data_set_2_values[x]:
actors_count += 1
else:
summa_count += 1
print("TOTAL ACTORS COUNT =", actors_count)
print("TOTAL SUMMA COUNT =", summa_count)
histActors, sa_bins, _ = plt.hist(data_set_1_values, bins=50, edgecolor='black', linewidth=1.0)
# Get the binning range from a default histogram plot
histOriginal, so_bins, _ = plt.hist(data_set_2_values, bins=50, edgecolor='black', linewidth=1.0)
fig2, ax2 = plt.subplots(1, 1, figsize=(12, 8))
cmap = plt.get_cmap('viridis').copy()
sa_color = cmap(0.02)
so_color = cmap(0.5)
lower = min(sa_bins[0], so_bins[0])
upper = max(sa_bins[-1], so_bins[-1])
linbins = np.linspace(lower,upper,nbins)
binsForCDF = np.linspace(lower,upper,517)
pdf1 = histActors / sum(histActors)
# print(pdf1)
pdf2 = histOriginal / sum(histOriginal)
# print(pdf2)
newList1 = []
newList2 = []
newList1[:] = [x / 70 for x in data_set_1_values]
newList2[:] = [x / 70 for x in data_set_2_values]
# Create the bar graph
sa_n, sa_bins, patch = ax2.hist(data_set_1_values, bins=linbins, alpha=0.75, label="SUMMA-Actors", edgecolor=sa_color, linewidth=0.5, color=sa_color)
so_n, so_bins, patch2 = ax2.hist(data_set_2_values, bins=linbins, alpha=0.6, edgecolor=so_color, linewidth=0.5, label="SUMMA", color=so_color)
ax2.set_title("Wall-Clock Time Comparison Of Jobs On Each CPU", fontsize=20)
ax2.set_xlabel('Wall-Clock Time (Hours)'
# ,fontsize=16
)
ax2.set_ylabel('Count'
# ,fontsize=16
)
# Merge graphs
ax3 = ax2.twinx()
ax3.hist(data_set_1_values, bins=sa_bins, linewidth=1.5, histtype='step', cumulative=True, label="SUMMA-Actors", color="#CC00CC")
ax3.hist(data_set_2_values, bins=so_bins, linewidth=1.5, histtype='step', cumulative=True, label="SUMMA", color="#00CCCC")
ax3.set_ylabel('Total Number of Bins', labelpad=15)
# Get rid of line that goes straigh down at the end of this plot
axpolygons = [poly for poly in ax3.get_children() if isinstance(poly, mpl.patches.Polygon)]
for poly in axpolygons:
poly.set_xy(poly.get_xy()[:-1])
handles1, labels1 = ax2.get_legend_handles_labels()
handles2, labels2 = ax3.get_legend_handles_labels()
ax2.legend((*handles1, *handles2), (*len(labels1)*[''], *labels2),
loc='right', ncol=2, handlelength=3, fontsize=16)
# ax2.legend(())
plt.savefig("WallClock.png",bbox_inches="tight")
def boxPlot(data_set_1, data_set_2):
data_set_1_reads = data_set_1["read_duration"].values
data_set_1_writes = data_set_1["write_duration"].values
data_set_2_reads = data_set_2["read_duration"].values
data_set_2_writes = data_set_2["write_duration"].values
fig, ax = plt.subplots(1, 1, figsize=(14, 8))
ax.set_title("Box Plot Of Read And Write Times", fontsize=20)
ax.set_xlabel('Read/Write', fontsize=14)
ax.set_ylabel('Time (Seconds)', fontsize=14)
ax.boxplot([data_set_1_reads, data_set_2_reads, data_set_1_writes, data_set_2_writes], labels=["SUMMA-Actors Read", "SUMMA Read", "SUMMA-Actors Write", "SUMMA Write"], flierprops=dict(markerfacecolor='r', marker='D'), vert=False)
# set size of tick labels
ax.tick_params(axis='both', which='major', labelsize=14)
plt.savefig("BoxPlot.png",bbox_inches="tight")
# Assemble Da"a
data_set_1 = pd.read_csv("/scratch/gwf/gwf_cmt/kck540/Single_CPU_Test/actors/logs/_log_summaryActors_sorted.csv")
data_set_2 = pd.read_csv("/scratch/gwf/gwf_cmt/kck540/Single_CPU_Test/non-actors/logs/_log_summaryOriginal_sorted.csv")
# data_set 1 and 2 are used for the paper
wallClockComparison(data_set_1, data_set_2)
# boxPlot(data_set_1, data_set_2)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment