diff --git a/build/source/actors/FileAccessActor.h b/build/source/actors/FileAccessActor.h index 39408ff99af54dd8e3381509d8c15c85772f3b66..61c4da36f79906cc810fba78f1f865ec09ae1ccc 100644 --- a/build/source/actors/FileAccessActor.h +++ b/build/source/actors/FileAccessActor.h @@ -22,8 +22,15 @@ behavior file_access_actor(stateful_actor<file_access_state>* self, int startGRU self->state.numGRU = numGRU; self->state.startGRU = startGRU; self->state.outputStrucSize = outputStrucSize; - parseSettings(self, configPath); - aout(self) << "\nFile Access Actor Started\n"; + + // Get Settings from configuration file + if (parseSettings(self, configPath) == -1) { + aout(self) << "Error with JSON Settings File!!!\n"; + self->quit(); + } else { + aout(self) << "\nSETTINGS FOR FILE_ACCESS_ACTOR\n" << + "Number of Vectors in Output Structure = " << self->state.num_vectors_in_output_manager << "\n"; + } initalizeFileAccessActor(self); return { diff --git a/config/configuration.py b/config/configuration.py index d126391c56f5ff2581f359170365a30e265eee97..751b6bde5d9212cb88f720583d5cc7b9a7d35584 100644 --- a/config/configuration.py +++ b/config/configuration.py @@ -98,9 +98,14 @@ def create_output_path(outputPath): # The job will not be submitted without a file name outputSlurm += "slurm-%A_%a.out" - return outputNetCDF, outputSlurm + return outputNetCDF, outputSlurm, outputCSV +""" +Function to create the file manager for SummaActors, +THis is a text file that is created from the settings in the Configuration section +in the JSON file. +""" def create_file_manager(): json_file = open("Summa_Actors_Settings.json") fileManagerSettings = json.load(json_file) @@ -109,7 +114,7 @@ def create_file_manager(): # add the date for the run outputPath = fileManagerSettings["Configuration"]["outputPath"] if exists(outputPath): - outputNetCDF, outputSlurm = create_output_path(outputPath) + outputNetCDF, outputSlurm, outputCSV = create_output_path(outputPath) fileManagerSettings["Configuration"]["outputPath"] = outputNetCDF else: print("Output path does not exist, Ensure it exists before running this setup") @@ -123,6 +128,7 @@ def create_file_manager(): with open("Summa_Actors_Settings.json") as settings_file: data = json.load(settings_file) data["JobActor"]["FileManagerPath"] = os.getcwd() + "/" + "fileManager.txt" + data["JobActor"]["csvPath"] = outputCSV with open("Summa_Actors_Settings.json", "w") as updated_settings: json.dump(data, updated_settings, indent=2) @@ -150,47 +156,6 @@ def create_caf_config(): caf_config_path += caf_config_name return caf_config_path -""" -Function to create the a list of the jobs will run -This is used for submitting the array job -""" -def create_job_list(): - json_file = open("Summa_Actors_Settings.json") - SummaSettings = json.load(json_file) - json_file.close() - - numberOfTasks = SummaSettings["JobSubmissionParams"]["numHRUs"] - GRUPerJob = SummaSettings["JobSubmissionParams"]["maxGRUsPerSubmission"] - numCPUs = SummaSettings["JobSubmissionParams"]["cpus-per-task"] - print(numberOfTasks) - print(GRUPerJob) - print(numCPUs) - - # we need to get the full path of the summa binary - os.chdir("../build") - summaPath = os.getcwd() - summaPath += "/summaMain" - os.chdir("../config") - config_dir = os.getcwd() - caf_config_path = create_caf_config(numCPUs) - - - # we want to assemble the job list - job_list = open("job_list.txt", "w") - gruStart = 1 - jobCount = 0 - while gruStart < numberOfTasks: - if (numberOfTasks - gruStart < GRUPerJob): - job_list.write("{} -g {} -n {} -c {} --config-file={}\n".format(summaPath,\ - gruStart, numberOfTasks - gruStart, config_dir, caf_config_path)) - else: - job_list.write("{} -g {} -n {} -c {} --config-file={}\n".format(summaPath,\ - gruStart, GRUPerJob, config_dir, caf_config_path)) - gruStart += GRUPerJob - jobCount += 1 - - return jobCount - def create_sbatch_file(outputSlurm, configFile): json_file = open("Summa_Actors_Settings.json") @@ -243,7 +208,6 @@ def init_run(): if exists('./Summa_Actors_Settings.json'): print("File Exists, What do we do next") outputSlurm = create_file_manager() - # jobCount = create_job_list() configFile = create_caf_config() create_sbatch_file(outputSlurm, configFile) diff --git a/utils/StatisticsScripts/ramUsage.py b/utils/StatisticsScripts/ramUsage.py new file mode 100644 index 0000000000000000000000000000000000000000..bb03fa1d08ef0b98daf98cdeab34338d7982bd7d --- /dev/null +++ b/utils/StatisticsScripts/ramUsage.py @@ -0,0 +1,76 @@ +import numpy as np +import pandas as pd +import statistics as stat +import csv +import matplotlib as mpl +import matplotlib.pyplot as plt + +def time_convert(x): + h,m,s = map(int,x.split(':')) + return (h*60+m)*60+s + + +def ramUsage(): + data_set_1 = pd.read_csv("/home/kklenk/projects/rpp-kshook/kklenk/SummaActorsOutput/Jun-06-2022/SummaActors_jobStats_61721504.csv") + data_set_2 = pd.read_csv("/home/kklenk/projects/rpp-kshook/kklenk/SummaActorsOutput/May-13-2022/SummaActors_jobStatistics_60829543.csv") + data_set_4 = pd.read_csv("/home/kklenk/projects/rpp-kshook/kklenk/SummaActorsOutput/May-26-2022/SummaActors_jobStats_61263427.csv") + + # data_set_1 = pd.read_csv("/home/kklenk/projects/rpp-kshook/kklenk/SummaOriginalOuput/Apr-28-2022/SummaOrginal-60232429_jobStatistics.csv") + # data_set_2 = pd.read_csv("/home/kklenk/projects/rpp-kshook/kklenk/SummaOriginalOuput/May-27-2022/SummaOriginal_jobStats_61377500.csv") + # data_set_4 = pd.read_csv("/home/kklenk/projects/rpp-kshook/kklenk/SummaOriginalOuput/May-30-2022/SummaOriginal_jobStats_61415123.csv") + + + df1 = pd.DataFrame(data_set_1) + df2 = pd.DataFrame(data_set_2) + df4 = pd.DataFrame(data_set_4) + + usageStat1 = [] + for x in df1["Wall-Clock Time"].values: + usageStat1.append(round((time_convert(x) / 60) / 60, 2)) + usageStat2 = [] + for x in df2["Wall-Clock Time"].values: + usageStat2.append(round((time_convert(x) / 60) / 60, 2)) + usageStat4 = [] + for x in df4["Wall-Clock Time"].values: + usageStat4.append(round((time_convert(x) / 60) / 60, 2)) + + totalRam = [sum(usageStat1), sum(usageStat2), sum(usageStat4)] + print("usageStat1 Total Ram Used = ", sum(usageStat1)) + print("usageStat1 Mean Ram Used = ", stat.mean(usageStat1)) + print("usageStat2 Total Ram Used = ", sum(usageStat2)) + print("usageStat2 Mean Ram Used = ", stat.mean(usageStat2)) + print("usageStat4 Total Ram Used = ", sum(usageStat4)) + print("usageStat4 Mean Ram Used = ", stat.mean(usageStat4)) + print() + print("variation = ", stat.stdev(totalRam) / stat.mean(totalRam)) + csvFile = open("VarationStats.csv", 'w') + header = ["relative standard deviation"] + + csvFile.write("{}\n".format("relative standard deviation")) + + for i in range(0, len(usageStat1)): + l = [usageStat1[i], usageStat2[i], usageStat4[i]] + csvFile.write("{}\n".format(stat.stdev(l) / stat.mean(l))) + + +def scatterPlot(): + data_set_1 = pd.read_csv("/home/kklenk/SummaProjects/Summa-Actors/utils/StatisticsScripts/VarationStats.csv") + + df = pd.DataFrame(data_set_1) + + d = df["relative standard deviation"].values + x = [] + for i in range(1, 515): + x.append(i) + print(len(x)) + print(len(d)) + plt.scatter(x, d) + plt.title("Coefficient of Variation Plot") + plt.xlabel("Job number") + plt.ylabel("Relative Standard Deviation") + plt.savefig("RSD-Actors.pdf", format="pdf", bbox_inches="tight") + plt.show() + + +# ramUsage() +scatterPlot() \ No newline at end of file diff --git a/utils/StatisticsScripts/resourageUsage.py b/utils/StatisticsScripts/resourageUsage.py index 13680d6ed54547decd0b1f74af38b585d26a32b3..5a824bee53e662f1fe0ed9e51a4616499c7f5175 100644 --- a/utils/StatisticsScripts/resourageUsage.py +++ b/utils/StatisticsScripts/resourageUsage.py @@ -33,7 +33,6 @@ def seffCommand(jobId, numJobs): writer.writerow(header) - startHRU = 1 numHRU = 1000 for i in range(0, int(numJobs)): print("Job", i)