From ab9aa27f138fac44e73a5c998f91bc0901ac638f Mon Sep 17 00:00:00 2001
From: kck540 <kyle.klenk@usask.ca>
Date: Thu, 9 Jun 2022 12:38:13 -0400
Subject: [PATCH] Printed out settings for FileAccessActor

---
 build/source/actors/FileAccessActor.h     | 11 +++-
 config/configuration.py                   | 52 +++-------------
 utils/StatisticsScripts/ramUsage.py       | 76 +++++++++++++++++++++++
 utils/StatisticsScripts/resourageUsage.py |  1 -
 4 files changed, 93 insertions(+), 47 deletions(-)
 create mode 100644 utils/StatisticsScripts/ramUsage.py

diff --git a/build/source/actors/FileAccessActor.h b/build/source/actors/FileAccessActor.h
index 39408ff..61c4da3 100644
--- a/build/source/actors/FileAccessActor.h
+++ b/build/source/actors/FileAccessActor.h
@@ -22,8 +22,15 @@ behavior file_access_actor(stateful_actor<file_access_state>* self, int startGRU
     self->state.numGRU = numGRU;
     self->state.startGRU = startGRU;
     self->state.outputStrucSize = outputStrucSize;
-    parseSettings(self, configPath);
-    aout(self) << "\nFile Access Actor Started\n";
+
+    // Get Settings from configuration file
+    if (parseSettings(self, configPath) == -1) {
+        aout(self) << "Error with JSON Settings File!!!\n";
+        self->quit();
+    } else {
+        aout(self) << "\nSETTINGS FOR FILE_ACCESS_ACTOR\n" <<
+        "Number of Vectors in Output Structure = " << self->state.num_vectors_in_output_manager << "\n";
+    }
     initalizeFileAccessActor(self);
 
     return {
diff --git a/config/configuration.py b/config/configuration.py
index d126391..751b6bd 100644
--- a/config/configuration.py
+++ b/config/configuration.py
@@ -98,9 +98,14 @@ def create_output_path(outputPath):
     # The job will not be submitted without a file name
     outputSlurm += "slurm-%A_%a.out"
     
-    return outputNetCDF, outputSlurm
+    return outputNetCDF, outputSlurm, outputCSV
 
 
+"""
+Function to create the file manager for SummaActors,
+THis is a text file that is created from the settings in the Configuration section
+in the JSON file.
+"""
 def create_file_manager():
     json_file = open("Summa_Actors_Settings.json")
     fileManagerSettings = json.load(json_file)
@@ -109,7 +114,7 @@ def create_file_manager():
     # add the date for the run
     outputPath = fileManagerSettings["Configuration"]["outputPath"]
     if exists(outputPath):
-        outputNetCDF, outputSlurm = create_output_path(outputPath)
+        outputNetCDF, outputSlurm, outputCSV = create_output_path(outputPath)
         fileManagerSettings["Configuration"]["outputPath"] = outputNetCDF
     else:
         print("Output path does not exist, Ensure it exists before running this setup")
@@ -123,6 +128,7 @@ def create_file_manager():
     with open("Summa_Actors_Settings.json") as settings_file:
         data = json.load(settings_file)
         data["JobActor"]["FileManagerPath"] = os.getcwd() + "/" + "fileManager.txt"
+        data["JobActor"]["csvPath"] = outputCSV
 
     with open("Summa_Actors_Settings.json", "w") as updated_settings:
         json.dump(data, updated_settings, indent=2) 
@@ -150,47 +156,6 @@ def create_caf_config():
     caf_config_path += caf_config_name
     return caf_config_path
 
-"""
-Function to create the a list of the jobs will run
-This is used for submitting the array job
-"""
-def create_job_list():
-    json_file = open("Summa_Actors_Settings.json")
-    SummaSettings = json.load(json_file)
-    json_file.close()
-
-    numberOfTasks = SummaSettings["JobSubmissionParams"]["numHRUs"]
-    GRUPerJob = SummaSettings["JobSubmissionParams"]["maxGRUsPerSubmission"]
-    numCPUs = SummaSettings["JobSubmissionParams"]["cpus-per-task"]
-    print(numberOfTasks)
-    print(GRUPerJob)
-    print(numCPUs)
-
-    # we need to get the full path of the summa binary
-    os.chdir("../build")
-    summaPath = os.getcwd()
-    summaPath += "/summaMain"
-    os.chdir("../config")
-    config_dir = os.getcwd()
-    caf_config_path = create_caf_config(numCPUs)
-
-
-    # we want to assemble the job list
-    job_list = open("job_list.txt", "w")
-    gruStart = 1
-    jobCount = 0
-    while gruStart < numberOfTasks:
-        if (numberOfTasks - gruStart < GRUPerJob):
-            job_list.write("{} -g {} -n {} -c {} --config-file={}\n".format(summaPath,\
-                gruStart, numberOfTasks - gruStart, config_dir, caf_config_path))
-        else:
-            job_list.write("{} -g {} -n {} -c {} --config-file={}\n".format(summaPath,\
-                gruStart, GRUPerJob, config_dir, caf_config_path))
-        gruStart += GRUPerJob
-        jobCount += 1
-    
-    return jobCount
-
 
 def create_sbatch_file(outputSlurm, configFile):
     json_file = open("Summa_Actors_Settings.json")
@@ -243,7 +208,6 @@ def init_run():
     if exists('./Summa_Actors_Settings.json'):
         print("File Exists, What do we do next")
         outputSlurm = create_file_manager()
-        # jobCount = create_job_list()
         configFile = create_caf_config()
         create_sbatch_file(outputSlurm, configFile)
         
diff --git a/utils/StatisticsScripts/ramUsage.py b/utils/StatisticsScripts/ramUsage.py
new file mode 100644
index 0000000..bb03fa1
--- /dev/null
+++ b/utils/StatisticsScripts/ramUsage.py
@@ -0,0 +1,76 @@
+import numpy as np
+import pandas as pd
+import statistics as stat
+import csv
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+
+def time_convert(x):
+    h,m,s = map(int,x.split(':'))
+    return (h*60+m)*60+s
+
+
+def ramUsage():
+    data_set_1 = pd.read_csv("/home/kklenk/projects/rpp-kshook/kklenk/SummaActorsOutput/Jun-06-2022/SummaActors_jobStats_61721504.csv")
+    data_set_2 = pd.read_csv("/home/kklenk/projects/rpp-kshook/kklenk/SummaActorsOutput/May-13-2022/SummaActors_jobStatistics_60829543.csv")
+    data_set_4 = pd.read_csv("/home/kklenk/projects/rpp-kshook/kklenk/SummaActorsOutput/May-26-2022/SummaActors_jobStats_61263427.csv")
+
+    # data_set_1 = pd.read_csv("/home/kklenk/projects/rpp-kshook/kklenk/SummaOriginalOuput/Apr-28-2022/SummaOrginal-60232429_jobStatistics.csv")
+    # data_set_2 = pd.read_csv("/home/kklenk/projects/rpp-kshook/kklenk/SummaOriginalOuput/May-27-2022/SummaOriginal_jobStats_61377500.csv")
+    # data_set_4 = pd.read_csv("/home/kklenk/projects/rpp-kshook/kklenk/SummaOriginalOuput/May-30-2022/SummaOriginal_jobStats_61415123.csv")
+
+
+    df1 = pd.DataFrame(data_set_1)
+    df2 = pd.DataFrame(data_set_2)
+    df4 = pd.DataFrame(data_set_4)
+
+    usageStat1 = []
+    for x in df1["Wall-Clock Time"].values:
+        usageStat1.append(round((time_convert(x) / 60) / 60, 2))
+    usageStat2 = []
+    for x in df2["Wall-Clock Time"].values:
+        usageStat2.append(round((time_convert(x) / 60) / 60, 2))
+    usageStat4 = []
+    for x in df4["Wall-Clock Time"].values:
+        usageStat4.append(round((time_convert(x) / 60) / 60, 2))
+
+    totalRam = [sum(usageStat1), sum(usageStat2), sum(usageStat4)]
+    print("usageStat1 Total Ram Used = ", sum(usageStat1))
+    print("usageStat1 Mean Ram Used = ", stat.mean(usageStat1))
+    print("usageStat2 Total Ram Used = ", sum(usageStat2))
+    print("usageStat2 Mean Ram Used = ", stat.mean(usageStat2))
+    print("usageStat4 Total Ram Used = ", sum(usageStat4))
+    print("usageStat4 Mean Ram Used = ", stat.mean(usageStat4))
+    print()
+    print("variation = ", stat.stdev(totalRam) / stat.mean(totalRam))
+    csvFile = open("VarationStats.csv", 'w')
+    header = ["relative standard deviation"]
+
+    csvFile.write("{}\n".format("relative standard deviation"))
+
+    for i in range(0, len(usageStat1)):
+        l = [usageStat1[i], usageStat2[i], usageStat4[i]]
+        csvFile.write("{}\n".format(stat.stdev(l) / stat.mean(l)))
+
+
+def scatterPlot():
+    data_set_1 = pd.read_csv("/home/kklenk/SummaProjects/Summa-Actors/utils/StatisticsScripts/VarationStats.csv")
+
+    df = pd.DataFrame(data_set_1)
+
+    d = df["relative standard deviation"].values
+    x = []
+    for i in range(1, 515):
+        x.append(i)
+    print(len(x))
+    print(len(d))
+    plt.scatter(x, d)
+    plt.title("Coefficient of Variation Plot")
+    plt.xlabel("Job number")
+    plt.ylabel("Relative Standard Deviation")
+    plt.savefig("RSD-Actors.pdf", format="pdf", bbox_inches="tight")
+    plt.show()
+
+    
+# ramUsage()
+scatterPlot()
\ No newline at end of file
diff --git a/utils/StatisticsScripts/resourageUsage.py b/utils/StatisticsScripts/resourageUsage.py
index 13680d6..5a824be 100644
--- a/utils/StatisticsScripts/resourageUsage.py
+++ b/utils/StatisticsScripts/resourageUsage.py
@@ -33,7 +33,6 @@ def seffCommand(jobId, numJobs):
 
     writer.writerow(header)
 
-    startHRU = 1
     numHRU = 1000
     for i in range(0, int(numJobs)):
         print("Job", i)
-- 
GitLab