diff --git a/build/compile_summa.sh b/build/compile_summa.sh index 176d05b41eebd1c7ca840ed74b1a3b0c130ff049..77ce42b45a077b06c4677b353dd9cec44bedffab 100644 --- a/build/compile_summa.sh +++ b/build/compile_summa.sh @@ -7,7 +7,7 @@ module load openblas module load caf #### Specifiy Master Directory, parent of build directory -export F_MASTER=/globalhome/kck540/HPC/SummaProjects/Summa-Actors +export F_MASTER=/home/kklenk/SummaProjects/Summa-Actors #### Specifiy Compilers #### export FC=gfortran diff --git a/utils/StatisticsScripts/UsageStatsCSV.py b/utils/StatisticsScripts/UsageStatsCSV.py new file mode 100644 index 0000000000000000000000000000000000000000..bd90464ef2be5e7e1835578057ef6280e021bb7f --- /dev/null +++ b/utils/StatisticsScripts/UsageStatsCSV.py @@ -0,0 +1,159 @@ +import numpy as np +import pandas as pd +import statistics as stat +import csv +import matplotlib as mpl +import matplotlib.pyplot as plt + +def time_convert(x): + h,m,s = map(int,x.split(':')) + return (h*60+m)*60+s + + +def ramUsage(): + data_set_1_actors = pd.read_csv("/home/kklenk/projects/rpp-kshook/kklenk/SummaActorsOutput/Jul-08-2022/SummaActors_jobStats_63007640_Filled_failed.csv") + # data_set_2_actors = pd.read_csv("/home/kklenk/projects/rpp-kshook/kklenk/SummaActorsOutput/Jun-17-2022/SummaActors_jobStats_62270590.csv") + # data_set_3_actors = pd.read_csv("/home/kklenk/projects/rpp-kshook/kklenk/SummaActorsOutput/May-26-2022/SummaActors_jobStats_61263427.csv") + + data_set_1_original = pd.read_csv("/home/kklenk/SummaProjects/Summa-Actors/utils/StatisticsScripts/SummaOriginal_jobStats_63155456.csv") + # data_set_2_original = pd.read_csv("/home/kklenk/projects/rpp-kshook/kklenk/SummaOriginalOuput/May-27-2022/SummaOriginal_jobStats_61377500.csv") + # data_set_3_original = pd.read_csv("/home/kklenk/projects/rpp-kshook/kklenk/SummaOriginalOuput/May-30-2022/SummaOriginal_jobStats_61415123.csv") + + + df1_actors = pd.DataFrame(data_set_1_actors) + # df2_actors = pd.DataFrame(data_set_2_actors) + # df3_actors = pd.DataFrame(data_set_3_actors) + + + df1_original = pd.DataFrame(data_set_1_original) + # df2_original = pd.DataFrame(data_set_2_original) + # df3_original = pd.DataFrame(data_set_3_original) + + + actors_stat1 = [] + for x in df1_actors["Wall-Clock Time"].values: + actors_stat1.append(round((time_convert(x) / 60) / 60, 2)) + # actors_stat2 = [] + # for x in df2_actors["Wall-Clock Time"].values: + # actors_stat2.append(round((time_convert(x) / 60) / 60, 2)) + # actors_stat3 = [] + # for x in df3_actors["Wall-Clock Time"].values: + # actors_stat3.append(round((time_convert(x) / 60) / 60, 2)) + + print("SUMMA-Actors Array Job 1 Total Wall-Clock =", sum(actors_stat1)) + # print("SUMMA-Actors Array Job 2 Total Wall-Clock =", sum(actors_stat2)) + # print("SUMMA-Actors Array Job 3 Total Wall-Clock =", sum(actors_stat3)) + + original_stat1 = [] + for x in df1_original["Wall-Clock Time"].values: + original_stat1.append(round((time_convert(x) / 60) / 60, 2)) + # original_stat2 = [] + # for x in df2_original["Wall-Clock Time"].values: + # original_stat2.append(round((time_convert(x) / 60) / 60, 2)) + # original_stat3 = [] + # for x in df3_original["Wall-Clock Time"].values: + # original_stat3.append(round((time_convert(x) / 60) / 60, 2)) + print() + print("SUMMA-Original Array Job 1 Total Wall-Clock =", sum(original_stat1)) + # print("SUMMA-Original Array Job 2 Total Wall-Clock =", sum(original_stat2)) + # print("SUMMA-Original Array Job 3 Total Wall-Clock =", sum(original_stat3)) + + + + # usageStat4 = [] + # for x in df4["Wall-Clock Time"].values: + # usageStat4.append(round((time_convert(x) / 60) / 60, 2)) + + # print("Total Time Actor = ", sum(usageStat1)) + # print("Max Actor = ", max(usageStat1)) + # print("Min Actor = ", min(usageStat1)) + # print("----------------------------------------") + # print("Total Time Original = ", sum(usageStat2)) + # print("Max Original = ", max(usageStat2)) + # print("Min Original = ", min(usageStat2)) + + # # totalRam = [sum(usageStat1), sum(usageStat2), sum(usageStat4)] + # print("usageStat1 Total Ram Used = ", sum(usageStat1)) + # # print("usageStat1 Mean Ram Used = ", stat.mean(usageStat1)) + # print("usageStat2 Total Ram Used = ", sum(usageStat2)) + # print("usageStat2 Mean Ram Used = ", stat.mean(usageStat2)) + # print("usageStat4 Total Ram Used = ", sum(usageStat4)) + # print("usageStat4 Mean Ram Used = ", stat.mean(usageStat4)) + # print() + # print("variation = ", stat.stdev(totalRam) / stat.mean(totalRam)) + # csvFile = open("VarationStats.csv", 'w') + # header = ["relative standard deviation"] + + # csvFile.write("{}\n".format("relative standard deviation")) + + # for i in range(0, len(usageStat1)): + # l = [usageStat1[i], usageStat2[i], usageStat4[i]] + # csvFile.write("{}\n".format(stat.stdev(l) / stat.mean(l))) + + +def scatterPlot(): + data_set_1 = pd.read_csv("/home/kklenk/SummaProjects/Summa-Actors/utils/StatisticsScripts/VarationStats.csv") + + df = pd.DataFrame(data_set_1) + + d = df["relative standard deviation"].values + x = [] + for i in range(1, 515): + x.append(i) + print(len(x)) + print(len(d)) + plt.scatter(x, d) + plt.title("Coefficient of Variation Plot") + plt.xlabel("Job number") + plt.ylabel("Relative Standard Deviation") + plt.savefig("RSD-Actors.pdf", format="pdf", bbox_inches="tight") + plt.show() + + + +def initDuration(): + data_set_1 = pd.read_csv("/home/kklenk/projects/rpp-kshook/kklenk/SummaActorsOutput/Jun-06-2022/csv/Success1.csv") + df = pd.DataFrame(data_set_1) + print(sum(df["initDuration"].values)) + +def findRow(df, startHRU): + bool_val = False + for row in df.iterrows(): + if row[1].iloc[0] == startHRU: + bool_val = True + break + + if (bool_val): + print("found", startHRU) + else: + print("did not find", startHRU) + + + + + + + +def compareCompleted(): + data_actor = pd.read_csv("/home/kklenk/SummaProjects/Summa-Actors/utils/StatisticsScripts/SummaActors_jobStats_62666948.csv", index_col=False) + data_original = pd.read_csv("/home/kklenk/SummaProjects/Summa-Actors/utils/StatisticsScripts/SummaOriginal_jobStats_62667162.csv", index_col=False) + df_actors = pd.DataFrame(data_actor) + df_original = pd.DataFrame(data_original) + + df_actors = df_actors.drop(df_actors[df_actors.Status == "TIMEOUT"].index) + # df_actors = df_actors.drop(columns=["Status","#-CPU","CPU Efficiency","Memory Used"]) + + df_original = df_original.drop(df_original[df_original.Status == "TIMEOUT"].index) + # df_original = df_original.drop(columns=["Status","#-CPU","CPU Efficiency","Memory Used"]) + + + for row in df_original.iterrows(): + # print(row[1].iloc[0]) + findRow(df_actors, row[1].iloc[0]) + + + # df_actors.to_csv("actors_no_timeout.csv", index=False) + # df_original.to_csv("original_no_timeout.csv", index=False) +ramUsage() +# compareCompleted() +# initDuration() diff --git a/utils/StatisticsScripts/ramUsage.py b/utils/StatisticsScripts/ramUsage.py deleted file mode 100644 index bb03fa1d08ef0b98daf98cdeab34338d7982bd7d..0000000000000000000000000000000000000000 --- a/utils/StatisticsScripts/ramUsage.py +++ /dev/null @@ -1,76 +0,0 @@ -import numpy as np -import pandas as pd -import statistics as stat -import csv -import matplotlib as mpl -import matplotlib.pyplot as plt - -def time_convert(x): - h,m,s = map(int,x.split(':')) - return (h*60+m)*60+s - - -def ramUsage(): - data_set_1 = pd.read_csv("/home/kklenk/projects/rpp-kshook/kklenk/SummaActorsOutput/Jun-06-2022/SummaActors_jobStats_61721504.csv") - data_set_2 = pd.read_csv("/home/kklenk/projects/rpp-kshook/kklenk/SummaActorsOutput/May-13-2022/SummaActors_jobStatistics_60829543.csv") - data_set_4 = pd.read_csv("/home/kklenk/projects/rpp-kshook/kklenk/SummaActorsOutput/May-26-2022/SummaActors_jobStats_61263427.csv") - - # data_set_1 = pd.read_csv("/home/kklenk/projects/rpp-kshook/kklenk/SummaOriginalOuput/Apr-28-2022/SummaOrginal-60232429_jobStatistics.csv") - # data_set_2 = pd.read_csv("/home/kklenk/projects/rpp-kshook/kklenk/SummaOriginalOuput/May-27-2022/SummaOriginal_jobStats_61377500.csv") - # data_set_4 = pd.read_csv("/home/kklenk/projects/rpp-kshook/kklenk/SummaOriginalOuput/May-30-2022/SummaOriginal_jobStats_61415123.csv") - - - df1 = pd.DataFrame(data_set_1) - df2 = pd.DataFrame(data_set_2) - df4 = pd.DataFrame(data_set_4) - - usageStat1 = [] - for x in df1["Wall-Clock Time"].values: - usageStat1.append(round((time_convert(x) / 60) / 60, 2)) - usageStat2 = [] - for x in df2["Wall-Clock Time"].values: - usageStat2.append(round((time_convert(x) / 60) / 60, 2)) - usageStat4 = [] - for x in df4["Wall-Clock Time"].values: - usageStat4.append(round((time_convert(x) / 60) / 60, 2)) - - totalRam = [sum(usageStat1), sum(usageStat2), sum(usageStat4)] - print("usageStat1 Total Ram Used = ", sum(usageStat1)) - print("usageStat1 Mean Ram Used = ", stat.mean(usageStat1)) - print("usageStat2 Total Ram Used = ", sum(usageStat2)) - print("usageStat2 Mean Ram Used = ", stat.mean(usageStat2)) - print("usageStat4 Total Ram Used = ", sum(usageStat4)) - print("usageStat4 Mean Ram Used = ", stat.mean(usageStat4)) - print() - print("variation = ", stat.stdev(totalRam) / stat.mean(totalRam)) - csvFile = open("VarationStats.csv", 'w') - header = ["relative standard deviation"] - - csvFile.write("{}\n".format("relative standard deviation")) - - for i in range(0, len(usageStat1)): - l = [usageStat1[i], usageStat2[i], usageStat4[i]] - csvFile.write("{}\n".format(stat.stdev(l) / stat.mean(l))) - - -def scatterPlot(): - data_set_1 = pd.read_csv("/home/kklenk/SummaProjects/Summa-Actors/utils/StatisticsScripts/VarationStats.csv") - - df = pd.DataFrame(data_set_1) - - d = df["relative standard deviation"].values - x = [] - for i in range(1, 515): - x.append(i) - print(len(x)) - print(len(d)) - plt.scatter(x, d) - plt.title("Coefficient of Variation Plot") - plt.xlabel("Job number") - plt.ylabel("Relative Standard Deviation") - plt.savefig("RSD-Actors.pdf", format="pdf", bbox_inches="tight") - plt.show() - - -# ramUsage() -scatterPlot() \ No newline at end of file diff --git a/utils/StatisticsScripts/resourageUsage.py b/utils/StatisticsScripts/resourageUsage.py index 5a824bee53e662f1fe0ed9e51a4616499c7f5175..49e9e8ef8a88a98606799dbe2750ed3cd30fd71b 100644 --- a/utils/StatisticsScripts/resourageUsage.py +++ b/utils/StatisticsScripts/resourageUsage.py @@ -13,8 +13,9 @@ This function uses the seff command and can get the following data: - CPU-Efficiency - Wall-Clock Time - Memory Used + - Completion Status ''' -def seffCommand(jobId, numJobs): +def seffCommand(jobId, numJobs, gru_per_job): input_prompt = "SummaActors: a\nSummaOriginal: o\n" # Get input from the user user_response = input(input_prompt) @@ -27,14 +28,14 @@ def seffCommand(jobId, numJobs): raise Exception("Something went wrong") csvFile = open(output_csv_name, 'w') - header = ["startHRU", "numHRU", "#-CPU", "CPU Efficiency", "Wall-Clock Time", "Memory Used"] + header = ["startHRU", "numHRU", "#-CPU", "CPU Efficiency", "Wall-Clock Time", "Memory Used", "Status"] writer = csv.writer(csvFile) writer.writerow(header) - numHRU = 1000 - for i in range(0, int(numJobs)): + numHRU = gru_per_job + for i in range(0, numJobs): print("Job", i) rowData = [] rowData = [numHRU * i + 1, numHRU] @@ -48,6 +49,7 @@ def seffCommand(jobId, numJobs): if b'CPU Efficiency:' in line: effeciency = line.decode().split(" ")[2] effeciency = effeciency.strip() + effeciency = effeciency.replace('%', '') if b'Job Wall-clock time:' in line: wallClock = line.decode().split(" ")[-1] @@ -56,11 +58,16 @@ def seffCommand(jobId, numJobs): if b'Memory Utilized:' in line: memory = line.decode().split(" ")[2] memory = memory.strip() + + if b'State:' in line: + status = line.decode().split(" ")[1] + status = status.strip() rowData.append(cores) rowData.append(effeciency) rowData.append(wallClock) rowData.append(memory) + rowData.append(status) writer.writerow(rowData) csvFile.close() @@ -71,6 +78,9 @@ print(jobId) numJobs = argv[2] print(numJobs) -seffCommand(jobId, numJobs) +gru_per_job = argv[3] +print(gru_per_job) + +seffCommand(jobId, int(numJobs), int(gru_per_job)) diff --git a/utils/StatisticsScripts/stats.py b/utils/StatisticsScripts/stats.py new file mode 100644 index 0000000000000000000000000000000000000000..1c64b27995ab751ce5f996886255c846cc90ffb6 --- /dev/null +++ b/utils/StatisticsScripts/stats.py @@ -0,0 +1,45 @@ +import numpy as np +import pandas as pd +import statistics as stat +import csv +import matplotlib as mpl +import matplotlib.pyplot as plt + +def time_convert(x): + h,m,s = map(int,x.split(':')) + return (h*60+m)*60+s + +def wallClockTime(data_set_1, data_set_2): + df1 = pd.DataFrame(data_set_1) + df2 = pd.DataFrame(data_set_2) + + df1_stat = [] + for time in df1["Wall-Clock Time"].values: + df1_stat.append(round((time_convert(time) / 60) / 60, 2)) + print("Total Wall Clock for data_set_1 =", sum(df1_stat)) + df2_stat = [] + for time in df2["Wall-Clock Time"].values: + df2_stat.append(round((time_convert(time) / 60) / 60, 2)) + print("Total Wall Clock for data_set_2 =", sum(df2_stat)) + +def cpuEfficiency(data_set_1, data_set_2): + df1 = pd.DataFrame(data_set_1) + df2 = pd.DataFrame(data_set_2) + + df1_stat = [] + for cpu_e in df1["CPU Efficiency"].values: + df1_stat.append(cpu_e) + print("Average CPU Efficiency for data_set_1 =", sum(df1_stat) / len(df1_stat)) + df2_stat = [] + for cpu_e in df2["CPU Efficiency"].values: + df2_stat.append(cpu_e) + print("Average CPU Efficiency for data_set_2 =", sum(df2_stat) / len(df1_stat)) + + + +data_set_actors = pd.read_csv("/home/kklenk/projects/rpp-kshook/kklenk/SummaActorsOutput/Jul-13-2022/SummaActors_jobStats_63221110.csv") +data_set_original = pd.read_csv("/home/kklenk/projects/rpp-kshook/kklenk/SummaOriginalOuput/Jul-09-2022/SummaOriginal_jobStats_63155456.csv") + +wallClockTime(data_set_actors, data_set_original) +print("") +cpuEfficiency(data_set_actors, data_set_original) \ No newline at end of file diff --git a/utils/netcdf/OutputVerification/compareOutput.py b/utils/netcdf/OutputVerification/compareOutput.py index 2bc563e48d1aa7871bab4dd4b34428f77b2d6a17..8f4d3adea1ec3c377890ebc37eb47f063e151846 100644 --- a/utils/netcdf/OutputVerification/compareOutput.py +++ b/utils/netcdf/OutputVerification/compareOutput.py @@ -3,7 +3,7 @@ from os.path import isfile, join from pathlib import Path import xarray as xr -numHRU = 25 +numHRU = 125 time = 'time' scalarSWE = 'scalarSWE' @@ -28,8 +28,8 @@ varList = [time, scalarSWE, scalarCanopyWat, scalarAquiferStorage, scalarTotalSo scalarTotalET, scalarTotalRunoff, scalarNetRadiation] filename = "out.txt" -originalPath = Path('/home/kklenk/projects/rpp-kshook/kklenk/SummaOriginalOuput/May-13-2022/netcdf/SummaBE_G000001-000125_day.nc') -actorsPath = Path('/home/kklenk/projects/rpp-kshook/kklenk/SummaActorsOutput/May-26-2022/netcdf/SummaActorsGRU1-500_day.nc') +originalPath = Path('/home/kklenk/projects/rpp-kshook/kklenk/SummaOriginalOuput/May-30-2022/netcdf/SummaBE_G001001-001125_day.nc') +actorsPath = Path('/home/kklenk/projects/rpp-kshook/kklenk/SummaActorsOutput/Jun-18-2022/netcdf/SummaActorsGRU1001-1000_day.nc') originalDataset = xr.open_dataset(originalPath) actorsDataset = xr.open_dataset(actorsPath) @@ -56,6 +56,7 @@ for i in range(0, numHRU): dataAct.append(data) print("Original", len(dataOrig)) print("Actors", len(dataAct)) + print("HRU = ", i) marginOfError = 0 if var == time: for a in range(0, len(dataAct)):