Skip to content
Snippets Groups Projects
Commit c8a64fff authored by Kyle Klenk (kck540)'s avatar Kyle Klenk (kck540)
Browse files

commiting before merge. Utility files have changes to paths

parent d036e018
No related branches found
No related tags found
No related merge requests found
......@@ -7,7 +7,7 @@ module load openblas
module load caf
#### Specifiy Master Directory, parent of build directory
export F_MASTER=/globalhome/kck540/HPC/SummaProjects/Summa-Actors
export F_MASTER=/home/kklenk/SummaProjects/Summa-Actors
#### Specifiy Compilers ####
export FC=gfortran
......
import numpy as np
import pandas as pd
import statistics as stat
import csv
import matplotlib as mpl
import matplotlib.pyplot as plt
def time_convert(x):
h,m,s = map(int,x.split(':'))
return (h*60+m)*60+s
def ramUsage():
data_set_1_actors = pd.read_csv("/home/kklenk/projects/rpp-kshook/kklenk/SummaActorsOutput/Jul-08-2022/SummaActors_jobStats_63007640_Filled_failed.csv")
# data_set_2_actors = pd.read_csv("/home/kklenk/projects/rpp-kshook/kklenk/SummaActorsOutput/Jun-17-2022/SummaActors_jobStats_62270590.csv")
# data_set_3_actors = pd.read_csv("/home/kklenk/projects/rpp-kshook/kklenk/SummaActorsOutput/May-26-2022/SummaActors_jobStats_61263427.csv")
data_set_1_original = pd.read_csv("/home/kklenk/SummaProjects/Summa-Actors/utils/StatisticsScripts/SummaOriginal_jobStats_63155456.csv")
# data_set_2_original = pd.read_csv("/home/kklenk/projects/rpp-kshook/kklenk/SummaOriginalOuput/May-27-2022/SummaOriginal_jobStats_61377500.csv")
# data_set_3_original = pd.read_csv("/home/kklenk/projects/rpp-kshook/kklenk/SummaOriginalOuput/May-30-2022/SummaOriginal_jobStats_61415123.csv")
df1_actors = pd.DataFrame(data_set_1_actors)
# df2_actors = pd.DataFrame(data_set_2_actors)
# df3_actors = pd.DataFrame(data_set_3_actors)
df1_original = pd.DataFrame(data_set_1_original)
# df2_original = pd.DataFrame(data_set_2_original)
# df3_original = pd.DataFrame(data_set_3_original)
actors_stat1 = []
for x in df1_actors["Wall-Clock Time"].values:
actors_stat1.append(round((time_convert(x) / 60) / 60, 2))
# actors_stat2 = []
# for x in df2_actors["Wall-Clock Time"].values:
# actors_stat2.append(round((time_convert(x) / 60) / 60, 2))
# actors_stat3 = []
# for x in df3_actors["Wall-Clock Time"].values:
# actors_stat3.append(round((time_convert(x) / 60) / 60, 2))
print("SUMMA-Actors Array Job 1 Total Wall-Clock =", sum(actors_stat1))
# print("SUMMA-Actors Array Job 2 Total Wall-Clock =", sum(actors_stat2))
# print("SUMMA-Actors Array Job 3 Total Wall-Clock =", sum(actors_stat3))
original_stat1 = []
for x in df1_original["Wall-Clock Time"].values:
original_stat1.append(round((time_convert(x) / 60) / 60, 2))
# original_stat2 = []
# for x in df2_original["Wall-Clock Time"].values:
# original_stat2.append(round((time_convert(x) / 60) / 60, 2))
# original_stat3 = []
# for x in df3_original["Wall-Clock Time"].values:
# original_stat3.append(round((time_convert(x) / 60) / 60, 2))
print()
print("SUMMA-Original Array Job 1 Total Wall-Clock =", sum(original_stat1))
# print("SUMMA-Original Array Job 2 Total Wall-Clock =", sum(original_stat2))
# print("SUMMA-Original Array Job 3 Total Wall-Clock =", sum(original_stat3))
# usageStat4 = []
# for x in df4["Wall-Clock Time"].values:
# usageStat4.append(round((time_convert(x) / 60) / 60, 2))
# print("Total Time Actor = ", sum(usageStat1))
# print("Max Actor = ", max(usageStat1))
# print("Min Actor = ", min(usageStat1))
# print("----------------------------------------")
# print("Total Time Original = ", sum(usageStat2))
# print("Max Original = ", max(usageStat2))
# print("Min Original = ", min(usageStat2))
# # totalRam = [sum(usageStat1), sum(usageStat2), sum(usageStat4)]
# print("usageStat1 Total Ram Used = ", sum(usageStat1))
# # print("usageStat1 Mean Ram Used = ", stat.mean(usageStat1))
# print("usageStat2 Total Ram Used = ", sum(usageStat2))
# print("usageStat2 Mean Ram Used = ", stat.mean(usageStat2))
# print("usageStat4 Total Ram Used = ", sum(usageStat4))
# print("usageStat4 Mean Ram Used = ", stat.mean(usageStat4))
# print()
# print("variation = ", stat.stdev(totalRam) / stat.mean(totalRam))
# csvFile = open("VarationStats.csv", 'w')
# header = ["relative standard deviation"]
# csvFile.write("{}\n".format("relative standard deviation"))
# for i in range(0, len(usageStat1)):
# l = [usageStat1[i], usageStat2[i], usageStat4[i]]
# csvFile.write("{}\n".format(stat.stdev(l) / stat.mean(l)))
def scatterPlot():
data_set_1 = pd.read_csv("/home/kklenk/SummaProjects/Summa-Actors/utils/StatisticsScripts/VarationStats.csv")
df = pd.DataFrame(data_set_1)
d = df["relative standard deviation"].values
x = []
for i in range(1, 515):
x.append(i)
print(len(x))
print(len(d))
plt.scatter(x, d)
plt.title("Coefficient of Variation Plot")
plt.xlabel("Job number")
plt.ylabel("Relative Standard Deviation")
plt.savefig("RSD-Actors.pdf", format="pdf", bbox_inches="tight")
plt.show()
def initDuration():
data_set_1 = pd.read_csv("/home/kklenk/projects/rpp-kshook/kklenk/SummaActorsOutput/Jun-06-2022/csv/Success1.csv")
df = pd.DataFrame(data_set_1)
print(sum(df["initDuration"].values))
def findRow(df, startHRU):
bool_val = False
for row in df.iterrows():
if row[1].iloc[0] == startHRU:
bool_val = True
break
if (bool_val):
print("found", startHRU)
else:
print("did not find", startHRU)
def compareCompleted():
data_actor = pd.read_csv("/home/kklenk/SummaProjects/Summa-Actors/utils/StatisticsScripts/SummaActors_jobStats_62666948.csv", index_col=False)
data_original = pd.read_csv("/home/kklenk/SummaProjects/Summa-Actors/utils/StatisticsScripts/SummaOriginal_jobStats_62667162.csv", index_col=False)
df_actors = pd.DataFrame(data_actor)
df_original = pd.DataFrame(data_original)
df_actors = df_actors.drop(df_actors[df_actors.Status == "TIMEOUT"].index)
# df_actors = df_actors.drop(columns=["Status","#-CPU","CPU Efficiency","Memory Used"])
df_original = df_original.drop(df_original[df_original.Status == "TIMEOUT"].index)
# df_original = df_original.drop(columns=["Status","#-CPU","CPU Efficiency","Memory Used"])
for row in df_original.iterrows():
# print(row[1].iloc[0])
findRow(df_actors, row[1].iloc[0])
# df_actors.to_csv("actors_no_timeout.csv", index=False)
# df_original.to_csv("original_no_timeout.csv", index=False)
ramUsage()
# compareCompleted()
# initDuration()
import numpy as np
import pandas as pd
import statistics as stat
import csv
import matplotlib as mpl
import matplotlib.pyplot as plt
def time_convert(x):
h,m,s = map(int,x.split(':'))
return (h*60+m)*60+s
def ramUsage():
data_set_1 = pd.read_csv("/home/kklenk/projects/rpp-kshook/kklenk/SummaActorsOutput/Jun-06-2022/SummaActors_jobStats_61721504.csv")
data_set_2 = pd.read_csv("/home/kklenk/projects/rpp-kshook/kklenk/SummaActorsOutput/May-13-2022/SummaActors_jobStatistics_60829543.csv")
data_set_4 = pd.read_csv("/home/kklenk/projects/rpp-kshook/kklenk/SummaActorsOutput/May-26-2022/SummaActors_jobStats_61263427.csv")
# data_set_1 = pd.read_csv("/home/kklenk/projects/rpp-kshook/kklenk/SummaOriginalOuput/Apr-28-2022/SummaOrginal-60232429_jobStatistics.csv")
# data_set_2 = pd.read_csv("/home/kklenk/projects/rpp-kshook/kklenk/SummaOriginalOuput/May-27-2022/SummaOriginal_jobStats_61377500.csv")
# data_set_4 = pd.read_csv("/home/kklenk/projects/rpp-kshook/kklenk/SummaOriginalOuput/May-30-2022/SummaOriginal_jobStats_61415123.csv")
df1 = pd.DataFrame(data_set_1)
df2 = pd.DataFrame(data_set_2)
df4 = pd.DataFrame(data_set_4)
usageStat1 = []
for x in df1["Wall-Clock Time"].values:
usageStat1.append(round((time_convert(x) / 60) / 60, 2))
usageStat2 = []
for x in df2["Wall-Clock Time"].values:
usageStat2.append(round((time_convert(x) / 60) / 60, 2))
usageStat4 = []
for x in df4["Wall-Clock Time"].values:
usageStat4.append(round((time_convert(x) / 60) / 60, 2))
totalRam = [sum(usageStat1), sum(usageStat2), sum(usageStat4)]
print("usageStat1 Total Ram Used = ", sum(usageStat1))
print("usageStat1 Mean Ram Used = ", stat.mean(usageStat1))
print("usageStat2 Total Ram Used = ", sum(usageStat2))
print("usageStat2 Mean Ram Used = ", stat.mean(usageStat2))
print("usageStat4 Total Ram Used = ", sum(usageStat4))
print("usageStat4 Mean Ram Used = ", stat.mean(usageStat4))
print()
print("variation = ", stat.stdev(totalRam) / stat.mean(totalRam))
csvFile = open("VarationStats.csv", 'w')
header = ["relative standard deviation"]
csvFile.write("{}\n".format("relative standard deviation"))
for i in range(0, len(usageStat1)):
l = [usageStat1[i], usageStat2[i], usageStat4[i]]
csvFile.write("{}\n".format(stat.stdev(l) / stat.mean(l)))
def scatterPlot():
data_set_1 = pd.read_csv("/home/kklenk/SummaProjects/Summa-Actors/utils/StatisticsScripts/VarationStats.csv")
df = pd.DataFrame(data_set_1)
d = df["relative standard deviation"].values
x = []
for i in range(1, 515):
x.append(i)
print(len(x))
print(len(d))
plt.scatter(x, d)
plt.title("Coefficient of Variation Plot")
plt.xlabel("Job number")
plt.ylabel("Relative Standard Deviation")
plt.savefig("RSD-Actors.pdf", format="pdf", bbox_inches="tight")
plt.show()
# ramUsage()
scatterPlot()
\ No newline at end of file
......@@ -13,8 +13,9 @@ This function uses the seff command and can get the following data:
- CPU-Efficiency
- Wall-Clock Time
- Memory Used
- Completion Status
'''
def seffCommand(jobId, numJobs):
def seffCommand(jobId, numJobs, gru_per_job):
input_prompt = "SummaActors: a\nSummaOriginal: o\n"
# Get input from the user
user_response = input(input_prompt)
......@@ -27,14 +28,14 @@ def seffCommand(jobId, numJobs):
raise Exception("Something went wrong")
csvFile = open(output_csv_name, 'w')
header = ["startHRU", "numHRU", "#-CPU", "CPU Efficiency", "Wall-Clock Time", "Memory Used"]
header = ["startHRU", "numHRU", "#-CPU", "CPU Efficiency", "Wall-Clock Time", "Memory Used", "Status"]
writer = csv.writer(csvFile)
writer.writerow(header)
numHRU = 1000
for i in range(0, int(numJobs)):
numHRU = gru_per_job
for i in range(0, numJobs):
print("Job", i)
rowData = []
rowData = [numHRU * i + 1, numHRU]
......@@ -48,6 +49,7 @@ def seffCommand(jobId, numJobs):
if b'CPU Efficiency:' in line:
effeciency = line.decode().split(" ")[2]
effeciency = effeciency.strip()
effeciency = effeciency.replace('%', '')
if b'Job Wall-clock time:' in line:
wallClock = line.decode().split(" ")[-1]
......@@ -56,11 +58,16 @@ def seffCommand(jobId, numJobs):
if b'Memory Utilized:' in line:
memory = line.decode().split(" ")[2]
memory = memory.strip()
if b'State:' in line:
status = line.decode().split(" ")[1]
status = status.strip()
rowData.append(cores)
rowData.append(effeciency)
rowData.append(wallClock)
rowData.append(memory)
rowData.append(status)
writer.writerow(rowData)
csvFile.close()
......@@ -71,6 +78,9 @@ print(jobId)
numJobs = argv[2]
print(numJobs)
seffCommand(jobId, numJobs)
gru_per_job = argv[3]
print(gru_per_job)
seffCommand(jobId, int(numJobs), int(gru_per_job))
import numpy as np
import pandas as pd
import statistics as stat
import csv
import matplotlib as mpl
import matplotlib.pyplot as plt
def time_convert(x):
h,m,s = map(int,x.split(':'))
return (h*60+m)*60+s
def wallClockTime(data_set_1, data_set_2):
df1 = pd.DataFrame(data_set_1)
df2 = pd.DataFrame(data_set_2)
df1_stat = []
for time in df1["Wall-Clock Time"].values:
df1_stat.append(round((time_convert(time) / 60) / 60, 2))
print("Total Wall Clock for data_set_1 =", sum(df1_stat))
df2_stat = []
for time in df2["Wall-Clock Time"].values:
df2_stat.append(round((time_convert(time) / 60) / 60, 2))
print("Total Wall Clock for data_set_2 =", sum(df2_stat))
def cpuEfficiency(data_set_1, data_set_2):
df1 = pd.DataFrame(data_set_1)
df2 = pd.DataFrame(data_set_2)
df1_stat = []
for cpu_e in df1["CPU Efficiency"].values:
df1_stat.append(cpu_e)
print("Average CPU Efficiency for data_set_1 =", sum(df1_stat) / len(df1_stat))
df2_stat = []
for cpu_e in df2["CPU Efficiency"].values:
df2_stat.append(cpu_e)
print("Average CPU Efficiency for data_set_2 =", sum(df2_stat) / len(df1_stat))
data_set_actors = pd.read_csv("/home/kklenk/projects/rpp-kshook/kklenk/SummaActorsOutput/Jul-13-2022/SummaActors_jobStats_63221110.csv")
data_set_original = pd.read_csv("/home/kklenk/projects/rpp-kshook/kklenk/SummaOriginalOuput/Jul-09-2022/SummaOriginal_jobStats_63155456.csv")
wallClockTime(data_set_actors, data_set_original)
print("")
cpuEfficiency(data_set_actors, data_set_original)
\ No newline at end of file
......@@ -3,7 +3,7 @@ from os.path import isfile, join
from pathlib import Path
import xarray as xr
numHRU = 25
numHRU = 125
time = 'time'
scalarSWE = 'scalarSWE'
......@@ -28,8 +28,8 @@ varList = [time, scalarSWE, scalarCanopyWat, scalarAquiferStorage, scalarTotalSo
scalarTotalET, scalarTotalRunoff, scalarNetRadiation]
filename = "out.txt"
originalPath = Path('/home/kklenk/projects/rpp-kshook/kklenk/SummaOriginalOuput/May-13-2022/netcdf/SummaBE_G000001-000125_day.nc')
actorsPath = Path('/home/kklenk/projects/rpp-kshook/kklenk/SummaActorsOutput/May-26-2022/netcdf/SummaActorsGRU1-500_day.nc')
originalPath = Path('/home/kklenk/projects/rpp-kshook/kklenk/SummaOriginalOuput/May-30-2022/netcdf/SummaBE_G001001-001125_day.nc')
actorsPath = Path('/home/kklenk/projects/rpp-kshook/kklenk/SummaActorsOutput/Jun-18-2022/netcdf/SummaActorsGRU1001-1000_day.nc')
originalDataset = xr.open_dataset(originalPath)
actorsDataset = xr.open_dataset(actorsPath)
......@@ -56,6 +56,7 @@ for i in range(0, numHRU):
dataAct.append(data)
print("Original", len(dataOrig))
print("Actors", len(dataAct))
print("HRU = ", i)
marginOfError = 0
if var == time:
for a in range(0, len(dataAct)):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment