Skip to content
Snippets Groups Projects
Commit 983cd5c0 authored by Kyle Klenk (kck540)'s avatar Kyle Klenk (kck540)
Browse files

Add convience scripts

Scripts for going over data within Summa
parent 6076ce84
No related branches found
No related tags found
No related merge requests found
import pandas as pd
# Read CSV file into a DataFrame
df = pd.read_csv('/home/kklenk/scratch/Single_CPU_TEST/non-actors/logs/_log_summaryOriginal.csv')
# Sort DataFrame by column
df = df.sort_values('start_hru')
# Write sorted DataFrame back to CSV file
df.to_csv('/home/kklenk/scratch/Single_CPU_TEST/non-actors/logs/_log_summaryOriginal_sorted.csv', index=False)
\ No newline at end of file
# Kyle Klenk, (kyle.klenk@usask.ca)
# This file will summarize the files that are outputed by summa
import os
import re
import sys
import csv
summaryFile = '_log_summaryActors.txt'
ext = ".out"
def get_job_stats(folder,file):
outFile = open(folder + file, 'r')
print(outFile)
lines = outFile.readlines()
if len(sys.argv) == 1:
sys.exit('Error: no input folder specified')
else:
start_hru = int(''.join(filter(str.isdigit, file)))
max_lines_to_read = 40
lines_read_counter = 1
max_items_looking_for = 3
items_found = 0
row_data = [start_hru, -99, -99, -99]
for line in reversed(lines):
if lines_read_counter > max_lines_to_read:
return row_data
elif items_found == max_items_looking_for:
return row_data
elif "Hours" in line:
hours = re.findall("\d+\.\d+", line)
row_data[1] = hours[0]
lines_read_counter += 1
folder = sys.argv[1]
elif "Total Read Duration" in line:
seconds = re.findall("\d+\.\d+", line)
row_data[2] = seconds[0]
lines_read_counter += 1
elif "Total Write Duration" in line:
seconds = re.findall("\d+\.\d+", line)
row_data[3] = seconds[0]
lines_read_counter += 1
def determine_output(folder,file):
outFile = open(folder + file, 'r')
print(outFile)
try:
lines = outFile.readlines()
except UnicodeDecodeError:
outFile.close()
outFile = open(folder + file, encoding = "ISO-8859-1")
lines = outFile.readlines()
counter = 1
for line in reversed(lines):
if counter > 30:
return -1
else:
if "Hours" in line:
hours = re.findall("\d+\.\d+", line)
return hours
counter += 1
lines_read_counter += 1
output_file = '_log_summaryActors.csv'
ext = ".txt"
# Check command line args
if len(sys.argv) == 1:
sys.exit('Error: no input folder specified')
else:
folder = sys.argv[1]
try:
os.remove(folder + "/" + summaryFile)
os.remove(folder + "/" + output_file)
except OSError:
pass
......@@ -44,20 +67,26 @@ for file in os.listdir(folder):
files.sort()
total_success = []
computation_time = []
with open(folder + '/' + summaryFile, "w") as sf:
sf.write('Summarizing log files in ' + folder + '\n \n')
sf.write('Log files' + '\n')
csv_file = open(folder + '/' + output_file, "w")
writer = csv.writer(csv_file)
csv_header = ["start_hru", "job_duration", "read_duration", "write_duration"]
writer.writerow(csv_header)
for file in files:
row_data = get_job_stats(folder, file)
if row_data is None:
start_hru = int(''.join(filter(str.isdigit, file)))
row_data = [start_hru, -99, -99, -99]
writer.writerow(row_data)
csv_file.close()
for file in files:
value = determine_output(folder, file)
if value == -1:
sf.write("{} - Still Running or Failed\n".format(file))
else:
sf.write("{} - Success after {} hours \n".format(file, value[0]))
# Kyle Klenk, (kyle.klenk@usask.ca)
# This file will summarize the files that are outputed by summa
import os
import re
import sys
import csv
def get_job_stats(folder, file):
outFile = open(folder + file, 'r')
print(outFile)
lines = outFile.readlines()
start_hru = int(''.join(filter(str.isdigit, file)))
max_lines_to_read = 40
lines_read_counter = 1
max_items_looking_for = 3
items_found = 0
row_data = [start_hru, -99, -99, -99]
for line in reversed(lines):
if lines_read_counter > max_lines_to_read:
return row_data
elif items_found == max_items_looking_for:
return row_data
elif "FATAL ERROR" in line:
return row_data
elif "h" in line and "or" in line:
hours = re.findall("\d+\.\d+", line)
row_data[1] = hours[0]
lines_read_counter += 1
items_found += 1
elif "elapsed read" in line:
seconds = re.findall("\d+\.\d+", line)
row_data[2] = seconds[0]
lines_read_counter += 1
items_found += 1
elif "elapsed write" in line:
seconds = re.findall("\d+\.\d+", line)
row_data[3] = seconds[0]
lines_read_counter += 1
items_found += 1
else:
lines_read_counter += 1
output_file = '_log_summaryOriginal.csv'
ext = ".txt"
# Check command line args
if len(sys.argv) == 1:
sys.exit('Error: no input folder specified')
else:
folder = sys.argv[1]
try:
os.remove(folder + "/" + output_file)
except OSError:
pass
files = []
for file in os.listdir(folder):
if file.endswith(ext):
files.append(file)
files.sort()
csv_file = open(folder + '/' + output_file, "w")
writer = csv.writer(csv_file)
csv_header = ["start_hru", "job_duration", "read_duration", "write_duration"]
writer.writerow(csv_header)
for file in files:
row_data = get_job_stats(folder, file)
if row_data is None:
start_hru = int(''.join(filter(str.isdigit, file)))
row_data = [start_hru, -99, -99, -99]
writer.writerow(row_data)
csv_file.close()
......@@ -80,15 +80,15 @@ def get_output_vars(model_output_file):
num_hru = 1
num_hru = 125
print("Checking output for", num_hru, "HRUs")
dataset_1 = "/home/kklenk/scratch/Kinsol/netcdf/SummaActorsGRU1-1_timestep.nc"
dataset_2 = "/home/kklenk/scratch/Kinsol/netcdf/SummaActors_kinsolGRU1-1_timestep.nc"
dataset_1 = "/home/kklenk/scratch/Single_CPU_TEST/actors/netcdf/SummaActorsGRU6126-125_day.nc"
dataset_2 = "/home/kklenk/scratch/Single_CPU_TEST/non-actors/netcdf/SummaOriginal_G006126-006250_day.nc"
# dataset_1 = "/scratch/kck540/Summa_Sundials/non-actors/SummaOriginal-BE_G000001-000002_timestep.nc"
# dataset_2 = "/scratch/kck540/Summa_Sundials/actors/SummaActors-BEGRU1-2_timestep.nc"
model_output_file = "/home/kklenk/projects/rpp-kshook/kklenk/settings/SummaActorsSettings/outputControl.txt"
model_output_file = "/home/kklenk/scratch/Single_CPU_TEST/settings/outputControl.txt"
output_vars = get_output_vars(model_output_file)
verify_data(dataset_1, dataset_2, num_hru, output_vars)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment