diff --git a/utils/StatisticsScripts/sortCSV.py b/utils/StatisticsScripts/sortCSV.py new file mode 100644 index 0000000000000000000000000000000000000000..61eac2b8405de2e96967cf921d73c354ecb187aa --- /dev/null +++ b/utils/StatisticsScripts/sortCSV.py @@ -0,0 +1,10 @@ +import pandas as pd + +# Read CSV file into a DataFrame +df = pd.read_csv('/home/kklenk/scratch/Single_CPU_TEST/non-actors/logs/_log_summaryOriginal.csv') + +# Sort DataFrame by column +df = df.sort_values('start_hru') + +# Write sorted DataFrame back to CSV file +df.to_csv('/home/kklenk/scratch/Single_CPU_TEST/non-actors/logs/_log_summaryOriginal_sorted.csv', index=False) \ No newline at end of file diff --git a/utils/StatisticsScripts/summarize_summaActors.py b/utils/StatisticsScripts/summarize_summaActors.py index 4a629be59f7b60180c1121cd34b2e2777aff508c..372b8bdfa3ea7a621c95046ef4b28ec119593148 100644 --- a/utils/StatisticsScripts/summarize_summaActors.py +++ b/utils/StatisticsScripts/summarize_summaActors.py @@ -1,39 +1,62 @@ +# Kyle Klenk, (kyle.klenk@usask.ca) +# This file will summarize the files that are outputed by summa import os import re import sys +import csv -summaryFile = '_log_summaryActors.txt' -ext = ".out" +def get_job_stats(folder,file): + outFile = open(folder + file, 'r') + print(outFile) + + lines = outFile.readlines() -if len(sys.argv) == 1: - sys.exit('Error: no input folder specified') -else: + start_hru = int(''.join(filter(str.isdigit, file))) + + + max_lines_to_read = 40 + lines_read_counter = 1 + max_items_looking_for = 3 + items_found = 0 + row_data = [start_hru, -99, -99, -99] + + for line in reversed(lines): + if lines_read_counter > max_lines_to_read: + return row_data + + elif items_found == max_items_looking_for: + return row_data + + elif "Hours" in line: + hours = re.findall("\d+\.\d+", line) + row_data[1] = hours[0] + lines_read_counter += 1 - folder = sys.argv[1] + elif "Total Read Duration" in line: + seconds = re.findall("\d+\.\d+", line) + row_data[2] = seconds[0] + lines_read_counter += 1 + + elif "Total Write Duration" in line: + seconds = re.findall("\d+\.\d+", line) + row_data[3] = seconds[0] + lines_read_counter += 1 -def determine_output(folder,file): - outFile = open(folder + file, 'r') - print(outFile) - try: - lines = outFile.readlines() - except UnicodeDecodeError: - outFile.close() - outFile = open(folder + file, encoding = "ISO-8859-1") - lines = outFile.readlines() - counter = 1 - for line in reversed(lines): - if counter > 30: - return -1 else: - if "Hours" in line: - hours = re.findall("\d+\.\d+", line) - return hours - counter += 1 - + lines_read_counter += 1 + +output_file = '_log_summaryActors.csv' +ext = ".txt" + +# Check command line args +if len(sys.argv) == 1: + sys.exit('Error: no input folder specified') +else: + folder = sys.argv[1] try: - os.remove(folder + "/" + summaryFile) + os.remove(folder + "/" + output_file) except OSError: pass @@ -44,20 +67,26 @@ for file in os.listdir(folder): files.sort() + total_success = [] computation_time = [] -with open(folder + '/' + summaryFile, "w") as sf: - sf.write('Summarizing log files in ' + folder + '\n \n') - sf.write('Log files' + '\n') +csv_file = open(folder + '/' + output_file, "w") +writer = csv.writer(csv_file) +csv_header = ["start_hru", "job_duration", "read_duration", "write_duration"] +writer.writerow(csv_header) + +for file in files: + row_data = get_job_stats(folder, file) + if row_data is None: + start_hru = int(''.join(filter(str.isdigit, file))) + row_data = [start_hru, -99, -99, -99] + + writer.writerow(row_data) + +csv_file.close() - for file in files: - value = determine_output(folder, file) - if value == -1: - sf.write("{} - Still Running or Failed\n".format(file)) - else: - sf.write("{} - Success after {} hours \n".format(file, value[0])) diff --git a/utils/StatisticsScripts/summarize_summaOrig.py b/utils/StatisticsScripts/summarize_summaOrig.py new file mode 100644 index 0000000000000000000000000000000000000000..465de9de73bd96639cb8495175b34203b5eb5b85 --- /dev/null +++ b/utils/StatisticsScripts/summarize_summaOrig.py @@ -0,0 +1,89 @@ +# Kyle Klenk, (kyle.klenk@usask.ca) +# This file will summarize the files that are outputed by summa +import os +import re +import sys +import csv + + +def get_job_stats(folder, file): + outFile = open(folder + file, 'r') + print(outFile) + + lines = outFile.readlines() + + start_hru = int(''.join(filter(str.isdigit, file))) + + max_lines_to_read = 40 + lines_read_counter = 1 + max_items_looking_for = 3 + items_found = 0 + row_data = [start_hru, -99, -99, -99] + + for line in reversed(lines): + if lines_read_counter > max_lines_to_read: + return row_data + + elif items_found == max_items_looking_for: + return row_data + + elif "FATAL ERROR" in line: + return row_data + + elif "h" in line and "or" in line: + hours = re.findall("\d+\.\d+", line) + row_data[1] = hours[0] + lines_read_counter += 1 + items_found += 1 + + elif "elapsed read" in line: + seconds = re.findall("\d+\.\d+", line) + row_data[2] = seconds[0] + lines_read_counter += 1 + items_found += 1 + + elif "elapsed write" in line: + seconds = re.findall("\d+\.\d+", line) + row_data[3] = seconds[0] + lines_read_counter += 1 + items_found += 1 + + else: + lines_read_counter += 1 + +output_file = '_log_summaryOriginal.csv' +ext = ".txt" + +# Check command line args +if len(sys.argv) == 1: + sys.exit('Error: no input folder specified') +else: + folder = sys.argv[1] + +try: + os.remove(folder + "/" + output_file) +except OSError: + pass + +files = [] +for file in os.listdir(folder): + if file.endswith(ext): + files.append(file) + +files.sort() + +csv_file = open(folder + '/' + output_file, "w") +writer = csv.writer(csv_file) +csv_header = ["start_hru", "job_duration", "read_duration", "write_duration"] +writer.writerow(csv_header) + +for file in files: + row_data = get_job_stats(folder, file) + if row_data is None: + start_hru = int(''.join(filter(str.isdigit, file))) + row_data = [start_hru, -99, -99, -99] + + writer.writerow(row_data) + +csv_file.close() + diff --git a/utils/netcdf/OutputVerification/checkOutput.py b/utils/netcdf/OutputVerification/checkOutput.py index 675ee57d9bf7c9e59a2499e9d462fd75ed541087..104f83fc4f56654fc7fa07f7f85e587f6dfeb447 100644 --- a/utils/netcdf/OutputVerification/checkOutput.py +++ b/utils/netcdf/OutputVerification/checkOutput.py @@ -80,15 +80,15 @@ def get_output_vars(model_output_file): -num_hru = 1 +num_hru = 125 print("Checking output for", num_hru, "HRUs") -dataset_1 = "/home/kklenk/scratch/Kinsol/netcdf/SummaActorsGRU1-1_timestep.nc" -dataset_2 = "/home/kklenk/scratch/Kinsol/netcdf/SummaActors_kinsolGRU1-1_timestep.nc" +dataset_1 = "/home/kklenk/scratch/Single_CPU_TEST/actors/netcdf/SummaActorsGRU6126-125_day.nc" +dataset_2 = "/home/kklenk/scratch/Single_CPU_TEST/non-actors/netcdf/SummaOriginal_G006126-006250_day.nc" # dataset_1 = "/scratch/kck540/Summa_Sundials/non-actors/SummaOriginal-BE_G000001-000002_timestep.nc" # dataset_2 = "/scratch/kck540/Summa_Sundials/actors/SummaActors-BEGRU1-2_timestep.nc" -model_output_file = "/home/kklenk/projects/rpp-kshook/kklenk/settings/SummaActorsSettings/outputControl.txt" +model_output_file = "/home/kklenk/scratch/Single_CPU_TEST/settings/outputControl.txt" output_vars = get_output_vars(model_output_file) verify_data(dataset_1, dataset_2, num_hru, output_vars)