From fb8835b7b21c955447eee04c3aece939ddb457f7 Mon Sep 17 00:00:00 2001 From: kck540 <kyle.klenk@usask.ca> Date: Thu, 21 Apr 2022 13:31:29 -0400 Subject: [PATCH] Used refGRU for clearer output of which GRU finished Started configuration.py to streamline job submissions --- build/makefile | 16 ++-- build/source/actors/FileAccessActor.h | 5 +- build/source/actors/JobActor.h | 14 +-- build/source/actors/SummaActor.h | 5 +- build/source/actors/messageAtoms.h | 2 +- build/source/engine/conv_funcs.f90 | 3 +- build/source/engine/derivforce.f90 | 1 - .../file_access_actor/initOutputStruc.f90 | 3 - config/Summa_Actors_Settings.json | 44 ++++++++- config/caf-application.conf | 2 +- config/configuration.py | 92 +++++++++++++++++++ config/fileManager.txt | 22 +++++ utils/netcdf/checkOutput.py | 21 +++++ 13 files changed, 201 insertions(+), 29 deletions(-) create mode 100644 config/configuration.py create mode 100644 config/fileManager.txt create mode 100644 utils/netcdf/checkOutput.py diff --git a/build/makefile b/build/makefile index b20c9bf..94e0ae8 100644 --- a/build/makefile +++ b/build/makefile @@ -17,16 +17,16 @@ # gfortran compiler flags ifeq "$(FC)" "gfortran" # Production runs -FLAGS_NOAH = -O3 -ffree-form -ffree-line-length-none -fmax-errors=0 -fPIC -FLAGS_COMM = -O3 -ffree-line-length-none -fmax-errors=0 -fPIC -FLAGS_SUMMA = -O3 -ffree-line-length-none -fmax-errors=0 -fPIC -FLAGS_ACTORS = -O3 +# FLAGS_NOAH = -O3 -ffree-form -ffree-line-length-none -fmax-errors=0 -fPIC +# FLAGS_COMM = -O3 -ffree-line-length-none -fmax-errors=0 -fPIC +# FLAGS_SUMMA = -O3 -ffree-line-length-none -fmax-errors=0 -fPIC +# FLAGS_ACTORS = -O3 # # Debug runs -# FLAGS_NOAH = -p -g -O0 -ffree-form -ffree-line-length-none -fmax-errors=0 -fbacktrace -Wno-unused -Wno-unused-dummy-argument -fPIC -# FLAGS_COMM = -p -g -O0 -Wall -ffree-line-length-none -fmax-errors=0 -fbacktrace -fcheck=bounds -fPIC -# FLAGS_SUMMA = -p -g -O0 -Wall -ffree-line-length-none -fmax-errors=0 -fbacktrace -fcheck=bounds -fPIC -# FLAGS_ACTORS = -g -O0 -Wall +FLAGS_NOAH = -p -g -O0 -ffree-form -ffree-line-length-none -fmax-errors=0 -fbacktrace -Wno-unused -Wno-unused-dummy-argument -fPIC +FLAGS_COMM = -p -g -O0 -Wall -ffree-line-length-none -fmax-errors=0 -fbacktrace -fcheck=bounds -fPIC +FLAGS_SUMMA = -p -g -O0 -Wall -ffree-line-length-none -fmax-errors=0 -fbacktrace -fcheck=bounds -fPIC +FLAGS_ACTORS = -g -O0 -Wall endif # ifort compiler flags diff --git a/build/source/actors/FileAccessActor.h b/build/source/actors/FileAccessActor.h index d805659..4750f5a 100644 --- a/build/source/actors/FileAccessActor.h +++ b/build/source/actors/FileAccessActor.h @@ -15,10 +15,13 @@ behavior file_access_actor(stateful_actor<file_access_state>* self, int startGRU self->state.numGRU = numGRU; self->state.startGRU = startGRU; self->state.outputStrucSize = outputStrucSize; + + aout(self) << "\nFile Access Actor Started\n"; initalizeFileAccessActor(self); return { - [=](initalize_outputStrucure) { + [=](initalize_outputStructure) { + aout(self) << "Initalizing Output Structure" << std::endl; Init_OutputStruct(self->state.handle_forcFileInfo, &self->state.outputStrucSize, &self->state.numGRU, &self->state.err); }, diff --git a/build/source/actors/JobActor.h b/build/source/actors/JobActor.h index ac00049..387f4f4 100644 --- a/build/source/actors/JobActor.h +++ b/build/source/actors/JobActor.h @@ -45,7 +45,7 @@ behavior job_actor(stateful_actor<job_state>* self, int startGRU, int numGRU, return { [=](done_file_access_actor_init) { // Init GRU Actors and the Output Structure - self->send(self->state.file_access_actor, initalize_outputStrucure_v); + self->send(self->state.file_access_actor, initalize_outputStructure_v); self->send(self, init_hru_v); }, @@ -78,7 +78,9 @@ behavior job_actor(stateful_actor<job_state>* self, int startGRU, int numGRU, [=](done_hru, int indxGRU, double totalDuration, double initDuration, double forcingDuration, double runPhysicsDuration, double writeOutputDuration) { - aout(self) << "GRU " << indxGRU << " Done\n"; + aout(self) << "GRU:" << self->state.GRUList[indxGRU - 1]->getRefGRU() + << "indxGRU = " << indxGRU << "Done \n"; + self->state.GRUList[indxGRU - 1]->doneRun(totalDuration, initDuration, forcingDuration, runPhysicsDuration, writeOutputDuration); @@ -136,8 +138,10 @@ behavior job_actor(stateful_actor<job_state>* self, int startGRU, int numGRU, }, [=](run_failure, int indxGRU, int err) { - aout(self) << "GRU:" << indxGRU << "Failed \n" << - "Will have to wait until all GRUs are done before it can be re-tried\n"; + aout(self) << "GRU:" << self->state.GRUList[indxGRU - 1]->getRefGRU() + << "indxGRU = " << indxGRU << "Failed \n" + << "Will have to wait until all GRUs are done before it can be re-tried\n"; + self->state.numGRUFailed++; self->state.numGRUDone++; self->state.GRUList[indxGRU - 1]->updateFailed(); @@ -260,8 +264,6 @@ void restartFailures(stateful_actor<job_state>* self) { gru->updateGRU(newGRU); gru->updateCurrentAttempt(); self->send(gru->getActor(), dt_init_factor_v, gru->getDt_init()); - } else { - aout(self) << "We are done \n"; } } } diff --git a/build/source/actors/SummaActor.h b/build/source/actors/SummaActor.h index a9840ef..bdad565 100644 --- a/build/source/actors/SummaActor.h +++ b/build/source/actors/SummaActor.h @@ -57,6 +57,7 @@ void spawnJob(stateful_actor<summa_manager>* self) { // Ensure we do not start a job with too many GRUs if (self->state.numGRU > self->state.maxGRUPerJob) { // spawn the job actor + aout(self) << "\n Starting Job with startGRU = " << self->state.startGRU << "\n"; self->state.currentJob = self->spawn(job_actor, self->state.startGRU, self->state.maxGRUPerJob, self->state.configPath, self->state.outputStrucSize, self); @@ -83,8 +84,8 @@ void parseSettings(stateful_actor<summa_manager>* self, std::string configPath) json SummaActorConfig = settings["SummaActor"]; // Find the desired OutputStrucSize - if (SummaActorConfig.find("OuputStrucureSize") != SummaActorConfig.end()) { - self->state.outputStrucSize = SummaActorConfig["OuputStrucureSize"]; + if (SummaActorConfig.find("OuputStructureSize") != SummaActorConfig.end()) { + self->state.outputStrucSize = SummaActorConfig["OuputStructureSize"]; } else { aout(self) << "Error Finding OutputStructureSize in JOSN - Reverting to default value\n"; self->state.outputStrucSize = 250; diff --git a/build/source/actors/messageAtoms.h b/build/source/actors/messageAtoms.h index f31f319..9f38386 100644 --- a/build/source/actors/messageAtoms.h +++ b/build/source/actors/messageAtoms.h @@ -18,7 +18,7 @@ CAF_BEGIN_TYPE_ID_BLOCK(summa, first_custom_type_id) CAF_ADD_ATOM(summa, done_file_access_actor_init) CAF_ADD_ATOM(summa, file_access_actor_done) // FileAccess Actor - CAF_ADD_ATOM(summa, initalize_outputStrucure) + CAF_ADD_ATOM(summa, initalize_outputStructure) CAF_ADD_ATOM(summa, access_forcing) CAF_ADD_ATOM(summa, access_first_forcing_file) CAF_ADD_ATOM(summa, access_forcing_internal) diff --git a/build/source/engine/conv_funcs.f90 b/build/source/engine/conv_funcs.f90 index 2a07b7c..b4bc8f2 100755 --- a/build/source/engine/conv_funcs.f90 +++ b/build/source/engine/conv_funcs.f90 @@ -341,8 +341,7 @@ do iter=1,maxiter ! check if achieved tolerance if(abs(f0) < Xtol) exit ! check convergence - ! TODO: Changed the below to continue the hru computation - ! if(iter==maxiter)stop 'failed to converge in WETBULBTMP' +! if(iter==maxiter)stop 'failed to converge in WETBULBTMP' if(iter==maxiter)print*, 'failed to converge in WETBULBTMP' end do ! (iterating) diff --git a/build/source/engine/derivforce.f90 b/build/source/engine/derivforce.f90 index 8fb7905..a2d75e7 100755 --- a/build/source/engine/derivforce.f90 +++ b/build/source/engine/derivforce.f90 @@ -258,7 +258,6 @@ contains ! ensure wind speed is above a prescribed minimum value if(windspd < minwind) windspd=minwind - ! compute relative humidity (-) relhum = SPHM2RELHM(spechum, airpres, airtemp) ! if relative humidity exceeds saturation, then set relative and specific humidity to saturation diff --git a/build/source/interface/file_access_actor/initOutputStruc.f90 b/build/source/interface/file_access_actor/initOutputStruc.f90 index 29895e7..f9d48fd 100644 --- a/build/source/interface/file_access_actor/initOutputStruc.f90 +++ b/build/source/interface/file_access_actor/initOutputStruc.f90 @@ -23,9 +23,6 @@ subroutine initalizeOutput(forcFileInfo, maxSteps, nGRU, err) USE multiconst,only:secprday ! number of seconds in a day USE data_types,only:file_info_array USE var_lookup,only:maxvarFreq ! maximum number of output files - - - implicit none type(file_info_array), pointer :: forcFileInfo diff --git a/config/Summa_Actors_Settings.json b/config/Summa_Actors_Settings.json index 83b59a0..db2de25 100644 --- a/config/Summa_Actors_Settings.json +++ b/config/Summa_Actors_Settings.json @@ -1,17 +1,53 @@ { + "JobSubmissionParams": { + "cpus-per-task": 8, + "memory": "32G", + "job-name": "SummaActors", + "account": "rpp-kshook", + "numHRUs": 517315, + "maxNumberOfJobs": 1000, + "maxGRUsPerSubmission": 1000, + "executablePath": "/home/kklenk/SummaProjects/Summa-Actors/bin/summaMain" + }, + + "Configuration": { + "controlVersion": "SUMMA_FILE_MANAGER_V3.0.0", + "simStartTime": "1979-01-01 00:00", + "simEndTime": "2019-12-31 23:00", + "tmZoneInfo": "utcTime", + "settingsPath": "/project/6008034/kklenk/settings/SummaActorsSettings/", + "forcingPath": "/project/6008034/kklenk/forcingChunked/", + "outputPath": "/home/kklenk/projects/rpp-kshook/kklenk/SummaActorsOutput/", + "forcingFreq": "month", + "forcingStart": "1979-01-01", + "decisionsFile": "modelDecisions.txt", + "outputControlFile": "outputControl.txt", + "globalHruParamFile": "localParamInfo.txt", + "globalGruParamFile": "basinParamInfo.txt", + "attributeFile": "attributes.nc", + "trialParamFile": "trialParams.nc", + "forcingListFile": "forcingFileList.txt", + "initConditionFile": "coldState.nc", + "outFilePrefix": "SummaActors", + "vegTableFile": "TBL_VEGPARM.TBL", + "soilTableFile": "TBL_SOILPARM.TBL", + "generalTableFile": "TBL_GENPARM.TBL", + "noahmpTableFile": "TBL_MPTABLE.TBL" + }, + "SummaActor": { - "OuputStrucureSize": 250, - "maxGRUPerJob": 1 + "OuputStructureSize": 250, + "maxGRUPerJob": 250 }, "JobActor": { - "FileManagerPath": "/home/k13nk/SummaProject/SummaActorsSettings/fileManager.txt", + "FileManagerPath": "/home/kklenk/projects/rpp-kshook/kklenk/settings/SummaActorsSettings/fileManager.txt", "outputCSV": false, "csvPath": "" }, "HRUActor": { "printOutput": true, - "outputFrequency": 1000 + "outputFrequency": 500 } } \ No newline at end of file diff --git a/config/caf-application.conf b/config/caf-application.conf index 83aa913..bf14c0d 100644 --- a/config/caf-application.conf +++ b/config/caf-application.conf @@ -1,6 +1,6 @@ caf { # Parameters selecting a default scheduler. scheduler { - max-threads = 4 + max-threads = 1 } } \ No newline at end of file diff --git a/config/configuration.py b/config/configuration.py new file mode 100644 index 0000000..b429768 --- /dev/null +++ b/config/configuration.py @@ -0,0 +1,92 @@ +import json +import os +from os.path import exists +from datetime import date + +def actor_setting(actor_id, setting_name, setting_value): + new_dic = {actor_id: {}} + + +""" +Function to create the inital summa_actors_settings file +""" +def create_init_config(): + Summa_Actor_Settings = ["OutputStructureSize", "maxGRUPerJob"] + Job_Actor_Settings = ["FileManagerPath", "outputCSV", "csvPath"] + HRU_Actor_Settings = ["printOutput", "outputFrequency"] + +def create_output_path(outputPath): + print("The output path exists, now seperating this run by today's date") + today = date.today() + todays_date = today.strftime("%b-%d-%Y") + outputPath += "{}/".format(todays_date) + if not exists(outputPath): + os.mkdir(outputPath) + print("Directory Created. Now Creating sub directories for SLURM Data and NetCDF data") + outputNetCDF = outputPath + "netcdf/" + outputSlurm = outputPath + "slurm/" + if not exists(outputNetCDF): + os.mkdir(outputNetCDF) + if not exists(outputSlurm): + os.mkdir(outputSlurm) + + return outputNetCDF, outputSlurm + + +def create_file_manager(): + json_file = open("Summa_Actors_Settings.json") + fileManagerSettings = json.load(json_file) + json_file.close() + + # add the date for the run + outputPath = fileManagerSettings["Configuration"]["outputPath"] + if exists(outputPath): + outputNetCDF, outputSlurm = create_output_path(outputPath) + fileManagerSettings["Configuration"]["outputPath"] = outputNetCDF + else: + print("Output path does not exist, Ensure it exists before running this setup") + return -1 + + fileManager = open("fileManager.txt", "w") + for key,value in fileManagerSettings["Configuration"].items(): + fileManager.write(key + " \'{}\'\n".format(value)) + fileManager.close() + print("File Manager for this job has been created") + + + +""" +Function to create the a list of the jobs will run +This is used for submitting the array job +""" +def create_job_list(): + json_file = open("Summa_Actors_Settings.json") + SummaSettings = json.load(json_file) + json_file.close() + + numberOfTasks = SummaSettings["JobSubmissionParams"]["numHRUs"] + maxGRU = SummaSettings["JobSubmissionParams"]["maxGRUsPerSubmission"] + numCPUs = SummaSettings["JobSubmissionParams"]["cpus-per-task"] + print(numberOfTasks) + print(maxGRU) + + + +""" +Funciton checks if the Summa_Actors_Settings.json file exists. +If yes: + move on +If no: + create it +""" +def init_run(): + Summa_Settings_Path = './Summa_Actors_Settings.json' + if exists('./Summa_Actors_Settings.json'): + print("File Exists, What do we do next") + create_file_manager() + create_job_list() + else: + print("File Does not Exist and we need to create it") + create_init_config() + +init_run() \ No newline at end of file diff --git a/config/fileManager.txt b/config/fileManager.txt new file mode 100644 index 0000000..06fb406 --- /dev/null +++ b/config/fileManager.txt @@ -0,0 +1,22 @@ +controlVersion 'SUMMA_FILE_MANAGER_V3.0.0' +simStartTime '1979-01-01 00:00' +simEndTime '2019-12-31 23:00' +tmZoneInfo 'utcTime' +settingsPath '/project/6008034/kklenk/settings/SummaActorsSettings/' +forcingPath '/project/6008034/kklenk/forcingChunked/' +outputPath '/home/kklenk/projects/rpp-kshook/kklenk/SummaActorsOutput/Apr-18-2022/netcdf/' +forcingFreq 'month' +forcingStart '1979-01-01' +decisionsFile 'modelDecisions.txt' +outputControlFile 'outputControl.txt' +globalHruParamFile 'localParamInfo.txt' +globalGruParamFile 'basinParamInfo.txt' +attributeFile 'attributes.nc' +trialParamFile 'trialParams.nc' +forcingListFile 'forcingFileList.txt' +initConditionFile 'coldState.nc' +outFilePrefix 'SummaActors' +vegTableFile 'TBL_VEGPARM.TBL' +soilTableFile 'TBL_SOILPARM.TBL' +generalTableFile 'TBL_GENPARM.TBL' +noahmpTableFile 'TBL_MPTABLE.TBL' diff --git a/utils/netcdf/checkOutput.py b/utils/netcdf/checkOutput.py new file mode 100644 index 0000000..a9d4718 --- /dev/null +++ b/utils/netcdf/checkOutput.py @@ -0,0 +1,21 @@ +from pathlib import Path +import xarray as xr + + +airtemp = "airtemp" + +filename = "outputChunked.txt" + +datasetPath = Path("/home/kklenk/projects/rpp-kshook/kklenk/forcingChunked/NorthAmerica_remapped_1983-05-01-00-00-00-chunked.nc") + +dataset = xr.open_dataset(datasetPath) + +data = [] + +data.append(dataset.isel(hru=492001).copy()) + +file = open(filename, "w") +for i in data[0][airtemp].values: + file.write("{}\n".format(i)) + +file.close() -- GitLab