diff --git a/build/includes/file_access_actor/file_access_actor.hpp b/build/includes/file_access_actor/file_access_actor.hpp index 26aa1abd89e89c9cbec9eeaddb6a5cdccbc440cb..90c835bc8db4a09869059906524f7e9b83211f18 100644 --- a/build/includes/file_access_actor/file_access_actor.hpp +++ b/build/includes/file_access_actor/file_access_actor.hpp @@ -9,7 +9,7 @@ #include "message_atoms.hpp" #include "forcing_file_info.hpp" #include "json.hpp" -#include "summa_init_struc.hpp" +// #include "summa_init_struc.hpp" /********************************************* * File Access Actor Fortran Functions @@ -48,7 +48,7 @@ struct file_access_state { File_Access_Actor_Settings file_access_actor_settings; - std::unique_ptr<SummaInitStruc> summa_init_struc; + // std::unique_ptr<SummaInitStruc> summa_init_struc; std::unique_ptr<forcingFileContainer> forcing_files; bool write_params_flag = true; diff --git a/build/includes/file_access_actor/summa_init_struc.hpp b/build/includes/file_access_actor/summa_init_struc.hpp index b92d1d9d00528a392591504e5ed17e5174ae8c50..2303cb0e54c23aa706cf060418e4158b3df3c9c7 100644 --- a/build/includes/file_access_actor/summa_init_struc.hpp +++ b/build/includes/file_access_actor/summa_init_struc.hpp @@ -1,10 +1,11 @@ #define SUMMA_INIT_STRUC #ifdef SUMMA_INIT_STRUC - +#include "settings_functions.hpp" extern "C" { void initialize_init_struc(int& num_gru, int& err, void* message); void paramSetup_fortran(int& err, void* message); void readRestart_fortran(int& err, void* message); + void getInitTolerance_fortran(double& rtol, double& atol); void deallocate_init_struc(); } @@ -16,6 +17,7 @@ class SummaInitStruc { int allocate(int num_gru); // allocate space in Fortran int summa_paramSetup(); // call summa_paramSetup int summa_readRestart(); // call summa_readRestart + void getInitTolerance(HRU_Actor_Settings& hru_settings); }; diff --git a/build/includes/job_actor/gru_struc.hpp b/build/includes/job_actor/gru_struc.hpp index d14622659dfa5c089aaedaf21841224a01106bf8..1d9daf49f14f0eed97aa41deecb27e0c0ad7a7a4 100644 --- a/build/includes/job_actor/gru_struc.hpp +++ b/build/includes/job_actor/gru_struc.hpp @@ -33,12 +33,21 @@ class GruStruc { inline void incrementNumGRUDone() { num_gru_done_++; } inline void incrementNumGRUFailed() { num_gru_failed_++; num_gru_done_++;} inline void decrementRetryAttempts() { num_retry_attempts_left_--; } + inline void decrementNumGRUFailed() { num_gru_failed_--; num_gru_done_--;} inline GRU* getGRU(int index) { return gru_info_[index-1].get(); } inline bool isDone() { return num_gru_done_ >= num_gru_; } inline bool hasFailures() { return num_gru_failed_ > 0; } inline bool shouldRetry() { return num_retry_attempts_left_ > 0; } + int getFailedIndex() { + for (int i = 0; i < gru_info_.size(); i++) { + if (gru_info_[i]->getStatus() == gru_state::failed) { + return gru_info_[i]->getIndexJob(); + } + } + return -1; + } private: // Inital Information about the GRUs diff --git a/build/includes/job_actor/job_actor.hpp b/build/includes/job_actor/job_actor.hpp index 7ba37f2700a10eae931e08531fbfb9ab9e09c353..f69871c4cbbe4fa1ea7aafa77325cad7112d3608 100644 --- a/build/includes/job_actor/job_actor.hpp +++ b/build/includes/job_actor/job_actor.hpp @@ -17,6 +17,7 @@ #include <cmath> #include <vector> #include <tuple> +#include "summa_init_struc.hpp" @@ -54,6 +55,9 @@ struct job_state { NumGRUInfo num_gru_info; GRU_Container gru_container; + + std::unique_ptr<SummaInitStruc> summa_init_struc; + // Variables for GRU monitoring int dt_init_start_factor = 1; // Initial Factor for dt_init (coupled_em) int num_gru_done = 0; // The number of GRUs that have completed diff --git a/build/source/file_access_actor/file_access_actor.cpp b/build/source/file_access_actor/file_access_actor.cpp index eaaace071b7139e7f94a9f60ad1b0a9764a46f1a..928478b9cba1b4a7cd2c2eb7c2d7c2c4470cb4cc 100644 --- a/build/source/file_access_actor/file_access_actor.cpp +++ b/build/source/file_access_actor/file_access_actor.cpp @@ -39,19 +39,19 @@ behavior file_access_actor(stateful_actor<file_access_state>* self, int num_hru = self->state.num_gru; - self->state.summa_init_struc = std::make_unique<SummaInitStruc>(); - if (self->state.summa_init_struc->allocate(self->state.num_gru) != 0) { - aout(self) << "ERROR -- File_Access_Actor: SummaInitStruc allocation failed\n"; - return -1; - } - if (self->state.summa_init_struc->summa_paramSetup() != 0) { - aout(self) << "ERROR -- File_Access_Actor: SummaInitStruc paramSetup failed\n"; - return -1; - } - if (self->state.summa_init_struc->summa_readRestart()!= 0) { - aout(self) << "ERROR -- File_Access_Actor: SummaInitStruc readRestart failed\n"; - return -1; - } + // self->state.summa_init_struc = std::make_unique<SummaInitStruc>(); + // if (self->state.summa_init_struc->allocate(self->state.num_gru) != 0) { + // aout(self) << "ERROR -- File_Access_Actor: SummaInitStruc allocation failed\n"; + // return -1; + // } + // if (self->state.summa_init_struc->summa_paramSetup() != 0) { + // aout(self) << "ERROR -- File_Access_Actor: SummaInitStruc paramSetup failed\n"; + // return -1; + // } + // if (self->state.summa_init_struc->summa_readRestart()!= 0) { + // aout(self) << "ERROR -- File_Access_Actor: SummaInitStruc readRestart failed\n"; + // return -1; + // } // Get the information about the forcing files self->state.forcing_files = std::make_unique<forcingFileContainer>(); diff --git a/build/source/file_access_actor/output_container.cpp b/build/source/file_access_actor/output_container.cpp index 65a900410c1c866c604407cc405893fb63b07e60..1799d666795a2e50ae5c66b61f4683e1e1ad819a 100644 --- a/build/source/file_access_actor/output_container.cpp +++ b/build/source/file_access_actor/output_container.cpp @@ -152,6 +152,10 @@ int Output_Container::findPartition(int local_gru_index) { if (it != this->failed_gru_index_list.end()) { return std::distance(this->failed_gru_index_list.begin(), it); } else { + std::cout << "GRU index: " << local_gru_index << std::endl; + for (int i = 0; i < this->failed_gru_index_list.size(); i++) { + std::cout << this->failed_gru_index_list[i] << std::endl; + } throw std::runtime_error("GRU index not found in failed GRU index list"); } } diff --git a/build/source/file_access_actor/summa_init_struc.cpp b/build/source/file_access_actor/summa_init_struc.cpp index 03d27b121f54d5ade29f69f6393a515f3f3a786d..ad99941c316e72ae8a4efba81295d98c24156d81 100644 --- a/build/source/file_access_actor/summa_init_struc.cpp +++ b/build/source/file_access_actor/summa_init_struc.cpp @@ -32,3 +32,7 @@ int SummaInitStruc::summa_readRestart() { return err; } +void SummaInitStruc::getInitTolerance(HRU_Actor_Settings& hru_settings) { + getInitTolerance_fortran(hru_settings.rel_tol, hru_settings.abs_tol); +} + diff --git a/build/source/file_access_actor/summa_init_struc.f90 b/build/source/file_access_actor/summa_init_struc.f90 index 4788ce83cd66feb86cbc992fec2707a2f327e39f..4f21fd01b1e8f9a791efe3afdbca273f212ef2b6 100644 --- a/build/source/file_access_actor/summa_init_struc.f90 +++ b/build/source/file_access_actor/summa_init_struc.f90 @@ -6,6 +6,7 @@ module summa_init_struc public :: initialize_init_struc public :: paramSetup_fortran public :: deallocate_init_struc + public :: getInitTolerance_fortran ! Used to get all the inital conditions for the model -- allows calling summa_setup.f90 type(summa1_type_dec),allocatable,save,public :: init_struc @@ -205,6 +206,26 @@ subroutine readRestart_fortran(err, message_r) bind(C, name="readRestart_fortran end subroutine readRestart_fortran +subroutine getInitTolerance_fortran(rtol, atol) & + bind(C, name="getInitTolerance_fortran") + USE globalData,only:model_decisions ! model decision structure + USE var_lookup,only:iLookDECISIONS + USE var_lookup,only:iLookPARAM + implicit none + ! dummy variables + real(c_double), intent(out) :: rtol + real(c_double), intent(out) :: atol + + if (model_decisions(iLookDECISIONS%num_method)%iDecision == 83) then + rtol = init_struc%mparStruct%gru(1)%hru(1)%var(iLookPARAM%relTolWatSnow)%dat(1) + atol = init_struc%mparStruct%gru(1)%hru(1)%var(iLookPARAM%absTolWatSnow)%dat(1) + else + rtol = -9999 + atol = -9999 + end if + +end subroutine getInitTolerance_fortran + subroutine deallocate_init_struc() bind(C, name="deallocate_init_struc") USE globalData,only:startTime,finshTime,refTime,oldTime implicit none diff --git a/build/source/hru_actor/hru_actor.cpp b/build/source/hru_actor/hru_actor.cpp index 4933c3a60b4617ebb2da53412cac311168da8585..114769ba7ba2e4daf078805388bcc752934bf927 100644 --- a/build/source/hru_actor/hru_actor.cpp +++ b/build/source/hru_actor/hru_actor.cpp @@ -200,10 +200,12 @@ void Initialize_HRU(stateful_actor<hru_state>* self) { } #ifdef SUNDIALS_ACTIVE if (self->state.hru_actor_settings.rel_tol > 0 && - self->state.hru_actor_settings.abs_tol > 0) + self->state.hru_actor_settings.abs_tol > 0) { + aout(self) << "Setting Sundials Tolerances\n"; set_sundials_tolerances(self->state.hru_data, - &self->state.hru_actor_settings.rel_tol, - &self->state.hru_actor_settings.abs_tol); + &self->state.hru_actor_settings.rel_tol, + &self->state.hru_actor_settings.abs_tol); + } #endif } diff --git a/build/source/job_actor/async_mode.cpp b/build/source/job_actor/async_mode.cpp index 57b990a1f7fd175c731c1c97d7e815fc934ed866..20eacb1e93accdafdb7992880af8e1da2a5ecdc8 100644 --- a/build/source/job_actor/async_mode.cpp +++ b/build/source/job_actor/async_mode.cpp @@ -19,6 +19,36 @@ behavior async_mode(stateful_actor<job_state>* self) { [=] (restart_failures) { aout(self) << "Async Mode: Restarting GRUs that Failed\n"; + if (self->state.hru_actor_settings.rel_tol > 0 && + self->state.hru_actor_settings.abs_tol > 0) { + self->state.hru_actor_settings.rel_tol /= 10; + self->state.hru_actor_settings.abs_tol /= 10; + } else { + self->state.hru_actor_settings.dt_init_factor *= 2; + } + + // notify file_access_actor + self->send(self->state.file_access_actor, restart_failures_v); + + while(self->state.gru_struc->getNumGRUFailed() > 0) { + int job_index = self->state.gru_struc->getFailedIndex(); + aout(self) << "Async Mode: Restarting GRU: " << job_index << "\n"; + int netcdf_index = job_index + self->state.gru_struc->getStartGru() - 1; + auto gru = self->spawn(hru_actor, netcdf_index, job_index, + self->state.hru_actor_settings, + self->state.file_access_actor, self); + self->send(gru, init_hru_v); + self->send(gru, update_hru_async_v); + self->state.gru_struc->decrementNumGRUFailed(); + std::unique_ptr<GRU> gru_obj = std::make_unique<GRU>( + netcdf_index, job_index, gru, self->state.dt_init_start_factor, + self->state.hru_actor_settings.rel_tol, + self->state.hru_actor_settings.abs_tol, + self->state.max_run_attempts); + self->state.gru_struc->addGRU(std::move(gru_obj)); + } + self->state.gru_struc->decrementRetryAttempts(); + }, [=](finalize) { finalizeJob(self); }, diff --git a/build/source/job_actor/job_actor.cpp b/build/source/job_actor/job_actor.cpp index 375408833f288ac9e3ca64e0eeab7c42f15b1bad..14cd9843969425a4235724edfc03e82cdcb8bc2d 100644 --- a/build/source/job_actor/job_actor.cpp +++ b/build/source/job_actor/job_actor.cpp @@ -56,6 +56,22 @@ behavior job_actor(stateful_actor<job_state>* self, int start_gru, int num_gru, return {}; } + + self->state.summa_init_struc = std::make_unique<SummaInitStruc>(); + if (self->state.summa_init_struc->allocate(self->state.num_gru) != 0) { + aout(self) << "ERROR -- Job_Actor: SummaInitStruc allocation failed\n"; + return {}; + } + if (self->state.summa_init_struc->summa_paramSetup() != 0) { + aout(self) << "ERROR -- Job_Actor: SummaInitStruc paramSetup failed\n"; + return {}; + } + if (self->state.summa_init_struc->summa_readRestart()!= 0) { + aout(self) << "ERROR -- Job_Actor: SummaInitStruc readRestart failed\n"; + return {}; + } + self->state.summa_init_struc->getInitTolerance(self->state.hru_actor_settings); + self->state.num_gru_info = NumGRUInfo(self->state.start_gru, self->state.start_gru, self->state.num_gru, @@ -174,40 +190,3 @@ behavior job_actor(stateful_actor<job_state>* self, int start_gru, int num_gru, }; } - - - - - -// void handleGRUError(stateful_actor<job_state>* self, caf::actor src) { -// auto it = std::find_if(self->state.gru_container.gru_list.begin(), -// self->state.gru_container.gru_list.end(), -// [src](auto& gru) { -// return gru->getActorRef() == src; -// }); - -// if (it != self->state.gru_container.gru_list.end()) { -// (*it)->setFailed(); -// (*it)->decrementAttemptsLeft(); - -// self->state.gru_container.num_gru_done++; -// self->state.gru_container.num_gru_failed++; -// self->send(self->state.file_access_actor, run_failure_v, (*it)->getIndexJob()); -// } else { -// aout(self) << "ERROR: Job_Actor: Could not find GRU in GRU_Container\n"; -// } - -// // Check if all GRUs are finished -// if (self->state.gru_container.num_gru_done >= self->state.gru_container.num_gru_in_run_domain) { -// // Check for failures -// if(self->state.gru_container.num_gru_failed == 0 || self->state.max_run_attempts == 1) { -// self->send(self, finalize_v); -// } else { -// self->send(self, restart_failures_v); -// } -// } - -// } - - -