From c638ba9b125f3003df2310546944d34f50662657 Mon Sep 17 00:00:00 2001
From: KyleKlenk <kyle.c.klenk@gmail.com>
Date: Thu, 26 Oct 2023 12:24:02 -0600
Subject: [PATCH] Restart feature is working with sundials

---
 build/includes/global/message_atoms.hpp       |  1 +
 build/includes/hru_actor/hru_actor.hpp        | 57 ++++++++++++----
 .../hru_actor_subroutine_wrappers.hpp         | 36 ----------
 .../hru_actor/serialize_data_structure.hpp    | 32 ---------
 build/includes/job_actor/job_actor.hpp        | 41 ++++++++----
 .../job_actor_subroutine_wrappers.hpp         | 12 ----
 build/source/actors/hru_actor/hru_actor.cpp   | 42 +++++-------
 build/source/actors/hru_actor/hru_init.f90    |  1 -
 .../source/actors/hru_actor/hru_modelRun.f90  | 65 +++++++++++++++++++
 build/source/actors/job_actor/job_actor.cpp   | 25 ++++---
 10 files changed, 175 insertions(+), 137 deletions(-)
 delete mode 100644 build/includes/hru_actor/hru_actor_subroutine_wrappers.hpp
 delete mode 100644 build/includes/hru_actor/serialize_data_structure.hpp
 delete mode 100644 build/includes/job_actor/job_actor_subroutine_wrappers.hpp

diff --git a/build/includes/global/message_atoms.hpp b/build/includes/global/message_atoms.hpp
index 070cacb..49c13a5 100644
--- a/build/includes/global/message_atoms.hpp
+++ b/build/includes/global/message_atoms.hpp
@@ -47,6 +47,7 @@ CAF_BEGIN_TYPE_ID_BLOCK(summa, first_custom_type_id)
     // Reciever: 
     // Summary:
     CAF_ADD_ATOM(summa, err)
+    CAF_ADD_ATOM(summa, err_atom)
     // Sender:
     // Reciever: 
     // Summary:
diff --git a/build/includes/hru_actor/hru_actor.hpp b/build/includes/hru_actor/hru_actor.hpp
index 7f8f8fe..a714886 100644
--- a/build/includes/hru_actor/hru_actor.hpp
+++ b/build/includes/hru_actor/hru_actor.hpp
@@ -5,12 +5,44 @@
 #include "auxilary.hpp"
 #include "timing_info.hpp"
 #include "settings_functions.hpp"
-#include <chrono>
 #include <string>
+#include "message_atoms.hpp"
+#include "global.hpp"
 
 
+/*********************************************
+ * HRU Actor Fortran Functions
+ *********************************************/
+extern "C" {
+  // Initialize HRU data_structures
+    void initHRU(int* indxGRU, int* num_steps, void* hru_data, int* err);
+    
+    void setupHRUParam( int* indxGRU, int* indxHRU, void* hru_data, double* upArea, int* err);
+    
+    // Setup summa_readRestart File if this option has been chosen 
+    void summa_readRestart(int* indxGRU, int* indxHRU, void* hru_data, double* dtInit, int* err);
+    
+    // Run the model for one timestep
+    void RunPhysics(int* id, int* stepIndex, void* hru_data, double* dt, int* dt_int_factor, int* err);
+    
+    void hru_writeOutput(int* index_hru, int* index_gru, int* timestep, int* output_step, void* hru_data, int* err);
+    
+    void setTimeZoneOffset(int* iFile, void* hru_data, int* err);
 
+    void HRU_readForcing(int* index_gru, int* iStep, int* iRead, int* iFile, void* hru_data,  int* err);
 
+    void get_sundials_tolerances(void* hru_data, double* relTol, double* absTol);
+    void set_sundials_tolerances(void* hru_data, double* relTol, double* absTol);
+
+    void setIDATolerances(void* hru_data, double* relTolTempCas, double* absTolTempCas, double* relTolTempVeg,
+                          double* absTolTempVeg, double* relTolWatVeg, double* absTolWatVeg, double* relTolTempSoilSnow,
+                          double* absTolTempSoilSnow, double* relTolWatSnow, double* absTolWatSnow, double* relTolMatric,
+                          double* absTolMatric, double* relTolAquifr, double* absTolAquifr);
+}
+
+/*********************************************
+ * HRU Actor state variables
+ *********************************************/
 namespace caf {
 struct hru_state {
 	// Actor References
@@ -24,14 +56,14 @@ struct hru_state {
     int refGRU;			// The actual ID of the GRU we are
 
     // Variables for forcing structures
-	  int stepsInCurrentFFile;        // number of time steps in current forcing file
+	int stepsInCurrentFFile;        // number of time steps in current forcing file
     int num_steps_until_write;      // number of time steps until we pause for FA_Actor to write
 
     // HRU data structures (formerly summa_type)
     void *hru_data = new_handle_hru_type();
 
     // Misc Variables
-	  int 		timestep = 1;	    // Current Timestep of HRU simulation
+	int     timestep = 1;	    // Current Timestep of HRU simulation
     int     forcingStep = 1;    // index of current time step in current forcing file
     int     num_steps = 0;      // number of time steps
     int     iFile = 1;              // index of current forcing file from forcing file list
@@ -39,12 +71,15 @@ struct hru_state {
     int     output_structure_step_index = 1; // index of current time step in output structure
     double  dt_init;            // used to initialize the length of the sub-step for each HRU
     double	upArea;             // area upslope of each HRU
+    int     err = 0;	
+
+    // Sundials variables
+    double rtol = -9999; // -9999 uses default
+    double atol = -9999; // -9999 uses default		        
 
 
     // Settings
     HRU_Actor_Settings hru_actor_settings;
-    // error control
-    int err = 0;			        
     
     ~hru_state() {
         delete_handle_hru_type(hru_data);
@@ -55,14 +90,14 @@ behavior hru_actor(stateful_actor<hru_state>* self, int refGRU, int indxGRU,
     HRU_Actor_Settings hru_actor_settings, caf::actor file_access_actor, 
     caf::actor parent);
 
-/**
- Function to initalize the HRU for running
- */
+/*********************************************
+ * Functions for the HRU Actor
+ *********************************************/
+
+/** Function to initalize the HRU for running */
 void Initialize_HRU(stateful_actor<hru_state>* self);
 
-/**
- Function runs all of the hru time_steps
- */
+/** Function runs all of the hru time_steps */
 int Run_HRU(stateful_actor<hru_state>* self);
 
 }
\ No newline at end of file
diff --git a/build/includes/hru_actor/hru_actor_subroutine_wrappers.hpp b/build/includes/hru_actor/hru_actor_subroutine_wrappers.hpp
deleted file mode 100644
index 0c07df6..0000000
--- a/build/includes/hru_actor/hru_actor_subroutine_wrappers.hpp
+++ /dev/null
@@ -1,36 +0,0 @@
-#pragma once
-
-extern "C" {
-  // Initialize HRU data_structures
-    void initHRU(int* indxGRU, int* num_steps, void* hru_data, int* err);
-    
-    void setupHRUParam( int* indxGRU, int* indxHRU, void* hru_data, double* upArea, int* err);
-    
-    // Setup summa_readRestart File if this option has been chosen 
-    void summa_readRestart(int* indxGRU, int* indxHRU, void* hru_data, double* dtInit, int* err);
-    
-    // Run the model for one timestep
-    void RunPhysics(int* id, int* stepIndex, void* hru_data, double* dt, int* dt_int_factor, int* err);
-    
-    void hru_writeOutput(int* index_hru, int* index_gru, int* timestep, int* output_step, void* hru_data, int* err);
-    
-    void setTimeZoneOffset(int* iFile, void* hru_data, int* err);
-
-    void HRU_readForcing(int* index_gru, int* iStep, int* iRead, int* iFile, void* hru_data,  int* err);
-
-    void setIDATolerances(void* hru_data,
-                        double* relTolTempCas,
-                        double* absTolTempCas,
-                        double* relTolTempVeg,
-                        double* absTolTempVeg,
-                        double* relTolWatVeg,
-                        double* absTolWatVeg,
-                        double* relTolTempSoilSnow,
-                        double* absTolTempSoilSnow,
-                        double* relTolWatSnow,
-                        double* absTolWatSnow,
-                        double* relTolMatric,
-                        double* absTolMatric,
-                        double* relTolAquifr,
-                        double* absTolAquifr);
-}
\ No newline at end of file
diff --git a/build/includes/hru_actor/serialize_data_structure.hpp b/build/includes/hru_actor/serialize_data_structure.hpp
deleted file mode 100644
index c150069..0000000
--- a/build/includes/hru_actor/serialize_data_structure.hpp
+++ /dev/null
@@ -1,32 +0,0 @@
-#pragma once
-
-template<typename T>
-struct Summa_Variable {
-    std::vector<T> dat; // needs to be general as each array can have its own type
-    int var_type;  // maps to varType() in Fortran-Side
-    int var_fortran_index; // maps to the index of the var portion of the summa_variable
-};
-
-extern "C" {
-    void getSummaVariableInfo(int* var_type, int* var_fortran_index, void* data_struct);
-}
-
-
-
-
-template <typename T>
-std::vector<Summa_Variable<T>> init_summa_variable_vector(int num_variables) {
-    std::vector<Summa_Variable<T>> summa_variables(num_variables);
-
-    int variable_index = 1; // value to index into the fortran structure of the array
-    for (Summa_Variable var : summa_variables) {
-        var.var_fortran_index = variable_index;
-        getSummaVariableInfo(var.var_type, var.var_fortran_index);
-        variable_index++;
-    }
-
-    return summa_variables;
-}
-
-
-
diff --git a/build/includes/job_actor/job_actor.hpp b/build/includes/job_actor/job_actor.hpp
index 47fa10b..c6926a0 100644
--- a/build/includes/job_actor/job_actor.hpp
+++ b/build/includes/job_actor/job_actor.hpp
@@ -4,14 +4,31 @@
 #include "GRU.hpp"
 #include "timing_info.hpp"
 #include "settings_functions.hpp"
+#include "global.hpp"
+#include "json.hpp"
+#include "hru_actor.hpp"
+#include "message_atoms.hpp"
+#include "file_access_actor.hpp"
 #include <unistd.h>
 #include <limits.h>
-#include "global.hpp"
 
+
+/*********************************************
+ * Job Actor Fortran Functions
+ *********************************************/
+extern "C" {
+    void job_init_fortran(char const* file_manager, int* start_gru_index, int* num_gru, int* num_hru, int* err);
+
+    void deallocateJobActor(int* err);
+}
+
+
+/*********************************************
+ * Job Actor state variables
+ *********************************************/
 namespace caf {
 using chrono_time = std::chrono::time_point<std::chrono::system_clock>;
 
-
 // Holds information about the GRUs
 struct GRU_Container {
     std::vector<GRU*> gru_list;
@@ -28,21 +45,17 @@ struct job_state {
     caf::actor parent;            // actor reference to the top-level SummaActor
 
     // Job Parameters
-    int start_gru;                 // Starting GRU for this job
-    int num_gru;                   // Number of GRUs for this job
+    int start_gru;                // Starting GRU for this job
+    int num_gru;                  // Number of GRUs for this job
     int num_hru;
-    int max_run_attempts = 1;         // Max number of attemtps to solve a GRU
-
-
+    int max_run_attempts = 1;     // Max number of attempts to solve a GRU
 
     GRU_Container gru_container;
 
-
-
     // Variables for GRU monitoring
-    int dt_init_start_factor = 1;   // Initial Factor for dt_init (coupled_em)
-    int num_gru_done = 0;             // The number of GRUs that have completed
-    int num_gru_failed = 0;           // Number of GRUs that have failed
+    int dt_init_start_factor = 1; // Initial Factor for dt_init (coupled_em)
+    int num_gru_done = 0;         // The number of GRUs that have completed
+    int num_gru_failed = 0;       // Number of GRUs that have failed
 
     // Timing Variables
     TimingInfo job_timing;
@@ -66,6 +79,10 @@ behavior job_actor(stateful_actor<job_state>* self,
                    actor parent);
 
 
+/*********************************************
+ * Functions for the Job Actor
+ *********************************************/
+
 /** Get the information for the GRUs that will be written to the netcdf file */
 std::vector<serializable_netcdf_gru_actor_info> getGruNetcdfInfo(int max_run_attempts, 
                                                                  std::vector<GRU*> &gru_list);
diff --git a/build/includes/job_actor/job_actor_subroutine_wrappers.hpp b/build/includes/job_actor/job_actor_subroutine_wrappers.hpp
deleted file mode 100644
index a0073c9..0000000
--- a/build/includes/job_actor/job_actor_subroutine_wrappers.hpp
+++ /dev/null
@@ -1,12 +0,0 @@
-#pragma once
-
-extern "C" {
-    void job_init_fortran(char const* file_manager,
-                          int* start_gru_index, 
-                          int* num_gru, 
-                          int* num_hru, 
-                          int* err);
-
-    void deallocateJobActor(int* err);
-    
-}
\ No newline at end of file
diff --git a/build/source/actors/hru_actor/hru_actor.cpp b/build/source/actors/hru_actor/hru_actor.cpp
index 38af62b..4939a6f 100644
--- a/build/source/actors/hru_actor/hru_actor.cpp
+++ b/build/source/actors/hru_actor/hru_actor.cpp
@@ -1,11 +1,11 @@
-#include "caf/all.hpp"
 #include "hru_actor.hpp"
-#include "global.hpp"
-#include "message_atoms.hpp"
-#include "hru_actor_subroutine_wrappers.hpp"
-#include "serialize_data_structure.hpp"
-#include <thread>
 
+// Compiler set flag for sundials
+#ifdef SUNDIALS_ACTIVE
+    bool sundials_active = true;
+#else
+    bool sundials_active = false;
+#endif
 
 namespace caf {
 
@@ -66,8 +66,15 @@ behavior hru_actor(stateful_actor<hru_state>* self, int refGRU, int indxGRU,
 
                 err = Run_HRU(self); // Simulate a Timestep
                 if (err != 0) {
-                    // We should have already printed the error to the screen if we get here
-                    self->send(self->state.parent, hru_error::run_physics_unhandleable, self);
+                
+                    if (sundials_active) {
+                        get_sundials_tolerances(self->state.hru_data, &self->state.rtol, &self->state.atol);
+                        self->send(self->state.parent, err_atom_v, self, self->state.rtol, self->state.atol);
+                    } else {
+                        self->send(self->state.parent, hru_error::run_physics_unhandleable, self);
+                    }
+
+
                     self->quit();
                     return;
                 }
@@ -141,23 +148,8 @@ void Initialize_HRU(stateful_actor<hru_state>* self) {
         return;
     }
 
-    // Set HRU Tolerances
-    // setIDATolerances(self->state.hru_data,
-    //                  &self->state.hru_actor_settings.relTolTempCas,
-    //                  &self->state.hru_actor_settings.absTolTempCas,
-    //                  &self->state.hru_actor_settings.relTolTempVeg,
-    //                  &self->state.hru_actor_settings.absTolTempVeg,
-    //                  &self->state.hru_actor_settings.relTolWatVeg,
-    //                  &self->state.hru_actor_settings.absTolWatVeg,
-    //                  &self->state.hru_actor_settings.relTolTempSoilSnow,
-    //                  &self->state.hru_actor_settings.absTolTempSoilSnow,
-    //                  &self->state.hru_actor_settings.relTolWatSnow,
-    //                  &self->state.hru_actor_settings.absTolWatSnow,
-    //                  &self->state.hru_actor_settings.relTolMatric,
-    //                  &self->state.hru_actor_settings.absTolMatric,
-    //                  &self->state.hru_actor_settings.relTolAquifr,
-    //                  &self->state.hru_actor_settings.absTolAquifr);
-            
+    if (self->state.hru_actor_settings.rel_tol > 0 && self->state.hru_actor_settings.abs_tol > 0)
+        set_sundials_tolerances(self->state.hru_data, &self->state.hru_actor_settings.rel_tol, &self->state.hru_actor_settings.abs_tol);            
 }
 
 int Run_HRU(stateful_actor<hru_state>* self) {
diff --git a/build/source/actors/hru_actor/hru_init.f90 b/build/source/actors/hru_actor/hru_init.f90
index fb71c40..7e4c77b 100755
--- a/build/source/actors/hru_actor/hru_init.f90
+++ b/build/source/actors/hru_actor/hru_init.f90
@@ -61,7 +61,6 @@ private
 public::initHRU
 public::setupHRUParam
 public::summa_readRestart
-public::setIDATolerances
 contains
 ! **************************************************************************************************
 ! public subroutine initHRU: ! used to declare and allocate summa data structures and initialize model state to known values
diff --git a/build/source/actors/hru_actor/hru_modelRun.f90 b/build/source/actors/hru_actor/hru_modelRun.f90
index 6867737..7e53b11 100644
--- a/build/source/actors/hru_actor/hru_modelRun.f90
+++ b/build/source/actors/hru_actor/hru_modelRun.f90
@@ -69,6 +69,8 @@ USE mDecisions_module,only:&               ! look-up values for LAI decisions
 implicit none
 private
 public::runPhysics
+public::get_sundials_tolerances
+public::set_sundials_tolerances
 contains
 
 ! Runs the model physics for an HRU
@@ -344,4 +346,67 @@ subroutine runPhysics(&
 
 end subroutine runPhysics
 
+! *******************************************************************************************
+! *** get_sundials_tolerances
+! *******************************************************************************************
+subroutine get_sundials_tolerances(handle_hru_data, rtol, atol) bind(C, name='get_sundials_tolerances')
+  USE var_lookup,only: iLookPARAM
+  implicit none
+
+  ! dummy variables
+  type(c_ptr),    intent(in), value         :: handle_hru_data        ! c_ptr to -- hru data
+  real(c_double), intent(out)               :: rtol                   ! relative tolerance
+  real(c_double), intent(out)               :: atol                   ! absolute tolerance
+  ! local variables
+  type(hru_type),pointer                    :: hru_data               ! hru data
+  call c_f_pointer(handle_hru_data, hru_data)
+
+  ! get tolerances
+  rtol = hru_data%mparStruct%var(iLookPARAM%relTolWatSnow)%dat(1) 
+  atol = hru_data%mparStruct%var(iLookPARAM%absTolWatSnow)%dat(1)
+end subroutine get_sundials_tolerances
+
+! *******************************************************************************************
+! *** get_sundials_tolerances
+! *******************************************************************************************
+subroutine set_sundials_tolerances(handle_hru_data, rtol, atol) bind(C, name='set_sundials_tolerances')
+  USE var_lookup,only: iLookPARAM
+  implicit none
+
+  ! dummy variables
+  type(c_ptr),    intent(in), value         :: handle_hru_data        ! c_ptr to -- hru data
+  real(c_double), intent(in)               :: rtol                   ! relative tolerance
+  real(c_double), intent(in)               :: atol                   ! absolute tolerance
+  ! local variables
+  type(hru_type),pointer                    :: hru_data               ! hru data
+  call c_f_pointer(handle_hru_data, hru_data)
+
+
+  ! Set rtols
+  hru_data%mparStruct%var(iLookPARAM%relConvTol_liquid)%dat(1) = rtol  
+  hru_data%mparStruct%var(iLookPARAM%relConvTol_matric)%dat(1) = rtol  
+  hru_data%mparStruct%var(iLookPARAM%relConvTol_energy)%dat(1) = rtol  
+  hru_data%mparStruct%var(iLookPARAM%relConvTol_aquifr)%dat(1) = rtol  
+  hru_data%mparStruct%var(iLookPARAM%relTolTempCas)%dat(1) = rtol  
+  hru_data%mparStruct%var(iLookPARAM%relTolTempVeg)%dat(1) = rtol  
+  hru_data%mparStruct%var(iLookPARAM%relTolWatVeg)%dat(1) = rtol  
+  hru_data%mparStruct%var(iLookPARAM%relTolTempSoilSnow)%dat(1) = rtol  
+  hru_data%mparStruct%var(iLookPARAM%relTolWatSnow)%dat(1) = rtol  
+  hru_data%mparStruct%var(iLookPARAM%relTolMatric)%dat(1) = rtol  
+  hru_data%mparStruct%var(iLookPARAM%relTolAquifr)%dat(1) = rtol  
+  ! Set atols
+  hru_data%mparStruct%var(iLookPARAM%absConvTol_liquid)%dat(1) = atol 
+  hru_data%mparStruct%var(iLookPARAM%absConvTol_matric)%dat(1) = atol 
+  hru_data%mparStruct%var(iLookPARAM%absConvTol_energy)%dat(1) = atol 
+  hru_data%mparStruct%var(iLookPARAM%absConvTol_aquifr)%dat(1) = atol 
+  hru_data%mparStruct%var(iLookPARAM%absTolTempCas)%dat(1) = atol 
+  hru_data%mparStruct%var(iLookPARAM%absTolTempVeg)%dat(1) = atol 
+  hru_data%mparStruct%var(iLookPARAM%absTolWatVeg)%dat(1) = atol 
+  hru_data%mparStruct%var(iLookPARAM%absTolTempSoilSnow)%dat(1) = atol 
+  hru_data%mparStruct%var(iLookPARAM%absTolWatSnow)%dat(1) = atol 
+  hru_data%mparStruct%var(iLookPARAM%absTolMatric)%dat(1) = atol 
+  hru_data%mparStruct%var(iLookPARAM%absTolAquifr)%dat(1) = atol 
+end subroutine set_sundials_tolerances
+
+
 end module summa_modelRun
diff --git a/build/source/actors/job_actor/job_actor.cpp b/build/source/actors/job_actor/job_actor.cpp
index 34b06d2..e8da6c9 100644
--- a/build/source/actors/job_actor/job_actor.cpp
+++ b/build/source/actors/job_actor/job_actor.cpp
@@ -1,11 +1,4 @@
 #include "job_actor.hpp"
-#include "file_access_actor.hpp"
-#include "json.hpp"
-#include <chrono>
-#include <thread>
-#include "message_atoms.hpp"
-#include "job_actor_subroutine_wrappers.hpp"
-#include "hru_actor.hpp"
 
 using json = nlohmann::json;
 using chrono_time = std::chrono::time_point<std::chrono::system_clock>;
@@ -164,11 +157,19 @@ behavior job_actor(stateful_actor<job_state>* self,
 
           self->send(self->state.file_access_actor, restart_failures_v); // notify file_access_actor
 
+          // Set Sundials tolerance or decrease timestep length
+          if (self->state.hru_actor_settings.rel_tol > 0 && self->state.hru_actor_settings.abs_tol > 0) {
+            self->state.hru_actor_settings.rel_tol /= 10;
+            self->state.hru_actor_settings.abs_tol /= 10;
+          } else {
+            self->state.hru_actor_settings.dt_init_factor *= 2;
+          }
+
+
           for(auto GRU : self->state.gru_container.gru_list) {
             if(GRU->isFailed()) {
               GRU->setRunning();
               GRU->decrementAttemptsLeft();
-              self->state.hru_actor_settings.dt_init_factor *= 2;
               auto global_gru_index = GRU->getGlobalGRUIndex();
               auto local_gru_index = GRU->getLocalGRUIndex();
               auto gru_actor = self->spawn(hru_actor, 
@@ -224,6 +225,13 @@ behavior job_actor(stateful_actor<job_state>* self,
 
         },
 
+        // Handle Sundials Error
+        [=](err_atom, caf::actor src, double rtol, double atol) {
+          self->state.hru_actor_settings.rel_tol = rtol;
+          self->state.hru_actor_settings.abs_tol = atol;
+          handleGRUError(self, src);
+        },
+
         [=](const error& err, caf::actor src) {
           
           aout(self) << "\n\n ********** ERROR HANDLER \n";
@@ -290,6 +298,7 @@ void handleGRUError(stateful_actor<job_state>* self, caf::actor src) {
   if (it != self->state.gru_container.gru_list.end()) {
     (*it)->setFailed();
     (*it)->decrementAttemptsLeft();
+
     self->state.gru_container.num_gru_done++;
     self->state.gru_container.num_gru_failed++;
     self->send(self->state.file_access_actor, run_failure_v, (*it)->getLocalGRUIndex());
-- 
GitLab