diff --git a/build/CMakeLists.txt b/build/CMakeLists.txt index 575348f0a413f9263a98b4f27758e41d31c0b854..9b57ffdc52f279e4c2bf5387a9818d45b9d3e1c8 100644 --- a/build/CMakeLists.txt +++ b/build/CMakeLists.txt @@ -16,8 +16,11 @@ set(CMAKE_CONFIGURATION_TYPES BE BE_Debug BE_Cluster BE_Cluster_Debug) if(CMAKE_BUILD_TYPE MATCHES Debug) message("\nSetting Debug Options\n") add_compile_definitions(DEBUG) - set(FLAGS_NOAH -g -O0 -fbacktrace -fbounds-check -ffree-form -ffree-line-length-none -fmax-errors=0 -fPIC -Wfatal-errors ${FLAGS_OPT}) - set(FLAGS_ALL -g -O0 -fbacktrace -fbounds-check -ffree-line-length-none -fmax-errors=0 -fPIC -Wfatal-errors -cpp ${FLAGS_OPT}) + set(FLAGS_NOAH -g -O0 -fbacktrace -fbounds-check -ffree-form + -ffree-line-length-none -fmax-errors=0 -fPIC -Wfatal-errors + ${FLAGS_OPT}) + set(FLAGS_ALL -g -O0 -fbacktrace -fbounds-check -ffree-line-length-none + -fmax-errors=0 -fPIC -Wfatal-errors -cpp ${FLAGS_OPT}) set(FLAGS_CXX -g -O0 -fbounds-check -Wfatal-errors -std=c++17 ${FLAGS_OPT}) else() message("\nSetting Release Options") @@ -74,7 +77,7 @@ set(NETCDF_DIR ${F_MASTER}/build/source/netcdf) set(NOAHMP_DIR ${F_MASTER}/build/source/noah-mp) # Define Actors specific directories -set(ACTORS_DIR ${PARENT_DIR}/build/source/actors) +set(ACTORS_DIR ${PARENT_DIR}/build/source) set(FILE_ACCESS_DIR ${ACTORS_DIR}/file_access_actor) set(JOB_ACTOR_DIR ${ACTORS_DIR}/job_actor) set(HRU_ACTOR_DIR ${ACTORS_DIR}/hru_actor) @@ -229,9 +232,14 @@ set(FILE_ACCESS_ACTOR ${ACTORS_DIR}/file_access_actor/output_container.cpp) set(JOB_ACTOR ${ACTORS_DIR}/job_actor/GRU.cpp - ${ACTORS_DIR}/job_actor/job_actor.cpp) + ${ACTORS_DIR}/job_actor/job_actor.cpp + ${ACTORS_DIR}/job_actor/async_mode.cpp + ${ACTORS_DIR}/job_actor/distributed_job_actor.cpp + ${ACTORS_DIR}/job_actor/node_actor.cpp) set(HRU_ACTOR - ${ACTORS_DIR}/hru_actor/hru_actor.cpp) + ${ACTORS_DIR}/hru_actor/hru_actor.cpp + ${ACTORS_DIR}/hru_actor/hru_batch_actor.cpp) + #========================================================================================= # COMPILE PART 3: Collect the subroutines into build groups depending on build type diff --git a/build/build_scripts/build_v4_cluster.sh b/build/build_scripts/build_v4_cluster.sh index 4c70781c927c4a75a6a353b724e1154053188cc1..c00347e5e11fa10dc7bb4d46673d852e1545c48c 100755 --- a/build/build_scripts/build_v4_cluster.sh +++ b/build/build_scripts/build_v4_cluster.sh @@ -6,9 +6,7 @@ module load StdEnv/2020 module load gcc/9.3.0 module load openblas/0.3.17 module load netcdf-fortran/4.5.2 - -# for Actors -module load caf +module load caf # actors export FLAGS_OPT="-flto=1;-fuse-linker-plugin" export SUNDIALS_PATH=/globalhome/kck540/HPC/Libraries/sundials/v7.0/instdir diff --git a/build/includes/job_actor/job_actor.hpp b/build/includes/job_actor/job_actor.hpp index d0ab14ee072f5404f6df05cf82198251affa757a..0476edfb4dbe9e14771f6048c1ff54be2d91cca6 100644 --- a/build/includes/job_actor/job_actor.hpp +++ b/build/includes/job_actor/job_actor.hpp @@ -151,6 +151,11 @@ behavior job_actor(stateful_actor<job_state>* self, Job_Actor_Settings job_actor_settings, HRU_Actor_Settings hru_actor_settings, actor parent); +// TODO: Implement the following behaviors +// behavior data_assimilation_mode() +behavior async_mode(stateful_actor<job_state>* self); + + /** The Job Actor For Internode Communication */ behavior distributed_job_actor(stateful_actor<distributed_job_state>* self, int start_gru, int num_gru, @@ -169,6 +174,10 @@ void spawnHRUActors(stateful_actor<job_state>* self, bool normal_mode); void spawnHRUBatches(stateful_actor<job_state>* self); +void handleFinishedGRU(stateful_actor<job_state>* self, int local_gru_index); + +void finalizeJob(stateful_actor<job_state>* self); + /** Get the information for the GRUs that will be written to the netcdf file */ std::vector<serializable_netcdf_gru_actor_info> getGruNetcdfInfo( int max_run_attempts, std::vector<GRU*> &gru_list); diff --git a/build/source/file_access_actor/cppwrap_fileAccess.f90 b/build/source/file_access_actor/cppwrap_fileAccess.f90 index e65b1755fafebfe26fedd1d755dc21ac25e7bd29..50b4b689398e7f7ed670ad967135a370adbbc9fe 100644 --- a/build/source/file_access_actor/cppwrap_fileAccess.f90 +++ b/build/source/file_access_actor/cppwrap_fileAccess.f90 @@ -73,25 +73,26 @@ subroutine fileAccessActor_init_fortran(& ! Variables for forcing USE globalData,only:iRunMode ! define the current running mode USE globalData,only:checkHRU ! index of the HRU for a single HRU run - ! look-up values for the choice of heat capacity computation - USE mDecisions_module,only:enthalpyFD,enthalpyFDlu ! heat capacity using enthalpy - + USE mDecisions_module,only:& monthlyTable,& ! LAI/SAI taken directly from a monthly table for different vegetation classes specified,& ! LAI/SAI computed from green vegetation fraction and winterSAI and summerLAI parameters sameRulesAllLayers, & ! SNTHERM option: same combination/sub-dividion rules applied to all layers rulesDependLayerIndex ! CLM option: combination/sub-dividion rules depend on layer index - USE enthalpyTemp_module,only:T2H_lookup_snow ! module to calculate a look-up table for the snow temperature-enthalpy conversion - USE enthalpyTemp_module,only:T2L_lookup_soil ! module to calculate a look-up table for the soil temperature-enthalpy conversion - - - USE NOAHMP_VEG_PARAMETERS,only:SAIM,LAIM ! 2-d tables for stem area index and leaf area index (vegType,month) USE NOAHMP_VEG_PARAMETERS,only:HVT,HVB ! height at the top and bottom of vegetation (vegType) USE globalData,only:numtim ! number of time steps in the simulation USE globalData,only:fileout ! name of the output file USE globalData,only:ncid ! id of the output file + + ! Moudles that pertian to Version 4 (Sundials addition) +#ifdef V4_ACTIVE + USE mDecisions_module,only:enthalpyFD,enthalpyFDlu ! look-up values for the choice of heat capacity computation + USE enthalpyTemp_module,only:T2H_lookup_snow ! module to calculate a look-up table for the snow temperature-enthalpy conversion + USE enthalpyTemp_module,only:T2L_lookup_soil ! module to calculate a look-up table for the soil temperature-enthalpy conversion +#endif + implicit none type(c_ptr), intent(in), value :: handle_forcFileInfo @@ -124,19 +125,27 @@ subroutine fileAccessActor_init_fortran(& ! Variables for forcing call c_f_pointer(handle_forcFileInfo, forcFileInfo) call c_f_pointer(handle_output_ncid, output_ncid) - ! Get the initial forcing file information + ! ***************************************************************************** + ! *** read description of model forcing datafile used in each HRU + ! ***************************************************************************** call ffile_info(indxGRU, forcFileInfo, num_forcing_files, err, message) if(err/=0)then; print*, trim(message); return; endif - ! Get and save the model decisions as integers + ! ***************************************************************************** + ! *** read model decisions + ! ***************************************************************************** + ! NOTE: Must be after ffile_info because mDecisions uses the data_step call mDecisions(err,message) if(err/=0)then; print*,trim(message); return; endif + ! TODO: This can be moved to a simple getter the file_access_actor calls num_timesteps = numtim ! Returns to the file_access_actor +#ifdef V4_ACTIVE ! decide if computing enthalpy lookup tables, if need enthalpy and not using hypergeometric function needLookup = .false. if(model_decisions(iLookDECISIONS%nrgConserv)%iDecision == enthalpyFDlu) needLookup = .true. +#endif ! get the maximum number of snow layers select case(model_decisions(iLookDECISIONS%snowLayers)%iDecision) @@ -255,7 +264,7 @@ subroutine fileAccessActor_init_fortran(& ! Variables for forcing ! ! loop through GRUs do iGRU=1,num_gru ! calculate the fraction of runoff in future time steps - call fracFuture(outputStructure(1)%bparStruct%gru(iGRU)%var, & ! vector of basin-average model parameters + call fracFuture(outputStructure(1)%bparStruct%gru(iGRU)%var, & ! vector of basin-average model parameters outputStructure(1)%bvarStruct_init%gru(iGRU), & ! data structure of basin-average variables err,message) ! error control if(err/=0)then; print*, trim(message); return; endif @@ -281,6 +290,7 @@ subroutine fileAccessActor_init_fortran(& ! Variables for forcing call paramCheck(outputStructure(1)%mparStruct%gru(iGRU)%hru(iHRU),err,message) if(err/=0)then; print*, message; return; endif +#ifdef V4_ACTIVE ! calculate a look-up table for the temperature-enthalpy conversion of snow for future snow layer merging ! NOTE2: H is the mixture enthalpy of snow liquid and ice call T2H_lookup_snow(outputStructure(1)%mparStruct%gru(iGRU)%hru(iHRU),err,message) @@ -289,7 +299,6 @@ subroutine fileAccessActor_init_fortran(& ! Variables for forcing ! calculate a lookup table for the temperature-enthalpy conversion of soil ! NOTE: L is the integral of soil Clapeyron equation liquid water matric potential from temperature ! multiply by Cp_liq*iden_water to get temperature component of enthalpy -#ifdef V4_ACTIVE if(needLookup)then call T2L_lookup_soil(gru_struc(iGRU)%hruInfo(iHRU)%nSoil, & ! intent(in): number of soil layers outputStructure(1)%mparStruct%gru(iGRU)%hru(iHRU), & ! intent(in): parameter data structure @@ -297,6 +306,11 @@ subroutine fileAccessActor_init_fortran(& ! Variables for forcing err,message) ! intent(out): error control if(err/=0)then; print*, message; return; endif endif +else + ! calculate a look-up table for the temperature-enthalpy conversion + call E2T_lookup(outputStructure(1)%mparStruct%gru(iGRU)%hru(iHRU),err,message) + if(err/=0)then; message=trim(message); print*, message; return; endif + #endif ! overwrite the vegetation height HVT(outputStructure(1)%typeStruct%gru(iGRU)%hru(iHRU)%var(iLookTYPE%vegTypeIndex)) = outputStructure(1)%mparStruct%gru(iGRU)%hru(iHRU)%var(iLookPARAM%heightCanopyTop)%dat(1) diff --git a/build/source/global/auxiliary.cpp b/build/source/global/auxiliary.cpp index aad469de59e0650309fb9081b316bbc9009af22a..2afef1e0e432ef3ae390cd4bbc2a8aa83cd94d75 100644 --- a/build/source/global/auxiliary.cpp +++ b/build/source/global/auxiliary.cpp @@ -497,12 +497,12 @@ std::vector<int> get_flagVec_by_indx(void* handle, int struct_indx) { return array; } +#ifdef V4_ACTIVE std::vector<std::vector<std::vector<double>>> get_lookup_struct(void *handle) { int size_z; get_size_z_lookup(handle, &size_z); if (size_z == 0) return std::vector<std::vector<std::vector<double>>>(); - std::vector<std::vector<std::vector<double>>> lookup_struct; for (int z = 1; z <= size_z; z++) { int size_var; @@ -520,6 +520,7 @@ std::vector<std::vector<std::vector<double>>> get_lookup_struct(void *handle) { return lookup_struct; } +#endif void get_scalar_data(void* handle, double fracJulDay, double tmZoneOffsetFracDay, int year_length, int computeVegFlux) { diff --git a/build/source/hru_actor/hru_actor.cpp b/build/source/hru_actor/hru_actor.cpp index 3b7282f4fe776b3815229964c2eba52862c3381f..37bd152084cf409db21d6c12c656e664f73f6ae8 100644 --- a/build/source/hru_actor/hru_actor.cpp +++ b/build/source/hru_actor/hru_actor.cpp @@ -62,10 +62,6 @@ behavior hru_actor(stateful_actor<hru_state>* self, int refGRU, int indxGRU, } self->state.num_steps_until_write--; - if (self->state.timestep == 5) { - self->send(self, serialize_hru_v); - } - err = Run_HRU(self); // Simulate a Timestep if (err != 0) { #ifdef SUNDIALS_ACTIVE diff --git a/build/source/hru_actor/hru_modelRun.f90 b/build/source/hru_actor/hru_modelRun.f90 index e4bc6ce77a7a50fedc11ec26d57600650e0b6b27..b72f6b445d16e4dd25f5a4f1223eac9bd5a38fb9 100644 --- a/build/source/hru_actor/hru_modelRun.f90 +++ b/build/source/hru_actor/hru_modelRun.f90 @@ -149,10 +149,10 @@ subroutine runPhysics(& ! (compute the exposed LAI and SAI and whether veg is buried by snow) call vegPhenlgy(& ! model control - hru_data%fracJulDay, & ! intent(in): fractional julian days since the start of year - hru_data%yearLength, & ! intent(in): number of days in the current year - ! input/output: data structures model_decisions, & ! intent(in): model decisions + hru_data%fracJulDay, & ! intent(in): fractional julian days since the start of year + hru_data%yearLength, & ! intent(in): number of days in the current year + ! input/output: data structures hru_data%typeStruct, & ! intent(in): type of vegetation and soil hru_data%attrStruct, & ! intent(in): spatial attributes hru_data%mparStruct, & ! intent(in): model parameters diff --git a/build/source/job_actor/async_mode.cpp b/build/source/job_actor/async_mode.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0e494079116aba18a2716f1b9b24f3e34614f56f --- /dev/null +++ b/build/source/job_actor/async_mode.cpp @@ -0,0 +1,107 @@ +#include "job_actor.hpp" +namespace caf { + +behavior async_mode(stateful_actor<job_state>* self) { + aout(self) << "Async Mode Started\n"; + + return { + /*** From file access actor after it spawns ***/ + [=](init_file_access_actor, int num_timesteps) { + aout(self) << "Async Mode: init_file_access_actor\n"; + spawnHRUActors(self, true); + for(auto& gru : self->state.gru_container.gru_list) { + self->send(gru->getGRUActor(), init_hru_v); + self->send(gru->getGRUActor(), update_hru_async_v); + } + }, + + [=](done_hru, int local_gru_index) { + aout(self) << "HRU Done: " << local_gru_index << "\n"; + handleFinishedGRU(self, local_gru_index); + }, + + [=](finalize) { finalizeJob(self); }, + + }; +} + +void finalizeJob(stateful_actor<job_state>* self) { + std::vector<serializable_netcdf_gru_actor_info> netcdf_gru_info = + getGruNetcdfInfo( + self->state.max_run_attempts,self->state.gru_container.gru_list); + + self->state.num_gru_failed = std::count_if(netcdf_gru_info.begin(), + netcdf_gru_info.end(), [](auto& gru_info) { + return !gru_info.successful; + }); + + self->request(self->state.file_access_actor, infinite, finalize_v).await( + [=](std::tuple<double, double> read_write_duration) { + int err = 0; + for (auto GRU : self->state.gru_container.gru_list) + delete GRU; + self->state.gru_container.gru_list.clear(); + self->state.job_timing.updateEndPoint("total_duration"); + aout(self) << "\n________________" + << "PRINTING JOB_ACTOR TIMING INFO RESULTS" + << "________________\n" + << "Total Duration = " + << self->state.job_timing.getDuration("total_duration") + .value_or(-1.0) << " Seconds\n" + << "Total Duration = " + << self->state.job_timing.getDuration("total_duration") + .value_or(-1.0) / 60 << " Minutes\n" + << "Total Duration = " + << (self->state.job_timing.getDuration("total_duration") + .value_or(-1.0) / 60) / 60 << " Hours\n" + << "Job Init Duration = " + << self->state.job_timing.getDuration("init_duration") + .value_or(-1.0) << " Seconds\n" + << "_________________________________" + << "_______________________________________\n\n"; + + deallocateJobActor(&err); + + // Tell Parent we are done + self->send(self->state.parent, done_job_v, self->state.num_gru_failed, + self->state.job_timing.getDuration("total_duration").value_or(-1.0), + std::get<0>(read_write_duration), + std::get<1>(read_write_duration)); + self->quit(); + }); +} + +void handleFinishedGRU(stateful_actor<job_state>* self, int local_gru_index) { + using namespace std::chrono; + auto& gru_container = self->state.gru_container; + chrono_time end_point = high_resolution_clock::now(); + double total_duration = duration_cast<seconds>(end_point - + gru_container.gru_start_time).count(); + gru_container.num_gru_done++; + + aout(self) << "GRU Finished: " << gru_container.num_gru_done << "/" + << gru_container.num_gru_in_run_domain << " -- GlobalGRU=" + << gru_container.gru_list[local_gru_index-1]->getGlobalGRUIndex() + << " -- LocalGRU=" << local_gru_index << "\n"; + + gru_container.gru_list[local_gru_index-1]->setRunTime(total_duration); + gru_container.gru_list[local_gru_index-1]->setInitDuration(-1); + gru_container.gru_list[local_gru_index-1]->setForcingDuration(-1); + gru_container.gru_list[local_gru_index-1]->setRunPhysicsDuration(-1); + gru_container.gru_list[local_gru_index-1]->setWriteOutputDuration(-1); + gru_container.gru_list[local_gru_index-1]->setSuccess(); + + + // Check if all GRUs are done + if (gru_container.num_gru_done >= gru_container.num_gru_in_run_domain) { + if(gru_container.num_gru_failed == 0 || self->state.max_run_attempts == 1) + self->send(self, finalize_v); + else + self->send(self, restart_failures_v); + } +} + + + + +} // End of Namespace \ No newline at end of file diff --git a/build/source/job_actor/job_actor.cpp b/build/source/job_actor/job_actor.cpp index b7848be68b22c4f5fc4472038c81ddef09fc22a9..625b718840dc6ff96ec4194d6eae09f5e756acff 100644 --- a/build/source/job_actor/job_actor.cpp +++ b/build/source/job_actor/job_actor.cpp @@ -13,56 +13,41 @@ behavior job_actor(stateful_actor<job_state>* self, HRU_Actor_Settings hru_actor_settings, caf::actor parent) { self->set_down_handler([=](const down_msg& dm) { - aout(self) << "\n\n ********** DOWN HANDLER ********** \n"; - aout(self) << "Lost Connection With A Connected Actor\n"; - aout(self) << "Reason: " << to_string(dm.reason) << "\n"; + aout(self) << "\n\n ********** DOWN HANDLER ********** \n" + << "Lost Connection With A Connected Actor\n" + << "Reason: " << to_string(dm.reason) << "\n"; }); self->set_exit_handler([=](const exit_msg& em) { - aout(self) << "\n\n ********** EXIT HANDLER ********** \n"; - aout(self) << "Exit Reason: " << to_string(em.reason) << "\n"; + aout(self) << "\n\n ********** EXIT HANDLER ********** \n" + << "Exit Reason: " << to_string(em.reason) << "\n"; }); - // Timing Information self->state.job_timing = TimingInfo(); self->state.job_timing.addTimePoint("total_duration"); self->state.job_timing.updateStartPoint("total_duration"); - // Set Job Variables self->state.start_gru = start_gru; self->state.num_gru = num_gru; self->state.parent = parent; - - - // Set the settings variables self->state.file_access_actor_settings = file_access_actor_settings; self->state.job_actor_settings = job_actor_settings; self->state.hru_actor_settings = hru_actor_settings; self->state.max_run_attempts = job_actor_settings.max_run_attempts; - // Init the GRU Container self->state.gru_container.num_gru_in_run_domain = num_gru; - char host[HOST_NAME_MAX]; gethostname(host, HOST_NAME_MAX); self->state.hostname = host; - // Initalize global variables calling Fortran Routines - int err = 0; - - - /* - Calls: - - summa_SetTimesDirsAndFiles - - summa_defineGlobalData - - read_icond_nlayers - - Allocates time structures - */ + /* Calls: summa_SetTimesDirsAndFiles(), summa_defineGlobalData(), + read_icond_nlayers(), Allocates time structures */ self->state.job_timing.addTimePoint("init_duration"); int file_gru = 0; + int err = 0; job_init_fortran(self->state.job_actor_settings.file_manager_path.c_str(), &self->state.start_gru, &self->state.num_gru, &self->state.num_hru, &file_gru, &err); @@ -75,15 +60,23 @@ behavior job_actor(stateful_actor<job_state>* self, self->state.start_gru, self->state.num_gru, self->state.num_gru, file_gru, false); - // Spawn the file_access_actor. self->state.file_access_actor = self->spawn(file_access_actor, - self->state.num_gru_info, - self->state.file_access_actor_settings, self); + self->state.num_gru_info, self->state.file_access_actor_settings, self); self->send(self->state.file_access_actor, def_output_v, file_gru); self->state.job_timing.updateEndPoint("init_duration"); aout(self) << "Job Actor Initialized \n"; + + if (job_actor_settings.data_assimilation_mode) { + aout(self) << "Job_Actor: Data Assimilation Mode\n"; + } else { + self->become(async_mode(self)); + return {}; + } + + + return { /*** From file access actor after it spawns ***/ [=](init_file_access_actor, int num_timesteps) { @@ -390,23 +383,20 @@ void spawnHRUActors(stateful_actor<job_state>* self, bool normal_mode) { gru_container.run_attempts_left--; for (int i = 0; i < gru_container.num_gru_in_run_domain; i++) { - auto global_gru_index = gru_container.gru_list.size() - + self->state.start_gru; + auto global_gru_index = gru_container.gru_list.size() + + self->state.start_gru; auto local_gru_index = gru_container.gru_list.size() + 1; auto gru = self->spawn(hru_actor, global_gru_index, local_gru_index, - self->state.hru_actor_settings, - self->state.file_access_actor, self); + self->state.hru_actor_settings, self->state.file_access_actor, self); // Create the GRU object (Job uses this to keep track of GRU status) gru_container.gru_list.push_back(new GRU(global_gru_index, - local_gru_index, gru, - self->state.dt_init_start_factor, - self->state.hru_actor_settings.rel_tol, - self->state.hru_actor_settings.abs_tol, - self->state.max_run_attempts)); + local_gru_index, gru, self->state.dt_init_start_factor, + self->state.hru_actor_settings.rel_tol, + self->state.hru_actor_settings.abs_tol, self->state.max_run_attempts)); - if (normal_mode) self->send(gru, update_hru_async_v); + // if (normal_mode) self->send(gru, update_hru_async_v); } }