diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 103d4b22f..7bdd2c883 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,7 +14,7 @@ jobs: fail-fast: false matrix: os: [windows-latest, ubuntu-latest] - python-version: [3.8] # , 3.7, 3.6] + python-version: [3.7] # , 3.7, 3.6] run-type: [std] # test_repo: [""] # test_dir: [""] diff --git a/documentation/c6e8be34c728676210e0fa7bc01f3102b8843c1f.wmf b/documentation/c6e8be34c728676210e0fa7bc01f3102b8843c1f.wmf new file mode 100644 index 000000000..721a196e3 Binary files /dev/null and b/documentation/c6e8be34c728676210e0fa7bc01f3102b8843c1f.wmf differ diff --git a/documentation/pestpp_users_guide_v5.1.13.docx b/documentation/pestpp_users_guide_v5.1.18.docx similarity index 67% rename from documentation/pestpp_users_guide_v5.1.13.docx rename to documentation/pestpp_users_guide_v5.1.18.docx index 4c758d51e..9409c38f2 100644 Binary files a/documentation/pestpp_users_guide_v5.1.13.docx and b/documentation/pestpp_users_guide_v5.1.18.docx differ diff --git a/documentation/pestpp_users_manual.md b/documentation/pestpp_users_manual.md index 15898fe26..92e092b9e 100644 --- a/documentation/pestpp_users_manual.md +++ b/documentation/pestpp_users_manual.md @@ -1,13 +1,13 @@ A close up of a purple sign Description automatically generated -# Version 5.1.13 +# Version 5.1.18 PEST++ Development Team -February 2022 +July 2022 # Acknowledgements @@ -70,7 +70,7 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI # Table of Contents -- [Version 5.1.13](#s1) +- [Version 5.1.18](#s1) - [Acknowledgements](#s2) - [Preface](#s3) - [License](#s4) @@ -153,157 +153,152 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI - [4.15 Prior Information Section](#s8-15) - [4.16 Regularization Section](#s8-16) - [4.17 Control Variables for PEST++ Programs ](#s8-17) -- [](#s9) - - [4.18 Keyword and External File Control File Format](#s9-1) - - [4.18.1 Keyword and Consolidated Algorithmic Variables](#s9-2) - - [4.18.2 External file support](#s9-3) -- [](#s10) -- [5. Running PEST++ Programs](#s11) - - [5.1 General](#s11-1) - - [5.2 Model Runs in Serial](#s11-2) - - [5.2.1 Concepts](#s11-2-1) - - [5.2.2 Running PESTPP-XXX](#s11-2-2) - - [5.3 Model Runs in Parallel](#s11-3) - - [5.3.1 Concepts](#s11-3-1) - - [5.3.2 Manager to Agent Communication](#s11-3-2) - - [5.3.3 Running PESTPP-XXX as Manager and Agent](#s11-3-3) - - [5.3.4 Run Management Record File](#s11-3-4) - - [5.3.5 Run Management Control Variables ](#s11-3-5) - - [5.4 Run Book-Keeping Files](#s11-4) -- [6. PESTPP-GLM](#s12) - - [6.1 Introduction](#s12-1) - - [6.2.1 Basic Equations](#s12-1-1) - - [6.2.2 Choosing the Regularization Weight Factor](#s12-1-2) - - [6.2.3 Inter-Regularization Group Weighting](#s12-1-3) - - [6.2.4 Choosing Values for the Marquardt Lambda](#s12-1-4) - - [6.2.5 Singular Value Decomposition](#s12-1-5) - - [6.2.6 SVD-Assist ](#s12-1-6) - - [6.2.7 Expediting the First Iteration](#s12-1-7) - - [6.2.8 First Order, Second Moment Uncertainty Analysis and Monte Carlo](#s12-1-8) - - [6.2.9 Model Run Failure](#s12-1-9) - - [6.2.10 Composite Parameter Sensitivities](#s12-1-10) - - [6.2.11 Other Controls](#s12-1-11) - - [6.2.12 Running PESTPP-GLM](#s12-1-12) - - [6.2.13 PESTPP-GLM Output Files](#s12-1-13) - - [6.3.4 Running PESTPP](#s12-1-14) - - [6.3.5 PESTPP-GLM Output Files](#s12-1-15) - - [6.4 Summary of PESTPP-GLM Control Variables](#s12-2) - - [6.4.1 General](#s12-2-1) - - [6.4.2 Control Variables in the PEST Control File ](#s12-2-2) - - [6.4.3 PEST++ Control Variables](#s12-2-3) -- [7. PESTPP-SEN](#s13) - - [7.1 Introduction](#s13-1) - - [7.1.1 General](#s13-1-1) - - [7.1.2 Grouped Parameters](#s13-1-2) - - [7.2 Method of Morris](#s13-2) - - [7.2.1 Elementary Effects](#s13-2-1) - - [7.2.2 Sampling Scheme](#s13-2-2) - - [7.2.3 Control Variables](#s13-2-3) - - [7.3 Method of Sobol](#s13-3) - - [7.3.1 Sensitivity Indices](#s13-3-1) - - [7.3.2 Control Variables](#s13-3-2) - - [7.4 PESTPP-SEN Output Files](#s13-4) -- [8. PESTPP-OPT](#s14) - - [8.1 Introduction](#s14-1) - - [8.1.1 A Publication](#s14-1-1) - - [8.1.2 Overview](#s14-1-2) - - [8.1.3 Calculation of Uncertainty](#s14-1-3) - - [8.1.4 Optimization](#s14-1-4) - - [8.1.5 Chance Constraints](#s14-1-5) - - [8.2 Using PESTPP-OPT](#s14-2) - - [8.2.1The PEST Control File ](#s14-2-1) - - [8.2.2 Decision Variables and Parameters](#s14-2-2) - - [8.2.3 Defining the Objective Function](#s14-2-3) - - [8.2.4 Constraints](#s14-2-4) - - [8.2.5 Observations](#s14-2-5) - - [8.2.6 Regularization ](#s14-2-6) - - [8.2.7 Prior Covariance Matrix](#s14-2-7) - - [8.2.8 Risk](#s14-2-8) - - [8.2.9 Jacobian and Response Matrices](#s14-2-9) - - [8.2.10 Solution Convergence](#s14-2-10) - - [8.2.11 Other Control Variables](#s14-2-11) - - [8.2.12 Final Model Run](#s14-2-12) - - [8.2.13 Restarts](#s14-2-13) - - [8.2.14 Zero Run Solution](#s14-2-14) - - [8.3 PESTPP-OPT Output Files](#s14-3) - - [8.4 Summary of Control Variables](#s14-4) -- [9. PESTPP-IES](#s15) - - [9.1 Introduction](#s15-1) - - [9.1.1 Publications](#s15-1-1) - - [9.1.2 Overview](#s15-1-2) - - [9.1.3 Ensemble Kalman Filters and Ensemble Smoothers](#s15-1-3) - - [9.1.4 Some Repercussions of Using Ensembles](#s15-1-4) - - [9.1.5 Iterations](#s15-1-5) - - [9.1.6 Measurement Noise](#s15-1-6) - - [9.1.7 Regularization](#s15-1-7) - - [9.1.8 Base Realization](#s15-1-8) - - [9.1.9 Parameter Transformation Status](#s15-1-9) - - [9.1.10 Inequality Observations](#s15-1-10) - - [9.1.11 Localization](#s15-1-11) - - [9.1.12 Use of observation noise covariance matrices](#s15-1-12) - - [9.1.13 Detecting and resolving prior-data conflict](#s15-1-13) - - [9.1.14 Multi-modal solution process](#s15-1-14) - - [9.2 Using PESTPP-IES](#s15-2) - - [9.2.1 General](#s15-2-1) - - [9.2.2 Initial Realizations](#s15-2-2) - - [9.2.3 “Regularization”](#s15-2-3) - - [9.2.4 Prior Parameter Scaling](#s15-2-4) - - [9.2.5 The Marquardt Lambda](#s15-2-5) - - [9.2.6 Restarting](#s15-2-6) - - [9.2.7 Failed Model Runs](#s15-2-7) - - [9.2.8 Reporting ](#s15-2-8) - - [9.2.9 Termination Criteria, Objective Functions, and Upgrade Acceptance ](#s15-2-9) - - [9.3 PESTPP-IES Output Files](#s15-3) - - [9.3.1 CSV Output Files](#s15-3-1) - - [9.3.2 Non-CSV Output Files](#s15-3-2) - - [9.4 Summary of Control Variables](#s15-4) -- [10. PESTPP-SWP](#s16) - - [10.1 Introduction](#s16-1) - - [10.2 Using PESTPP-SWP](#s16-2) - - [10.3 Summary of Control Variables](#s16-3) - - [11.1 Introduction](#s16-4) - - [11.1.2 Multi-Objective Particle Swarm optimization](#s16-4-1) - - [11.1.2 Decision Variable Transformations](#s16-4-2) - - [11.1 Using PESTPP-PSO](#s16-5) - - [11.1.1 General](#s16-5-1) - - [11.1.2 Estimation Mode](#s16-5-2) - - [11.2.3. Pareto mode](#s16-5-3) - - [](#s16-6) - - [11.2 PESTPP-PSO Output Files](#s16-7) -- [](#s17) -- [12. PESTPP-DA](#s18) - - [12.1 Introduction](#s18-1) - - [12.2 Theory](#s18-2) - - [12.2.1 Background and Basic Equations](#s18-2-1) - - [12.2.2 Schemes for Assimilating Temporal Data](#s18-2-2) - - [12.2.2.1 Batch Data Assimilation with PESTPP-DA](#s18-2-3) - - [12.2.2.2 Sequential Data Assimilation with PESTPP-DA](#s18-2-4) - - [12.2.4 State estimation, parameter estimation and joint state-parameter estimation](#s18-2-5) - - [12.2.4 Parameter, Observation and Weight Cycle Tables](#s18-2-6) - - [12.2.5 Steps for Data Assimilation implementation](#s18-2-7) - - [12.2.12 Running PESTPP-DA](#s18-2-8) - - [12.2.13 Other uses for PESTPP-DA](#s18-2-9) - - [12.2.14 PESTPP-DA Output Files](#s18-2-10) - - [12.4 Summary of PESTPP-DA Control Variables](#s18-3) - - [12.4.1 General](#s18-3-1) - - [12.4.2 Control Variables in the PEST Control File ](#s18-3-2) - - [12.4.3 PEST++ Control Variables](#s18-3-3) -- [13. PESTPP-MOU](#s19) - - [13.1 Introduction](#s19-1) - - [13.2 Theory](#s19-2) - - [13.2.1 Background and Basic Equations](#s19-2-1) - - [13.2.2 Evaluating chances in a population-based algorithm](#s19-2-2) - - [](#s19-2-3) - - [13.2.3 PESTPP-MOU workflow](#s19-2-4) - - [13.2.4 Advanced functionality](#s19-2-5) - - [13.2.5 Running PESTPP-MOU](#s19-2-6) - - [13.2.6 PESTPP-DA Output Files](#s19-2-7) - - [](#s19-3) - - [13.4 Summary of PESTPP-MOU Control Variables](#s19-4) - - [13.4.1 General](#s19-4-1) - - [13.4.2 Control Variables in the PEST Control File ](#s19-4-2) - - [13.4.3 PEST++ Control Variables](#s19-4-3) -- [14. References](#s20) + - [4.18 Keyword and External File Control File Format](#s8-18) + - [4.18.1 Keyword and Consolidated Algorithmic Variables](#s8-18-1) + - [4.18.2 External file support](#s8-18-2) +- [5. Running PEST++ Programs](#s9) + - [5.1 General](#s9-1) + - [5.2 Model Runs in Serial](#s9-2) + - [5.2.1 Concepts](#s9-2-1) + - [5.2.2 Running PESTPP-XXX](#s9-2-2) + - [5.3 Model Runs in Parallel](#s9-3) + - [5.3.1 Concepts](#s9-3-1) + - [5.3.2 Manager to Agent Communication](#s9-3-2) + - [5.3.3 Running PESTPP-XXX as Manager and Agent](#s9-3-3) + - [5.3.4 Run Management Record File](#s9-3-4) + - [5.3.5 Run Management Control Variables ](#s9-3-5) + - [5.4 Run Book-Keeping Files](#s9-4) +- [6. PESTPP-GLM](#s10) + - [6.1 Introduction](#s10-1) + - [6.2.1 Basic Equations](#s10-1-1) + - [6.2.2 Choosing the Regularization Weight Factor](#s10-1-2) + - [6.2.3 Inter-Regularization Group Weighting](#s10-1-3) + - [6.2.4 Choosing Values for the Marquardt Lambda](#s10-1-4) + - [6.2.5 Singular Value Decomposition](#s10-1-5) + - [6.2.6 SVD-Assist ](#s10-1-6) + - [6.2.7 Expediting the First Iteration](#s10-1-7) + - [6.2.8 First Order, Second Moment Uncertainty Analysis and Monte Carlo](#s10-1-8) + - [6.2.9 Model Run Failure](#s10-1-9) + - [6.2.10 Composite Parameter Sensitivities](#s10-1-10) + - [6.2.11 Other Controls](#s10-1-11) + - [6.2.12 Running PESTPP-GLM](#s10-1-12) + - [6.2.13 PESTPP-GLM Output Files](#s10-1-13) + - [6.3.4 Running PESTPP](#s10-1-14) + - [6.3.5 PESTPP-GLM Output Files](#s10-1-15) + - [6.4 Summary of PESTPP-GLM Control Variables](#s10-2) + - [6.4.1 General](#s10-2-1) + - [6.4.2 Control Variables in the PEST Control File ](#s10-2-2) + - [6.4.3 PEST++ Control Variables](#s10-2-3) +- [7. PESTPP-SEN](#s11) + - [7.1 Introduction](#s11-1) + - [7.1.1 General](#s11-1-1) + - [7.1.2 Grouped Parameters](#s11-1-2) + - [7.2 Method of Morris](#s11-2) + - [7.2.1 Elementary Effects](#s11-2-1) + - [7.2.2 Sampling Scheme](#s11-2-2) + - [7.2.3 Control Variables](#s11-2-3) + - [7.3 Method of Sobol](#s11-3) + - [7.3.1 Sensitivity Indices](#s11-3-1) + - [7.3.2 Control Variables](#s11-3-2) + - [7.4 PESTPP-SEN Output Files](#s11-4) +- [8. PESTPP-OPT](#s12) + - [8.1 Introduction](#s12-1) + - [8.1.1 A Publication](#s12-1-1) + - [8.1.2 Overview](#s12-1-2) + - [8.1.3 Calculation of Uncertainty](#s12-1-3) + - [8.1.4 Optimization](#s12-1-4) + - [8.1.5 Chance Constraints](#s12-1-5) + - [8.2 Using PESTPP-OPT](#s12-2) + - [8.2.1The PEST Control File ](#s12-2-1) + - [8.2.2 Decision Variables and Parameters](#s12-2-2) + - [8.2.3 Defining the Objective Function](#s12-2-3) + - [8.2.4 Constraints](#s12-2-4) + - [8.2.5 Observations](#s12-2-5) + - [8.2.6 Regularization ](#s12-2-6) + - [8.2.7 Prior Covariance Matrix](#s12-2-7) + - [8.2.8 Risk](#s12-2-8) + - [8.2.9 Jacobian and Response Matrices](#s12-2-9) + - [8.2.10 Solution Convergence](#s12-2-10) + - [8.2.11 Other Control Variables](#s12-2-11) + - [8.2.12 Final Model Run](#s12-2-12) + - [8.2.13 Restarts](#s12-2-13) + - [8.2.14 Zero Run Solution](#s12-2-14) + - [8.3 PESTPP-OPT Output Files](#s12-3) + - [8.4 Summary of Control Variables](#s12-4) +- [9. PESTPP-IES](#s13) + - [9.1 Introduction](#s13-1) + - [9.1.1 Publications](#s13-1-1) + - [9.1.2 Overview](#s13-1-2) + - [9.1.3 Ensemble Kalman Filters and Ensemble Smoothers](#s13-1-3) + - [9.1.4 Some Repercussions of Using Ensembles](#s13-1-4) + - [9.1.5 Iterations](#s13-1-5) + - [9.1.6 Measurement Noise](#s13-1-6) + - [9.1.7 Regularization](#s13-1-7) + - [9.1.8 Base Realization](#s13-1-8) + - [9.1.9 Parameter Transformation Status](#s13-1-9) + - [9.1.10 Inequality Observations](#s13-1-10) + - [9.1.11 Localization](#s13-1-11) + - [9.1.12 Use of observation noise covariance matrices](#s13-1-12) + - [9.1.13 Detecting and resolving prior-data conflict](#s13-1-13) + - [9.1.14 Multi-modal solution process](#s13-1-14) + - [9.2 Using PESTPP-IES](#s13-2) + - [9.2.1 General](#s13-2-1) + - [9.2.2 Initial Realizations](#s13-2-2) + - [9.2.3 “Regularization”](#s13-2-3) + - [9.2.4 Prior Parameter Scaling](#s13-2-4) + - [9.2.5 The Marquardt Lambda](#s13-2-5) + - [9.2.6 Restarting](#s13-2-6) + - [9.2.7 Failed Model Runs](#s13-2-7) + - [9.2.8 Reporting ](#s13-2-8) + - [9.2.9 Termination Criteria, Objective Functions, and Upgrade Acceptance ](#s13-2-9) + - [9.3 PESTPP-IES Output Files](#s13-3) + - [9.3.1 CSV Output Files](#s13-3-1) + - [9.3.2 Non-CSV Output Files](#s13-3-2) + - [9.4 Summary of Control Variables](#s13-4) +- [10. PESTPP-SWP](#s14) + - [10.1 Introduction](#s14-1) + - [10.2 Using PESTPP-SWP](#s14-2) + - [10.3 Summary of Control Variables](#s14-3) + - [11.1 Introduction](#s14-4) + - [11.1.2 Multi-Objective Particle Swarm optimization](#s14-4-1) + - [11.1.2 Decision Variable Transformations](#s14-4-2) + - [11.1 Using PESTPP-PSO](#s14-5) + - [11.1.1 General](#s14-5-1) + - [11.1.2 Estimation Mode](#s14-5-2) + - [11.2.3. Pareto mode](#s14-5-3) + - [11.2 PESTPP-PSO Output Files](#s14-6) +- [12. PESTPP-DA](#s15) + - [12.1 Introduction](#s15-1) + - [12.2 Theory](#s15-2) + - [12.2.1 Background and Basic Equations](#s15-2-1) + - [12.2.2 Schemes for Assimilating Temporal Data](#s15-2-2) + - [12.2.2.1 Batch Data Assimilation with PESTPP-DA](#s15-2-3) + - [12.2.2.2 Sequential Data Assimilation with PESTPP-DA](#s15-2-4) + - [12.2.4 State estimation, parameter estimation and joint state-parameter estimation](#s15-2-5) + - [12.2.4 Parameter, Observation and Weight Cycle Tables](#s15-2-6) + - [12.2.5 Steps for Data Assimilation implementation](#s15-2-7) + - [12.2.12 Running PESTPP-DA](#s15-2-8) + - [12.2.13 Other uses for PESTPP-DA](#s15-2-9) + - [12.2.14 PESTPP-DA Output Files](#s15-2-10) + - [12.4 Summary of PESTPP-DA Control Variables](#s15-3) + - [12.4.1 General](#s15-3-1) + - [12.4.2 Control Variables in the PEST Control File ](#s15-3-2) + - [12.4.3 PEST++ Control Variables](#s15-3-3) +- [13. PESTPP-MOU](#s16) + - [13.1 Introduction](#s16-1) + - [13.2 Theory](#s16-2) + - [13.2.1 Background and Basic Equations](#s16-2-1) + - [13.2.2 Evaluating chances in a population-based algorithm](#s16-2-2) + - [](#s16-2-3) + - [13.2.3 PESTPP-MOU workflow](#s16-2-4) + - [13.2.4 Advanced functionality](#s16-2-5) + - [13.2.5 Running PESTPP-MOU](#s16-2-6) + - [13.2.6 PESTPP-DA Output Files](#s16-2-7) + - [13.4 Summary of PESTPP-MOU Control Variables](#s16-3) + - [13.4.1 General](#s16-3-1) + - [13.4.2 Control Variables in the PEST Control File ](#s16-3-2) + - [13.4.3 PEST++ Control Variables](#s16-3-3) +- [14. References](#s17) # 1. Introduction @@ -1478,9 +1473,7 @@ Values that are supplied with a keyword can be integer, real or text (for exampl If a program of the PEST++ suite does not use a keyword, it simply ignores it. Hence a PEST control file can contain keywords that are pertinent to a number of members of the suite. The PEST++ program that is currently using the PEST control file only reads values for control variables that it recognizes. If a ++ argument is not recognized, this will raise an exception; if users want to “forgive” unrecognized ++ args, the “++forgive_unknown_args(true)” should be supplied. -# - -## 4.18 Keyword and External File Control File Format +## 4.18 Keyword and External File Control File Format As of version 4.3.0, the programs in the PEST++ suite support an enhanced control file format, which has been designed to support an increasingly diverse set of tools. This new format requires significantly less “in depth” knowledge of the algorithmic controls over the PEST++ tools from the user as all of these control variables now have internal default values, so arguments that are not specified in the control file simply use these internal defaults. Additionally, the sections of the control file with listed of data (e.g., “\* parameter data”, “\* observation data”, among others) can now be stored in external files; only the name of this file and some optional parsing information is needed in the control file. @@ -1488,7 +1481,7 @@ Empty blanks lines are tolerated in the enhanced format as are lines that start Below is a more detailed description of this enhanced format. -## 4.18.1 Keyword and Consolidated Algorithmic Variables +### 4.18.1 Keyword and Consolidated Algorithmic Variables The enhanced control file format now accepts a “\* control data keyword” section. This section replaces the following section in the standard control file format: @@ -1502,7 +1495,7 @@ The enhanced control file format now accepts a “\* control data keyword” sec Therefore, if users construct a “\* control data keyword” section, these cannot also be listed–an error message will be issued if you try. The format of the “\* control data keyword”, as expected is by keywords. An example section is shown on Figure 4.13. Keyword-value pairs should be separated by one or more whitespace (tabs acceptable as well). For values that have multiple entries (like the PESTPP-GLM control variable “lambdas”), users should comma separate each separate value (as shown on Figure 4.13). If a ++ argument is not recognized, this will raise an exception; if users want to “forgive” unrecognized ++ args, the “forgive_unknown_args true” should be supplied. -## 4.18.2 External file support +### 4.18.2 External file support As shown in Figure 4.13, the enhanced control file format allows users to store list-directed input in external files. These files must have the same number of entries on each line and the location of these files in the user’s directory structure must be the path from where the control file is located to where the external file is located. For example, if the control file is saved in the directory “model” and parameter data is stored in the file “parameters.csv”, the entry in the “\* parameter data external” must be “parameters.csv”, regardless of where the calling program is instantiated. @@ -1580,17 +1573,15 @@ Note that not all external files or even every row in an external file must have Figure 4.13 An enhanced PEST control file. -# - -# 5. Running PEST++ Programs +# 5. Running PEST++ Programs -## 5.1 General +## 5.1 General To simplify the following discussion, let PESTPP-XXX signify the name of a program belonging to the PEST++ suite. This can be any of the programs listed in table 1.1. -## 5.2 Model Runs in Serial +## 5.2 Model Runs in Serial -### 5.2.1 Concepts +### 5.2.1 Concepts Programs of the PEST++ suite have in common the fact that they run a model many times. These runs can be undertaken in serial or in parallel. Where a PEST++ program undertakes model runs in serial, it issues a command directly to the operating system whenever it requires that a model run be undertaken. The command which it issues is provided in the “model command line” section of the PEST control file. @@ -1598,7 +1589,7 @@ On most occasions of its use, execution of PESTPP-XXX should be initiated from t If the model program resides in the folder from which PESTPP-XXX is run, or resides in a folder that is cited in the PATH environment variable, there is no need to prefix the model command with an absolute or relative pathname in the PEST control file. If the model is a batch or shell script, the same applies to executable programs which are cited in this script. However, if this is not the case, then pathnames are required. -### 5.2.2 Running PESTPP-XXX +### 5.2.2 Running PESTPP-XXX Where model runs are undertaken in serial, PESTPP-XXX is run using the command @@ -1609,9 +1600,9 @@ where *case* is the filename base of the PEST control file. If you wish, you can Note that all members of the PEST++ suite support multithreaded template and instruction file processing, which can be an important consideration for very high-dimensional problems where the number of template and instruction files may number in the hundreds. The number of threads to use for processing template and instruction files is controlled by the *num_tpl_ins_threads* option, which is set to 1 by default. Note the parallel agents also use this argument. Also note that using multiple threads to process template and instruction files can consume significantly more memory and clock cycles than a single thread. -## 5.3 Model Runs in Parallel +## 5.3 Model Runs in Parallel -### 5.3.1 Concepts +### 5.3.1 Concepts Tasks that are carried out by programs of the PEST++ suite require that a model be run many times. By undertaking these model runs in parallel, the time required for completion of an inversion or optimization task can be reduced considerably. Nowadays, most modellers have ready access to parallel computing facilities. Modern-day personal computers have multiple CPUs. Most offices have multiple computers connected to each other through an office network. Many modellers have access to a high-performance computing cluster which may provide hundreds of cores. All modellers have access to the computing cloud. @@ -1625,13 +1616,13 @@ When the PESTPP-XXX manager wishes that a model run be carried out, it chooses o In order to write model input files and read model output files, each instance of the PESTPP-XXX agent must have access to template files and instruction files. Normally copies of all template and instruction files that are listed in the PEST control file used by the PESTPP-XXX manager are placed in the working folder of each PESTPP-XXX agent, together with files required by the model. As will be discussed shortly, the PESTPP-XXX agent knows of the existence of these template and instruction files because it reads the PEST control pertaining to the current problem as it commences execution. -### 5.3.2 Manager to Agent Communication +### 5.3.2 Manager to Agent Communication The PESTPP-XXX manager and the PESTPP-XXX agent communicate with each other using the TCP/IP communications protocol. Where a agent resides on a different machine from that of the manager, network management must permit this kind of communication between them. If the manager’s machine can be “pinged” from the agent’s machine, and if the agent’s machine can be “pinged” from the manager’s machine, then you have this permission. When the PESTPP-XXX manager commences execution, it opens a so-called “port”. Agents must be informed of the IP address or hostname of the machine on which the manager is operating, and of the number of this port (see below). In contrast, the PESTPP-XXX manager does not need to know the locations of its agents. It knows that a agent exists through the TCP/IP connection which the agent initiates when it commences execution. Acceptance of this connection is sufficient for the communications pathway to exist. Then, whenever the manager requires that a agent carry out a model run, it sends parameter values to that agent through this connection. When the model run that is supervised by the agent is complete, the manager receives the values of model-calculated observations read from model output files by the agent through the same connection. It is oblivious to the location of the agent, and hence of the computer and folder in which these model runs are being carried out. -### 5.3.3 Running PESTPP-XXX as Manager and Agent +### 5.3.3 Running PESTPP-XXX as Manager and Agent When model runs are parallelized, execution of the PESTPP-XXX manager must be initiated using the following command: @@ -1664,11 +1655,11 @@ in a command line window to ascertain the hostname of a machine. If, for some reason, a agent ceases execution, or the computer on which it resides loses its connection with the manager, it should be re-started using the above command. A agent does not have to be restarted using the “/r” switch as its tasks are repetitive and simple, namely to receive parameters, run the model, and then send model outputs to the PESTPP-XXX manager. -### 5.3.4 Run Management Record File +### 5.3.4 Run Management Record File The PESTPP-XXX manager records all communications between it and its agents in a run management record file. This file is named *case.rmr* where *case* is the filename base of the PEST control file. The agent that execute runs write all information related to communications with the master to the *panther_agent.rec* file, which is written in the local agent directory. -### 5.3.5 Run Management Control Variables +### 5.3.5 Run Management Control Variables A number of PESTPP-XXX control variables are used to control parallel run management. These are now described. @@ -1718,13 +1709,13 @@ In some cases, users may want to retrieve one or more model output files from th **panther_poll_interval** Once a panther agent is initialized, it will start to try to connect to the master instance. On some operating systems, this act of trying connect actually results in a OS-level “file handle” being opened, which, if substantial time passes, can accumulate to a large number of open file handles. To prevent this, the panther agents will “sleep” for a given number of seconds before trying to connect to the master again. The length of time the agent sleeps is controlled by the *panther_poll_interval*, which an interger value of seconds to sleep. By default, this value is 1 second. -## 5.4 Run Book-Keeping Files +## 5.4 Run Book-Keeping Files After running a program of the PEST++ suite, you may notice a number of (possibly large) files in the folder from which it was run. These are *case.rns*, *case.rnu* and *case.rnj*, where *case* is the filename base of the PEST control file. These are binary files that are used for temporary storage of “raw” run results. They contain information that assists in parallel run management, and that facilitates restart of an interrupted PEST++ run – if PESTPP-XXX exits gracefully, these files are removed. These run storage files can be read and processed using pyEMU. -# 6. PESTPP-GLM +# 6. PESTPP-GLM -## 6.1 Introduction +## 6.1 Introduction PESTPP-GLM was the original member of the PEST++ suite; its original name was “PESTPP”. The intention behind its creation was to reproduce much of the functionality of PEST in code that is modular, object oriented and supportive of collaborative programming. At the same time, it was hoped that certain aspects of PEST’s performance could be improved by taking advantage of the “new slate” that was offered by PESTPP-GLM. @@ -1734,7 +1725,7 @@ Like PEST, PESTPP-GLM undertakes highly parameterized inversion. However, if req 6.2 Highly Parameterized Inversion -### 6.2.1 Basic Equations +### 6.2.1 Basic Equations When used to undertake highly parameterized inversion, PESTPP-GLM implements theory and methodologies that are programmed into PEST. However, many implementation details have been improved. In order to explain these details, it is necessary to present some theory. This theory employs matrices and vectors. These are used to describe the linearized inverse problem on which so-called “gradient methods” are based. Through repeated linearization of the inverse problem over successive iterations, these methods achieve their purpose of model calibration, notwithstanding the nonlinear relationship that exists between model outputs and model parameters. Full details of this theory are presented in Doherty (2015). @@ -1801,7 +1792,7 @@ If PESTPP-GLM is not run in “regularization” mode then, or course, regulariz PESTPP-GLM also offers users the option of using “regularized Gauss Levenburg Marquardt” of Hanke (1996), where prior parameter covariance matrix based regularization is “baked in” to the upgrade calculation process. This form of upgrade calculations is activated with the *glm_normal_form(prior)* option. Users can specify a prior parameter covariance matrix via the *parcov* option; if a covariance matrix is not supplied, then one is constructed on the fly using the parameter bounds and the optional *par_sigma_range* argument. In this case, MAXSING and EIGTHRESH become the “knobs” to control regularization – “regularization” mode and the associated variables in the “\* regularization” section are not allowed in this mode of operation. -### 6.2.2 Choosing the Regularization Weight Factor +### 6.2.2 Choosing the Regularization Weight Factor The value that is assigned to *μ*2 strongly influences the inversion process. If it is too small, then overfitting may occur. If it is too large, then the fit that is attained with the calibration dataset may not be satisfactory. @@ -1809,7 +1800,7 @@ When using PESTPP-GLM (and PEST), a user does not have to choose a value for *μ As was discussed in section 4.16, the level of fit that can be achieved with a calibration dataset is often difficult to predict. To accommodate this, it may be useful to endow PHIMLIM with a very low value (for example 1.0E-10). This allows you to find out just how good a fit you can get with the calibration dataset, possibly at the cost of over-fitting that dataset. At the same time, the value of the FRACPHIM control variable should be set to 0.1. If FRACPHIM is set to a value greater than zero, PESTPP-GLM adjusts the target measurement objective function internally to be FRACPHIM times the current value of the actual measurement objective function. Hence regularization is operative, notwithstanding the pursuit of what may turn out to be too good a fit with the calibration dataset. Then, on a subsequent PESTPP-GLM run, PHIMLIM should be set about five percent greater than the best measurement objective function attained through the preceding “range finder” run. -### 6.2.3 Inter-Regularization Group Weighting +### 6.2.3 Inter-Regularization Group Weighting Experience gained in using PEST suggests that a Tikhonov-regularised inversion process can benefit from automatic balancing of weights between different regularization groups (i.e., observation groups that contain regularisation observations). Ideally, a higher weight factor should be applied to regularization groups that feature parameters that are mildly influenced by the calibration dataset than to groups which feature parameters whose values are heavily influenced by the calibration dataset. Not only can this ensure that the former set of parameters do not depart much from their preferred values (which should also be their initial values); it can contribute to numerical stability of the inversion process. @@ -1823,7 +1814,7 @@ where *m* is the number of adjustable parameters featured in the PEST control fi If IREGADJ is set to 1, PESTPP-GLM multiplies the weights pertaining to all members of each regularization group by a group-specific factor. This factor is chosen so that, after this operation has been performed, the total composite sensitivities of all regularization groups are the same. -### 6.2.4 Choosing Values for the Marquardt Lambda +### 6.2.4 Choosing Values for the Marquardt Lambda In contrast to PEST, PESTPP-GLM does not use the control variables specified on the fifth line of the “control data” section of the PEST control file to govern how it chooses Marquardt lambdas. Instead, it receives Marquardt lambda control information from PEST++ control variables. Two of these are *lambdas()* and *lambda_scale_fac()*. Default settings for these variables are as follows: @@ -1835,7 +1826,7 @@ As is apparent, more than one value can be associated with each of these control If only a single value is supplied for *lambdas()*, then this value of lambda is used in all iterations of the inversion process. In contrast, if more than one value is supplied for *lambdas()*, then PESTPP-GLM expands the user-supplied lambda list over the course of the inversion process. The expanded list includes other lambda values that are above and/or below those already comprising the list. A lambda is added to the list if the best parameters obtained during any particular iteration of the inversion process were calculated using an end member of the current list. The value chosen for a new lambda ensures that the previous best lambda is bracketed by members of the expanded list. -### 6.2.5 Singular Value Decomposition +### 6.2.5 Singular Value Decomposition In order to simplify the following equations, it is assumed that regularization observations and/or prior information equations are assimilated into the rows of Z using a weight factor that has already been evaluated. @@ -1889,7 +1880,7 @@ The default values for *svd_pack()* is “redsvd”. This is the method which PE Two variables that are recorded in the PEST control file can also affect PESTPP’s deployment of singular value decomposition to solve an inverse problem. These are the MAXSING and EIGTHRESH variables featured in the “singular value decomposition” section of that file. If a “singular value decomposition” section is not provided in the PEST control file that is read by PESTPP-GLM, then PESTPP-GLM assigns MAXSING a value that is equal to the number of adjustable parameters that define the current inverse problem; at the same time, EIGTHRESH is assigned a value of 1.0E-7. (Recall from section 4.7 that MAXSING and EIGTHRESH are used to define the singular value truncation point.) However, if the PEST control file includes a “singular value decomposition” section, then PESTPP-GLM employs values for MAXSING and EIGTHRESH that it reads from this file. Note also that if *glm_normal_form(prior)* is specified, then MAXSING and EIGTHRESH also function as the consolidated regularization controls. -### 6.2.6 SVD-Assist +### 6.2.6 SVD-Assist Use of PEST’s “SVD-assist” methodology can promulgate significant increases in the numerical efficiency of highly parameterized inversion. In implementing this methodology, PEST estimates the values of so-called “super parameters” in place of the parameters themselves. @@ -1927,7 +1918,7 @@ The handling of Jacobian matrix files (i.e., JCO files) is also somewhat differe Note, however, that if PESTPP-GLM is not undertaking SVD-assisted inversion, then the JCO file that is recorded at the end of each iteration of the inversion process is named *case.jco*. Under these circumstances, this file contains sensitivities with respect to base model parameters. -### 6.2.7 Expediting the First Iteration +### 6.2.7 Expediting the First Iteration In the normal course of events, PESTPP-GLM commences an inversion process by running the model in order to determine the value of the objective function based on initial parameter values. In doing this, it also determines the reference values of all model outputs for use in finite difference derivatives calculation. It then commences the long process of filling the Jacobian matrix. As has been explained, this requires at least as many model runs as there are adjustable parameters. @@ -1937,7 +1928,7 @@ Significant savings can also be made by employing an already-calculated Jacobian If both of the *hotstart_resfile()* and *base_jacobian()* options are selected at the same time, PESTPP-GLM does not need to run the model at all prior to calculating and testing parameter upgrades. This can sometimes be useful when fine-tuning PESTPP-GLM settings for optimal inversion performance. -### 6.2.8 First Order, Second Moment Uncertainty Analysis and Monte Carlo +### 6.2.8 First Order, Second Moment Uncertainty Analysis and Monte Carlo A Jacobian matrix calculated by PESTPP-GLM can be used as a basis for first-order, second-moment (FOSM) parameter and predictive uncertainty analysis as well as FOSM-based Monte Carlo. The equations used by PESTPP-GLM for implementation of FOSM analysis are derived from Bayes equation. They are outlined by Fienen et al (2010) and Doherty (2015). These same equations form the basis for analyses undertaken by the PEST PREDUNC suite of utility programs and by the PyEMU library. @@ -1979,7 +1970,7 @@ Through the *glm_accept_mc_phi* argument, PESTPP-GLM will accept the lowest-phi As well as calculating parameter uncertainties, PESTPP-GLM can also be asked to calculate the prior and posterior uncertainties of some predictions. This functionality is activated through use of the *forecasts()* control variable. The values which must be supplied for this variable are the names of predictions whose uncertainties are sought, or, optionally, the name of a file that stores multiple entries. For example, *forecasts(ar10,ar11)* requests that prior and predictive uncertainties be evaluated for model outputs named “ar10” and “ar11” in the PEST control file on which PESTPP-GLM’s operations are based. Despite the fact that these model outputs are predictions, they must be listed in the “observation data” section of the PEST control file; hence sensitivities of these model outputs to parameters are available as rows of the Jacobian matrix which is calculated by PESTPP-GLM. Model predictions should be endowed with weights of zero in a PEST control file; this is because predictions are not used to constrain parameters, and hence do not form part of a calibration dataset. (PESTPP-GLM issues a warning message if this is not the case.). If the *forecasts* argument is not supplied and the *uncertainty* flag is true, then PESTPP-GLM will treat all zero-weighted observations as forecasts. The uncertainties and lower/upper bounds of forecasts that are specified in this way are listed in the PESTPP-GLM run record file, and in a comma-delimited file named *case.N.pred.usum.csv*. Posterior predictive lower and upper bounds are calculated by subtracting and adding two standard deviations from/to the value of the prediction as calculated by the model using initial or estimated parameter values. -### 6.2.9 Model Run Failure +### 6.2.9 Model Run Failure The inversion process implemented by PESTPP-GLM is an iterative procedure. Each iteration is subdivided into two parts. Finite-difference derivatives are calculated in the first part of each iteration; parameter upgrades are calculated and tested in the second part. Model run failure is much more likely to occur during the second of these parts than in the first of these parts as parameter values may vary significantly from model run to model run in the latter case. Where an updated parameter set precipitates model run failure, PESTPP-GLM deems the objective function to be very high; the offending parameter set is therefore judged to be far from optimal. @@ -1987,7 +1978,7 @@ Model run failure during finite difference derivatives calculation is more worri The *der_forgive()* control variable can be used to govern PESTPP’s behavior under these circumstances. It must be supplied as either *true* or *false*. If it is supplied as *true* (its default value) then model run failure when finite difference derivatives are being calculated is accommodated using the first of the above alternatives, that is through temporary freezing of the parameter at its current value. However, if it supplied as *false*, then model run failure during calculation of finite-difference derivatives precipitates cessation of PESTPP-GLM execution. -### 6.2.10 Composite Parameter Sensitivities +### 6.2.10 Composite Parameter Sensitivities PESTPP-GLM records composite parameter sensitivities in a file named *case.sen* where *case* is the filename base of the PEST control file. These are recorded during each iteration of the inversion process. Two composite parameter sensitivities are recorded. The first is the *csp* statistic of Doherty (2015), calculated using the equation @@ -1995,17 +1986,17 @@ cspj= ((JTQJ)0.5)j/n (6.18) where J is the Jacobian matrix, Q is the weight matrix and *n* is the number of non-zero-weighted observations. PESTPP-GLM also records the composite scaled sensitivity of Hill and Tiedeman (2007) in this same file; see that text for details of its computation. Where regularization is employed in the inversion process, two sets of these two composite sensitivities are calculated. Regularization observations and prior information equations are included in one of them, while these are excluded from the other. Where they are included, the weights applied to regularization are multiplied by the current regularization weight factor. -### 6.2.11 Other Controls +### 6.2.11 Other Controls If the control variable *iteration_summary()* is set to *true*, then PESTPP-GLM records (and continually updates) a comma-delimited file named *case.upg.csv* (where “upg” stands for “upgrade”). This file lists the values of parameters used for every model run in which parameter upgrades are tested. Iteration numbers, lambda values and fractional lengths along parameter upgrade vectors are also recorded in this file. PESTPP-GLM also records files named *case.ipar*, *case.iobj*, *case.isen* and *case.rid*. The first three of these are comma-delimited files; they list iteration-specific values of parameters, objective function components and composite parameter sensitivities respectively. *case.rid* links model runs undertaken for derivatives calculation to run identifiers in the parallel run management file; a user can thus be informed of the agent that undertook a particular finite-difference model run. If the *jac_scale()* control variable is set to *true*, the equations that are used for calculating parameter upgrades are slightly modified from those presented in section 6.2.1. Prior to estimation, parameters are scaled by their sensitivities. Estimated scaled parameters then undergo post-estimation back-transformation; see equation 5.4.6 of Doherty (2015). This strategy can reduce numerical errors in some instances; however, it can also increase computation times. The default value for *jac_scale()* is *true*. -### 6.2.12 Running PESTPP-GLM +### 6.2.12 Running PESTPP-GLM See section 5 of this manual for how to run PESTPP-GLM. As is described in that section, model runs can be undertaken in series or in parallel. In either case, a prematurely terminated PESTPP-GLM run can be restarted by commencing PESTPP-GLM execution using the “/r” command line switch. -### 6.2.13 PESTPP-GLM Output Files +### 6.2.13 PESTPP-GLM Output Files The following table summarizes the contents of files that are recorded by PESTPP-GLM when it is asked to undertake highly-parameterized inversion. Most of these have been discussed above. It is assumed that the PEST control file on which the inversion process is based is named *case.pst*. @@ -2044,11 +2035,11 @@ The following table summarizes the contents of files that are recorded by PESTPP Table 6.1. Files recorded by PESTPP-GLM. -### 6.3.4 Running PESTPP +### 6.3.4 Running PESTPP See chapter 5 of this manual for how to run PESTPP-GLM, with model runs undertaken in serial and with model runs undertaken in parallel. At the time of writing, a prematurely terminated PESTPP-GLM run cannot be restarted when implementing differential evolution. If started using the “/r” switch, it re-commences the DE process. -### 6.3.5 PESTPP-GLM Output Files +### 6.3.5 PESTPP-GLM Output Files When run in order to implement differential evolution optimization, a number of the output files recorded by PESTPP-GLM are actually empty as they pertain to gradient-based inversion. Other output files pertain to run management; these are discussed in section 6.2.13. The only output files that are relevant to DE-based optimization are those listed in the following table. In this table it is assumed that the PEST control file on which DE optimization is based is named *case.pst*. @@ -2060,13 +2051,13 @@ When run in order to implement differential evolution optimization, a number of Table 6.2 PESTPP-GLM output files that are pertinent to DE optimization. It is assumed that the name of the PEST control file is *case.pst*. -## 6.4 Summary of PESTPP-GLM Control Variables +## 6.4 Summary of PESTPP-GLM Control Variables -### 6.4.1 General +### 6.4.1 General This section summarizes variables that control the operation of PESTPP-GLM. First those that feature in the PEST control file are discussed; see chapter 4 of this manual for a full description of the functions that they perform. The roles of PEST++ variables which control the operation of PESTPP-GLM are listed in table 6.3. -### 6.4.2 Control Variables in the PEST Control File +### 6.4.2 Control Variables in the PEST Control File The PESTMODE variable determines whether PESTPP-GLM runs in “regularization” or “estimation” modes. @@ -2074,7 +2065,7 @@ When PESTPP-GLM undertakes gradient based inversion, the NOPTMAX, PHIREDSTP, NPH If the PEST control file on which the inversion process is based contains a “singular value decomposition” section, then the variables NUMSING and EIGTHRESH that appear in this section determine the singular value truncation point. If no “singular value decomposition” section is present in the PEST control file, then the default value for NUMSING is the number of adjustable parameters featured in the PEST control file; the default value for EIGTHRESH is 1.0E-7. -### 6.4.3 PEST++ Control Variables +### 6.4.3 PEST++ Control Variables Table 6.3 lists PEST++ control variables. All of these are optional. If a variable is not supplied, a default value is employed. The value of the default is presented along with the name of each variable in the table below. Variables are grouped in approximate accordance with their roles. @@ -2113,11 +2104,11 @@ Note also that the number of control variables may change with time. Refer to th Table 6.3 PESTPP-GLM control variables. Variables which control parallel run management can be supplied in addition to these. See section 5.3.6. -# 7. PESTPP-SEN +# 7. PESTPP-SEN -## 7.1 Introduction +## 7.1 Introduction -### 7.1.1 General +### 7.1.1 General The purpose of global sensitivity analysis (GSA) is to characterize how model parameters affect model outputs (or a function of model outputs such as an objective function) over a wide range of acceptable parameter values. In doing this, it strives for greater robustness, and for the provision of more information, than local sensitivity analysis based on partial derivatives of model outputs with respect to model parameters. Because local sensitivity analysis pertains to a single point in parameter space, the information that it yields is often insufficient to support an understanding of the behavior of nonlinear models whose outputs depend on combinations of model parameters in complicated and parameter-value-dependent ways. @@ -2139,15 +2130,15 @@ PESTPP-SEN currently supports two GSA meth­ods. These are The Method of Morris is a “one-at-a-time” method (Saltelli et al, 2004). It is computationally efficient and is therefore suitable for use with models whose run times are high. It provides estimates of the first two moments (mean and variance) of the effect that each parameter has on a model output of interest. These statistics acknowledge that a parameter’s sensitivity may be a function not just of its own value, but of the values of other parameters. In doing so, they reveal those parameters that have the most influence on model outputs of interest, and the consistency of these influences over parameter space. The information that it provides may justify the omission of some parameters from a calibration exercise; and/or it may support the design of a simple, fast-running, surrogate model. In contrast, the Method of Sobol has the potential to provide much more detailed information than the Method of Morris. Because it is based on decomposition of variance (Saltelli et al, 2004), it can reveal details of parameter nonlinearity that are beyond the reach of other methods. It can also reveal complex parameter interactions and, by inference, interaction of the processes to which these parameters pertain. Unfortunately, this information comes with a high computational cost. Hence unless Sobol-based global sensitivity analysis is restricted to only a few parameters and a relatively fast-running model, it is generally computationally unaffordable. -### 7.1.2 Grouped Parameters +### 7.1.2 Grouped Parameters There are many occasions where a modeler wishes to explore sensitivities of one or more model outputs to groups of parameters rather than to a single parameter. For example, in the groundwater modeling context, a modeler may wish to explore the sensitivity of a particular model output, or of the calibration objective function, to all of the pilot point parameters which collectively describe the vertical hydraulic conductivity of an aquitard. Analysis of the sensitivity of a model output to a group of parameters is easily accomplished by tying all but one of the members of that group to the remaining parameter; the latter then represents the group. In the above example, a single pilot point parameter would be selected as parent to all other pilot point parameters that collectively represent the vertical hydraulic conductivity of the aquitard. Where the sensitivity of the parent parameter is assessed using a global sensitivity analysis methodology, the joint sensitivity of it, and all of the parameters that are tied to it, are thereby assessed. In implementing this strategy, a modeler must ensure that the values assigned to tied parameters in relation to the parent parameter are realistic, because the ratio of these values will be preserved during the entire sensitivity analysis process. Note that all members of the PEST++ suite (including PESTPP-SEN) implement an alternative means by which all members of a group of parameters can be tied together as a single parameter. Use of the *tie_by_group()* control variable dispenses with the need to specify individual parameter linkages in the manner described above. It is important to note that tying parameters together can change the effect upper and lower bounds of the adjustable parameters – this is necessary to keep the tied parameters within their bounds. The effective bounds for each adjustable parameter are calculated at the start of PESTPP-SEN using the distance that each parameter is from its bounds; this information is reported to the run record file. -## 7.2 Method of Morris +## 7.2 Method of Morris -### 7.2.1 Elementary Effects +### 7.2.1 Elementary Effects The Method of Morris focuses on quantities called “elementary effects”. In describing this methodology, notation used by Morris (1991) and by Saltelli (2008) is adopted. The latter text explains the method particularly well (see chapter 3 of that text); hence the description provided herein is brief. @@ -2169,13 +2160,13 @@ The elementary effect of each parameter has a probability distribution. Using st For each adjustable parameter featured in the “parameter data” section of a PEST control file, PESTPP-SEN computes all of *μ*, *μ*\* and *σ* for the objective function (calculated using equation 3.3) and, optionally, for each model output (i.e., observation) featured in the “observation data” section of the PEST control file. -### 7.2.2 Sampling Scheme +### 7.2.2 Sampling Scheme While the Method of Morris does not have the same theoretical foundations as variance-based sensitivity analysis, its major attraction is that it can provide reasonably robust indications of parameter influence, and parameter influence variability, in a relatively few number of model runs. Its efficiency is an outcome of the way in which it chooses parameters on which to base these model runs. By varying a single, randomly-chosen parameter at a time by the amount Δ described above, the cost of each new addition to the pool through which *μ*, *μ*\* and *σ* are calculated for each parameter is only one model run. At the same time, the sequencing of model runs makes the effects of model nonlinearity and parameter interactions visible in these statistics. Model runs are performed in sequences of *m* runs (where *m* is the number of parameters used by the model), in which each parameter is varied in random order while all other parameters retain their values from the previous model run. At the end of this sequence a revised estimate of statistics pertaining to all EE*i*’s is available. A new model run sequence is then initiated, starting at another random point within the gridded parameter domain. See Morris (1991) and chapter 3 of Saltelli (2008) for full details. The number of model run sequences (denoted as *r* by Morris, 1991) is provided by the user. For a highly nonlinear model, a greater value of *r* leads to more robust estimates of *μ*, *μ*\* and *σ*. -### 7.2.3 Control Variables +### 7.2.3 Control Variables In common with other programs comprising the PEST++ suite, PESTPP-SEN obtains case-defining information from a PEST control file. PEST++ control variables which govern the operation of the Method of Morris can be placed in this file on lines that begin with the “++” character string. @@ -2202,9 +2193,9 @@ Saltelli et al. (2004) suggest the following values for Method of Morris control - 4 to 10 for *morris_r()* -## 7.3 Method of Sobol +## 7.3 Method of Sobol -### 7.3.1 Sensitivity Indices +### 7.3.1 Sensitivity Indices The Method of Sobol is based on the decomposition of variance. It employs theory derived by Sobol (2001) through which it can be shown that any function of an arbitrary number of parameters can be decomposed into summed functions of individual parameters, pairs of parameters, triplets of parameters, etc. From this it follows that the variance of any model output (or function of a model output) can be expressed as separate variances arising from individual parameters, pairs of parameters, triplets of parameters, etc. By discovering and separating these variances, the importance of each parameter to the model output can be revealed. So too can the extent to which the influence of any particular parameter on a model’s output results from its interaction with other parameters rather than being a direct outcome of its own influence. See Sobol (2001) and Homma and Saltelli (1996) for further details. See Saltelli et al. (2004, 2008) for a comprehensive and easily understood discussion of the method, and for further details of concepts that are presented in summary form below. @@ -2246,7 +2237,7 @@ where the symbol x\~i signifies all parameters but *xi* be Notwithstanding its ability to provide a comprehensive characterization of the relationship between a model output and all parameters employed by a model, use of Sobol’s method is compromised by its high model run requirements. Hence the Method of Morris is normally a more practical alternative unless model run times are minimal. -### 7.3.2 Control Variables +### 7.3.2 Control Variables PEST++ variables which control the operation of the Method of Sobol are listed in the following table. As for variables which control PESTPP-SEN’s implementation of the Method of Morris, these must be provided on “++” lines within a PEST control file. @@ -2260,7 +2251,7 @@ PEST++ variables which control the operation of the Method of Sobol are listed i Table 7.2 Variables used by PESTPP-SEN to control the operation of the Method of Sobol. -## 7.4 PESTPP-SEN Output Files +## 7.4 PESTPP-SEN Output Files PESTPP-SEN writes the following output files. It is assumed that the filename base of the PEST control file on which global sensitivity analysis is based is named *case.pst*. @@ -2282,15 +2273,15 @@ PESTPP-SEN writes the following output files. It is assumed that the filename ba Table 7.3 Files written by PESTPP-SEN. It is assumed that the name of the PEST control file is *case.pst*. Data elements in all of the above files are comma delimited. -# 8. PESTPP-OPT +# 8. PESTPP-OPT -## 8.1 Introduction +## 8.1 Introduction -### 8.1.1 A Publication +### 8.1.1 A Publication PESTPP-OPT is described by White et al (2018), where examples of its use are also provided. The following description summarizes information available from this source. See also Wagner and Gorelick (1987) where a similar methodology is described. -### 8.1.2 Overview +### 8.1.2 Overview Sustainable management of a natural system often requires that an optimization problem be solved. Something must be maximized or minimized through adjustment of so-called “decision variables”, subject to certain constraints. For example, it may be desirable to maximize the amount of water extracted from a number of wells (where pumping rates are the decision variables), subject to the constraints that flow in an adjacent stream does not fall below a specified rate, and that groundwater levels in certain observation wells are maintained above certain levels. Design of a contaminant remediation system may attempt to ensure that the cost of water extraction and treatment is minimized subject to the constraint that the contaminant is captured; pumping and injection rates, and the locations of pumping and injection wells, comprise the decision variables in this example. @@ -2302,7 +2293,7 @@ If uncertainty is to be taken into account in imposition of an optimization cons PESTPP-OPT not only solves a constrained optimization problem. It solves a constrained optimization problem that accommodates uncertainties in model outputs to which constraints are applied. These are often referred to as “chance constraints”. In applying chance constraints, PESTPP-OPT assumes that model predictive uncertainty is an outcome of model parameter uncertainty. The latter is, in turn, an outcome of prior parameter uncertainty (i.e., the uncertainty range that emerges from the stochastic nature of expert knowledge), and the extent to which this uncertainty is reduced through the model calibration process. Parameter uncertainty reduction is a function of the information content of the calibration dataset, and the extent to which flow of this information is hampered by the presence of noise within that dataset. -### 8.1.3 Calculation of Uncertainty +### 8.1.3 Calculation of Uncertainty PESTPP-OPT offers three options incorporate model-based constraint uncertainties: supplying model-based constraint “weights” in the pest control file as standard deviations, through the use of “stack-based” constraint uncertainty and through using linear methods. @@ -2352,7 +2343,7 @@ The stack-based form of chance constraint usage is the most rigorous in that it If a stack is “reused” for multiple iterations, the same process is used except the subtract the mean value from each column and add the remaining “anomalies” to the current constraint value. This effectively transfers the stack-based probability distribution (represented by each column of the observation stack) to the current constraint value. -### 8.1.4 Optimization +### 8.1.4 Optimization An optimization problem can be formulated in many ways. For the moment it will be characterized as minimizing an objective function. (A maximization problem can be turned into a minimization problem simply through reversing the sign of the objective function.) The objective function which PESTPP-OPT minimizes must be distinguished from that which is minimized through model calibration. Use of PESTPP-OPT assumes that the model has already been calibrated (or, if not, it assumes that it does not need to be calibrated). The set of parameters that it uses must therefore be those that emerge from the calibration process or (if the model has not been calibrated) those that are of minimized error variance from an expert knowledge point of view. Hence the objective function that is the focus of model calibration is not considered when using PESTPP-OPT. Nevertheless, as will be discussed below, it is implicitly taken into account through the weights that are assigned to observations comprising the calibration dataset that are featured in the PEST control file on which PESTPP-OPT’s operations are based. @@ -2382,7 +2373,7 @@ where *aij* is the element of A that occupies its *i*th ro The optimization algorithm employed by PESTPP-OPT employs a so-called “linear programming” or “simplex” methodology that is accessed through the open-source CLP optimization library (Forrest et al., 2016), developed through the Computational Infrastructure for Operations Research (COIN-OR) project; see Lougee-Heimer (2003). This algorithm is fast and efficient; it can handle hundreds of thousands of decision-variables. The assumption of a linear relationship between model outputs and decision variables is accommodated by repeating the linear optimization process in a series of iterations in which the decision variable response matrix (i.e., A of equation 8.5) is re-computed on each occasion. Where decision-variables are many, this can be a time-consuming process. The iterative nature of the optimization process earns it the name “sequential linear programming”, or simply SLP for short. See Ahlfield and Mulligan (2000) for further details. -### 8.1.5 Chance Constraints +### 8.1.5 Chance Constraints A user of PESTPP-OPT can inform it whether he/she would like the optimization process which it implements to be risk neutral, risk averse, or risk tolerant. In the latter two cases he/she can specify the degree of aversion or tolerance that should characterize that process. Tolerance or aversion is introduced through the way in which model output uncertainty affects the imposition of optimization constraints. @@ -2390,9 +2381,9 @@ Suppose that a user specifies that a model output *o* shall have a value no grea A PESTPP-OPT user must provide one number to characterize his/her approach to risk. This number must be between zero and one. A model-output-specific number, representing the uncertainty of that output, is then added or subtracted from it prior to imposition of optimization constraints on that output. Provision of a value of 0.5 for this variable (signifying risk neutrality) is equivalent to ignoring parameter, and hence predictive, uncertainty. Under these circumstances, PESTPP-OPT does not calculate model output uncertainties at all. This reduces input requirements, at the same time as it accelerates the optimization process by foregoing the need to (re)calculate the J matrix and/or y vectors of equations 8.1 to 8.3. On the other hand, a value of 0.95 specifies that constraints are applied to model outputs which are corrected to represent the upper end of the 95% one-sided confidence level of that prediction. -## 8.2 Using PESTPP-OPT +## 8.2 Using PESTPP-OPT -### 8.2.1The PEST Control File +### 8.2.1The PEST Control File Like other members of the PEST++ suite, execution of PESTPP-OPT is initiated using a command line that references a PEST control file. See chapter 5 of this manual for details. The PEST control file supplied to PESTPP-OPT must define the optimization problem that it must solve. In particular, this PEST control file must inform it of the following: @@ -2416,7 +2407,7 @@ As is the normal protocol for members of the PEST++ suite, variables which contr Each of the above issues is now discussed in detail. -### 8.2.2 Decision Variables and Parameters +### 8.2.2 Decision Variables and Parameters In a PEST control file, each parameter is assigned to a parameter group. PEST control variables which govern the operation of finite difference derivatives are assigned to these groups. These variables are just as important for PESTPP-OPT as they are for other members of the PEST++ suite (and indeed PEST), as PESTPP-OPT may be required to calculate multiple Jacobian and response matrices using finite parameter differences. @@ -2430,7 +2421,7 @@ The names of parameter groups which house external decision variables can be sup The simplex algorithm that PESTPP-OPT employs to minimize the objective function defined by equation 8.4 is quite different from that used by PEST and PESTPP-GLM to minimize the type of objective function that quantifies model-to-measurement misfit. Hence some of the control variables that are pertinent to the latter optimization process are not pertinent to the former process. In particular, decision variables that are adjusted by PESTPP-OPT cannot be log-transformed; however, they can be tied or fixed. As they are altered in order to minimize the management objective function, they are not subject to limits imposed by the FACPARMAX and RELPARMAX variables that are featured in the “control data” section of the PEST control file. -### 8.2.3 Defining the Objective Function +### 8.2.3 Defining the Objective Function The coefficients which are employed in formulating the objective function of equation 8.4 are supplied through the *opt_obj_func()* control variable. PESTPP-OPT is informed whether this function must be maximized or minimized through the *opt_direction()* control variable. The latter must be supplied as either “min” or “max”. @@ -2442,7 +2433,7 @@ Figure 8.1 An external file whose name is supplied with the *opt_obj_func()* var If the *opt_obj_func()* control variable is not provided in the PEST control file that is featured on the PESTPP-OPT command line, PESTPP-OPT assigns coefficients to decision variables itself; each is assigned a coefficient of 1.0. -### 8.2.4 Constraints +### 8.2.4 Constraints Constraints can be applied on model outputs (read from model output files using instruction files) or on prior information equations. PESTPP-OPT allows constraints to be either “less than” or “greater than” constraints; the latter are internally reformulated as “less than” constraints to meet the demands of the linear programming algorithm that it implements. Constraints are identified using observation groups. If an observation group contains “less than” constraints, then its name must begin with “l\_” (that is, the letter “el” followed by an underscore) or “less\_”; if an observation group contains “greater than” constraints then its name must begin with “g\_” or “greater\_”. Do not forget that prior information equations are also assigned to observation groups. @@ -2456,7 +2447,7 @@ In addition to constraints applied to model outputs and prior information equati As presently programmed, PESTPP-OPT requires that at least one constraint be applied to a model output. Hence if the only constraints that are specified in a PEST control file that is read by PESTPP-OPT are those on individual decision variables through their bounds, and/or on prior information equations that feature those decision variables, PESTPP-OPT will cease execution with an appropriate message. -### 8.2.5 Observations +### 8.2.5 Observations If an observation that is featured in a PEST control file is not denoted as a constraint, then it is used in the notional calibration process through which posterior parameter uncertainties are calculated. (It is important to note that the same does not apply to prior information equations; if a prior information equation does not feature a decision variable, then PESTPP_OPT simply ignores it.) These posterior parameter uncertainties are used to calculate the uncertainties of model outputs to which constraints are applied; see equations 8.1 to 8.3. The calibration process is described as “notional” because model parameters are not actually adjusted to fit the calibration dataset. Instead, PESTPP-OPT assumes that parameter values have already been adjusted. The posterior uncertainties associated with these parameters are then calculated using equation 8.2. Observations that are employed in the notional calibration process must be featured in the PEST control file that is provided to PESTPP-OPT so that it can calculate the terms of this equation. However, it is important to note that, in calculating posterior parameter and model output uncertainties, PESTPP-OPT takes no notice of the values of observations that must be supplied in the “observation data” section of the PEST control file, for these do not appear in equations 8.1 to 8.3; only sensitivities are featured in these equations. The same applies to parameters; hence their values do not need to change in implementing the notional calibration exercise that is embodied in these equations. @@ -2464,17 +2455,17 @@ The weights assigned to observations that are cited in the “observation data In some optimization contexts, it may be desirable to set all observation weights to zero. The notional calibration exercise through which a posterior covariance matrix is calculated from a prior covariance matrix is then foregone. In this case, PESTPP-OPT uses the prior covariance matrix to calculate the uncertainties associated with model outputs to which constraints are applied. (This covariance matrix is supplied to PESTPP-OPT in the manner described below.) Alternatively, if all weights are set to zero, a user may provide PESTPP-OPT with a self-calculated posterior parameter covariance matrix in place of the prior parameter covariance matrix for use in calculating parameter and model output uncertainty. For reasons which will be discussed below, this strategy may enhance the efficiency of PESTPP-OPT usage. -### 8.2.6 Regularization +### 8.2.6 Regularization It is recommended practice when calibrating a model to include Tikhonov regularization in the inversion process (often through a suite of prior information equations). Tikhonov regularization expresses expert knowledge as it pertains to parameters. When using PESTPP-OPT, this same expert knowledge is expressed through the prior covariance matrix featured in equations 8.1 and 8.2. All regularization that was employed in a previous calibration exercise should be removed from a PEST control file before using PESTPP-OPT. -### 8.2.7 Prior Covariance Matrix +### 8.2.7 Prior Covariance Matrix A prior covariance matrix can be supplied to PESTPP-OPT using the PEST++ *parcov()* control variable; this variable is also used by PESTPP-GLM and PESTPP-IES. The name of a covariance matrix file (with extension *.cov*), parameter uncertainty file (with extension *.unc*), or binary file containing a covariance matrix (with extension *.jco* or *.jcb*) can be supplied as the value of this keyword. (See appendix B of this manual for specifications of these file types.) Variances and covariances featured in this file must pertain to the logs (to base 10) of parameters that are declared as log-transformed in the PEST control file. There is no need to feature any decision variables in this file, as these do not appear in equations 8.1 and 8.2. The user is reminded that parameter covariance matrices can be calculated using PyEMU, as well as the PPCOV, PPCOV3D, PPCOV_SVA and PPCOV3D_SVA utilities available from the PEST Groundwater Data Utilities suite. If a covariance matrix is not supplied, then PESTPP-OPT calculates a prior covariance matrix itself. In doing so, it assumes that parameters are statistically independent, and that the difference between the upper and lower bounds of a parameter (with log transformation taken into account) is equal to 4 standard deviations of its prior probability distribution. (An alternative number of standard deviations can be provided through the *par_sigma_range()* control variable.) -### 8.2.8 Risk +### 8.2.8 Risk Using the *opt_risk()* control variable, a user specifies his/her disposition with respect to risk. The setting of this variable determines the value of δ*o* discussed in section 8.1. This is the value that is added/subtracted to/from a model output before a constraint is applied to that output. @@ -2484,7 +2475,7 @@ An *opt_risk()* setting of greater than 0.5 indicates risk aversion. For “less A setting for *opt_risk()* that is less than 0.5 indicates risk tolerance. For “less than” constraints, the constraint is applied to *o* - δ*o*. The opposite applies for “greater than” constraints. Suppose that *opt_risk()* is supplied as 0.05. Then, for a “less than” constraint, δ*o* is calculated to be such that there is a 5% chance that the real system state or flux corresponding to a certain set of decision variables is less than *o* - δ*o*. Similarly, for a “greater than” constraint, δ*o* is calculated to be such that there is a 5% chance that the real system state or flux corresponding to a certain set of decision variables is greater than *o* + δ*o*. -### 8.2.9 Jacobian and Response Matrices +### 8.2.9 Jacobian and Response Matrices During every iteration of the constrained optimization process, PESTPP-OPT calculates derivatives of model outputs to which constraints are applied to decision variables whose values are optimized. In accordance with the normal PEST/PEST++ protocol, control variables which govern calculation of finite-difference derivatives are read from the “parameter data” section of the PEST control file. @@ -2496,7 +2487,7 @@ It may be possible to avoid calculation of at least some partial derivatives thr Filling of the J matrix of equations 8.1 and 8.2 can also be avoided if weights assigned to all calibration-relevant observations in the “observation data” section of the PEST control file are set to zero, or if no calibration-relevant observations are included in this section at all. This signifies to PESTPP-OPT that the model is uncalibrated. PESTPP-OPT then uses prior parameter uncertainties, rather than posterior parameter uncertainties, for calculation of δ*o* values for constraint-relevant model outputs. Using the *parcov()* control variable, a user may wish to supply a covariance matrix to PESTPP-OPT instead of letting PESTPP-OPT calculate prior parameter uncertainties itself from parameter bounds (and/or optional *standard_deviation* in external files). Under these circumstances he/she may wish to provide PESTPP-OPT with a posterior parameter covariance matrix instead of a prior covariance matrix. Because PESTPP-OPT “thinks” that this is a prior parameter covariance matrix, and because it has been informed that this matrix does not need modification in accordance with the notional calibration exercise that is embedded in equations 8.1 and 8.2, it simply uses this matrix for calculation of δ*o*; it does not expend model runs to calculate J. This strategy can speed up the optimization process considerably, at the same time as it ensures that δ*o* is calculated using post-calibration uncertainties. -### 8.2.10 Solution Convergence +### 8.2.10 Solution Convergence Notwithstanding the nonlinear nature of most models, the constrained optimization problem that is solved by PESTPP-OPT is formulated as a linear problem. Model nonlinearities are accommodated by solving this problem in a progressive fashion through a series of iterations in which sensitivities to decision variables are re-calculated during every iteration. This sequential linear programming (SLP) process is deemed to be complete when neither the objective function, nor any decision variable, changes by more than a certain (small) amount from one iteration to the next. This amount is supplied by the user as the value of the *opt_iter_tol()* control variable. PESTPP-OPT provides a default value of 0.001 for this variable. @@ -2504,23 +2495,23 @@ As was stated above, PESTPP-OPT uses the open source CLP library supplied by the Nonlinearities of constraint-relevant model outputs with respect to parameters which are not decision variables can be accommodated through intermittent re-calculation of J and y during the SLP process. However, it is important to keep in mind that this strategy constitutes only partial accommodation of this type of nonlinearity, as model parameters which are not decision-variables are not actually varied from iteration to iteration of the SLP process. Re-calculation of J and y accommodates only the effect that changes in the values of decision variables have on these sensitivities. It does not accommodate changes in J and y that may be incurred by variability of model parameters over ranges denoted by their posterior uncertainties. Nor does it accommodate the fact that equations 8.1 to 8.3 assume model linearity with respect to these parameters. -### 8.2.11 Other Control Variables +### 8.2.11 Other Control Variables In common with all other members of the PEST++ suite, a PEST control file used by PESTPP-OPT can include variables that govern parallel run management. See section 5.3 of this manual. -### 8.2.12 Final Model Run +### 8.2.12 Final Model Run Once it has completed the constrained optimization process, PESTPP-OPT undertakes one final model run in which it employs optimized values of all decision variables. The optimized values of these variables thus remain on model input files when PESTPP-OPT ceases execution; model output files reflect these inputs. Undertaking of the final model run can be foregone through the use of the aptly named *opt_skip_final()* option. -### 8.2.13 Restarts +### 8.2.13 Restarts As presently programmed, a prematurely-terminated PESTPP-OPT run cannot be restarted. However, PESTPP-OPT does support use of the *hotstart_resfile()* and *base_jacobian()* control variables. Strategic use of functionality provided by these variables effectively constitutes a restart. -### 8.2.14 Zero Run Solution +### 8.2.14 Zero Run Solution PESTPP-OPT offers functionality for solving the chance-constrained SLP problem without the requirement for any model runs. If a user activates the *base_jacobian()*, *hotstart_resfile()* and *opt_skip_final()* options while setting the NOPTMAX control variable to 1, then PESTPP-OPT will not undertake any model runs at all. Instead, it will solve the chance-constrained linear programming problem specified in the control file, report optimal decision variable values and the final objective function, and then cease execution. This can be a useful strategy for exploring the implications of changing decision variable bounds, constraints, risk and/or any of the factors affecting chance constraints. The latter can include prior parameter uncertainties, and the number of observations (and their weights) used to condition parameters. -## 8.3 PESTPP-OPT Output Files +## 8.3 PESTPP-OPT Output Files Files recorded by PESTPP-OPT are listed in the following table. The contents of this table are based on the assumption that the PEST control file on which constrained optimization is based is named *case.pst*. @@ -2544,7 +2535,7 @@ Files recorded by PESTPP-OPT are listed in the following table. The contents of Table 8.1 PESTPP-OPT output files. It is assumed that the name of the PEST control file is *case.pst*. -## 8.4 Summary of Control Variables +## 8.4 Summary of Control Variables Table 8.2 tabulates PEST++ control variables used by PESTPP-OPT. All of these are optional. If a particular control variable is not supplied, then PESTPP-OPT provides a default value. Where appropriate, the value of the default is presented with the name of the variable in the table below. Variables discussed in section 5.3.6 that control parallel run management are not listed in the following table. @@ -2575,15 +2566,15 @@ Note also that the number of control variables may change with time. Refer to th Table 8.2 PESTPP-OPT control variables. Parallel run management variables can be supplied in addition to these. See section 5.3.6. -# 9. PESTPP-IES +# 9. PESTPP-IES -## 9.1 Introduction +## 9.1 Introduction -### 9.1.1 Publications +### 9.1.1 Publications PESTPP-IES is described by White (2018). The reader is referred to that paper for a description of what it does and how it works, together with an example of its use. Chen and Oliver (2013) describe in detail the theory on which the iterative ensemble smoother methodology implemented by PESTPP-IES rests; they also provide deployment examples. -### 9.1.2 Overview +### 9.1.2 Overview Predictions made by environmental models are accompanied by uncertainty. This applies particularly to models of fluid flow and mass transport through the subsurface, where hydraulic properties and system stresses are often only poorly known. Where models are used to support management decisions, the uncertainties associated with their predictions should be quantified. Unless this is done, it is not possible to assess the range of possible outcomes of a particular management action. Without knowledge of the risks associated with an envisioned management strategy, the basis for a decision to adopt it is flawed. @@ -2609,7 +2600,7 @@ Through a series of successive iterations, PESTPP-IES modifies parameter realiza It is apparent that use of PESTPP-IES constitutes a significant departure from the “calibrate first and do uncertainty analysis later” approach that underpins much environmental model history-matching. PESTPP-IES does not seek a parameter field that is deemed to “calibrate” a model. Instead, it seeks a suite of parameter fields which collectively express posterior parameter uncertainty. Astonishingly, as will be discussed below, the numerical cost of obtaining this suite is often low compared with that required to obtain a single “calibrated” parameter field of post-calibration minimum error variance, particular where parameter numbers are high (which is often required to avoid underestimation of predictive uncertainty). -### 9.1.3 Ensemble Kalman Filters and Ensemble Smoothers +### 9.1.3 Ensemble Kalman Filters and Ensemble Smoothers The Kalman filter is widely used in time series processing and in engineering control. It constitutes an efficient means of processing noisy measurements in order to provide increasingly better estimates of system state, and of parameters that govern processes which determine system state. @@ -2629,7 +2620,7 @@ It is apparent from the above brief description that, unlike PESTPP-GLM and PEST PESTPP-IES includes two different solution techniques: The standard Kalman update (Evensen, 2003) (including the iterative Multiple Data Assimilation scheme of Emerick and Reynolds (2013)) and the ensemble form of the Gauss-Levenburg-Marquardt equation (Chen and Oliver, 2013). These two solution techniques differ in how they calculate parameter updates and, by default, the GLM form of Chen and Oliver (2013) is used by default. Conceptually, if the model is a perfect simulator of the natural system and linear, then the standard Kalman update is optimal in that it is a linearized Bayesian update. However, rarely are natural system models perfect representations and rarely are they linear. In this situation, iteration is needed to resolve nonlinearity. The MDA scheme of Emerick and Reynolds (2013) uses a series of covariance inflation factors chosen such that the sum of the inverse of these factors sums to 1 such that the cumulative effect of the assimilation iterations is theoretically the same as the standard Kalman update. The GLM iterative ensemble smoother update scheme of Chen and Oliver takes a different approach by using the GLM trust region formulation that penalizes parameter changes so that it too can be derived from a linear Bayesian formulation. Some preliminary testing indicates that the GLM IES solution has stronger capabilities to resolve nonlinearities but both the MDA and GLM solution schemes are widely cited in the literature. -### 9.1.4 Some Repercussions of Using Ensembles +### 9.1.4 Some Repercussions of Using Ensembles The most obvious advantage of using ensembles is their ability to sample the posterior parameter probability distribution. This is critical to a model’s support for environmental decision-making. @@ -2641,13 +2632,13 @@ The use of an ensemble to calculate partial parameter derivatives does not come Another issue associated with the use of random parameter fields is that of model stability. If the model (or models) that is run by PESTPP-IES experiences numerical difficulties when provided with some parameter sets, then use of an ensemble of random parameter fields is likely to trigger occasional model run failures, or occasional simulations whose run times are excessively long. As will be discussed below, PESTPP-IES includes functionality to accommodate this problem. -### 9.1.5 Iterations +### 9.1.5 Iterations The parameter adjustment algorithm that is implemented by PESTPP-IES is referred to as an “iterative ensemble smoother”. This reflects the fact that the process through which a prior ensemble becomes a posterior ensemble requires a number of iterations of parameter field adjustment, these iterations being necessary to deal with nonlinearity of the parameter-to-model output relationship. During each of these iterations, a new approximation to the Jacobian matrix is calculated based on the latest ensemble. Upgrades to parameter sets are then calculated using this matrix. The process is then repeated. An important prerequisite to success of the parameter upgrade process is use of an appropriately valued Marquardt lambda. The Marquardt lambda plays the same role in ensemble parameter field adjustment as it does in single parameter field adjustment such as that undertaken by PESTPP-GLM and PEST. A high value of lambda results in calculation of a damped (i.e., shortened) parameter upgrade vector that is aligned with the gradient of the objective function. This promulgates rapid improvement in the objective function where the latter is far above its minimum. A low value of lambda promulgates better navigation of an objective function surface in which the minimum lies at the bottom of a narrow valley, the existence of which is an outcome of a high degree of post-calibration parameter correlation. Ideally the Marquardt lambda should fall from a high value to a low value as the iterative parameter adjustment process progresses. Like PEST and PESTPP-GLM, PESTPP-IES employs a trial-and-error procedure to find the best value of the Marquardt lambda to employ at any iteration of the ensemble-based inversion process. However, numerical efficiency precludes testing of multiple lambdas when adjusting each parameter field in the entire ensemble, as this would incur an unduly large computation burden. Instead, lambda values are tested using only a few parameter fields. Once the best lambda is identified through this process, the remaining members of the ensemble are adjusted using this lambda. As was stated above, these same runs that are used for testing parameter upgrades are also used for calculation of a new Jacobian matrix. -### 9.1.6 Measurement Noise +### 9.1.6 Measurement Noise Variability between parameter fields that sample the posterior parameter probability distribution (and hence quantify the uncertainty of this distribution) arises from three sources. The first is prior parameter uncertainty. The second is a lack of information in the calibration dataset by which to reduce this uncertainty. The third arises from the fact that the calibration dataset is contaminated by measurement noise. @@ -2665,7 +2656,7 @@ Optionally, users can forego the use of measurement noise realizations through t Still another option to cope with the need to define weights and noise that are non-commensurate is to employ the *external* control file section format for observation data (e.g., *\* observation data external*) and in the external observation data file(s), supply the column *standard_deviation* as values of expected noise and supply the *weight* column in the external files as the values necessary to form a balanced composite objective function. In this way, PESTPP-IES will generate realizations of observation noise using the covariance matrix implied by the *standard_deviation* entries and will use the weights during the upgrade calculation process and during objective function calculations. -### 9.1.7 Regularization +### 9.1.7 Regularization The term “regularization” generally describes numerical devices that are employed to achieve uniqueness of ill-posed inverse problems. Regularization is thus fundamental to the notion of calibration. If regularization is properly implemented, the calibrated parameter field is one of minimized error variance. In most calibration contexts, regularization eliminates from the calibrated parameter field any heterogeneity that is not supported by direct measurements of hydraulic properties or by the calibration dataset. @@ -2677,13 +2668,13 @@ It is important to note that if PESTPP-IES is provided with a PEST control file As with PEST(\_HP) and PESTPP-GLM, the SVD truncation controls (i.e., MAXSING and EIGTHRESH) can also be used to enforce regularization in PESTPP-IES since these inputs control the number of singular components used in the upgrade calculation process. Setting MAXSING \< the number of realizations will result in fewer singular components be used in the upgrade process, which effectively limits the parameter adjustments. Additional regularization can also be implemented by using large lambda values and small *lambda_scale_fac* (i.e., line search) values. The optimal regularization strategy to use for any given PESTPP-IES analysis depends heavily on the problem, but generally, MAXSING seems to be the most effective and efficient means of controlling the level of fit. -### 9.1.8 Base Realization +### 9.1.8 Base Realization Optionally (and by default), the parameter ensemble used by PESTPP-IES can include a “base realization”. Parameter values that comprise this realization are those which are listed in the “parameter data” section of the PEST control file which PESTPP-IES reads on commencement of execution. Ideally, this set of parameter values are those of minimum pre-calibration error variance; that is, they comprise the expected values of parameters from an expert knowledge point of view. PESTPP-IES pairs this realization with an observation dataset that has no “manufactured” measurement noise associated with it; this dataset is comprised of measurements that appear in the “observation data” section of the PEST control file. By monitoring the fate of the parameter set comprising this base realization, a user can witness the effect that the ensemble smoother process has on a non-random parameter field. Any bias that is introduced to this parameter field, or any incredulous heterogeneity that is introduced to this parameter field, is also presumably introduced to the other random parameter fields which comprise the ensemble. Inspection of this field can aid a modeller in assessing whether, in his/her opinion, the parameter field ensemble that emerges from the ensemble smoother process comprises a legitimate sample of the posterior parameter probability distribution. -### 9.1.9 Parameter Transformation Status +### 9.1.9 Parameter Transformation Status The number of individual parameters which comprise a realization is equal to the number of parameters that are listed in the “parameter data” section of a PEST control file. However, parameters that are fixed in the “parameter data” section of the PEST control file are never varied from their initial values. Tied parameters maintain a fixed ratio with parent parameters, this ratio being determined by values provided for respective parameters in the “parameter data” section of the PEST control file. Note however, that if maintenance of this ratio takes a tied parameter beyond its bound (as provided in the PEST control file), then the bound is ignored, regardless of the setting of the *ies_enforce_bounds()* control variable; see below. (PEST and PESTPP-GLM treat tied parameters in the same way.). @@ -2691,7 +2682,7 @@ Where a user provides his/her own parameter ensembles, there are circumstances w As is usual practice in the PEST++ suite, if a parameter is designated as log-transformed in the “parameter data” section of a PEST control file, then adjustments that are made to the value of the parameter are actually made to the log of the parameter’s value. This takes place behind the scenes, and is invisible to the user. Strategic log-transformation of parameters can render a nonlinear problem much more linear; this can enhance the speed with which the objective function is reduced as parameter realizations comprising an ensemble are adjusted. In fact, most parameters are better log transformed than untransformed; nevertheless, sometimes log-transformation is inappropriate, for example if a parameter can adopt both positive and negative values, or if it represents the value of a quantity whose datum is arbitrary. -### 9.1.10 Inequality Observations +### 9.1.10 Inequality Observations PESTPP-IES introduces a special observation type that is not available in PESTPP-GLM or PEST, but resembles constraints supported by PESTPP-OPT. This is the “one way observation” type, which is synonymous with the inequality constraints used in PESTPP-OPT. For observations of this type, a residual is zero unless model outputs are either greater than or less than its “measured” value; the user specifies which of these apply. This reflects the nature of some types of measurements. However, their use is broader than this. “Greater than” and “less than” observations can comprise a powerful mechanism for inserting “soft knowledge” into the history-matching process. @@ -2699,7 +2690,7 @@ If an observation belongs to an observation group whose name begins with the str Similarly, if an observation belongs to an observation group whose name begins with the string “l\_” or “less\_”, then this observation is a “less than” observation. No objective function penalty is incurred if the modelled value of the pertinent quantity is less than the measured value listed in the “observation data” section of the PEST control file. However, if the model-calculated value is greater than the measured value, the objective function penalty is calculated in the usual manner, that is as the squared residual times the squared weight. -### 9.1.11 Localization +### 9.1.11 Localization Calculating an empirical cross-covariance between large numbers of parameters and observations from a limited number of realizations is likely to result in spurious cross-correlations. Because of this, some parameters will be adjusted when they should not be adjusted. Furthermore, when large numbers of independent observations comprise a calibration dataset, a small ensemble size will almost certainly not provide enough degrees of freedom to reproduce these data. To combat these problems, users can employ localization. The term “localization” comes from ensemble Kalman filter parlance. It refers to a strategy whereby only “local” covariances inform unmeasured states in a spatially distributed filtering problem. @@ -2723,7 +2714,7 @@ The automatic adaptive localization process can be used in conjunction with a lo If the *ies_verbose_level()* flag is set to greater than 1, the automatic adaptive localization process implemented by PESTPP-IES will record the resulting localization matrix in a file named *case.N.autoadaloc.mat*, where case is the filename base of the PEST control file. It will also record a CSV file containing results of the adaptive localization process as *case.N.autodaloc.csv*. Both of these are recorded at the end of each iteration; *N* is the iteration number. The automatic adaptive localization process can be computationally demanding. However, it can be multi-threaded. This option is activated using the *ies_num_threads()* control variable. -### 9.1.12 Use of observation noise covariance matrices +### 9.1.12 Use of observation noise covariance matrices In standard operation model, PESTPP-IES will generate the observation noise covariance matrix (required in the PESTPP-IES parameter adjustment equation) as a diagonal matrix with diagonal entries equal to one over the squared weights listed in the control file. Using this matrix assumes there is no correlation between observations, an assumption that is not always valid, especially in the presence of model error (Doherty and Welter, 2010). Note also that with the version 2 control file, users can specify a “standard_deviation” column in an “observation data external” file – this standard deviation will be used in place of weights for generating the observation noise covariance matrix. @@ -2737,7 +2728,7 @@ In this way, one effectively treats the observation noise covariance matrix as a Note that as presently coded, the residual covariance matrix is a dense matrix and it is formed for all non-zero-weighted observations simultaneously. This means that if the number of non-zero-weighted observations is greater than about 20,000, this matrix will likely not fit in memory. -### 9.1.13 Detecting and resolving prior-data conflict +### 9.1.13 Detecting and resolving prior-data conflict Closely related to the concept of measurement noise and the associated ensemble (described previously) is the concept of prior-data conflict (Evans and Moshonov (2006), Alfonso and Oliver (2019)). In the most general sense, prior-data conflict is a situation where the simulated outputs from the prior parameter ensemble to not “agree” with the observed values (plus, optionally, measurement noise), where “agree” is measured by the statistical distance between the ensemble of simulated outputs vs the ensemble of observed values plus noise realizations. If these two ensembles do not “agree”, then that implies that extreme parameter values and/or extreme parameter combinations will be needed to ultimately reproduce these conflicted observation values. In this case, the term “extreme” can be used interchangeably with the term “biased”. It is easy to see that continuing with parameter adjustments in the presence of prior-data conflict is a sure way to generate parameter bias, and ultimately, forecast bias. @@ -2745,7 +2736,7 @@ While detecting prior-data conflict is relatively simple (and PESTPP-IES will do With these options active, PESTPP-IES will remove observations in a prior-data conflict state from the parameter adjustment process, that is, these observations will not feature in the residual matrix used for upgrade calculations. -### 9.1.14 Multi-modal solution process +### 9.1.14 Multi-modal solution process The theory that underpins PEST, PESTPP-GLM and PESTPP-IES is designed to seek a single minimum of the objective function (e.g., a single peak of the likelihood function). For many data assimilation problems however, the objective function surface is not convex with a single minimum located neatly at the bottom. Instead, it maybe pitted with local minimum and/or contain a curving high-dimensional trough of nearly equal objective function minimum. In the context of deterministic parameter estimation, the goal is to seek a unique minimum of the objective function, that is seeking a minimum error variance solution. However, in the context of uncertainty quantification, and especially in the context of evaluating the likelihood of an unwanted outcome, exploring local minima and/or this high-dimensional trough is paramount. @@ -2763,9 +2754,9 @@ Closely related to the multimodal solution process is the use of a “weights” Figure 9.2 – A demonstration of the multi-modal solution process using a weight ensemble on the ZDT1 benchmark problem. The standard solution process using single weight vector drives the posterior towards a single point, while the multi-modal upgrade process uses unique weights on each of the two objectives (observations in the control file) such that each realization targets a different point on the trade-off between the two objectives. -## 9.2 Using PESTPP-IES +## 9.2 Using PESTPP-IES -### 9.2.1 General +### 9.2.1 General The parameter adjust algorithm implemented by PESTPP-IES is described in detail by Chen and Oliver (2013). The reader is referred to that publication for a complete mathematical description of that algorithm; see also the description presented by White (2018). We now focus on PESTPP-IES implementation details, and on control variables that are available for tuning the Chen and Oliver algorithm implemented by PESTPP-IES to particular history-matching contexts. @@ -2773,7 +2764,7 @@ As is the usual protocol for members of the PEST++ suite, values of control vari Like most programs of the PEST++ suite, PESTPP-IES can be run without any PEST++ control variables. PESTPP-IES then provides default values for all of these variables. For example, the default position of PESTPP-IES is to assume that the prior parameter covariance matrix is diagonal, and that parameter bounds span four standard deviations of the prior probability distribution of each parameter. Nevertheless, users are encouraged to use PESTPP-IES control variables in order to ensure that its performance is optimized for their particular modelling context. -### 9.2.2 Initial Realizations +### 9.2.2 Initial Realizations **Realizations** Initial parameter realizations can be generated by PESTPP-IES (the default), or they can be supplied by the user. The number of realizations that PESTPP-IES generates is set by the *ies_num_reals()* control variable; the default value is 50. @@ -2815,7 +2806,7 @@ The optional *ies_observation_ensemble()* keyword provides the name of a CSV or That is worth saying again: it is important to note that PESTPP-IES does not require that user-supplied parameter and observation ensembles share realization names. If a user supplies either a parameter or observation ensemble, PESTPP-IES will check for realization name commonality between the initial parameter and observation ensemble, and if they share all realization names but are not aligned, PESTPP-IES will reorder the observation ensemble. If the realization names are not common between these two ensembles, but there are some shared names, PESTPP-IES will warn the user and continue. -### 9.2.3 “Regularization” +### 9.2.3 “Regularization” Chen and Oliver (2013) provide two equations through which parameters comprising a particular realization within an ensemble are adjusted to provide a better fit with the calibration dataset. These are equations 18 and 19 in their paper. Equation 18 is expensive to compute. It incorporates a term that penalizes adjusted parameter fields whose statistical properties depart too far from those which characterize the prior parameter distribution. Equation 19 is simpler, and less expensive to compute. It adjusts parameters on the basis of current model-to-measurement misfit only. A user can inform PESTPP-IES to use the simpler and less expensive equation by providing the value *true* to the *ies_use_approx()* control variable. When *ies_use_approx* is set to *false*, a penalty for changing parameter values is enforced within the upgrade calculation process. @@ -2845,13 +2836,13 @@ Where a user-supplied CSV or JCO/JCB file provides initial parameter realization Note that the default value for *ies_use_empirical_prior()* is *false*. Hence PESTPP-IES employs a user-supplied covariance matrix for the above roles if one is available. Note also that, even if *ies_use_empirical_prior()* is set to *true*, a user-supplied covariance matrix is always used in the first of the above roles if it can be accessed through a *parcov()* file. If this file is not available so that an empirical covariance matrix is used for this task, off-diagonal elements of this matrix are not automatically set to zero. -### 9.2.4 Prior Parameter Scaling +### 9.2.4 Prior Parameter Scaling Like PESTPP-GLM and PEST, PESTPP-IES uses a Jacobian matrix as a basis for parameter adjustment. The mathematics of parameter adjustment which all of these programs implement is very similar, differing only in some of the details of how to handle problem ill-posedness. The major difference between PESTPP-IES on the one hand and PESTPP/PEST on the other hand is in how the Jacobian matrix is calculated. PESTPP-IES does not use finite parameter differences. Instead, it runs the model using the suite of random parameter fields that comprise an ensemble. It then inspects model outputs that correspond to members of the calibration dataset and calculates cross-covariances between these and individual parameters. From these covariances, with some matrix manipulation, it calculates an approximation to the Jacobian matrix. Where the number of realizations that comprise an ensemble is less than the number of adjustable parameters featured in the PEST control file, this Jacobian matrix is column-rank-deficient. Nevertheless, provided its rank is higher than the dimensionality of the calibration solution space, it can support attainment of parameter values which provide a good fit between model outputs and members of the calibration dataset. In calculating model-output-to-parameter cross-covariances, certain numerical advantages can be gained if differences between individual parameter realizations and the mean parameter field are scaled. This is done by dividing the difference between each parameter and its mean by the prior standard deviation of that parameter. PESTPP-IES performs this scaling if the *ies_use_prior_scaling()* control variable is set to *true*. Experience has demonstrated that prior scaling can be beneficial for problems that involve a very high number of parameters (over three hundred thousand), but that it is not so effective for problems that involve fewer parameters. The default value for *ies_use_prior_scaling()* is *false*. -### 9.2.5 The Marquardt Lambda +### 9.2.5 The Marquardt Lambda The Marquardt lambda plays a pivotal role in gradient based inversion. Use of a high lambda value in early iterations of an inversion process, and a low lambda value later in that process, can have a large impact on the rate at which a good fit with the calibration dataset is attained. However, the best value of the Marquardt lambda to use during any particular iteration must often be determined by trial and error. PESTPP-GLM and PEST test a number of parameter upgrades, calculated using different values of the Marquardt lambda; the cost is one model run for each tested lambda. Optionally, upgrades calculated using fractional lengths along these parameter upgrade vectors can also be tested. The parameter set that leads to the lowest objective function is selected as the upgraded parameter set. @@ -2877,7 +2868,7 @@ If, after NPHINORED attempts, PESTPP-IES is not able to find a lambda and line s Alternatively, if the mean objective function attained through the lambda and line search factor process described above is less than *ies_accept_phi_fac()* times the prevailing mean objective function for the ensemble subset, PESTPP-IES applies the best lambda and line search factor to the remainder of the ensemble. If the mean objective function for the entire ensemble was reduced from its prevailing mean then, on the next iteration of the smoother process, PESTPP-IES lowers the Marquardt lambda by applying a factor of *lambda_dec_fac()* to its current value. The default value of *lambda_dec_fac()* is 0.75. -### 9.2.6 Restarting +### 9.2.6 Restarting As has already been discussed, if either you or PESTPP-IES have generated a set of random parameter fields (or PESTPP-IES has improved them from a set of previous parameter fields), these can be provided to a newly-restarted PESTPP-IES parameter adjustment process using the *ies_parameter_ensembles()* control variable. If a model run has been undertaken for each of these fields (either by you or by PESTPP-IES), then the iterative ensemble smoother can be initiated from these parameter fields in conjunction with the model outputs which correspond to them. PESTPP-IES is instructed to do this using the *ies_restart_obs_ensemble()* control variable. The value for this variable is the name of a CSV or JCO/JCB file containing model outputs corresponding to the set of parameter fields which PESTPP-IES already has in its possession. @@ -2889,7 +2880,7 @@ In the event of model run failure for certain realizations, the parameter and si PESTPP-IES makes an exception to this protocol, however, if realization names are the same in filenames supplied with the *ies_parameter_ensemble()* and *ies_restart_obs_ensemble()* keywords. (This happens automatically if these files were written by PESTPP_IES.) In this case PESTPP-IES links these names to realization names appearing in the *ies_observation_ensemble()* CSV file, ignoring “lost” realizations from this file in the process. Alternatively, a user can remove lost realizations from the *ies_observation_ensemble()* file him/herself. This is easily accomplished using the Python Pandas library. The easiest and safest way to restart PESTPP-IES is by supplying *ies_observation_ensemble()* and *ies_parameter_ensemble()* as the “base” observation ensemble (that is observation values plus noise realizations) and the initial parameter ensemble, respectively. Meanwhile *ies_restart_observation_ensemble()* and *ies_restart_parameter_ensemble()* should cite ensembles produced from the same iteration of a previous PESTPP-IES run. This ensures that failed runs are handled correctly and that any regularization enforcement is with respect to the initial (prior) parameter ensemble. -### 9.2.7 Failed Model Runs +### 9.2.7 Failed Model Runs Where model runs are based on random parameter realizations, the risk of occasional model run failure is high for some models. The parallel run manager used by programs of the PEST++ suite is able to accommodate model run failure in ways described in section 5.3 of this manual. When model run failure is encountered, PESTPP-IES drops the parameter set that precipitated this failure from the ensemble. The ensemble thus loses a member. @@ -2897,13 +2888,13 @@ PESTPP-IES provides a mechanism for detection of model run failure that extends To forestall excessive PESTPP-IES run times incurred by occasional model failure, it is a good idea to set the *max_run_fail()* model run control variable to 1 (the default value for PESTPP‑IES), and to choose values for the *overdue_giveup_fac()* and/or *overdue_giveup_minutes()* control variables judiciously; see section 5.3. Note also that model run failure does not hurt PESTPP-IES as much as it hurts PESTPP-GLM or PEST. This is because the value of any model run undertaken by PESTPP-IES is lower than that undertaken by PEST or PESTPP-GLM. For the latter programs a failed model run during finite-difference derivatives calculation may lead to an empty column of the Jacobian matrix. In contrast, because PESTPP-IES uses an entire ensemble to fill a Jacobian matrix, a single failed model run does not result in an empty Jacobian matrix column. The outcome of model run failure is that the number of model runs employed in the averaging process through which this column is calculated is reduced by one. -### 9.2.8 Reporting +### 9.2.8 Reporting PESTPP-IES records its progress to the screen and to its run record file. In addition to this, it records a plethora of output files–this is intentional. In the ensemble setting the cost of evaluating new model outputs is high, a rerun of an ensemble. It is therefore easier for PESTPP-IES to write as much information as possible to avoid these additional costs. The output are discussed in the next section. These output files can be supplemented by additional files that record, in ASCII format, matrices that PESTPP-IES formulates in the course of upgrading parameter realizations. The extent of its output file production can be controlled using the *ies_verbose_level()* variable. This can be awarded values of 0,1 or 2. The default is 1. If a model is numerically unstable, a user may wish to be informed of parameter values that precipitate run failure. As is discussed below, PESTPP-IES records the values of all parameters in all realizations comprising an ensemble, together with model run results, in iteration-specific CSV or JCB files. Parameter sets used in lambda testing can also be recorded if the *save_lambda_ensembles()* control variable is set to *true*. -### 9.2.9 Termination Criteria, Objective Functions, and Upgrade Acceptance +### 9.2.9 Termination Criteria, Objective Functions, and Upgrade Acceptance Like PEST and PESTPP-GLM, PESTPP-IES reads termination criteria from the eighth line of the “control data” section of a PEST control file. @@ -2921,9 +2912,9 @@ The “composite” objective function is simply the combination of the measurem The “actual” objective function is calculated using the current simulated outputs and the observation values in the control file (that is, without measurement noise realizations). Through the *ies_no_noise* option, users can make the “measurement” and “actual” objective functions one in the same. This is an important consideration when subjective weighting is used to balance the contribution of several types of observations to the objective function–a process that can result in very small weights, which implies very large measurement noise. -## 9.3 PESTPP-IES Output Files +## 9.3 PESTPP-IES Output Files -### 9.3.1 CSV Output Files +### 9.3.1 CSV Output Files PESTPP-IES writes a suite of output files. Many of these are comma-delimited files (i.e., CSV files). Alternatively, the contents of some of these files (those which hold parameter and observation ensembles) can be recorded in binary JCB files, this option being activated if the *ies_save_binary()* control variable is suppled as *true*. CSV and JCB files that are written by PESTPP-IES are discussed in the current sub-section. Other files that are written by PESTPP-IES are discussed in the following sub-section. @@ -2950,7 +2941,7 @@ As always, it is assumed that the filename base of the PEST control file on whic Table 9.2 CSV and JCB files written by PESTPP-IES. It is assumed that the name of the PEST control file is *case.pst*. -### 9.3.2 Non-CSV Output Files +### 9.3.2 Non-CSV Output Files Non-CSV output files written by PESTPP-IES are listed in the following table. @@ -2967,7 +2958,7 @@ Non-CSV output files written by PESTPP-IES are listed in the following table. Table 9.3 Non-CSV/JCB files written by PESTPP-IES. It is assumed that the name of the PEST control file is *case.pst*. -## 9.4 Summary of Control Variables +## 9.4 Summary of Control Variables Table 9.4 lists PESTPP-IES control variables. All of these are optional. If a variable is not supplied, then a default is assumed for its value. Where appropriate, the value of the default is presented along with the name of the variable in the table below. Variables discussed in section 5.3.6 of this manual that control parallel run management are not listed in the following table. @@ -2977,15 +2968,15 @@ Note also that the number of control variables may change with time. Refer to th Table 9.4 PESTPP-IES control variables with default values. Parallel run management variables can be supplied in addition to these. See section 5.3.6. -# 10. PESTPP-SWP +# 10. PESTPP-SWP -## 10.1 Introduction +## 10.1 Introduction PESTPP-SWP runs a model using a suite of parameter fields. Parameter values that comprise these fields are supplied in a comma-delimited file (i.e., a CSV file) or in a binary (enhanced) Jacobian matrix file (i.e., a JCO or JCB file). PESTPP-SWP records the values of model outputs calculated using these parameter fields in another CSV file, together with objective function components calculated from these outputs. The PEST control file which PESTPP-SWP reads informs it of observation values, observation weights, observation groups, and the setting of the PESTMODE control variable. If PESTMODE is set to “regularization”, then PESTPP-SWP calculates a regularization objective function in addition to the measurement objective function. On most occasions of PESTPP-SWP usage, model runs are conducted in parallel. Use of PESTPP-SWP gives a modeller easy access to model run parallelization for the completion of model runs undertaken for any purpose whatsoever. A significant amount of functionality available through PyEMU makes use of PESTPP-SWP to undertake parallelized model runs; it bases its calculations on the outcomes of these runs. -## 10.2 Using PESTPP-SWP +## 10.2 Using PESTPP-SWP As usual, variables which control how PESTPP-SWP operates must be placed in a PEST control file whose name is supplied on its command line; these variables should appear on lines that begin with the “++” character string. @@ -3013,7 +3004,7 @@ The control variable *sweep_chunk()* pertains to parallelization of model runs. Also note that PESTPP-SWP can be particularly useful if users need complete model output files for a given set of runs. In this case, the file transfer capabilities of the parallel run manager can be used with PESTPP-SWP to run a sweep of parameter values and model output files can be transferred back to the master directory. -## 10.3 Summary of Control Variables +## 10.3 Summary of Control Variables Table 10.1 tabulates PESTPP-SWP control variables. As usual, all of these variables are optional. If a variable is not supplied, then a default is assumed. Default values are presented along with the names of variables in the table below. Variables discussed in section 5.3.6 that control parallel run management are not listed in this table. @@ -3030,10 +3021,10 @@ The number of control variables may change with time. Refer to the PEST++ web si | *tie_by_group(false)* | Boolean | Flag to tie all adjustable parameters together within each parameter group. Initial parameter ratios are maintained as parameters are adjusted. Parameters that are designated as already tied, or that have parameters tied to them, are not affected. | | *ensemble_output_precision* | int | Number of significant digits to use in ASCII format ensemble files. Default is 6 | -Table 10.1 PESTPP-SWP control variables. Parallel run management variables can be supplied in addition to these; see section 5.3.6. +Table 10.1 PESTPP-SWP control variables. Parallel run management variables can be supplied in addition to these; see section 5.3.6 **PESTPP-PSO** -## 11.1 Introduction +## 11.1 Introduction **Publications** A complete description of the background on PESTPP-PSO (Particle Swarm Optimization within PEST++) and its basic operation can be found in *Siade et al*, (2019), along with three benchmark problems and two real-world case studies. Therefore, this manual will instead provide a more detailed description on how to implement the software. The reader is also referred to the work by *Coello et al*, (2004) for an additional detailed description on the multi-objective (Pareto) optimization framework that formed the basis of the corresponding method employed in this software. @@ -3069,7 +3060,7 @@ The basic single-objective PSO algorithm proceeds by updating each particle’s While basic PSO can approach such a problem, like all other optimization methods, if the problem is nonconvex it cannot guarantee a globally optimal solution. However, its global search approach to optimization makes it very effective at avoiding local minima. It is also important to point out that, like other evolutionary algorithms, the number of iterations required for convergence can be relatively high. This can be mitigated somewhat through the choice of values for inertia and the social and cognitive constants. It is recommended that one begin with a relatively high value for inertia (e.g., 0.7) and gradually lower the inertia over successive iterations, perhaps as low as 0.4. Another factor affecting convergence is the swarm size; the larger the swarm the faster the convergence. However, there comes a point where the speed-up in this trade-off diminishes; from the author’s experience this occurs somewhere around a swarm size of 50, but may still be problem-specific. See the sections regarding the use of this software for more details on how to manage these control variables. -### 11.1.2 Multi-Objective Particle Swarm optimization +### 11.1.2 Multi-Objective Particle Swarm optimization Multi-objective optimization studies often have numerous factors to consider, and some of these factors may be considered objectives (a Pareto front is desired for their trade-offs), or they may be considered as constraints (they are given a limit for which they cannot exceed). Generally, one could consider constraints as objectives in this context, as they can be mixed and matched depending on the perspective of the optimization problem (Equation 11.2). Additionally, the upper limit of the constraints may be perturbed slightly to examine its effects on the Pareto front; such constraints are often referred to as epsilon (*ε*) constraints. @@ -3079,13 +3070,13 @@ MOPSO, like most multi-objective optimization algorithms in use today, approxima The MOPSO algorithm employed in this software determines the Pareto optimal set iteratively, beginning with an initial swarm population. The initial swarm is executed through the simulation model and the set of non-dominated decision vectors amongst the initial swarm is stored in a *repository*. Then, MOPSO will update the swarm, according to a modified PSO method, and check the dominance relationships between the swarm and the repository (*Siade et al*, 2019). If new decision vectors are obtained that are non-dominated (thus far), they will be added to the repository, and conversely, if decision vectors in the repository become dominated by those in the swarm, they will be discarded. This repeats for a desired number of iterations. At each iteration, the repository objectives and decision vectors are stored to their associated output files (see Section 11.3). -### 11.1.2 Decision Variable Transformations +### 11.1.2 Decision Variable Transformations Currently, the decision variables (which could consist of parameter values, for example) have a pre-defined transformation status. This status is referred to as *eqlog*, which allows for logarithmic transformation, but with different logarithmic bases for each of the decision variables. The decision variable with the greatest difference between upper and lower bounds (in terms of magnitude) is assigned a logarithm base of 10 during transformation. This is equivalent to the *log* option employed in much of the PEST and PEST++ suite for the variable PARTRANS. The logarithm base for the remaining decision variables are set such that the transformed range for those variables is equivalent to that of the widest one, whose aforementioned base is 10. This ensures that the variability of all transformed decision variables appears exactly the same to the PSO procedure, which enhances overall performance. This could result in some decision variables essentially having no transformation (equivalent to *none* for PARTRANS) or even some variables experiencing an expansion effect, where their transformed range is wider than the original one. Please see *Siade et al*, (2019) for more details. -## 11.1 Using PESTPP-PSO +## 11.1 Using PESTPP-PSO -### 11.1.1 General +### 11.1.1 General PESTPP-PSO was developed using the FORTRAN interface provided within the PEST++ source code. Currently, PESTPP-PSO is only designed to operate in parallel, and the command to execute the “manager” is as follows (which differs slightly from the other PEST++ calling programs), @@ -3102,7 +3093,7 @@ PESTPP-PSO must use another PEST++ calling program to initiate the “agents”. Figure 11.1. Variables comprising a minimalist PEST control file (see Figure 4.1), where the control variables used by PESTPP-PSO are shaded in grey. Note that the very last line designates the PSO control file. -### 11.1.2 Estimation Mode +### 11.1.2 Estimation Mode The algorithm employed in *estimation* mode is equivalent to the very basic form of PSO originally introduced by *Eberhart and Kennedy* (1995). Much of the basic mechanics of the algorithm can be summarized by Equation (11.3). The PSO control file for estimation mode will have a format as follows (“\*” sections can be in any order), @@ -3172,7 +3163,7 @@ OBJNME is a character string and the name of the objective function being minimi CONNME is a character variable that defines the names of the constraints that are to be maintained during optimization (*f**i* in Equation 11.1). Each CONNME must correspond with an observation group in the PEST control file. CONMETH is similar to OBJMETH and determines if a constraint is comprised of a sum of squared residuals (enter a 1), or a general constraint that is treated as is (enter a 2). UPLIM is simply the upper limit applied to that constraint (*b**i* in Equation 11.1). Constraints with a lower limit can be converted to ones with an upper limit by simply multiplying the constraint value and its associated lower limit value by a -1. -### 11.2.3. Pareto mode +### 11.2.3. Pareto mode The algorithm employed in *pareto* mode (i.e., multi-objective optimization) is fundamentally based upon the basic form of PSO (Equation 11.3); however, the conceptualization and logical aspects of its operation are relatively complex, and the reader is referred to *Siade et al*, (2019) for these technical details. The PESTPP-PSO specs file for MOPSO is the same as that for standard PSO, with some minor modifications, @@ -3234,9 +3225,7 @@ Figure 11.5. Format of the (optional) initial-swarm external file that the user The external initial-swarm file can also be used in other ways. For example, if the user simply wishes to execute a large number of model-runs, e.g., from the output of a Monte Carlo algorithm, the user could develop an external initial-swarm file with these realizations listed. Then the user would set NPOP accordingly, along with NOPTMAX set to 0. Another example could be the case where the user wishes to restart the PSO algorithm from some iteration of a previous PSO run. In this case, the user could use the *case.pbs* (*estimation* mode) or the *case.par* (*pareto* mode) output file from a previous PSO run as the external initial-swarm file, as these output files use the same format as described in Figure 11.5. -## - -## 11.2 PESTPP-PSO Output Files +## 11.2 PESTPP-PSO Output Files Output files produced by PESTPP-PSO are listed in the following table. The contents of this table assume that the PEST control file for which PESTPP-PSO is executed is named *case.pst*. @@ -3252,11 +3241,9 @@ Table X.1 PESTPP-PSO output files. Note that each of these output files are upda | *case.rep (Pareto mode)* | This file contains the objective function values corresponding to the repository positions at the end of the simulation; that is, this file contains the Pareto front. This file will be updated at the end of each iteration. Furthermore, additional *case_x.rep* files will also be created at each iteration, where x is the iteration number. This helps the user visualise the convergence of the MOPSO algorithm. | | *case.par (Pareto mode)* | This file contains the decision variable (or parameter) values associated with the objective function values that comprise the Pareto front (i.e., the weakly Pareto optimal set), at the end of the simulation. This file is also updated after each iteration and uses the same format as the external initial-swarm file described in Figure 11.5. | -# +# 12. PESTPP-DA -# 12. PESTPP-DA - -## 12.1 Introduction +## 12.1 Introduction PESTPP-DA is a generic data assimilation tool. It supports both batch and sequential assimilation. The former being the standard protocol that all tools in the PEST and PEST++ suites support: running the model forward for the entire period of simulation each time a model run is requested. However, sequential estimation is a different beast all together. Usually, sequential estimation requires running the model for a specific period of time, extract observed simulated equivalent, implement data assimilation to update parameters or dynamic states, and restart the forward model to simulate the next period of time. To allow PESTPP-DA to handle a wide range of assimilation schemes, the “assimilation cycle” concept is introduced. A “cycle” is usually a period of time during which specific forcings are applied and/or specific discrete-time observations are available to be assimilated. When data assimilation is employed, the simulation period can be divided into as many cycles as required for the problem. Cycles can be used to represent one-time step; in this case the forward model will be run (advanced in time) for one time step to implement an Ensemble Kalman Filter. In other cases, the entire simulation period can be represented using one cycle to implement an Ensemble Smoother. The archetypal sequential data assimilation algorithm is the ensemble Kalman filter (Evensen 2003). “Assimilation cycles” and assimilation schemes are discussed in detail throughout this chapter. @@ -3264,9 +3251,9 @@ To facilitate PESTPP-DA’s ability to advance cycles, it will normally be neces It is important to note that the solution equations implemented in PESTPP-DA are identical to those implemented in PESTPP-IES: the standard and MDA iterative form (Emerick and Reynolds, 2013) of the Kalman update equations (Evensen, 2003), as well as the iterative ensemble smoother equations of Chen and Oliver (2013). However, PESTPP-DA facilitates arbitrary cycles of assimilation, while PESTPP-IES is batch estimation only. The arbitrary cycle definitions support in PESTPP-DA allow users to define any form of sequential data assimilation from hourly to daily to arbitrary mixtures of hours, days, months, seasons, decades, etc. This flexibility is unique to PESTPP-DA and makes PESTPP-DA a very flexible, thus powerful, data-assimilation tool. In the following section, the theory of data assimilation and terminology used is introduced. -## 12.2 Theory +## 12.2 Theory -### 12.2.1 Background and Basic Equations +### 12.2.1 Background and Basic Equations Data assimilation is the process of optimally combining uncertain model inputs (parameters and system states) and uncertain observations to estimate model parameters and states as the modelled system evolves in time. The following concepts are used to describe a data assimilation problem: @@ -3290,7 +3277,7 @@ Whereas the solution mechanism and approach are similar, the primary difference As with PESTPP-IES, PESTPP-DA uses the *NOPTMAX* control variable to define the number of iterations to apply the solution equation. And, as with PESTPP-IES, PESTPP-DA uses the *NOPTMAX* values of 0 and -1 to define a “control file parameter value” run (a single model run) and a prior Monte Carlo run, respectively. For the control file parameter value run, PESTPP-DA uses the values of parameters listed in the control file, along with the cycle information to advance through each cycle, evaluating the control file parameters, recording simulated outputs and updating dynamic states. As the name implies, the prior Monte Carlo analysis with PESTPP-DA evaluates the prior parameter ensemble for each cycle, recording the simulated outputs and updating the dynamic states. -### 12.2.2 Schemes for Assimilating Temporal Data +### 12.2.2 Schemes for Assimilating Temporal Data The frequency of assimilating observations depends on the problem and on the need of the practitioner. For example, for weather forecasting atmospheric observations are typically assimilated at high frequency (ref) (in the order of minutes), while groundwater systems, which evolve slowly, might need assimilation frequency in the order of months or years. Sometimes, practitioner might be interested in sequentially assimilating observations for every model simulated time period, or, in other settings, all available historic observations may be assimilated simultaneously. @@ -3300,11 +3287,11 @@ The time cycle might consist of a single model time step, multiple model time st Although EnKF and EnKS schemes assimilate data on multiple time cycles, they differ in the way they restart the model (Ref). EnKF (Figure \*\*\*) restarts the model using the most updated state as initial conditions, while EnKS (Figure \*\*\*) restarts the model from a user defined time point (typically from the simulation beginning). PESTPP-DA implements a highly flexible data assimilation approach using standard template files and instructions files associated with each time cycles. For example, to implement EnKF (Fig \*\*\*), the user designs the structure of template files and instruction files to update model inputs using the most updated dynamic state and restart the model starting from end of last time cycles. Instruction files and templates files are used to define how to restart the model, which allows user to choose a rich combination of EnKF, EnKS, and ES. -### 12.2.2.1 Batch Data Assimilation with PESTPP-DA +### 12.2.2.1 Batch Data Assimilation with PESTPP-DA Batch data assimilation “Ensemble Smoother” with PESTPP-DA is conceptually the same process as used in the (optionally iterative) solution process in PESTPP-IES. This is method is widely used in model calibration. If no cycle information is found in control file, PESTPP-DA will resort to a batch assimilation process by assigning all parameters, template and instruction files a cycle value of -1, and all observations a cycle value of 0; or User can explicitly assign all observations, states, and parameters to the same cycle (cycle = 0). See PESTPP-IES for more information on ensemble-based batch data assimilation. -### 12.2.2.2 Sequential Data Assimilation with PESTPP-DA +### 12.2.2.2 Sequential Data Assimilation with PESTPP-DA The concept of sequential data assimilation can be thought of as applying the solution scheme discretely for each cycle, then advancing to the next cycle. This process is repeated for all cycles. @@ -3316,7 +3303,7 @@ The use of sequential data assimilation has several important implications when Another implication of sequential assimilation within the PESTPP-DA framework is that some template and/or instruction files may only apply to a given cycle or group of cycles. This is in contrast to the standard batch assimilation, where all template and instruction files are used for every model run. This means users need to define cycle information not only for parameter and observation data control file sections, but also for the template and instruction file sections. The need to define cycle information was one of the driving factors behind the development of the version 2 pest control file format. -### 12.2.4 State estimation, parameter estimation and joint state-parameter estimation +### 12.2.4 State estimation, parameter estimation and joint state-parameter estimation In the standard batch assimilation (Ensemble Smoother) scheme (without dynamic states and with a single assimilation cycle), there are no dynamic states to be estimated, only static and dynamic parameters (recall dynamic parameters include quantities like forcings like stress period recharge rates which can still be estimated in a smoother/batch formulation). @@ -3345,7 +3332,7 @@ You can switch between using estimated and simulated final states by changing th Sequential (joint) Final-and-Initial-State-Parameter Estimation – Estimating the initial and final states of each cycle, along with any parameters. This formulation is setup the same as the Sequential (joint) Final-State-Parameter Estimation except the initial-state parameters are not fixed. Like the Sequential (joint) Final-State-Parameter Estimation, users are cautioned against the formulation because it also may violate the physics of the underlying simulation. -### 12.2.4 Parameter, Observation and Weight Cycle Tables +### 12.2.4 Parameter, Observation and Weight Cycle Tables In sequential assimilation, PESTPP-DA needs to have access to the dynamic states. Invariably, one or more of these states may also correspond to a historic observation. For example, in a groundwater model, at different times and spatial locations during the historic period, groundwater levels may have been measured. Since PESTPP-DA is tracking the final simulated conditions, it, by default, is tracking the simulated equivalent to these observations (if you ignore the need for spatial and temporal interpolation from model nodes to observations and model output times to observations). In this case, users need to supply an observation value and weight for these observations so that they can be used in the solution scheme, in words, so that the information in these historic observations can be assimilated. But *what if the same model node also has observations during other cycles and what if these other observations have differencing quality or should be assigned a unique weight for other reasons?* Users could accommodate this situation by making cycle specific instruction files with unique observation names so that they are listed as unique entries in the control file. However, *this may result in a large number* of nearly identical instruction files to read the same model output locations for multiple cycles, which can make the problem setup overly complicated. @@ -3353,7 +3340,7 @@ A more straightforward option is to specify the *da_observation_cycle_table* and In a similar way, when “fixed” parameters have values that may change across cycles, a “parameter cycle table” can be used. For example, one of the fixed (non-adjustable) parameter quantities listed in the control file may represent the length of the time simulated for each cycle. Rather than constructing a unique template file for each cycle and having a unique time-length parameter for each cycle, users can supply a parameter cycle table that lists the cycle-specific values for any/all fixed parameters – not all fixed parameters need to be listed in the parameter cycle table, only those fixed parameters that have values that change across cycles. The row index of this table should list parameter names and the column header lists the cycle values. Just as with the observation and weight cycle tables, any parameter listed in the parameter cycle table should have a cycle value of -1 because the cycles to which each parameter applies is determined from the column header. If a table-listed parameter does not apply to a given cycle, then that row-column entry in the table should be left blank or empty. Only “fixed” parameters should be listed in the parameter cycle table. -### 12.2.5 Steps for Data Assimilation implementation +### 12.2.5 Steps for Data Assimilation implementation Given a model with a set of ASCII input files, executable file (or a chain of executable files) that represent a forward model run simulating the physical system, a resulting set of output files, and a set of transient observations. The following steps would implement data assimilation: @@ -3379,7 +3366,7 @@ Given a model with a set of ASCII input files, executable file (or a chain of ex 11. Choose either iterative or MDA solution method. -### 12.2.12 Running PESTPP-DA +### 12.2.12 Running PESTPP-DA PESTPP-DA is run exactly like all other tools in the PEST++ suite – See section 5 of this manual for how to run the tools in the PEST++ suite. As is described in that section, model runs can be undertaken in series or in parallel. In either case, a prematurely terminated PESTPP-DA run can be restarted by supplying the requisite global parameter and observation ensemble files (described below). @@ -3405,7 +3392,7 @@ The “cycle” values assigned to the various components in the control file ca In this way, the string-based cycle values allow users to apply sophisticated rules about how parameters and/or observations are used across multiple cycles. -### 12.2.13 Other uses for PESTPP-DA +### 12.2.13 Other uses for PESTPP-DA Although PESTPP-DA is a tool designed for flexible sequential and batch data assimilation, the generalized nature of the cycle concept, in concert with the observation and weight cycle tables, also provides a range of other functionality. In this way, the cycle concept can be thought of as an outer iteration process. For example, users can undertake the advanced “direct predictive hypothesis testing” analysis (e.g., Moore et al., 2010) with PESTPP-DA by constructing a generic weight cycle table where each cycle includes increasing weight on a control file observation quantity that represents a simulated outcome of interest. For example, assume a model has been constructed to simulate surface-water/groundwater exchange (SGE) along an important river reach. Further assume that the simulated SGE along this reach is included in the control file as an observation. To test the hypothesis that the SGE for this reach could be zero, users should set the observation value quantity in the control file to 0.0 and set the weight to 1.0 (this weight will not be used but simply activates this quantity in the PESTPP-DA cycle process). Now users can construct a weight cycle table. Let’s use 10 cycles. For the historic observations that are being assimilated, the entries for all cycles in the weight cycle table for these observations should be identical to the weights in the control file. The entries for the SGE “observation” in the weight cycle table should slow increase from 0.0 in the first cycle to a value large enough to dominate the objective function in the last cycle. Conceptually, during each PESTPP-DA “cycle”, a (iterative) ensemble smoother formulation will be used to minimize the objective function, but as cycles progress, the desire to force the SGE towards zero increasingly features in the objective function. In this way, the compatibility between the fitting the historic observations and the ability to make SGE be zero is directly tested. If the ability to fit the past observations is maintained while also making the simulated SGE zero, then one cannot reject the hypothesis that the SGE could be zero on the basis of compatibility with historic observations. This technique is very similar to “pareto mode” in PEST(\_HP), except here, we can take advantage of the computational efficiency of the iterative ensemble solver in PESTPP-DA. Figure 12.XXX depicts the results of such an analysis @@ -3413,7 +3400,7 @@ Although PESTPP-DA is a tool designed for flexible sequential and batch data ass Figure 12.XXX. Results of a direct predictive hypothesis testing analysis where the relation between fitting historic observations and a desire to make surface-water/groundwater exchange (SGE) zero is evaluated. The ensemble-based pareto trade-off between these two quantities shows that simulating an SGE of zero is not compatible with the historic observations. -### 12.2.14 PESTPP-DA Output Files +### 12.2.14 PESTPP-DA Output Files The following table summarizes the contents of files that are recorded by PESTPP-DA when it is asked to undertake highly-parameterized inversion. Most of these have been discussed above. It is assumed that the PEST control file on which the inversion process is based is named *case.pst*. @@ -3423,21 +3410,21 @@ Since the parameters and observations being used can change across cycles, the P Table 12.1. Files recorded by PESTPP-DA. -## 12.4 Summary of PESTPP-DA Control Variables +## 12.4 Summary of PESTPP-DA Control Variables -### 12.4.1 General +### 12.4.1 General Like all the tools in the PEST++ suite, PESTPP-DA uses a control file. However, because the sequential assimilation process that is unique to PESTPP-DA, it requires the use of the version 2 control file, and this control file should use external csv files for all sections. These external csv files have a column labelled “cycle” for parameter data, observation data, model input and model output information. TODO: Add use case table images here -### 12.4.2 Control Variables in the PEST Control File +### 12.4.2 Control Variables in the PEST Control File As has been discussed, PESTPP-DA shares its solution techniques with PESTPP-IES, so, it stands to reason that PESTPP-DA would use many of the same optional control file arguments as PESTPP-IES. For example, both PESTPP-DA and PESTPP-IES use a prior parameter ensemble and users can supply these ensembles through existing files. In PESTPP-IES, this ensemble can optionally be supplied as *ies_parameter_ensemble.* In PESTPP-DA, the corresponding argument is *da_parameter_ensemble*. In fact, all PESTPP-IES arguments are also supported by PESTPP-DA – every single one! And users can have both *ies_parameter_ensemble* and *da_parameter_ensemble* listed and PESTPP-DA will use the “da” argument. However, if only the “ies” argument is suppled, PESTPP-DA will use that value. In this way, users can use the same arguments for both PESTPP-DA and PESTPP-IES. There are however, a few PESTPP-DA arguments that only apply to PESTPP-DA, these being the arguments that apply the cycle control process. -### 12.4.3 PEST++ Control Variables +### 12.4.3 PEST++ Control Variables Table 12.XXX lists PEST++ control variables that are specific to only PESTPP-DA; many, many, many other optional control variables that can be used with PESTPP-DA are listed in section 9.4 . All of these are optional. If a variable is not supplied, a default value is employed. The value of the default is presented along with the name of each variable in the table below. Variables are grouped in approximate accordance with their roles. @@ -3454,15 +3441,15 @@ Variables discussed in section 5.3.6 that control parallel run management are no Table 12.2. PESTPP-DA specific control arguments. PESTPP-DA shares all other control arguments with PESTPP-IES -# 13. PESTPP-MOU +# 13. PESTPP-MOU -## 13.1 Introduction +## 13.1 Introduction PESTPP-MOU is a tool for constrained single and multiple objective optimization under uncertainty (CMOU) with evolutionary heuristics. It implements several popular “global” evolutionary optimization algorithms including simulated binary cross over, differential evolution (including self-adaptive differential evolution), and particle swarm optimization. PESTPP-MOU uses the pareto dominance concepts and processes available in NSGA-II and SPEA-2 to seek multidimensional pareto frontiers. More importantly, PESTPP-MOU implements the same “chance” concepts and mechanics as PESTPP-OPT for model output quantities used in the optimization problem, which includes both model-based constraints and model-based objectives. -## 13.2 Theory +## 13.2 Theory -### 13.2.1 Background and Basic Equations +### 13.2.1 Background and Basic Equations The core concepts related to the use of PESTPP-MOU are: @@ -3500,7 +3487,7 @@ Available generators are: Users are encouraged to google these to find out more about their behavior -### 13.2.2 Evaluating chances in a population-based algorithm +### 13.2.2 Evaluating chances in a population-based algorithm In contrast to PESTPP-OPT, which operates on a single solution, PESTPP-MOU uses a population of decision variable sets, which means there is no single point in decision variable space to evaluate chances, so some decisions must be made about how to evaluate model-based constraints and/or objective uncertainty (and ultimately risk). Similar to PESTPP-OPT, PESTPP-MOU support both FOSM and stack-based chance processes. However, given that the expected use of PESTPP-MOU is in more nonlinear settings that PESTPP-OPT, it is expected that the stack-based chance formulation will be more appropriate. @@ -3508,9 +3495,9 @@ Stack-based chances can be evaluated in two ways with PESTPP-MOU: at each popula When chances are reused for generations, the PDFs/CDFs of the constraints/objectives are translated from the points in decision variable space where they were evaluated to the new population individual s in a minimum-Euclidean-distance sense. This assumes that points near each other in decision-variable space yield more similar chance results than points that are distance. The translation of PDFs/CDFs is done by differencing the simulated constraint/objective values between two points, assuming these values represent the mean of the PDFs. -### +### -### 13.2.3 PESTPP-MOU workflow +### 13.2.3 PESTPP-MOU workflow PESTPP-MOU commences execution by either generating or loading a decision variable population, depending on whether an existing population was supplied. It then evaluates this initial population by running the model once for each population individual. PESTPP-MOU then generates a new population of offspring from the initial population using the specified *mou_generator*. This new population is then evaluated by running it through the model. @@ -3520,13 +3507,13 @@ Once all requested runs have finished, PESTPP-MOU uses the designated environmen The selection of the “best” individuals depends on the problem formulation. For single objective problems, the “best” individuals are simply those that have objective function values closer to the requested extrema (minimum or maximum). For multiple objective problems, things are more complicated and the concept of “pareto dominance” is used to sort/rank the population according to each individual’s location in objective function space relative to all other individuals of the population. If constraints are included, things get even more complicated because now we don’t want to include infeasible individuals (individuals who violate constraints) in the population for the next generation unless there are not enough feasible individuals. PESTPP-MOU uses the constrained fast nondominated sorting process of NSGA-II and a variant of the SPEA-II ranking process for constrained multi-objective environmental sorting. -### 13.2.4 Advanced functionality +### 13.2.4 Advanced functionality PESTPP-MOU implements several advanced functionality elements to increase its capacity as a decision support tool. The first of these is the option to treat “risk” as an objective to be maximized simultaneously with the other objective(s). That is, this option transforms the *OPT_RISK* argument into an objective to be maximized (drive risk towards a value of 1.0). In this way, PESTPP-MOU seeks to map the trade-off between objectives and also risk at the same time. To activate this option, an adjustable parameter named “\_risk\_” (leading and trailing underscores required) must be included in the pest interface and *mou_risk_objective* should be passed as “true”. This functionality can be useful in setting where the risk stance is not known a priori or if the use of the desired risk stance results in largely infeasible solutions. However, it is important to note the treating risk as an objective can increase the complexity and nonlinearity of the optimization solution process, requiring more generations, and in some cases, degrading the quality of the solution. PESTPP-MOU also supports self-adaptive differential evolution, where the differential evolution algorithmic controls (“f” value, cross over rate, and mutation rate) are treated as decision variables. This functionality is activated automatically when decision variables named “\_DE_F”, “\_CR\_”, and/or “\_MR\_” are found in the decision variable set. Users must take care to ensure these algorithmic decision variables are given reasonable ranges. -### 13.2.5 Running PESTPP-MOU +### 13.2.5 Running PESTPP-MOU PESTPP-MOU is run exactly like all other tools in the PEST++ suite – See section 5 of this manual for how to run the tools in the PEST++ suite. As is described in that section, model runs can be undertaken in series or in parallel. In either case, a prematurely terminated PESTPP-MOU run can be restarted by supplying the requisite decision variable population file. @@ -3536,7 +3523,7 @@ Constraints/objectives are identified in exactly the same way as PESTPP-OPT: via Decision variables are distinguished from parameters through the *opt_dec_var_groups* option which lists parameter groups whose members should be treated as decision variables. If this option is not specified, then all adjustable parameters as treated as decision variables. As with the number of objectives, it is important to point out the global evolutionary optimization methods do not scale to high dimensions; a maximum realistic number of decision variables is likely hundreds. -### 13.2.6 PESTPP-DA Output Files +### 13.2.6 PESTPP-DA Output Files The following table summarizes the contents of files that are recorded by PESTPP-DA. Most of these have been discussed above. It is assumed that the PEST control file on which the inversion process is based is named *case.pst*. @@ -3544,17 +3531,15 @@ The following table summarizes the contents of files that are recorded by PESTPP Table 13.1. Files recorded by PESTPP-MOU. -## - -## 13.4 Summary of PESTPP-MOU Control Variables +## 13.4 Summary of PESTPP-MOU Control Variables -### 13.4.1 General +### 13.4.1 General Like all the tools in the PEST++ suite, PESTPP-MOU uses a control file, template files, and instruction files. -### 13.4.2 Control Variables in the PEST Control File +### 13.4.2 Control Variables in the PEST Control File -### 13.4.3 PEST++ Control Variables +### 13.4.3 PEST++ Control Variables Table 12.XXX lists PEST++ control variables that are specific to only PESTPP-MOU; many other optional control variables that can be used with PESTPP-MOU are listed in the PESTPP-OPT section of the manual. All of these are optional. If a variable is not supplied, a default value is employed. The value of the default is presented along with the name of each variable in the table below. @@ -3586,7 +3571,7 @@ Variables discussed in section 5.3.6 that control parallel run management are no Table 13.2. PESTPP-MOU specific control arguments. PESTPP-MOU shares many other control arguments with PESTPP-OPT -# 14. References +# 14. References Ahlfeld, D.P. and Mulligan, A.E., 2000. Optimal Management of Flow in Groundwater Systems. Vol 1. Academic Press. diff --git a/src/libs/common/config_os.h b/src/libs/common/config_os.h index 9aa0fdb39..9e318fb2d 100644 --- a/src/libs/common/config_os.h +++ b/src/libs/common/config_os.h @@ -2,7 +2,7 @@ #define CONFIG_OS_H_ -#define PESTPP_VERSION "5.1.17"; +#define PESTPP_VERSION "5.1.18"; #if defined(_WIN32) || defined(_WIN64) #define OS_WIN diff --git a/src/libs/common/utilities.cpp b/src/libs/common/utilities.cpp index a40425be9..016f546a4 100644 --- a/src/libs/common/utilities.cpp +++ b/src/libs/common/utilities.cpp @@ -377,7 +377,7 @@ map read_twocol_ascii_to_map(string filename, int header_lines, { map result; ifstream fin(filename); - if (!fin.good()) + if (!fin.good()) throw runtime_error("could not open file " + filename + " for reading"); string line; double value; @@ -406,7 +406,7 @@ vector read_onecol_ascii_to_vector(std::string filename) { vector result; ifstream fin(filename); - if (!fin.good()) + if (!fin.good()) throw runtime_error("could not open file " + filename + " for reading"); string line; vector tokens; @@ -430,7 +430,7 @@ void read_res(string& res_filename, Observations& obs) { map result; ifstream fin(res_filename); - if (!fin.good()) + if (!fin.good()) throw runtime_error("could not open residuals file " + res_filename + " for reading"); vector tokens; string line, name; @@ -720,7 +720,7 @@ void read_dense_binary(const string& filename, vector& row_names, vector stringstream ss; ifstream in; in.open(filename.c_str(), ifstream::binary); - if (in.bad()) + if (!in.good()) { ss.str(""); ss << "read_dense_binary() error opening binary file " << filename << " for reading"; @@ -754,7 +754,7 @@ void read_dense_binary(const string& filename, vector& row_names, vector for (int i = 0; i < n_obs_and_pi; i++) { in.read((char*)&(name_size), sizeof(name_size)); - if (in.bad()) + if (!in.good()) { ss.str(""); ss << "read_dense_binary(), dense format error reading size column name size for column number " << i; @@ -769,7 +769,7 @@ void read_dense_binary(const string& filename, vector& row_names, vector { char* col_name = new char[col_name_size]; in.read(col_name, col_name_size); - if (in.bad()) + if (!in.good()) { ss.str(""); ss << "read_dense_binary(), dense format error reading column name for column number " << i << ", size " << col_name_size; @@ -790,13 +790,14 @@ void read_dense_binary(const string& filename, vector& row_names, vector while (true) { //finished - if ((in.bad()) || (in.eof())) + //if ((in.bad()) || (in.eof())) + if ((i > 0) && (!in.good())) { break; } in.read((char*)&(name_size), sizeof(name_size)); - if (in.bad()) + if (!in.good()) { ss.str(""); ss << "read_dense_binary(), dense format incomplete record: error reading row name size for row number " << i << "...continuing"; @@ -805,7 +806,7 @@ void read_dense_binary(const string& filename, vector& row_names, vector } char* row_name = new char[name_size]; in.read(row_name, name_size); - if (in.bad()) + if (!in.good()) { ss.str(""); ss << "read_dense_binary(), dense format incomplete record: error reading row name for row number " << i << "...continuing"; @@ -815,7 +816,7 @@ void read_dense_binary(const string& filename, vector& row_names, vector name = string(row_name, name_size); pest_utils::strip_ip(name); pest_utils::upper_ip(name); - if (in.bad()) + if (!in.good()) { ss.str(""); ss << "read_dense_binary(), dense format incomplete record: error skipping values for row " << i << "...continuing "; @@ -828,7 +829,7 @@ void read_dense_binary(const string& filename, vector& row_names, vector in.read(row_name, sizeof(double)* col_names.size()); if (in.eof()) break; - if (in.bad()) + if (!in.good()) { ss.str(""); ss << "read_dense_binary(), dense format incomplete record: error skipping values for row " << i << "...continuing "; @@ -836,6 +837,7 @@ void read_dense_binary(const string& filename, vector& row_names, vector break; } row_names.push_back(name); + i++; } in.close(); @@ -852,7 +854,7 @@ void read_dense_binary(const string& filename, vector& row_names, vector in.seekg(sizeof(int) + row_names[i].size(), ios_base::cur); for (int j = 0; j < col_names.size(); j++) { - if (in.bad()) + if (!in.good()) { ss.str(""); ss << "read_dense_binary(), dense format incomplete record: error reading row,col value " << i << "," << j << "...continuing "; @@ -871,7 +873,7 @@ void read_binary_matrix_header(const string& filename, int& tmp1, int& tmp2, int stringstream ss; ifstream in; in.open(filename.c_str(), ifstream::binary); - if (in.bad()) + if (!in.good()) { ss.str(""); ss << "pest_utils::read_binary_matrix_header() error opening binary file " << filename << " for reading"; @@ -880,7 +882,7 @@ void read_binary_matrix_header(const string& filename, int& tmp1, int& tmp2, int in.read((char*)&tmp1, sizeof(tmp1)); in.read((char*)&tmp2, sizeof(tmp2)); in.read((char*)&tmp3, sizeof(tmp3)); - if (in.bad()) + if (!in.good()) { ss.str(""); ss << "pest_utils::read_binary_matrix_header() error header from binary file " << filename << " for reading"; @@ -929,7 +931,7 @@ bool read_binary(const string &filename, vector &row_names, vector Ensemble::get_empirical_cov_matrices(FileManager* fi } double scale = (num_reals / ((num_reals - 1.) * (num_reals - 1.) * (num_reals - 1.))) * wij_sum; scale = scale / demon; - cout << "optimal residual covariance matrix shrinkage factor: " << scale << endl; + //cout << "optimal residual covariance matrix shrinkage factor: " << scale << endl; file_manager_ptr->rec_ofstream() << "optimal residual covariance matrix shrinkage factor : " << scale << endl; Covariance rcov_diag; @@ -1977,6 +1977,7 @@ ParameterEnsemble::ParameterEnsemble(Pest *_pest_scenario_ptr, std::mt19937* _ra reals = _reals; var_names = _var_names; real_names = _real_names; + org_real_names = _real_names; tstat = transStatus::CTL; set_fixed_names(); } @@ -2258,7 +2259,7 @@ void ParameterEnsemble::from_binary(string file_name, bool forgive) map header_info = Ensemble::from_binary(file_name, names, false); unordered_setsvar_names(var_names.begin(), var_names.end()); vector missing; - for (auto& name : pest_scenario_ptr->get_ctl_ordered_adj_par_names()) + for (auto& name : names) { if (svar_names.find(name) == svar_names.end()) { @@ -2282,6 +2283,25 @@ void ParameterEnsemble::from_binary(string file_name, bool forgive) throw_ensemble_error("from_binary() error: the following adjustable parameter names in the control file are not in the binary parameter ensemble file:", missing); } } + missing.clear(); + svar_names.clear(); + names = pest_scenario_ptr->get_ctl_ordered_par_names(); + svar_names.insert(names.begin(),names.end()); + unordered_set::iterator send = svar_names.end(); + for (auto& name: var_names) + { + if (svar_names.find(name) == send) + { + missing.push_back(name); + } + } + if (missing.size() > 0) + { + drop_cols(missing); + } + + + prep_par_ensemble_after_read(header_info); } @@ -3735,6 +3755,23 @@ void ObservationEnsemble::from_binary(string file_name) if (missing.size() > 0) throw_ensemble_error("from_binary() error: the following non-zero-weighted obs names in the control file are not in the binary obs ensemble file:", missing); names = pest_scenario_ptr->get_ctl_ordered_obs_names(); + missing.clear(); + svar_names.clear(); + svar_names.insert(names.begin(),names.end()); + unordered_set::iterator send = svar_names.end(); + + for (auto& name : var_names) + { + if (svar_names.find(name) == send) + { + missing.push_back(name); + } + } + if (missing.size() > 0) + { + //drop these extra vars + drop_cols(missing); + } if (var_names.size() < names.size()) { update_var_map(); diff --git a/src/libs/pestpp_common/EnsembleMethodUtils.cpp b/src/libs/pestpp_common/EnsembleMethodUtils.cpp index 851d9e61c..6019afeb9 100644 --- a/src/libs/pestpp_common/EnsembleMethodUtils.cpp +++ b/src/libs/pestpp_common/EnsembleMethodUtils.cpp @@ -4133,6 +4133,10 @@ void EnsembleMethod::initialize(int cycle, bool run, bool use_existing) vector failed = run_ensemble(pe, oe, vector(), cycle); if (pe.shape().first == 0) throw_em_error("all realizations failed during initial evaluation"); + if (pest_scenario.get_pestpp_options().get_ies_debug_fail_remainder()) + { + failed.push_back(0); + } if (failed.size() > 0) { ss.str(""); @@ -4200,7 +4204,7 @@ void EnsembleMethod::initialize(int cycle, bool run, bool use_existing) ss << " with the prior simulated ensemble." << endl; message(0, ss.str()); - cout << "...see rec file or " << file_manager.get_base_filename() << ".pdc.csv" << "for listing of conflicted observations" << endl << endl; + cout << "...see rec file or " << file_manager.get_base_filename() << ".pdc.csv" << " for listing of conflicted observations" << endl << endl; ofstream& frec = file_manager.rec_ofstream(); frec << endl << "...conflicted observations: " << endl; for (auto oname : in_conflict) diff --git a/src/libs/pestpp_common/Jacobian_1to1.cpp b/src/libs/pestpp_common/Jacobian_1to1.cpp index 54d92148b..9df0c32f2 100644 --- a/src/libs/pestpp_common/Jacobian_1to1.cpp +++ b/src/libs/pestpp_common/Jacobian_1to1.cpp @@ -296,11 +296,12 @@ bool Jacobian_1to1::process_runs(ParamTransformSeq &par_transform, run_manager.get_model_parameters(par_run.second[0], run_list.back().ctl_pars); bool success = run_manager.get_observations_vec(par_run.second[0], run_list.back().obs_vec); run_list.back().numeric_derivative_par = cur_numeric_par_value; - /*if ((debug_fail) && (i_run == 1)) + if (debug_fail) { file_manager.rec_ofstream() << "NOTE: 'GLM_DEBUG_DER_FAIL' is true, failing jco run for parameter '" << cur_par_name << "'" << endl; success = false; - }*/ + debug_fail = false; + } if (success) { par_transform.model2ctl_ip(run_list.back().ctl_pars); diff --git a/src/libs/pestpp_common/Regularization.cpp b/src/libs/pestpp_common/Regularization.cpp index 6550fdadd..7e4d547ea 100644 --- a/src/libs/pestpp_common/Regularization.cpp +++ b/src/libs/pestpp_common/Regularization.cpp @@ -102,8 +102,11 @@ PestppOptions::ARG_STATUS DynamicRegularization::assign_value_by_key(const std:: pest_utils::convert_ip(value, wf_min); else if (key == "WFMAX") pest_utils::convert_ip(value, wf_max); - else if (key == "WFINIT") - pest_utils::convert_ip(value, wf_init); + else if (key == "WFINIT") { + pest_utils::convert_ip(value, wf_init); + tikhonov_weight = wf_init; + } + else if (key == "WFTOL") pest_utils::convert_ip(value, wftol); else if (key == "WFFAC") @@ -140,6 +143,7 @@ void DynamicRegularization::set_defaults() wffac = 0; wftol = 1000; wf_init = 1.0; + tikhonov_weight = 1.0; max_reg_iter = 20; } diff --git a/src/libs/pestpp_common/SQP.cpp b/src/libs/pestpp_common/SQP.cpp index e53e2581a..31356a0cf 100644 --- a/src/libs/pestpp_common/SQP.cpp +++ b/src/libs/pestpp_common/SQP.cpp @@ -18,7 +18,7 @@ #include "EnsembleSmoother.h" -bool SqpFilter::accept(double obj_val, double violation_val, int iter, double alpha) +bool SqpFilter::accept(double obj_val, double violation_val, int iter, double alpha,bool keep) { FilterRec candidate{ obj_val, violation_val,iter,alpha }; if (obj_viol_pairs.size() == 0) @@ -34,18 +34,23 @@ bool SqpFilter::accept(double obj_val, double violation_val, int iter, double al candidate.obj_val *= (1 - obj_tol); candidate.viol_val *= (1 + viol_tol); - bool accept = false; + bool accept = true; for (auto& p : obj_viol_pairs) - if (first_dominates_second(candidate, p)) + if (!first_partially_dominates_second(candidate, p)) { - accept = true; + accept = false; break; } + if ((keep) && (accept)) + { + //cout << "obj:" << obj_val << ", viol:" << violation_val << ", alpha:" << alpha << endl; + obj_viol_pairs.insert(candidate); + } return accept; } -bool SqpFilter::first_dominates_second(const FilterRec& first, const FilterRec& second) +bool SqpFilter::first_partially_dominates_second(const FilterRec& first, const FilterRec& second) { if (minimize) { @@ -62,22 +67,89 @@ bool SqpFilter::first_dominates_second(const FilterRec& first, const FilterRec& return false; } } + +bool SqpFilter::first_strictly_dominates_second(const FilterRec& first, const FilterRec& second) +{ + if (minimize) + { + if ((first.obj_val < second.obj_val) && (first.viol_val < second.viol_val)) + return true; + else + return false; + } + else + { + if ((first.obj_val > second.obj_val) && (first.viol_val < second.viol_val)) + return true; + else + return false; + } +} + +void SqpFilter::report(ofstream& frec, int iter) +{ + frec << "...SQP filter members (" << obj_viol_pairs.size() <<") for iteration " << iter << ":" << endl << " obj, violation" << endl; + double omin = 1.0e+300,omax = -1e+300,vmin = 1e+300,vmax = -1e+300; + for (auto& fr : obj_viol_pairs) + { + frec << setw(6) << setprecision(3) << fr.obj_val << "," << fr.viol_val << endl; + omin = min(fr.obj_val,omin); + omax = max(fr.obj_val,omax); + vmin = min(fr.viol_val,vmin); + vmax = max(fr.viol_val,vmax); + } + stringstream ss; + ss.str(""); + ss << endl << "... filter summary with " << obj_viol_pairs.size() << " pairs for iteration " << iter << ":" << endl; + ss << " obj min: " << setw(10) << omin << endl; + ss << " obj max: " << setw(10) << omax << endl; + ss << " violation min: " << setw(10) << vmin << endl; + ss << " violation max: " << setw(10) << vmax << endl; + ss << endl; + + frec << ss.str(); + cout << ss.str(); + +} + bool SqpFilter::update(double obj_val, double violation_val, int iter, double alpha) { - bool acc = accept(obj_val, violation_val,iter, alpha); - if (!acc) - return false; + //check if this candidate is nondom + //bool acc = accept(obj_val, violation_val,iter, alpha); + //if (!acc) + // return false; FilterRec candidate; candidate.obj_val = obj_val; candidate.viol_val = violation_val; candidate.iter = iter; candidate.alpha = alpha; - set updated{ candidate }; - for (auto& p : obj_viol_pairs) - { - if (first_dominates_second(p, candidate)) - updated.insert(p); - } + multiset updated; + obj_viol_pairs.insert(candidate); + bool i_is_dominated = false; + multiset::iterator first = obj_viol_pairs.begin(); + multiset::iterator second = obj_viol_pairs.begin(); + for (int i=0;i rnames = dv.get_real_names(); + vector rnames = _dv.get_real_names(); bool inpar = false; if (find(rnames.begin(), rnames.end(), BASE_REAL_NAME) != rnames.end()) { - message(1, "'base' realization already in parameter ensemble, ignoring '++ies_include_base'"); + message(1, "'base' realization already in parameter ensemble, ignoring 'include_base'"); inpar = true; } else { message(1, "adding 'base' parameter values to ensemble"); Parameters pars = pest_scenario.get_ctl_parameters(); - dv.get_par_transform().active_ctl2numeric_ip(pars); - vector drop{ dv.shape().first - 1 }; - dv.drop_rows(drop); - dv.append(BASE_REAL_NAME, pars); + pars.update_without_clear(dv_names,current_ctl_dv_values.get_data_vec(dv_names)); + _dv.get_par_transform().active_ctl2numeric_ip(pars); + vector drop{ _dv.shape().first - 1 }; + _dv.drop_rows(drop); + _dv.append(BASE_REAL_NAME, pars); } //check that 'base' isn't already in ensemble - rnames = oe.get_real_names(); + rnames = _oe.get_real_names(); if (find(rnames.begin(), rnames.end(), BASE_REAL_NAME) != rnames.end()) { - message(1, "'base' realization already in observation ensemble, ignoring '++ies_include_base'"); + message(1, "'base' realization already in observation ensemble, ignoring 'include_base'"); } else { Observations obs = pest_scenario.get_ctl_observations(); if (inpar) { - vector prnames = dv.get_real_names(); + vector prnames = _dv.get_real_names(); int idx = find(prnames.begin(), prnames.end(), BASE_REAL_NAME) - prnames.begin(); //cout << idx << "," << rnames.size() << endl; @@ -311,44 +385,22 @@ void SeqQuadProgram::add_bases() message(1, mess); vector drop; drop.push_back(oreal); - oe.drop_rows(drop); - oe.append(BASE_REAL_NAME, obs); + _oe.drop_rows(drop); + _oe.append(BASE_REAL_NAME, obs); //rnames.insert(rnames.begin() + idx, string(base_name)); rnames[idx] = BASE_REAL_NAME; - oe.reorder(rnames, vector()); + _oe.reorder(rnames, vector()); } else { message(1, "adding 'base' observation values to ensemble"); - vector drop{ oe.shape().first - 1 }; - oe.drop_rows(drop); - oe.append(BASE_REAL_NAME, obs); + vector drop{ _oe.shape().first - 1 }; + _oe.drop_rows(drop); + _oe.append(BASE_REAL_NAME, obs); } } } - -//template -//void SeqQuadProgram::message(int level, char* _message, vector _extras) -//{ -// string s(_message); -// message(level, s, _extras); -//} - -//void SeqQuadProgram::message(int level, char* _message) -//{ -// string s(_message); -// message(level, s); -//} - -//template -//void SeqQuadProgram::message(int level, char* _message, T extra) -//{ -// string s(_message); -// message(level, s, extra); -// -//} - template void SeqQuadProgram::message(int level, const string &_message, vector _extras, bool echo) { @@ -676,18 +728,6 @@ void SeqQuadProgram::initialize_parcov() } - -void SeqQuadProgram::initialize_obscov() -{ - message(1, "initializing observation noise covariance matrix"); - string obscov_filename = pest_scenario.get_pestpp_options().get_obscov_filename(); - - string how = obscov.try_from(pest_scenario, file_manager, false, true); - message(1, "obscov loaded ", how); - -} - - void SeqQuadProgram::initialize() { message(0, "initializing"); @@ -784,9 +824,10 @@ void SeqQuadProgram::initialize() message(2, "using all adjustable parameters as decision variables: ", act_par_names.size()); dv_names = act_par_names; } - initialize_objfunc(); + constraints.initialize(dv_names, numeric_limits::max()); constraints.initial_report(); + initialize_objfunc(); //some risk-based stuff here string chance_points = ppo->get_opt_chance_points(); if (chance_points == "ALL") @@ -854,7 +895,7 @@ void SeqQuadProgram::initialize() scale_vals = ppo->get_sqp_scale_facs(); - message(1, "using the following upgrade vector scale values (e.g. 'line search'):", scale_vals); + message(1, "using the following upgrade vector scale (e.g. 'line search') values:", scale_vals); //ofstream &frec = file_manager.rec_ofstream(); last_best = 1.0E+30; @@ -862,8 +903,8 @@ void SeqQuadProgram::initialize() warn_min_reals = 10; error_min_reals = 2; - vector scale_facs = pest_scenario.get_pestpp_options().get_lambda_scale_vec(); - message(1, "using scaling factors: ", scale_facs); + //vector scale_facs = pest_scenario.get_pestpp_options().get_lambda_scale_vec(); + //message(1, "using scaling factors: ", scale_facs); message(1, "max run fail: ", ppo->get_max_run_fail()); @@ -879,11 +920,9 @@ void SeqQuadProgram::initialize() echo = true; initialize_parcov(); - //I dont think SQP needs an obscov? - //initialize_obscov(); - //these will be the ones we track... + //this means the initial dv vals in the control file will be the "center" of the enopt ensemble current_ctl_dv_values = pest_scenario.get_ctl_parameters(); current_obs = pest_scenario.get_ctl_observations(); @@ -1072,9 +1111,11 @@ void SeqQuadProgram::make_gradient_runs(Parameters& _current_dv_vals, Observatio Parameters dv_par = _current_dv_vals.get_subset(dv_names.begin(),dv_names.end()); ofstream& frec = file_manager.rec_ofstream(); _dv.draw(pest_scenario.get_pestpp_options().get_sqp_num_reals(), dv_par, parcov, performance_log, 0, frec); + //todo: save _dv here in case something bad happens... ObservationEnsemble _oe(&pest_scenario, &rand_gen); _oe.reserve(_dv.get_real_names(), constraints.get_obs_constraint_names()); + add_current_as_bases(_dv, _oe); message(1, "running new dv ensemble"); run_ensemble(_dv, _oe); save(_dv, _oe); @@ -1177,18 +1218,14 @@ void SeqQuadProgram::prep_4_ensemble_grad() dv.transform_ip(ParameterEnsemble::transStatus::NUM); - //TODO: think about what adding the base would do for SQP? - /*if (pest_scenario.get_pestpp_options().get_ies_include_base()) - if (pp_args.find("SQP_RESTART_OBS_EN") != pp_args.end()) - { - message(1, "Warning: even though `sqp_include_base` is true, you passed a restart obs en, not adding 'base' realization..."); - } - else - add_bases();*/ - + //TODO: think about what adding the base would do for SQP + if (pp_args.find("SQP_RESTART_OBS_EN") != pp_args.end()) + { + message(1, "Warning: even though `sqp_include_base` is true, you passed a restart obs en, not adding 'base' realization..."); + } + else + add_current_as_bases(dv, oe); - //now we check to see if we need to try to align the par and obs en - //this would only be needed if either of these were not drawn if (!dv_drawn || !oe_drawn) { bool aligned = dv.try_align_other_rows(performance_log, oe); @@ -1217,12 +1254,10 @@ void SeqQuadProgram::prep_4_ensemble_grad() frec << ma << endl; } - - message(2, "checking for denormal values in dv"); dv.check_for_normal("initial transformed dv ensemble"); ss.str(""); - //TODO: setup an sqp save bin flag? or piggy back? + if (pest_scenario.get_pestpp_options().get_save_binary()) { ss << file_manager.get_base_filename() << ".0.par.jcb"; @@ -1238,19 +1273,7 @@ void SeqQuadProgram::prep_4_ensemble_grad() oe.check_for_normal("observation ensemble"); ss.str(""); - - /*if (pest_scenario.get_pestpp_options().get_ies_save_binary()) - { - ss << file_manager.get_base_filename() << ".obs.jcb"; - oe.to_binary(ss.str()); - } - else - { - ss << file_manager.get_base_filename() << ".obs.csv"; - oe.to_csv(ss.str()); - } - message(1, "saved initial observation ensemble to ", ss.str());*/ - message(1, "centering on ensemble mean vector"); + message(1, "centering on 'base' realization"); if (pest_scenario.get_control_info().noptmax == -2) { @@ -1275,16 +1298,7 @@ void SeqQuadProgram::prep_4_ensemble_grad() _oe.append("mean", pest_scenario.get_ctl_observations()); oe_base = _oe; oe_base.reorder(vector(), act_obs_names); - //initialize the phi handler - ph = L2PhiHandler(&pest_scenario, &file_manager, &oe_base, &dv_base, &parcov); - if (ph.get_lt_obs_names().size() > 0) - { - message(1, "less_than inequality defined for observations: ", ph.get_lt_obs_names().size()); - } - if (ph.get_gt_obs_names().size()) - { - message(1, "greater_than inequality defined for observations: ", ph.get_gt_obs_names().size()); - } + message(1, "running mean dv values"); vector failed_idxs = run_ensemble(_pe, _oe); @@ -1298,11 +1312,11 @@ void SeqQuadProgram::prep_4_ensemble_grad() _oe.to_csv(obs_csv); - //TODO: rather than report l2 phi, report actual obj func and feas status - ph.update(_oe, _pe); - message(0, "mean parameter phi report:"); - ph.report(true); - ph.write(0, 1); + Eigen::VectorXd o = _oe.get_real_vector("mean"); + current_obs = pest_scenario.get_ctl_observations(); + current_obs.update_without_clear(_oe.get_var_names(), o); + save_real_par_rei(pest_scenario, _pe, _oe, output_file_writer, file_manager, -1, "mean"); + constraints.sqp_report(0,current_ctl_dv_values, current_obs); return; } @@ -1354,48 +1368,6 @@ void SeqQuadProgram::prep_4_ensemble_grad() save_real_par_rei(pest_scenario, dv, oe, output_file_writer, file_manager, -1); - performance_log->log_event("calc pre-drop phi"); - //todo: I think we probably want to use the opt_objective_function arg - //to define the objective function. Support and obs or a pi. - //what about an external file like pestpp-opt with just coefficients? - - - //initialize the phi handler - //todo: can we also use the l2 norm of residuals to ident - //"bad" realizations? - - /*ph = L2PhiHandler(&pest_scenario, &file_manager, &oe_base, &dv_base, &parcov); - - if (ph.get_lt_obs_names().size() > 0) - { - message(1, "less_than inequality defined for observations: ", ph.get_lt_obs_names().size()); - } - if (ph.get_gt_obs_names().size()) - { - message(1, "greater_than inequality defined for observations: ", ph.get_gt_obs_names().size()); - } - - ph.update(oe, dv); - message(0, "pre-drop initial phi summary"); - ph.report(true); - drop_bad_phi(dv, oe); - if (oe.shape().first == 0) - { - throw_sqp_error(string("all realizations dropped as 'bad'")); - } - if (oe.shape().first <= error_min_reals) - { - message(0, "too few active realizations:", oe.shape().first); - message(1, "need at least ", error_min_reals); - throw_sqp_error(string("too few active realizations, cannot continue")); - } - if (oe.shape().first < warn_min_reals) - { - ss.str(""); - ss << "WARNING: less than " << warn_min_reals << " active realizations...might not be enough"; - string s = ss.str(); - message(0, s); - }*/ pcs = ParChangeSummarizer(&dv_base, &file_manager, &output_file_writer); @@ -1412,88 +1384,7 @@ void SeqQuadProgram::prep_4_ensemble_grad() } -void SeqQuadProgram::drop_bad_phi(ParameterEnsemble &_pe, ObservationEnsemble &_oe, bool is_subset) -{ - //don't use this assert because _pe maybe full size, but _oe might be subset size - if (!is_subset) - if (_pe.shape().first != _oe.shape().first) - throw_sqp_error("SeqQuadProgram::drop_bad_phi() error: _pe != _oe and not subset"); - - double bad_phi = pest_scenario.get_pestpp_options().get_ies_bad_phi(); - double bad_phi_sigma = pest_scenario.get_pestpp_options().get_ies_bad_phi_sigma(); - vector idxs = ph.get_idxs_greater_than(bad_phi,bad_phi_sigma, _oe); - - if (pest_scenario.get_pestpp_options().get_ies_debug_bad_phi()) - idxs.push_back(0); - - if (idxs.size() > 0) - { - - message(0, "dropping realizations as bad: ", idxs.size()); - - vector par_real_names = _pe.get_real_names(), obs_real_names = _oe.get_real_names(); - stringstream ss; - string pname; - string oname; - int pidx; - vector full_onames, full_pnames; - // if a subset drop, then use the full oe index, otherwise, just use _oe index - /*if (_oe.shape().first != _pe.shape().first) - { - full_onames = oe.get_real_names(); - } - else - { - full_onames = _oe.get_real_names(); - }*/ - full_onames = oe.get_real_names(); - full_pnames = dv.get_real_names(); - vector pdrop, odrop; - for (auto i : idxs) - { - oname = obs_real_names[i]; - - /*if (is_subset) - { - pidx = find(full_onames.begin(), full_onames.end(), oname) - full_onames.begin(); - if (find(subset_idxs.begin(), subset_idxs.end(), pidx) == subset_idxs.end()) - { - ss.str(""); - ss << "drop_bad_phi() error: idx " << pidx << " not found in subset_idxs"; - throw_sqp_error(ss.str()); - } - pname = full_pnames[pidx]; - }*/ - //else - { - pidx = i; - pname = par_real_names[pidx]; - } - ss << pname << " : " << obs_real_names[i] << " , "; - pdrop.push_back(pname); - odrop.push_back(obs_real_names[i]); - } - - string s = "dropping par:obs realizations: "+ ss.str(); - message(1, s); - try - { - _pe.drop_rows(pdrop); - _oe.drop_rows(odrop); - } - catch (const exception &e) - { - stringstream ss; - ss << "drop_bad_phi() error : " << e.what(); - throw_sqp_error(ss.str()); - } - catch (...) - { - throw_sqp_error(string("drop_bad_phi() error")); - } - } -} void SeqQuadProgram::save_mat(string prefix, Eigen::MatrixXd &mat) { @@ -1540,6 +1431,8 @@ void SeqQuadProgram::iterate_2_solution() ofstream &frec = file_manager.rec_ofstream(); bool accept; + n_consec_infeas = 0; + n_consec_phiinc = 0; for (int i = 0; i < pest_scenario.get_control_info().noptmax; i++) { iter++; @@ -1551,187 +1444,136 @@ void SeqQuadProgram::iterate_2_solution() //solve for and test candidates accept = solve_new(); - //todo: save and write out the current phi grad vector (maybe save all of them???) - ss.str(""); - ss << "best phi sequence:"; - int ii = 0; - for (auto phi : best_phis) - { - ss << phi << " "; - ii++; - if (ii % 7 == 0) - ss << endl; - } - message(0, ss.str()); + //save some stuff... + if (use_ensemble_grad) + report_and_save_ensemble(); + + constraints.sqp_report(iter, current_ctl_dv_values, current_obs, true); + + //report dec var change stats - only for ensemble form + if (use_ensemble_grad) + { + ss.str(""); + ss << file_manager.get_base_filename() << "." << iter << ".pcs.csv"; + pcs.summarize(dv, ss.str()); + } + + //store the grad vector used for this iteration + grad_vector_map[iter] = current_grad_vector; //check to break here before making more runs if (should_terminate()) break; - //update the underlying runs - make_gradient_runs(current_ctl_dv_values,current_obs); - - //save some stuff... - if (use_ensemble_grad) - report_and_save_ensemble(); - + if (n_consec_infeas > MAX_CONSEC_INFEAS) + { + ss.str(""); + ss << "number of consecutive infeasible iterations > " << MAX_CONSEC_INFEAS << ", switching to IES to seek feasibility"; + message(0,ss.str()); + seek_feasible(); + n_consec_infeas = 0; + } + + //update the underlying runs + make_gradient_runs(current_ctl_dv_values,current_obs); //update the hessian update_hessian_and_grad_vector(); - - //todo: report some obj function stats //todo: report constraint stats //a la constraints.mou_report(); - constraints.sqp_report(iter, current_ctl_dv_values, current_obs, true); - //report dec var change stats - only for ensemble form - if (use_ensemble_grad) - { - ss.str(""); - ss << file_manager.get_base_filename() << "." << iter << ".pcs.csv"; - pcs.summarize(dv, ss.str()); - } - - //store the grad vector used for this iteration - grad_vector_map[iter] = current_grad_vector; + + } } bool SeqQuadProgram::should_terminate() { + stringstream ss; + //todo: use ies accept fac here? + double phiredstp = pest_scenario.get_control_info().phiredstp; + int nphistp = pest_scenario.get_control_info().nphistp; + int nphinored = pest_scenario.get_control_info().nphinored; + bool phiredstp_sat = false, nphinored_sat = false, consec_sat = false; + double phi, ratio; + int count = 0; + int nphired = 0; + //best_mean_phis = vector{ 1.0,0.8,0.81,0.755,1.1,0.75,0.75,1.2 }; + + //todo: save and write out the current phi grad vector (maybe save all of them???) + ss.str(""); + ss << "best phi sequence:" << endl; + ss << " "; + int ii = 0; + for (auto phi : best_phis) + { + ss << setw(10) << setprecision(4) << phi << " "; + ii++; + if (ii % 6 == 0) { + ss << endl; + ss << " "; + } + + } + ss << endl; + message(0, ss.str()); + + /*if ((!consec_sat )&& (best_mean_phis.size() == 0)) + return false;*/ + message(0, "phi-based termination criteria check"); + message(1, "phiredstp: ", phiredstp); + message(1, "nphistp: ", nphistp); + message(1, "nphinored: ", nphinored); + message(1, "best phi yet: ", best_phi_yet); + + for (auto& phi : best_phis) + { + ratio = (phi - best_phi_yet) / phi; + if (ratio <= phiredstp) + count++; + } + message(1, "number of iterations satisfying phiredstp criteria: ", count); + if (count >= nphistp) + { + message(1, "number iterations satisfying phiredstp criteria > nphistp"); + phiredstp_sat = true; + } + + message(1, "number of iterations since best yet mean phi: ", nphired); + if (nphired >= nphinored) + { + message(1, "number of iterations since best yet mean phi > nphinored"); + nphinored_sat = true; + } + if (best_phis[best_phis.size() - 1] == 0.0) + { + message(1, "phi is zero, all done"); + return true; + } + + if ((nphinored_sat) || (phiredstp_sat) || (consec_sat)) + { + message(1, "phi-based termination criteria satisfied, all done"); + return true; + } int q = pest_utils::quit_file_found(); if ((q == 1) || (q == 2)) { - cout << "pest.stp' found, quitting" << endl; - file_manager.rec_ofstream() << "pest.stp' found, quitting" << endl; + message(1,"'pest.stp' found, quitting"); return true; } - if (iter >= pest_scenario.get_control_info().noptmax) - return true; - - //todo: work out some SQP-based criteria here... - //double phiredstp = pest_scenario.get_control_info().phiredstp; - //int nphistp = pest_scenario.get_control_info().nphistp; - //int nphinored = pest_scenario.get_control_info().nphinored; - //bool phiredstp_sat = false, nphinored_sat = false, consec_sat = false; - //double phi, ratio; - //int count = 0; - //int nphired = 0; - - //message(0, "phi-based termination criteria check"); - //message(1, "phiredstp: ", phiredstp); - //message(1, "nphistp: ", nphistp); - //message(1, "nphinored (also used for consecutive bad lambda cycles): ", nphinored); - //if (best_mean_phis.size() > 0) - //{ - // vector::iterator idx = min_element(best_mean_phis.begin(), best_mean_phis.end()); - // nphired = (best_mean_phis.end() - idx) - 1; - // best_phi_yet = best_mean_phis[idx - best_mean_phis.begin()];// *pest_scenario.get_pestpp_options().get_ies_accept_phi_fac(); - // message(1, "best mean phi sequence: ", best_mean_phis); - // message(1, "best phi yet: ", best_phi_yet); - //} - //message(1, "number of consecutive bad lambda testing cycles: ", consec_bad_lambda_cycles); - //if (consec_bad_lambda_cycles >= nphinored) - //{ - // message(1, "number of consecutive bad lambda testing cycles > nphinored"); - // consec_sat = true; - //} - - //for (auto &phi : best_mean_phis) - //{ - // ratio = (phi - best_phi_yet) / phi; - // if (ratio <= phiredstp) - // count++; - //} - //message(1, "number of iterations satisfying phiredstp criteria: ", count); - //if (count >= nphistp) - //{ - // message(1, "number iterations satisfying phiredstp criteria > nphistp"); - // phiredstp_sat = true; - //} - - //message(1, "number of iterations since best yet mean phi: ", nphired); - //if (nphired >= nphinored) - //{ - // message(1, "number of iterations since best yet mean phi > nphinored"); - // nphinored_sat = true; - //} - - //if ((nphinored_sat) || (phiredstp_sat) || (consec_sat)) - //{ - // message(1, "phi-based termination criteria satisfied, all done"); - // return true; - //} - return false; + else if (q == 4) + { + message(0,"pest.stp found with '4'. run mgr has returned control, removing file."); + if (!pest_utils::try_remove_quit_file()) + { + message(0,"error removing pest.stp file, bad times ahead..."); + } + } + return false; } - - - - - -void SeqQuadProgram::update_reals_by_phi(ParameterEnsemble &_pe, ObservationEnsemble &_oe) -{ - - vector oe_names = _oe.get_real_names(); - vector pe_names = _pe.get_real_names(); - vector oe_base_names = oe.get_real_names(); - vector pe_base_names = dv.get_real_names(); - - //if (pe_names.size() != oe_base_names.size()) - // throw runtime_error("SeqQuadProgram::update_reals_by_phi() error: pe_names != oe_base_names"); - map oe_name_to_idx; - map pe_idx_to_name; - - for (int i = 0; i < oe_base_names.size(); i++) - oe_name_to_idx[oe_base_names[i]] = i; - - for (int i = 0; i < pe_base_names.size(); i++) - pe_idx_to_name[i] = pe_base_names[i]; - //store map of current phi values - ph.update(oe, dv); - L2PhiHandler::phiType pt = L2PhiHandler::phiType::COMPOSITE; - map phi_map = ph.get_phi_map(pt); - map cur_phi_map; - for (auto p : phi_map) - cur_phi_map[p.first] = p.second; - - //now get a phi map of the new phi values - ph.update(_oe, _pe); - phi_map = ph.get_phi_map(pt); - - double acc_fac = pest_scenario.get_pestpp_options().get_ies_accept_phi_fac(); - double cur_phi, new_phi; - string oname, pname; - Eigen::VectorXd real; - stringstream ss; - for (int i=0;i<_oe.shape().first;i++) - { - oname = oe_names[i]; - new_phi = phi_map.at(oname); - cur_phi = cur_phi_map.at(oname); - if (new_phi < cur_phi * acc_fac) - { - //pname = pe_names[i]; - //pname = pe_names[oe_name_to_idx[oname]]; - pname = pe_idx_to_name[oe_name_to_idx[oname]]; - if (find(pe_names.begin(), pe_names.end(), pname) == pe_names.end()) - throw runtime_error("SeqQuadProgram::update_reals_by_phi() error: pname not in pe_names: " + pname); - ss.str(""); - ss << "updating dv:oe real =" << pname << ":" << oname << ", current phi: new phi =" << cur_phi << ":" << new_phi; - message(3, ss.str()); - - real = _pe.get_real_vector(pname); - dv.update_real_ip(pname, real); - real = _oe.get_real_vector(oname); - oe.update_real_ip(oname, real); - } - } - ph.update(oe, dv); - -} - Eigen::VectorXd SeqQuadProgram::calc_gradient_vector_from_coeffs(const Parameters& _current_dv_values) { Eigen::VectorXd grad(dv_names.size()); @@ -1774,6 +1616,7 @@ Parameters SeqQuadProgram::calc_gradient_vector(const Parameters& _current_dv_va { stringstream ss; Eigen::VectorXd grad(dv_names.size()); + //TODO: should this be optional? string center_on = pest_scenario.get_pestpp_options().get_ies_center_on(); //if don't already have or if already have and exit @@ -1791,9 +1634,6 @@ Parameters SeqQuadProgram::calc_gradient_vector(const Parameters& _current_dv_va //ensemble stuff here if (use_obj_obs) { - - //todo: need to make sqp specific or refactor ies arg names - // compute sample dec var cov matrix and its pseudo inverse // see eq (8) of Dehdari and Oliver 2012 SPE and Fonseca et al 2015 SPE // TODO: so can pseudo inverse: Covariance dv_cov_matrix; @@ -1820,8 +1660,9 @@ Parameters SeqQuadProgram::calc_gradient_vector(const Parameters& _current_dv_va Covariance shrunk_cov = dv.get_empirical_cov_matrices(&file_manager).second; shrunk_cov.inv_ip(); parcov_inv = shrunk_cov.e_ptr()->toDense(); + //cout << "parcov inv: " << endl << parcov_inv << endl; //TODO: Matt to check consistency being sample cov forms - message(1, "empirical parcov inv:", parcov_inv); // tmp + //message(1, "empirical parcov inv:", parcov_inv); // tmp // try pseudo_inv_ip() //Covariance x; @@ -1837,7 +1678,7 @@ Parameters SeqQuadProgram::calc_gradient_vector(const Parameters& _current_dv_va Eigen::MatrixXd obj_anoms = oe.get_eigen_anomalies(vector(), vector{obj_func_str}, center_on); Eigen::MatrixXd cross_cov_vector; // or Eigen::VectorXd? cross_cov_vector = 1.0 / (dv.shape().first - 1.0) * (dv_anoms.transpose() * obj_anoms); - cout << "dv-obj_cross_cov:" << cross_cov_vector << endl; + //cout << "dv-obj_cross_cov:" << endl << cross_cov_vector << endl; // now compute grad vector // this is a matrix-vector product; the matrix being the pseudo inv of diag empirical dec var cov matrix and the vector being the dec var-phi cross-cov vector\ @@ -1980,8 +1821,15 @@ pair SeqQuadProgram::_kkt_null_space(Eigen::Ma message(1, "coeff matrix for p_range_space component", coeff); // tmp rhs = (-1. * constraint_diff); message(1, "rhs", rhs); + cout << "starting SVD..." << endl; rsvd.solve_ip(coeff, s, U, V, pest_scenario.get_svd_info().eigthresh, pest_scenario.get_svd_info().maxsing); - coeff = V * s.inverse() * U.transpose(); + cout << "...done..." << endl; + S_ = s.asDiagonal().inverse(); + cout << "s:" << S_.rows() << "," << S_.cols() << ", U:" << U.rows() << "," << U.cols() << ", V:" << V.rows() << "," << V.cols() << endl; + cout << endl << endl; + + coeff = V * S_ * U.transpose(); + message(1, "coeff inv", coeff); p_y = coeff * rhs; message(1, "p_y", p_y); // tmp @@ -2057,7 +1905,8 @@ pair SeqQuadProgram::_kkt_null_space(Eigen::Ma // combine to make total direction message(1, "combining range and null space components of search direction"); // tmp search_d = Y * p_y + Z * p_z; - message(1, "SD", search_d); // tmp + //message(1, "SD", search_d); // tmp + if (search_d.size() != curved_grad.size()) { throw_sqp_error("search direction vector computation error (in null space KKT solve method)!"); @@ -2138,10 +1987,10 @@ pair SeqQuadProgram::calc_search_direction_vec pair x; - message(1, "hessian:", hessian); // tmp + //message(1, "hessian:", hessian); // tmp Mat constraint_mat; if (use_ensemble_grad) { - message(2, "getting ensemble-based working set constraint matrix"); + message(1, "getting ensemble-based working set constraint matrix"); constraint_mat = constraints.get_working_set_constraint_matrix(current_ctl_dv_values, current_obs, dv, oe,true); } else @@ -2170,8 +2019,6 @@ pair SeqQuadProgram::calc_search_direction_vec message(0, "current working set:", cnames); Eigen::MatrixXd constraint_jco = constraint_mat.e_ptr()->toDense(); // or would you pref to slice and dice each time - this won't get too big but want to avoid replicates // and/or make Jacobian obj? - - //constraint_jco = jco.get_matrix(cnames, dv_names); message(1, "A:", constraint_jco); // tmp // add check here that A is full rank; warn that linearly dependent will be removed via factorization @@ -2187,9 +2034,11 @@ pair SeqQuadProgram::calc_search_direction_vec message(1, "constraint diff:", constraint_diff); // tmp // throw error here if not all on/near constraint - if ((constraint_diff.array() != 0.0).any()) // todo make some level of forgiveness with a tolerance parameter here + if ((constraint_diff.array().abs() > filter.get_viol_tol()).any()) // todo make some level of forgiveness with a tolerance parameter here { - throw_sqp_error("not on constraint"); // better to pick this up elsewhere (before) anyway + //throw_sqp_error("not on constraint"); // better to pick this up elsewhere (before) anyway + cout << "constraint diff vector: " << constraint_diff.array() << endl; + message(0,"WARNING: not on constraint but working set not empty, continuing..."); } // some transforms for solve @@ -2283,7 +2132,7 @@ pair SeqQuadProgram::calc_search_direction_vec string to_drop = cnames[idx]; cnames.erase(cnames.begin() + idx); - message(1, "cnames after dropping atttempting:", cnames); // tmp + message(1, "cnames after dropping attempting:", cnames); // tmp // todo JDub now we need to skip alpha testing for this iter and recalc search_d without this constraint... i moved to next iter in proto because it was cheap, probably want to go back to start of search_d computation? // jwhite: will this process happen multiple times? If so, we probably need to wrap this process in a "while true" loop @@ -2445,7 +2294,6 @@ bool SeqQuadProgram::solve_new() //Eigen::VectorXd vec = dv_num_candidate.get_data_eigen_vec(dv_names); Eigen::VectorXd vec = num_candidate.get_data_eigen_vec(dv_names); dv_candidates.update_real_ip(real_names[i], vec); - ss.str(""); message(1, "finished calcs for scaling factor:", scale_val); @@ -2529,20 +2377,19 @@ bool SeqQuadProgram::solve_new() throw_sqp_error("ies_debug_upgrade_only is true, exiting"); } - //TODO: add sqp option to save candidates - - //enforce bounds on candidates - TODO: report the shrinkage summary that enforce_bounds returns dv_candidates.enforce_bounds(performance_log,false); ss.str(""); - ss << file_manager.get_base_filename() << "." << iter << ".par.csv"; + ss << file_manager.get_base_filename() << "." << iter << ".dv_candidates.csv"; dv_candidates.to_csv(ss.str()); message(0, "running candidate decision variable batch"); vector passed_scale_vals = scale_vals; //passed_scale_vals will get amended in this function based on run fails... ObservationEnsemble oe_candidates = run_candidate_ensemble(dv_candidates, passed_scale_vals); - + ss.str(""); + ss << file_manager.get_base_filename() << "." << iter << ".oe_candidates.csv"; + oe_candidates.to_csv(ss.str()); //todo: decide which if any dv candidate to accept... bool success = pick_candidate_and_update_current(dv_candidates, oe_candidates, passed_scale_vals); if (!success) @@ -2562,8 +2409,6 @@ bool SeqQuadProgram::seek_feasible() { stringstream ss; message(1, "seeking feasibility with iterative ensemble smoother solution"); - - Pest ies_pest_scenario; string pst_filename = pest_scenario.get_pst_filename(); ifstream fin(pest_scenario.get_pst_filename()); @@ -2574,7 +2419,7 @@ bool SeqQuadProgram::seek_feasible() ParamTransformSeq pts = ies_pest_scenario.get_base_par_tran_seq_4_mod(); TranFixed* tf_ptr = pts.get_fixed_ptr_4_mod(); - Parameters ctl_pars = ies_pest_scenario.get_ctl_parameters_4_mod(); + Parameters& ctl_pars = ies_pest_scenario.get_ctl_parameters_4_mod(); for (auto& name : ies_pest_scenario.get_ctl_ordered_par_names()) { @@ -2607,45 +2452,78 @@ bool SeqQuadProgram::seek_feasible() { shifted = constraints.get_chance_shifted_constraints(current_obs); } - Observations ctl_obs = ies_pest_scenario.get_ctl_observations_4_mod(); + Observations& ctl_obs = ies_pest_scenario.get_ctl_observations_4_mod(); for (auto& name : ies_pest_scenario.get_ctl_ordered_obs_names()) { if (snames.find(name) == send) { oi->get_observation_rec_ptr_4_mod(name)->weight = 0.0; + } else { ctl_obs.update_rec(name, shifted.get_rec(name)); - //oi->get_observation_rec_ptr_4_mod(name)->group = "__nz_temp__"; + oi->get_observation_rec_ptr_4_mod(name)->group = "__eqconstraint__"+name; } } snames = ies_pest_scenario.get_pestpp_options().get_passed_args(); - if (snames.find("IES_NUM_REALS") == snames.end()) - ies_pest_scenario.get_pestpp_options_ptr()->set_ies_num_reals(10); - IterEnsembleSmoother ies(ies_pest_scenario, file_manager, output_file_writer, performance_log, run_mgr_ptr); - ies.initialize(); + if (snames.find("IES_BAD_PHI_SIGMA") == snames.end()) + { + ies_pest_scenario.get_pestpp_options_ptr()->set_ies_bad_phi_sigma(1.25); + } + ies_pest_scenario.get_pestpp_options_ptr()->set_ies_num_reals(pest_scenario.get_pestpp_options().get_sqp_num_reals()); + ies_pest_scenario.get_pestpp_options_ptr()->set_ies_no_noise(true); + ies_pest_scenario.get_pestpp_options_ptr()->set_ies_obs_csv(""); + ies_pest_scenario.get_pestpp_options_ptr()->set_ies_obs_restart_csv(""); + ies_pest_scenario.get_pestpp_options_ptr()->set_ies_par_csv(""); + ies_pest_scenario.get_pestpp_options_ptr()->set_ies_par_restart_csv(""); + ies_pest_scenario.get_control_info_4_mod().noptmax = 3; //TODO: make this an option some how? + + IterEnsembleSmoother ies(ies_pest_scenario, file_manager, output_file_writer, performance_log, run_mgr_ptr); + if (use_ensemble_grad) { + ies.set_pe(dv); + ies.set_oe(oe); + ies.set_noise_oe(oe_base); + ies.initialize(iter,true,true); + } + else{ + ies.initialize(); + } + + + ies.iterate_2_solution(); //what to do here? maybe we need to eval the kkt conditions to pick a new point that maintains the hessian? ParameterEnsemble* ies_pe_ptr = ies.get_pe_ptr(); + ObservationEnsemble* ies_oe_ptr = ies.get_oe_ptr(); + vector oreal_names = ies_oe_ptr->get_real_names(); + map aphi_map = ies.get_phi_handler().get_phi_map(L2PhiHandler::phiType::ACTUAL); + ies_pe_ptr->transform_ip(ParameterEnsemble::transStatus::CTL); names = ies_pe_ptr->get_var_names(); + Eigen::VectorXd cdv = current_ctl_dv_values.get_data_eigen_vec(dv_names); double mndiff = 1.0e+300; int mndiff_idx = -1; for (int i = 0; i < ies_pe_ptr->shape().first; i++) { - Eigen::VectorXd real = ies_pe_ptr->get_eigen_ptr()->row(i); - Eigen::VectorXd d = real - cdv; - double diff = (d.array() * d.array()).sum(); - if (diff < mndiff) + //cout << "real:" << oreal_names[i] << ", phi: " << aphi_map[oreal_names[i]] << endl; + //Eigen::VectorXd real = ies_pe_ptr->get_eigen_ptr()->row(i); + //Eigen::VectorXd d = real - cdv; + //double diff = (d.array() * d.array()).sum(); + //if (diff < mndiff) + if (aphi_map[oreal_names[i]] < mndiff) { - mndiff = diff; + mndiff = aphi_map[oreal_names[i]]; mndiff_idx = i; } } + ss.str(""); + ss << "updating current decision variable values with realization " << ies_pe_ptr->get_real_names()[mndiff_idx]; + ss << ", with minimum weighted constraint phi of " << mndiff; + message(1,ss.str()); cdv = ies_pe_ptr->get_real_vector(mndiff_idx); current_ctl_dv_values.update_without_clear(names, cdv); //update current obs @@ -2655,7 +2533,7 @@ bool SeqQuadProgram::seek_feasible() constraints.sqp_report(iter, current_ctl_dv_values, current_obs, true, "post feasible seek"); //todo: probably more algorithmic things here... last_best = get_obj_value(current_ctl_dv_values, current_obs); - message(1, "reset best phi value to ", last_best); + message(1, "finished seeking feasible, reset best phi value to ", last_best); return false; } @@ -2745,7 +2623,7 @@ bool SeqQuadProgram::pick_candidate_and_update_current(ParameterEnsemble& dv_can vector real_names = dv_candidates.get_real_names(); map obj_map = get_obj_map(dv_candidates, _oe); //todo make sure chances have been applied before now... - map> violations = constraints.get_ensemble_violations_map(dv_candidates,_oe); + map> violations = constraints.get_ensemble_violations_map(dv_candidates,_oe,filter.get_viol_tol(),true); Parameters cand_dv_values = current_ctl_dv_values; Observations cand_obs_values = current_obs; Eigen::VectorXd t; @@ -2753,6 +2631,10 @@ bool SeqQuadProgram::pick_candidate_and_update_current(ParameterEnsemble& dv_can ParamTransformSeq pts = pest_scenario.get_base_par_tran_seq(); bool filter_accept; string tag; + vector infeas_vec; + vector accept_idxs; + bool accept; + for (int i = 0; i < obj_vec.size(); i++) { ss.str(""); @@ -2766,28 +2648,22 @@ bool SeqQuadProgram::pick_candidate_and_update_current(ParameterEnsemble& dv_can infeas_sum += v.second; } ss << " infeasibilty total: " << infeas_sum << ", "; - - //not sure how to deal with filter here - liu and reynolds have a scheme about it... - //but maybe we want to just add all of these candidates to the filter? - //there is also a test method with the filter that doesnt add - //to the records... - filter_accept = filter.update(obj_vec[i], infeas_sum,iter,alpha_vals[i]); + infeas_vec.push_back(infeas_sum); + filter_accept = filter.accept(obj_vec[i], infeas_sum,iter,alpha_vals[i],true); if (filter_accept) ss << " filter accepted "; else ss << " filter rejected "; message(1, ss.str()); - if ((obj_sense=="minimize") && (obj_vec[i] < oext)) - { - idx = i; - oext = obj_vec[i]; - } - else if ((obj_sense == "maximize") && (obj_vec[i] > oext)) - { - idx = i; - oext = obj_vec[i]; - } - + if (filter_accept) + { + accept_idxs.push_back(i); +// idx = i; +// oext = obj_vec[i]; +// // now update the filter recs to remove any dominated pairs +// filter.update(obj_vec[i], infeas_sum, iter, alpha_vals[i]); +// accept = true; + } //report the constraint info for this candidate t = dv_candidates.get_real_vector(real_names[i]); cand_dv_values = current_ctl_dv_values; @@ -2796,14 +2672,86 @@ bool SeqQuadProgram::pick_candidate_and_update_current(ParameterEnsemble& dv_can t = _oe.get_real_vector(real_names[i]); cand_obs_values.update_without_clear(onames, t); - constraints.sqp_report(iter, cand_dv_values, cand_obs_values, true,tag); + constraints.sqp_report(iter, cand_dv_values, cand_obs_values, false,tag); } - message(0, "best phi this iteration: ", oext); + + + + + if (accept_idxs.size() > 0) + { + accept = true; + //since all of the these passed the filter, choose the one with lowest phi + double min_obj = 1.0e+300; + ss.str(""); + ss << "number of scale factors passing filter:" << accept_idxs.size(); + message(1,ss.str()); + for (auto iidx : accept_idxs) + { + if (obj_vec[iidx] < min_obj) + { + min_obj = obj_vec[iidx]; + idx = iidx; + oext = obj_vec[iidx]; + } + } + filter.update(min_obj,infeas_vec[idx],iter,alpha_vals[idx]); + } + else + { + message(0,"filter failed, checking for feasible solutions...."); + double viol_tol = filter.get_viol_tol(); + for (int i=0;i oext)) + { + idx = i; + oext = obj_vec[i]; + } + } + } + if (idx == -1) + { + message(0,"no feasible solutions, choosing lowest constraint violation..."); + //now what? + //how about least violation - probably going to hand off to ies now anyway... + double viol_min = 1e+300; + for (int i=0;i last_best))) + throw_sqp_error("shits busted"); + message(0, "best phi this iteration: ", oext); + t = dv_candidates.get_real_vector(real_names[idx]); + cand_dv_values = current_ctl_dv_values; + cand_dv_values.update_without_clear(dv_names, t); + pts.numeric2ctl_ip(cand_dv_values); + t = _oe.get_real_vector(real_names[idx]); + cand_obs_values.update_without_clear(onames, t); + ss.str(""); + ss << "best candidate (scale factor: " << setprecision(4) << scale_vals[idx] << ", phi: " << oext << ")"; + constraints.sqp_report(iter, cand_dv_values, cand_obs_values, true,ss.str()); + filter.report(file_manager.rec_ofstream(),iter); + //TODO: need more thinking here - do we accept only if filter accepts? I think so.... + //if (((obj_sense == "minimize") && (oext < last_best)) || ((obj_sense == "maximize") && (oext > last_best))) + if (accept) { //todo:update current_dv and current_obs message(0, "accepting upgrade", real_names[idx]); @@ -2819,13 +2767,17 @@ bool SeqQuadProgram::pick_candidate_and_update_current(ParameterEnsemble& dv_can current_obs.update_without_clear(onames, t); last_best = oext; message(0, "new best phi:", last_best); - + best_phis.push_back(oext); // todo add constraint (largest violating constraint not already in working set) to working set // is this the right place to do this? after accepting a particular candidate? // also can we adapt alpha_mult based on subset? using concept of blocking constraint here? // take diff between vector of strings of constraints in working set and constraints with non-zero violation (return constraint idx from filter?) + if (infeas_vec[idx] > filter.get_viol_tol()) + { + n_consec_infeas++; + } return true; @@ -2833,12 +2785,14 @@ bool SeqQuadProgram::pick_candidate_and_update_current(ParameterEnsemble& dv_can else { message(0, "not accepting upgrade #sad"); + best_phis.push_back(last_best); + if (infeas_vec[idx] > filter.get_viol_tol()) + { + n_consec_infeas++; + } + //TODO: something here to adapt to the failed iteration, otherwise I think we will continue failing... return false; } - - - - //return true; } void SeqQuadProgram::report_and_save_ensemble() diff --git a/src/libs/pestpp_common/SQP.h b/src/libs/pestpp_common/SQP.h index 7f94ed44f..27d28cfa7 100644 --- a/src/libs/pestpp_common/SQP.h +++ b/src/libs/pestpp_common/SQP.h @@ -50,17 +50,20 @@ class SqpFilter SqpFilter(bool _minimize=true,double _obj_tol = 0.01, double _viol_tol = 0.01) { minimize = _minimize; obj_tol = _obj_tol; viol_tol = _viol_tol; } - bool accept(double obj_val, double violation_val,int iter=0,double alpha=-1.0); + bool accept(double obj_val, double violation_val,int iter=0,double alpha=-1.0, bool keep=false); bool update(double obj_val, double violation_val, int iter=0,double alpha=-1.0); + void report(ofstream& frec,int iter); + double get_viol_tol() {return viol_tol;} private: bool minimize; double obj_tol; double viol_tol; - set obj_viol_pairs; + multiset obj_viol_pairs; - bool first_dominates_second(const FilterRec& first, const FilterRec& second); + bool first_partially_dominates_second(const FilterRec& first, const FilterRec& second); + bool first_strictly_dominates_second(const FilterRec& first, const FilterRec& second); }; @@ -86,7 +89,7 @@ class SeqQuadProgram OutputFileWriter &output_file_writer; PerformanceLog *performance_log; RunManagerAbstract* run_mgr_ptr; - L2PhiHandler ph; + //L2PhiHandler ph; ParChangeSummarizer pcs; Covariance parcov, obscov; double reg_factor; @@ -99,6 +102,11 @@ class SeqQuadProgram map obj_func_coef_map; int num_threads; + int n_consec_infeas; + int MAX_CONSEC_INFEAS = 3; + int MAX_CONSEC_PHIINC = 3; + + int n_consec_phiinc; double eigthresh; vector scale_vals; @@ -184,13 +192,9 @@ class SeqQuadProgram void save(ParameterEnsemble& _dv, ObservationEnsemble& _oe, bool save_base=true); void save_mat(string prefix, Eigen::MatrixXd &mat); bool initialize_dv(Covariance &cov); - //bool initialize_oe(Covariance &cov); bool initialize_restart(); void initialize_parcov(); - void initialize_obscov(); void initialize_objfunc(); - void drop_bad_phi(ParameterEnsemble &_pe, ObservationEnsemble &_oe, bool is_subset=false); - void queue_chance_runs(); template @@ -202,12 +206,8 @@ class SeqQuadProgram void sanity_checks(); - void add_bases(); + void add_current_as_bases(ParameterEnsemble& _dv, ObservationEnsemble& _oe); - void update_reals_by_phi(ParameterEnsemble &_pe, ObservationEnsemble &_oe); - - void set_subset_idx(int size); - }; #endif diff --git a/src/libs/pestpp_common/SVDSolver.cpp b/src/libs/pestpp_common/SVDSolver.cpp index 88ae66d01..f5cd44c67 100644 --- a/src/libs/pestpp_common/SVDSolver.cpp +++ b/src/libs/pestpp_common/SVDSolver.cpp @@ -1762,6 +1762,7 @@ PhiComponets SVDSolver::phi_estimate(const ModelRun &base_run, const Jacobian &j Parameters new_ctl_pars = par_transform.active_ctl2ctl_cp(new_pars); Parameters delta_par = par_transform.active_ctl2numeric_cp(new_pars) - par_transform.active_ctl2numeric_cp(base_run_active_ctl_par); + delta_par.erase(freeze_active_ctl_pars); vector numeric_par_names = delta_par.get_keys(); VectorXd delta_par_vec = transformable_2_eigen_vec(delta_par, numeric_par_names); Eigen::SparseMatrix jac = jacobian.get_matrix(obs_names_vec, numeric_par_names); diff --git a/src/libs/pestpp_common/constraints.cpp b/src/libs/pestpp_common/constraints.cpp index 841113132..8312167aa 100644 --- a/src/libs/pestpp_common/constraints.cpp +++ b/src/libs/pestpp_common/constraints.cpp @@ -2815,7 +2815,7 @@ map Constraints::get_unsatified_pi_constraints(Parameters& par_a return unsatisfied; } -map> Constraints::get_ensemble_violations_map(ParameterEnsemble& pe, ObservationEnsemble& oe) +map> Constraints::get_ensemble_violations_map(ParameterEnsemble& pe, ObservationEnsemble& oe, double tol, bool include_weight) { //make sure pe and oe share realizations vector pe_names = pe.get_real_names(); @@ -2835,7 +2835,7 @@ map> Constraints::get_ensemble_violations_map(Parame { v = oe.get_real_vector(name); obs.update_without_clear(vnames, v); - vmap = get_unsatified_obs_constraints(obs); + vmap = get_unsatified_obs_constraints(obs,tol,true,include_weight); violations[name] = vmap; } @@ -2855,7 +2855,7 @@ map> Constraints::get_ensemble_violations_map(Parame } -map Constraints::get_unsatified_obs_constraints(Observations& constraints_sim, double tol, bool do_shift) +map Constraints::get_unsatified_obs_constraints(Observations& constraints_sim, double tol, bool do_shift, bool include_weight) { /* get a map of name, distance for each of the obs-based (e.g. model-based) constraints that are not satisfied in the constraint_obs container. tol is a percent-based tolerance to accont for constraints that are very near their required (rhs) value @@ -2866,6 +2866,8 @@ map Constraints::get_unsatified_obs_constraints(Observations& co Observations _constraints_sim(constraints_sim); if ((do_shift) && (use_chance)) _constraints_sim = get_chance_shifted_constraints(constraints_sim); + ObservationInfo oi = pest_scenario.get_ctl_observation_info(); + double weight; for (int i = 0; i < num_obs_constraints(); ++i) { string name = ctl_ord_obs_constraint_names[i]; @@ -2880,14 +2882,16 @@ map Constraints::get_unsatified_obs_constraints(Observations& co scaled_diff = abs((obs_val - sim_val) / obs_val); else scaled_diff = abs(obs_val - sim_val); - + weight = 1.0; + if (include_weight) + weight = oi.get_weight(name); //check for invalid obs constraints (e.g. satified) if ((constraint_sense_map[name] == ConstraintSense::less_than) && (sim_val > obs_val) && (scaled_diff > tol)) - unsatisfied[name] = sim_val - obs_val; + unsatisfied[name] = weight * (sim_val - obs_val); else if ((constraint_sense_map[name] == ConstraintSense::greater_than) && (sim_val < obs_val) && (scaled_diff > tol)) - unsatisfied[name] = obs_val - sim_val; + unsatisfied[name] = weight * (obs_val - sim_val); else if ((constraint_sense_map[name] == ConstraintSense::equal_to) && (sim_val != obs_val) && (scaled_diff > tol)) - unsatisfied[name] = abs(sim_val - obs_val); + unsatisfied[name] = weight * abs(sim_val - obs_val); } return unsatisfied; diff --git a/src/libs/pestpp_common/constraints.h b/src/libs/pestpp_common/constraints.h index a9f942574..baa222c9c 100644 --- a/src/libs/pestpp_common/constraints.h +++ b/src/libs/pestpp_common/constraints.h @@ -108,13 +108,13 @@ class Constraints //get maps of obs and pi constraints that are not satified - the value is the distance to cosntraint RHS map get_unsatified_pi_constraints(Parameters& par_and_dec_vars, double tol=0.0); - map get_unsatified_obs_constraints(Observations& constraints_sim, double tol=0.0, bool do_shift = true); + map get_unsatified_obs_constraints(Observations& constraints_sim, double tol=0.0, bool do_shift = true, bool include_weight = false); map get_constraint_map(Parameters& par_and_dec_vars, Observations& constraints_sim, bool do_shift); Mat get_working_set_constraint_matrix(Parameters& par_and_dec_vars, Observations& constraints_sim, const Jacobian_1to1& jco, bool do_shift, double working_set_tol = 0.1); Mat get_working_set_constraint_matrix(Parameters& par_and_dec_vars, Observations& constraints_sim, ParameterEnsemble& dv, ObservationEnsemble& oe, bool do_shift, double working_set_tol = 0.1); - map> get_ensemble_violations_map(ParameterEnsemble& pe, ObservationEnsemble& oe); + map> get_ensemble_violations_map(ParameterEnsemble& pe, ObservationEnsemble& oe, double tol=0.0,bool include_weight=true); //get the number of non-zero Prior info constraint elements int get_num_nz_pi_constraint_elements(); diff --git a/src/libs/pestpp_common/pest_data_structs.cpp b/src/libs/pestpp_common/pest_data_structs.cpp index 2f880f53c..dec0cb5d8 100644 --- a/src/libs/pestpp_common/pest_data_structs.cpp +++ b/src/libs/pestpp_common/pest_data_structs.cpp @@ -856,10 +856,13 @@ bool PestppOptions::assign_ies_value_by_key(const string& key, const string& val return true; } - else if ((key == "IES_WEIGHTS_ENSEMBLE") || (key == "IES_WEIGHTS_EN")) + else if ((key == "IES_WEIGHTS_ENSEMBLE") || (key == "IES_WEIGHTS_EN") || + (key == "IES_WEIGHT_ENSEMBLE") || (key == "IES_WEIGHT_EN")) { passed_args.insert("IES_WEIGHTS_ENSEMBLE"); passed_args.insert("IES_WEIGHTS_EN"); + passed_args.insert("IES_WEIGHT_ENSEMBLE"); + passed_args.insert("IES_WEIGHT_EN"); //convert_ip(value, ies_obs_restart_csv); ies_weights_csv = org_value; return true; @@ -1480,6 +1483,7 @@ bool PestppOptions::assign_value_by_key_sqp(const string& key, const string& val convert_ip(t, v); sqp_scale_facs.push_back(v); } + return true; } return false; @@ -1736,7 +1740,7 @@ void PestppOptions::summary(ostream& os) const os << "gsa_morris_r: " << gsa_morris_r << endl; os << "gsa_morris_delta: " << gsa_morris_delta << endl; os << "gsa_sobol_samples: " << gsa_sobol_samples << endl; - os << "gsa_sobol_par_dist: " << gsa_sobol_par_dist << endl; + os << "gsa_sobol_par_dist: " << gsa_sobol_par_dist << endl << endl; os << "pestpp-da options (those not shared with pestpp-ies):" << endl; os << "da_parameter_cycle_table: " << da_par_cycle_table << endl; @@ -1826,7 +1830,7 @@ void PestppOptions::set_defaults() set_sqp_obs_restart_en(""); set_sqp_num_reals(-1); set_sqp_update_hessian(false); - set_sqp_scale_facs(vector{0.00001, 0.0001, 0.01, 0.1, 1.0}); + set_sqp_scale_facs(vector{0.00001, 0.0001,0.001, 0.005, 0.01, 0.05, 0.075, 0.1, 0.25,0.5, 1.0}); set_mou_generator("DE"); set_mou_population_size(100); @@ -1860,7 +1864,7 @@ void PestppOptions::set_defaults() set_ies_use_approx(true); set_ies_subset_size(4); set_ies_reg_factor(0.0); - set_ies_verbose_level(0); + set_ies_verbose_level(1); set_ies_use_prior_scaling(false); set_ies_num_reals(50); set_ies_bad_phi(1.0e+300);