Pyomo · sscini · Apr 23, 2025 · Apr 23, 2025 · Apr 23, 2025 · Apr 30, 2025
diff --git a/pyomo/contrib/parmest/examples/Simple_Multimodal_Multistart.ipynb b/pyomo/contrib/parmest/examples/Simple_Multimodal_Multistart.ipynb
diff --git a/pyomo/contrib/parmest/examples/reactor_design/multistart_example.py b/pyomo/contrib/parmest/examples/reactor_design/multistart_example.py
@@ -0,0 +1,53 @@
+#  ___________________________________________________________________________
+#
+#  Pyomo: Python Optimization Modeling Objects
+#  Copyright (c) 2008-2025
+#  National Technology and Engineering Solutions of Sandia, LLC
+#  Under the terms of Contract DE-NA0003525 with National Technology and
+#  Engineering Solutions of Sandia, LLC, the U.S. Government retains certain
+#  rights in this software.
+#  This software is distributed under the 3-clause BSD License.
+#  ___________________________________________________________________________
+
+from pyomo.common.dependencies import pandas as pd
+from os.path import join, abspath, dirname
+import pyomo.contrib.parmest.parmest as parmest
+from pyomo.contrib.parmest.examples.reactor_design.reactor_design import (
+    ReactorDesignExperiment,
+)
+
+
+def main():
+
+    # Read in data
+    file_dirname = dirname(abspath(str(__file__)))
+    file_name = abspath(join(file_dirname, "reactor_data.csv"))
+    data = pd.read_csv(file_name)
+
+    # Create an experiment list
+    exp_list = []
+    for i in range(data.shape[0]):
+        exp_list.append(ReactorDesignExperiment(data, i))
+
+    # View one model
+    # exp0_model = exp_list[0].get_labeled_model()
+    # exp0_model.pprint()
+
+    pest = parmest.Estimator(exp_list, obj_function='SSE')
+
+    # Parameter estimation
+    obj, theta = pest.theta_est()
+
+    # Parameter estimation with multistart to avoid local minima
+    obj, theta = pest.theta_est_multistart(
+        num_starts=10,
+        start_method='random',
+        random_seed=42,
+        max_iter=1000,
+        tol=1e-6,
+    )
+
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pyomo/contrib/parmest/parmest.py b/pyomo/contrib/parmest/parmest.py
@@ -235,6 +235,9 @@ def SSE(model):
     return expr
 
 
+'''Adding pseudocode for draft implementation of the estimator class, 
+incorporating multistart. 
+'''
 class Estimator(object):
     """
     Parameter estimation class
@@ -275,6 +278,11 @@ def __init__(
         solver_options=None,
     ):
 
+        '''first theta would be provided by the user in the initialization of 
+        the Estimator class through the unknown parameter variables. Additional
+        would need to be generated using the sampling method provided by the user.
+        '''
+
         # check that we have a (non-empty) list of experiments
         assert isinstance(experiment_list, list)
         self.exp_list = experiment_list
@@ -447,6 +455,145 @@ def TotalCost_rule(model):
         parmest_model = utils.convert_params_to_vars(model, theta_names, fix_vars=False)
 
         return parmest_model
+
+    # Make new private method, _generate_initial_theta:
+    # This method will be used to generate the initial theta values for multistart
+    # optimization. It will take the theta names and the initial theta values
+    # and return a dictionary of theta names and their corresponding values.
+    def _generate_initial_theta(self, parmest_model=None, seed=None, n_restarts=None, multistart_sampling_method=None, user_provided=None):
+        """
+        Generate initial theta values for multistart optimization using selected sampling method.
+        """    
+        if n_restarts == 1:
+            # If only one restart, return an empty list
+            return print("No multistart optimization needed. Please use normal theta_est()")
+
+        # Get the theta names and initial theta values
+        theta_names = self._return_theta_names()
+        initial_theta = [parmest_model.find_component(name)() for name in theta_names]
+
+        # Get the lower and upper bounds for the theta values
+        lower_bound = np.array([parmest_model.find_component(name).lb for name in theta_names])
+        upper_bound = np.array([parmest_model.find_component(name).ub for name in theta_names])
+        # Check if the lower and upper bounds are defined
+        if any(bound is None for bound in lower_bound) and any(bound is None for bound in upper_bound):
+            raise ValueError(
+                "The lower and upper bounds for the theta values must be defined."
+            )
+
+        # Check the length of theta_names and initial_theta, and make sure bounds are defined
+        if len(theta_names) != len(initial_theta):
+            raise ValueError(
+                "The length of theta_names and initial_theta must be the same."
+            )
+
+        if multistart_sampling_method == "uniform":
+            # Generate random theta values using uniform distribution, with set seed for reproducibility
+            np.random.seed(seed)
+            # Generate random theta values for each restart (n_restarts x len(theta_names))
+            theta_vals_multistart = np.random.uniform(
+            low=lower_bound, high=upper_bound, size=(n_restarts, len(theta_names))
+            )
+
+        elif multistart_sampling_method == "latin_hypercube":
+            # Generate theta values using Latin hypercube sampling or Sobol sampling
+            # Generate theta values using Latin hypercube sampling
+            # Create a Latin Hypercube sampler that uses the dimensions of the theta names
+            sampler = scipy.stats.qmc.LatinHypercube(d=len(theta_names), seed=seed)
+            # Generate random samples in the range of [0, 1] for number of restarts
+            samples = sampler.random(n=n_restarts)
+            # Resulting samples should be size (n_restarts, len(theta_names))
+
+        elif multistart_sampling_method == "sobol":
+            sampler = scipy.stats.qmc.Sobol(d=len(theta_names), seed=seed)
+            # Generate theta values using Sobol sampling
+            # The first value of the Sobol sequence is 0, so we skip it
+            samples = sampler.random(n=n_restarts+1)[1:]
+
+        elif multistart_sampling_method == "user_provided":
+                # Add user provided dataframe option
+            if user_provided is not None:
+
+                if isinstance(user_provided, np.ndarray):
+                    # Check if the user provided numpy array has the same number of rows as the number of restarts
+                    if user_provided.shape[0] != n_restarts:
+                        raise ValueError(
+                            "The user provided numpy array must have the same number of rows as the number of restarts."
+                        )
+                    # Check if the user provided numpy array has the same number of columns as the number of theta names
+                    if user_provided.shape[1] != len(theta_names):
+                        raise ValueError(
+                            "The user provided numpy array must have the same number of columns as the number of theta names."
+                        )
+                    # Check if the user provided numpy array has the same theta names as the model
+                    # if not, raise an error
+                    # if not all(theta in theta_names for theta in user_provided.columns):
+                        raise ValueError(
+                            "The user provided numpy array must have the same theta names as the model."
+                        )
+                    # If all checks pass, return the user provided numpy array
+                    theta_vals_multistart = user_provided
+                elif isinstance(user_provided, pd.DataFrame):
+                    # Check if the user provided dataframe has the same number of rows as the number of restarts
+                    if user_provided.shape[0] != n_restarts:
+                        raise ValueError(
+                            "The user provided dataframe must have the same number of rows as the number of restarts."
+                        )
+                    # Check if the user provided dataframe has the same number of columns as the number of theta names
+                    if user_provided.shape[1] != len(theta_names):
+                        raise ValueError(
+                            "The user provided dataframe must have the same number of columns as the number of theta names."
+                        )
+                    # Check if the user provided dataframe has the same theta names as the model
+                    # if not, raise an error
+                    # if not all(theta in theta_names for theta in user_provided.columns):
+                        raise ValueError(
+                            "The user provided dataframe must have the same theta names as the model."
+                        )
+                # If all checks pass, return the user provided dataframe
+                theta_vals_multistart = user_provided.iloc[0: len(initial_theta)].values
+            else:
+                raise ValueError(
+                    "The user must provide a numpy array or pandas dataframe from a previous attempt to use the 'user_provided' method."
+                )
+
+        else:
+            raise ValueError(
+                "Invalid sampling method. Choose 'uniform', 'latin_hypercube', 'sobol'  or 'user_provided'."
+            )
+
+        if multistart_sampling_method == "sobol" or multistart_sampling_method == "latin_hypercube":
+            # Scale the samples to the range of the lower and upper bounds for each theta in theta_names
+            theta_vals_multistart = np.array([lower_bound + (upper_bound - lower_bound) * theta for theta in samples])
+
+        # Create a DataFrame where each row is an initial theta vector for a restart,
+        # columns are theta_names, and values are the initial theta values for each restart
+        if multistart_sampling_method == "user_provided":
+            # If user_provided is a DataFrame, use its columns and values directly
+            if isinstance(user_provided, pd.DataFrame):
+                df_multistart = user_provided.copy()
+                df_multistart.columns = theta_names
+            else:
+                df_multistart = pd.DataFrame(theta_vals_multistart, columns=theta_names)
+        else:
+            # Ensure theta_vals_multistart is 2D (n_restarts, len(theta_names))
+            arr = np.atleast_2d(theta_vals_multistart)
+            if arr.shape[0] == 1 and n_restarts > 1:
+                arr = np.tile(arr, (n_restarts, 1))
+            df_multistart = pd.DataFrame(arr, columns=theta_names)
+
+        # Add columns for output info, initialized as nan
+        for name in theta_names:
+            df_multistart[f'converged_{name}'] = np.nan
+        df_multistart["initial objective"] = np.nan
+        df_multistart["final objective"] = np.nan
+        df_multistart["solver termination"] = np.nan
+        df_multistart["solve_time"] = np.nan
+
+        # Debugging output
+        # print(df_multistart)
+
+        return df_multistart
 
     def _instance_creation_callback(self, experiment_number=None, cb_data=None):
         model = self._create_parmest_model(experiment_number)
@@ -921,6 +1068,171 @@ def theta_est(
             cov_n=cov_n,
         )
 
+    def theta_est_multistart(
+        self,
+        n_restarts=20,
+        buffer=10,
+        multistart_sampling_method="uniform",
+        user_provided=None,
+        seed=None,
+        save_results=False,
+        theta_vals=None,
+        solver="ef_ipopt",
+        file_name = "multistart_results.csv",
+        return_values=[],
+    ):
+        """
+        Parameter estimation using multistart optimization
+
+        Parameters
+        ----------
+        n_restarts: int, optional
+            Number of restarts for multistart. Default is 1.
+        th_sampling_method: string, optional
+            Method used to sample theta values. Options are "uniform", "latin_hypercube", or "sobol".
+            Default is "uniform".
+        buffer: int, optional
+            Number of iterations to save results dynamically. Default is 10.
+        user_provided: pd.DataFrame, optional
+            User provided dataframe of theta values for multistart optimization.
+        solver: string, optional
+            Currently only "ef_ipopt" is supported. Default is "ef_ipopt".
+        return_values: list, optional
+            List of Variable names, used to return values from the model for data reconciliation
+
+
+        Returns
+        -------
+        objectiveval: float
+            The objective function value
+        thetavals: pd.Series
+            Estimated values for theta
+        variable values: pd.DataFrame
+            Variable values for each variable name in return_values (only for solver='ef_ipopt')
+
+        """
+
+        # check if we are using deprecated parmest
+        if self.pest_deprecated is not None:
+            return print(
+                "Multistart is not supported in the deprecated parmest interface"
+            )
+
+        assert isinstance(n_restarts, int)
+        assert isinstance(multistart_sampling_method, str)
+        assert isinstance(solver, str)
+        assert isinstance(return_values, list)
+
+        if n_restarts > 1 and multistart_sampling_method is not None:
+
+            # Find the initialized values of theta from the labeled parmest model
+            # and the theta names from the estimator object
+
+            # print statement to indicate multistart optimization is starting
+            print(f"Starting multistart optimization with {n_restarts} restarts using {multistart_sampling_method} sampling method.")
+
+            # @Reviewers, pyomo team: Use this or use instance creation callback?
+            theta_names = self._return_theta_names()
+            # Generate theta values using the sampling method
+            parmest_model_for_bounds = self._create_parmest_model(experiment_number=0)
+            results_df = self._generate_initial_theta(
+                parmest_model_for_bounds, seed=seed, n_restarts=n_restarts,
+                multistart_sampling_method=multistart_sampling_method, user_provided=user_provided
+            )
+            results_df = pd.DataFrame(results_df)
+            # Extract theta_vals from the dataframe
+            theta_vals = results_df.iloc[:, :len(theta_names)]
+            converged_theta_vals = np.zeros((n_restarts, len(theta_names)))
+
+            # Each restart uses a fresh model instance
+            for i in range(n_restarts):
+                # Create a fresh model for each restart
+                parmest_model = self._create_parmest_model(experiment_number=0)
+                theta_vals_current = theta_vals.iloc[i, :].to_dict()
+
+                # Set current theta values in the model
+                for name, value in theta_vals_current.items():
+                    parmest_model.find_component(name).set_value(value)
+
+                # Optional: Print the current theta values being set
+                print(f"Setting {name} to {value}")
+                for name in theta_names:
+                    current_value = parmest_model.find_component(name)()
+                    print(f"Current value of {name} is {current_value}")
+
+                # Call the _Q_opt method with the generated theta values
+                qopt_result = self._Q_opt(
+                    bootlist=None,
+                    solver=solver,
+                    return_values=return_values,
+                )
+
+                # Unpack results 
+                objectiveval, converged_theta = qopt_result
+
+
+                # Since _Q_opt does not return the solver result object, we cannot check
+                # solver termination condition directly here. Instead, we can assume
+                # that if converged_theta contains NaN, the solve failed.
+                if converged_theta.isnull().any():
+                    solver_termination = "not successful"
+                    solve_time = np.nan
+                    thetavals = np.nan
+                    final_objectiveval = np.nan
+                    init_objectiveval = np.nan
+                else:
+                    converged_theta_vals[i, :] = converged_theta.values
+                    # Calculate the initial objective value using the current theta values
+                    # Use the _Q_at_theta method to evaluate the objective at these theta values
+                    init_objectiveval, _, _ = self._Q_at_theta(theta_vals_current)
+                    final_objectiveval = objectiveval
+                    solver_termination = "successful"
+
+                    # plan to add solve time if available, @Reviewers, recommendations on how from current pyomo examples would
+                    # be appreciated
+                    solve_time = converged_theta.solve_time if hasattr(converged_theta, 'solve_time') else np.nan
+
+                # # Check if the objective value is better than the best objective value
+                # # Set a very high initial best objective value
+                # best_objectiveval = np.inf  
+                # best_theta = np.inf
+                # if final_objectiveval < best_objectiveval:
+                #     best_objectiveval = objectiveval
+                #     best_theta = thetavals
+
+                print(f"Restart {i+1}/{n_restarts}: Objective Value = {final_objectiveval}, Theta = {converged_theta}")
+
+                # Store the results in the DataFrame for this restart
+                # Fill converged theta values
+                for j, name in enumerate(theta_names):
+                    results_df.at[i, f'converged_{name}'] = converged_theta[j] if not np.isnan(converged_theta_vals[i, j]) else np.nan
+                # Fill initial and final objective values, solver termination, and solve time
+                results_df.at[i, "initial objective"] = init_objectiveval if 'init_objectiveval' in locals() else np.nan
+                results_df.at[i, "final objective"] = objectiveval if 'objectiveval' in locals() else np.nan
+                results_df.at[i, "solver termination"] = solver_termination if 'solver_termination' in locals() else np.nan
+                results_df.at[i, "solve_time"] = solve_time if 'solve_time' in locals() else np.nan
+
+                # Diagnostic: print the table after each restart
+                # print(results_df)
+
+                # Add buffer to save the dataframe dynamically, if save_results is True
+                if save_results and (i + 1) % buffer == 0:
+                    mode = 'w' if i + 1 == buffer else 'a'
+                    header = i + 1 == buffer
+                    results_df.to_csv(
+                        file_name, mode=mode, header=header, index=False
+                    )
+                    print(f"Intermediate results saved after {i + 1} iterations.")
+
+            # Final save after all iterations
+            if save_results:
+                results_df.to_csv(file_name, mode='a', header=False, index=False)
+                print("Final results saved.")
+
+            return results_df # just this for now, then best_theta, best_objectiveval
+
+
+
     def theta_est_bootstrap(
         self,
         bootstrap_samples,