Skip to content

Commit c29521d

Browse files
authored
Use estimagic for NFXP (#42)
Use estimagic to wrap scipy optimizers for the likelihood optimization in the NFXP.
1 parent 5af454f commit c29521d

18 files changed

+110
-210
lines changed

.conda/meta.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,4 @@ requirements:
2525
- pytest
2626
- pytest-xdist
2727
- scipy
28+
- estimagic >=0.0.30

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,4 +21,5 @@ figures
2121
ruspy.rst
2222
modules.rst
2323
_generated
24+
*.db
2425

environment.yml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
name: ruspy
2-
2+
channels:
3+
- opensourceeconomics
4+
- conda-forge
35
dependencies:
46
- python=3.7
57
- numpy
@@ -16,9 +18,10 @@ dependencies:
1618
- numba
1719
- mpmath
1820
- pip
21+
- estimagic>=0.0.30
1922
- pip:
2023
- flake8
2124
- black
2225
- pre-commit
2326
- jupyter_contrib_nbextensions
24-
- sphinx_rtd_theme
27+
- sphinx_rtd_theme

ruspy/estimation/bootstrapping.py

Lines changed: 0 additions & 55 deletions
This file was deleted.

ruspy/estimation/est_cost_params.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ def loglike_cost_params(
5858
5959
6060
"""
61+
params = params["value"].to_numpy()
6162
costs = calc_obs_costs(num_states, maint_func, params, scale)
6263

6364
ev = get_ev(params, trans_mat, costs, disc_fac, alg_details)
@@ -109,6 +110,7 @@ def derivative_loglike_cost_params(
109110
110111
111112
"""
113+
params = params["value"].to_numpy()
112114
dev = np.zeros_like(params)
113115
obs_costs = calc_obs_costs(num_states, maint_func, params, scale)
114116

ruspy/estimation/estimation.py

Lines changed: 22 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,8 @@
22
This module contains the main function for the estimation process.
33
"""
44
import numpy as np
5-
import scipy.optimize as opt
5+
from estimagic.optimization.optimize import minimize
66

7-
from ruspy.estimation.bootstrapping import bootstrapp
87
from ruspy.estimation.est_cost_params import create_state_matrix
98
from ruspy.estimation.est_cost_params import loglike_cost_params
109
from ruspy.estimation.estimation_interface import select_model_parameters
@@ -60,32 +59,30 @@ def estimate(init_dict, df):
6059

6160
alg_details = {} if "alg_details" not in init_dict else init_dict["alg_details"]
6261

62+
kwargs = {
63+
"maint_func": maint_func,
64+
"maint_func_dev": maint_func_dev,
65+
"num_states": num_states,
66+
"trans_mat": trans_mat,
67+
"state_mat": state_mat,
68+
"decision_mat": decision_mat,
69+
"disc_fac": disc_fac,
70+
"scale": scale,
71+
"alg_details": alg_details,
72+
}
73+
6374
result_cost_params = {}
6475

65-
min_result = opt.minimize(
76+
min_result = minimize(
6677
loglike_cost_params,
67-
args=(
68-
maint_func,
69-
maint_func_dev,
70-
num_states,
71-
trans_mat,
72-
state_mat,
73-
decision_mat,
74-
disc_fac,
75-
scale,
76-
alg_details,
77-
),
78-
**optimizer_options
78+
criterion_kwargs=kwargs,
79+
gradient_kwargs=kwargs,
80+
**optimizer_options,
7981
)
80-
result_cost_params["x"] = min_result["x"]
81-
result_cost_params["fun"] = min_result["fun"]
82-
result_cost_params["message"] = min_result["message"]
83-
result_cost_params["jac"] = min_result["jac"]
84-
85-
if "hess_inv" in min_result:
86-
(
87-
result_cost_params["95_conf_interv"],
88-
result_cost_params["std_errors"],
89-
) = bootstrapp(min_result["x"], min_result["hess_inv"])
82+
result_cost_params["x"] = min_result[1]["value"].to_numpy()
83+
result_cost_params["fun"] = min_result[0]["fitness"]
84+
result_cost_params["status"] = min_result[0]["status"]
85+
result_cost_params["message"] = min_result[0]["message"]
86+
result_cost_params["jac"] = min_result[0]["jacobian"]
9087

9188
return transition_results, result_cost_params

ruspy/estimation/estimation_interface.py

Lines changed: 16 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import numpy as np
2+
import pandas as pd
23

34
from ruspy.estimation.est_cost_params import derivative_loglike_cost_params
45
from ruspy.model_code.cost_functions import cubic_costs
@@ -97,46 +98,32 @@ def select_optimizer_options(init_dict, num_params_costs):
9798
9899
"""
99100

100-
optimizer_dict = {} if "optimizer" not in init_dict else init_dict["optimizer"]
101-
optimizer_options = {}
101+
optimizer_options = {} if "optimizer" not in init_dict else init_dict["optimizer"]
102102

103-
if "optimizer_name" in optimizer_dict:
104-
optimizer_options["method"] = optimizer_dict["optimizer_name"]
103+
if "algorithm" not in optimizer_options:
104+
optimizer_options["algorithm"] = "scipy_L-BFGS-B"
105105
else:
106-
optimizer_options["method"] = "L-BFGS-B"
106+
pass
107107

108-
if "start_values" in optimizer_dict:
109-
optimizer_options["x0"] = np.array(optimizer_dict["start_values"])
110-
else:
111-
optimizer_options["x0"] = np.power(
112-
np.full(num_params_costs, 10, dtype=float),
113-
np.arange(1, -num_params_costs + 1, -1),
108+
if "params" not in optimizer_options:
109+
optimizer_options["params"] = pd.DataFrame(
110+
np.power(
111+
np.full(num_params_costs, 10, dtype=float),
112+
np.arange(1, -num_params_costs + 1, -1),
113+
),
114+
columns=["value"],
114115
)
115-
116-
if "use_search_bounds" in optimizer_dict:
117-
if optimizer_dict["use_search_bounds"] == "yes":
118-
optimizer_options["bounds"] = [(1e-6, None)] * num_params_costs
119-
else:
120-
pass
121116
else:
122117
pass
123118

124-
if "search_bounds" in optimizer_dict:
125-
optimizer_options["bounds"] = np.array(optimizer_dict["search_bounds"])
119+
if "gradient" not in optimizer_options:
120+
optimizer_options["gradient"] = derivative_loglike_cost_params
126121
else:
127122
pass
128123

129-
if "use_gradient" in optimizer_dict:
130-
if optimizer_dict["use_gradient"] == "yes":
131-
optimizer_options["jac"] = derivative_loglike_cost_params
132-
else:
133-
pass
124+
if "logging" not in optimizer_options:
125+
optimizer_options["logging"] = False
134126
else:
135127
pass
136128

137-
if "additional_options" in optimizer_dict:
138-
optimizer_options["options"] = optimizer_dict["additional_options"]
139-
else:
140-
optimizer_options["options"] = {}
141-
142129
return optimizer_options

ruspy/estimation/estimation_transitions.py

Lines changed: 25 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,8 @@
44
"""
55
import numba
66
import numpy as np
7-
import scipy.optimize as opt
8-
9-
from ruspy.estimation.bootstrapping import bootstrapp
7+
import pandas as pd
8+
from estimagic.optimization.optimize import minimize
109

1110

1211
def estimate_transitions(df):
@@ -29,25 +28,34 @@ def estimate_transitions(df):
2928
usage = df["usage"].to_numpy(dtype=float)
3029
usage = usage[~np.isnan(usage)].astype(int)
3130
result_transitions["trans_count"] = transition_count = np.bincount(usage)
32-
raw_result_trans = opt.minimize(
33-
loglike_trans,
34-
args=transition_count,
35-
x0=np.full(len(transition_count), 0.1),
36-
method="BFGS",
37-
)
38-
p_raw = raw_result_trans["x"]
39-
result_transitions["x"] = reparam_trans(p_raw)
4031

41-
result_transitions["95_conf_interv"], result_transitions["std_errors"] = bootstrapp(
42-
p_raw, raw_result_trans["hess_inv"], reparam=reparam_trans
32+
# Prepare DataFrame for estimagic
33+
name = ["trans_prob"]
34+
number = np.arange(1, len(transition_count) + 1)
35+
index = pd.MultiIndex.from_product([name, number], names=["name", "number"])
36+
params = pd.DataFrame(
37+
np.full(len(transition_count), 1 / len(transition_count)),
38+
columns=["value"],
39+
index=index,
40+
)
41+
constr = [{"loc": "trans_prob", "type": "probability"}]
42+
43+
raw_result_trans = minimize(
44+
criterion=loglike_trans,
45+
params=params,
46+
algorithm="scipy_L-BFGS-B",
47+
constraints=constr,
48+
criterion_kwargs={"transition_count": transition_count},
49+
logging=False,
4350
)
4451

45-
result_transitions["fun"] = loglike_trans(p_raw, transition_count)
52+
result_transitions["x"] = raw_result_trans[1]["value"].to_numpy()
53+
result_transitions["fun"] = raw_result_trans[0]["fitness"]
4654

4755
return result_transitions
4856

4957

50-
def loglike_trans(p_raw, transition_count):
58+
def loglike_trans(params, transition_count):
5159
"""
5260
Log-likelihood function of transition probability estimation.
5361
@@ -65,31 +73,11 @@ def loglike_trans(p_raw, transition_count):
6573
log_like : numpy.float
6674
The negative log-likelihood value of the transition probabilities
6775
"""
68-
trans_probs = reparam_trans(p_raw)
69-
log_like = -np.sum(np.multiply(transition_count, np.log(trans_probs)))
76+
p_raw = params.loc["trans_prob", "value"].to_numpy()
77+
log_like = -np.sum(np.multiply(transition_count, np.log(p_raw)))
7078
return log_like
7179

7280

73-
def reparam_trans(p_raw):
74-
"""
75-
The reparametrization function for transition probabilities.
76-
77-
Parameters
78-
----------
79-
p_raw : numpy.array
80-
The raw values before reparametrization, on which there are no constraints
81-
or bounds.
82-
83-
Returns
84-
-------
85-
trans_prob : numpy.array
86-
The probabilities of an state increase.
87-
88-
"""
89-
p = np.exp(p_raw) / np.sum(np.exp(p_raw))
90-
return p
91-
92-
9381
@numba.jit(nopython=True)
9482
def create_transition_matrix(num_states, trans_prob):
9583
"""

ruspy/test/estimation_tests/test_cubic_repl.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,7 @@ def inputs():
2828
"maint_cost_func": "cubic",
2929
"cost_scale": scale,
3030
},
31-
"optimizer": {
32-
"optimizer_name": "BFGS",
33-
"use_gradient": "yes",
34-
"use_search_bounds": "no",
35-
},
31+
"optimizer": {"algorithm": "scipy_L-BFGS-B"},
3632
}
3733
df = pd.read_pickle(TEST_FOLDER + "group_4.pkl")
3834
result_trans, result_fixp = estimate(init_dict, df)
@@ -45,7 +41,7 @@ def inputs():
4541
out["disc_fac"] = disc_fac
4642
out["num_states"] = num_states
4743
out["scale"] = scale
48-
out["message"] = result_fixp["message"]
44+
out["status"] = result_fixp["status"]
4945
return out
5046

5147

@@ -56,7 +52,7 @@ def outputs():
5652
out["params_base"] = np.loadtxt(TEST_FOLDER + "repl_params_cubic.txt")
5753
out["transition_count"] = np.loadtxt(TEST_FOLDER + "transition_count.txt")
5854
out["trans_ll"] = 3140.570557
59-
out["cost_ll"] = 163.624044 # 162.885
55+
out["cost_ll"] = 164.632939 # 162.885
6056
return out
6157

6258

@@ -85,7 +81,7 @@ def test_ll_params_derivative(inputs, outputs):
8581
disc_fac = inputs["disc_fac"]
8682
assert_array_almost_equal(
8783
derivative_loglike_cost_params(
88-
inputs["params_est"],
84+
pd.DataFrame(inputs["params_est"], columns=["value"]),
8985
cubic_costs,
9086
cubic_costs_dev,
9187
num_states,
@@ -102,4 +98,4 @@ def test_ll_params_derivative(inputs, outputs):
10298

10399

104100
def test_success(inputs):
105-
assert inputs["message"] == "Optimization terminated successfully."
101+
assert inputs["status"] == "success"

0 commit comments

Comments
 (0)