Skip to content

Commit 8de5e25

Browse files
authored
Merge pull request #4 from IBM/poi
Poi
2 parents f323e9e + a7a981e commit 8de5e25

File tree

14 files changed

+1960
-148
lines changed

14 files changed

+1960
-148
lines changed

configs/dir_configs.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
local:
2+
buckets:
3+
inputs: ''
4+
inputs_dest: ''
5+
objectives: ''
6+
objectives_dest: ''
7+
data: ''
8+
data_dest: ''
9+
solutions: ''
10+
solutions_dest: ''

doframework/api.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -322,9 +322,9 @@ def run(generate_user_solution, configs_file, **kwargs):
322322
args = Args(objectives, datasets, feasibility_regions, run_mode, distribute, mcmc, logger, mock, after_idle_for, rayvens_logs, alg_num_cpus, data_num_cpus)
323323

324324
if args.run_mode == 'operator':
325-
ray.init(address='auto')
325+
ray.init(address='auto',ignore_reinit_error=True)
326326
else:
327-
ray.init()
327+
ray.init(ignore_reinit_error=True)
328328
rayvens.init(mode=args.run_mode ,release=(not args.rayvens_logs))
329329

330330
if args.logger: print('({}) INFO ... Running simulation with args objectives={o} datasets={s} feasibility_regions={r} distribute={d} run_mode={m} logger={l}'.format('root',

doframework/core/gp.py

Lines changed: 95 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,73 @@
1414
# limitations under the License.
1515
#
1616

17+
from typing import List
1718
import numpy as np
1819
from scipy.stats import gaussian_kde
1920
from scipy.integrate import quad
20-
from GPy.kern import RBF
21-
from GPy.models import GPRegression
22-
from GPy.core.parameterization.priors import Gamma
23-
from GPy.inference.mcmc import HMC
21+
import GPy
22+
23+
def plot_posteriors(samples: np.array, labels: List[str], **kwargs):
24+
25+
'''
26+
Plot hyper-parameter posteriors following HMC. By convention, first column of samples is RBF kernel variance chain.
27+
The last column of samples is the RBF kernel noise. The middle columns of samples are ARD length scale parameters.
28+
29+
Parameters:
30+
samples (np.array): hyper-parameter HMC chains.
31+
labels (List[str]): sample column labels for graph legends.
32+
'''
33+
34+
import matplotlib.pyplot as plt
35+
36+
num_of_plots = 3
37+
num_of_posteriors = samples.shape[-1]
38+
dim = num_of_posteriors - num_of_plots + 1
39+
40+
assert len(labels) == num_of_posteriors, \
41+
f'Expected {num_of_posteriors} plot labels, but received {len(labels)}.'
42+
43+
fig_width = kwargs['fig_width'] if 'fig_width' in kwargs else 10
44+
fig_length = kwargs['fig_length'] if 'fig_length' in kwargs else 3
45+
46+
_, axs = plt.subplots(num_of_plots,1,figsize=(fig_width,num_of_plots*fig_length))
47+
cmap = plt.cm.get_cmap(name='Accent',lut=num_of_posteriors)
48+
49+
for i, J in zip(range(num_of_plots),[[0],[1+j for j in range(dim)],[dim+1]]):
50+
51+
modals = []
52+
53+
for j in J:
54+
55+
s = samples[:,j]
56+
xmin = s.min()*0.9
57+
xmax = s.max()*1.1
58+
xs = np.linspace(xmin,xmax,1000)
59+
60+
if ('kdes' in kwargs) and (j in kwargs['kdes']):
61+
kde = kwargs['kdes'][j]
62+
else:
63+
kde = gaussian_kde(s)
64+
65+
if ('modals' in kwargs) and (j in kwargs['modals']):
66+
modal = kwargs['modals'][j]
67+
else:
68+
density = kde.pdf(xs)
69+
argmax = np.argmax(density)
70+
modal = xs[argmax]
71+
72+
modals.append(modal)
73+
modal_density = kde.pdf(modal)[0]
74+
75+
axs[i].plot([modal,modal],[0,modal_density],ls='--',color=cmap(j))
76+
axs[i].plot(xs,kde(xs),label=labels[j],lw=3,color=cmap(j))
77+
78+
ticks = np.sort(np.hstack([np.around(np.array(modals),2), axs[i].get_xticks()]))
79+
axs[i].set_xticks(ticks[1:-1]) # ticks[1:-1]
80+
axs[i].tick_params(axis="x", rotation=90, labelsize=12)
81+
axs[i].legend()
82+
83+
plt.tight_layout()
2484

2585
def find_modal(samples, linspace_num: int=1000):
2686

@@ -35,69 +95,78 @@ def find_modal(samples, linspace_num: int=1000):
3595
argmax = np.argmax(density)
3696
modal = xs[argmax]
3797

38-
return modal
98+
return modal, kde
3999

40100
except:
41101

42-
return None
102+
return None, None
43103

44104
def gp_model(X: np.array,
45105
y: np.array,
46106
is_mcmc: bool=False,
47107
num_samples: int=1000,
48108
hmc_iters: int=2,
49-
linspace_num: int=1000) -> GPRegression:
109+
plot_kernel_posteriors: bool=False,
110+
linspace_num: int=1000) -> GPy.models.GPRegression:
50111

51112
dim = X.shape[-1]
52113

53114
if is_mcmc:
54115

55-
factor = 10.0 # TODO: clever factor for numerical issues in HMC train
116+
factor = 10.0 # factor for numerical issues in HMC train
56117

57-
kern = RBF(input_dim=dim, ARD=True)
58-
model = GPRegression(factor*X,y,kernel=kern.copy())
118+
kern = GPy.kern.RBF(input_dim=dim, ARD=True)
119+
model = GPy.models.GPRegression(factor*X,y,kernel=kern.copy())
59120

60-
# TODO: automate prior for RBF variance
61-
model.kern.variance.set_prior(Gamma.from_EV(0.1,0.1),warning=False)
121+
# automate prior for RBF variance
122+
model.kern.variance.set_prior(GPy.priors.Gamma.from_EV(0.1,0.1),warning=False)
62123

63124
lengthscales = {}
125+
kdes = {}
64126
for i in range(dim):
65127
kde = gaussian_kde(X[:,i])
128+
kdes[i+1] = kde
66129
mean = quad(lambda x: x * kde.pdf(x), a=-np.inf, b=np.inf)[0]
67130
var = quad(lambda x: x**2 * kde.pdf(x), a=-np.inf, b=np.inf)[0] - mean**2
68131
lengthscales[i] = np.sqrt(var)
69-
model.kern.lengthscale[[i]].set_prior(Gamma.from_EV(lengthscales[i],lengthscales[i]/2),warning=False) # data variance as length scale
132+
model.kern.lengthscale[[i]].set_prior(GPy.priors.Gamma.from_EV(lengthscales[i],lengthscales[i]/2),warning=False)
70133

71-
hmc = HMC(model)
134+
hmc = GPy.inference.mcmc.HMC(model)
72135
samples = hmc.sample(num_samples=num_samples,hmc_iters=hmc_iters)
73136

74-
modals = {}
137+
modals = {}
138+
kdes = {}
75139
for i in range(samples.shape[-1]):
76-
modal = find_modal(samples[:,i],linspace_num)
77-
if modal is not None:
140+
modal, kde = find_modal(samples[:,i],linspace_num)
141+
if (modal is not None) and (kde is not None):
78142
modals[i] = modal
143+
kdes[i] = kde
79144

80-
kern = RBF(input_dim=dim, ARD=True)
81-
model = GPRegression(X,y,kernel=kern.copy())
145+
kern = GPy.kern.RBF(input_dim=dim, ARD=True)
146+
model = GPy.models.GPRegression(X,y,kernel=kern.copy())
82147

83148
if (0 in modals) and (dim-1 in modals):
84149
model.rbf.variance = modals[0]/factor**2
85150
model.Gaussian_noise.variance = modals[dim-1]/factor**2
86151
else:
87-
model = None
88-
152+
raise ValueError('HMC failed. Possible unsuitable priors on kernels parameters leading to repetative samples.')
153+
89154
for i in range(dim):
90155
if i in modals:
91156
model.rbf.lengthscale[i] = modals[1+i]/factor
92157
else:
93158
model.rbf.lengthscale[i] = lengthscales[i]
94159

160+
if plot_kernel_posteriors:
161+
labels = ['RBF kernel variance']+ [f'RBF kernel lengthscale[x{i}]' for i in range(dim)] + ['RBF kernel noise']
162+
plot_posteriors(samples, labels, modals=modals, kdes=kdes)
163+
95164
else:
96165

97-
kern = RBF(input_dim=dim, ARD=True)
98-
model = GPRegression(X,y,kernel=kern.copy())
99-
166+
kern = GPy.kern.RBF(input_dim=dim, ARD=True)
167+
model = GPy.models.GPRegression(X,y,kernel=kern.copy())
168+
100169
# model.optimize_restarts(num_restarts=10,optimizer='lbfgs',verbose=False)
101170
model.optimize(optimizer='lbfgs',messages=False)
102-
103-
return model
171+
172+
return model

doframework/core/hit_and_run.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,11 +174,12 @@ def in_domain(xs, A: np.array, R: Optional[float]=None, **kwargs) -> np.array:
174174
'''
175175
Check whether xs are inside the intersection of a convex polytope and a ball of radius R.
176176
The convex polytope is given by the matrix A such that Ax<=0 defines it.
177+
When the radius is not specified, this restriction is dropped.
177178
178179
Parameters:
179180
xs (np.array): points.
180181
A (np.array): matrix defining a convex polytope Ax <= 0.
181-
R (float): radius.
182+
R (float): radius (default: None).
182183
183184
Returns:
184185
a boolean numpy array that indicates whether the points are inside the polytope.

doframework/core/inputs.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ def parse_vertex_num(sim_input: dict) -> int:
7676

7777
return num
7878

79-
def get_configs(configs_file, is_logger: bool=True):
79+
def get_configs(configs_file, is_logger: bool=False):
8080

8181
with open(configs_file,'r') as file:
8282
try:
@@ -86,7 +86,7 @@ def get_configs(configs_file, is_logger: bool=True):
8686
if is_logger:
8787
print('({}) ERROR ... Could not load configs yaml. Check your path.'.format('root'))
8888
print(e)
89-
raise e
89+
raise e
9090

9191
def legit_configs(configs):
9292

doframework/core/poi.py

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
#
2+
# Copyright IBM Corporation 2022
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
#
16+
17+
from typing import List
18+
19+
import numpy as np
20+
import pandas as pd
21+
from scipy.stats import multivariate_normal
22+
from GPy.models import GPRegression
23+
24+
from doframework.core.utils import order_stats
25+
26+
def plot_joint_distribution(samples: np.array, **kwargs):
27+
28+
import seaborn as sns
29+
30+
assert samples.shape[-1]==2, 'Array must be of dimension Nx2. Array is a draw from a bivariate distribution.'
31+
32+
cols = kwargs['cols'] if 'cols' in kwargs else [f'x{i}' for i in range(2)]
33+
34+
if 'x_min' not in kwargs or 'x_max' not in kwargs:
35+
x_min = samples.min(axis=0).min()*0.5
36+
x_max = samples.max(axis=0).max()*1.5
37+
else:
38+
x_min = kwargs['x_min']
39+
x_max = kwargs['x_max']
40+
41+
lims = np.array([x_min,x_max])[:,None]
42+
43+
df = pd.DataFrame(samples,columns=cols)
44+
dl = pd.DataFrame(np.hstack([lims,lims]),columns=cols)
45+
46+
sns.set(style="white", color_codes=True)
47+
sns.jointplot(data=df, x=cols[0], y=cols[1], kind="hex", xlim=lims, ylim=lims)
48+
sns.lineplot(data=dl, x=cols[0], y=cols[1])
49+
50+
class POI(object):
51+
'''
52+
Class for probability of improvement outcomes.
53+
'''
54+
55+
def __init__(self, point: np.array, probability: float, **kwargs):
56+
57+
self.point = point
58+
assert all([probability>=0.0,probability<=1.0]), f'Probability value should be in [0,1]. Received {probability:.2f}.'
59+
self.probability = probability
60+
61+
self.upper_bound = kwargs['upper_bound'] if 'upper_bound' in kwargs else True
62+
self.reference = kwargs['reference'] if 'reference' in kwargs else np.array([])
63+
self.threshold = kwargs['threshold'] if 'threshold' in kwargs else None
64+
65+
def __repr__(self):
66+
return 'POI('+''.join([f'point={self.point},',
67+
f' probability={self.probability},',
68+
f' upper_bound={self.upper_bound}',
69+
','*any([self.reference.size > 0]),
70+
f' reference={self.reference}'*(self.reference.size > 0),
71+
','*any([self.threshold is not None]),
72+
f' threshold={self.threshold}'*(self.threshold is not None)])+')'
73+
74+
def probability_of_improvement(solutions: np.array, references: np.array, model: GPRegression,
75+
sample_num: int=100000, is_constraint: bool=False, upper_bound: bool=True, plot_joint_gaussians: bool=False,
76+
**kwargs) -> List[POI]:
77+
78+
sols = np.atleast_2d(solutions)
79+
d = sols.shape[-1]
80+
is_minimum = not upper_bound
81+
82+
if is_constraint:
83+
84+
refs = np.atleast_2d(references.flatten()).T
85+
86+
else:
87+
88+
refs = np.atleast_2d(references)
89+
90+
ref_num = refs.shape[0]
91+
ref_dim = refs.shape[-1]
92+
93+
assert ref_dim == d or ref_dim == 1, \
94+
'Input reference row dimension must either be:\n(1) identical to solution row dimension (POI for objective target, is_constraint=False)\n(2) or equal to 1 (POI for constraint satisfaction, is_constraint=True).\nYour input has inferred dimension {} for solution vectors and inferred dimension {} for reference vectors (is_constraint={}).'.format(d,ref_dim,is_constraint)
95+
96+
sols_rep = np.tile(sols, (1,ref_num)).reshape(ref_num*sols.shape[0],sols.shape[-1])
97+
refs_rep = np.tile(refs, (sols.shape[0],1))
98+
99+
N = sols_rep.shape[0]*(sols_rep.shape[0] == refs_rep.shape[0])
100+
101+
pois = []
102+
103+
for i in range(N):
104+
105+
if is_constraint:
106+
mu, cov = model.predict(np.vstack([sols_rep[i]]),full_cov=True)
107+
samples = multivariate_normal(mean=mu.flatten(),cov=cov).rvs(size=sample_num)
108+
samples = np.hstack([samples[:,None],np.tile(refs_rep[i:i+1],(samples.size,1))])
109+
else:
110+
mu, cov = model.predict(np.vstack([sols_rep[i],refs_rep[i]]),full_cov=True)
111+
samples = multivariate_normal(mean=mu.flatten(),cov=cov).rvs(size=sample_num)
112+
113+
if is_constraint:
114+
pois.append(POI(sols_rep[i],order_stats(samples,is_minimum),upper_bound=upper_bound,threshold=refs_rep[i]))
115+
else:
116+
pois.append(POI(sols_rep[i],order_stats(samples,is_minimum),upper_bound=upper_bound,reference=refs_rep[i]))
117+
118+
if plot_joint_gaussians and not is_constraint:
119+
120+
kwargs = {'cols': ['f({})'.format(np.around(sols_rep[i],2)),
121+
'f({})'.format(np.around(refs_rep[i]),2)]}
122+
123+
plot_joint_distribution(samples=samples, **kwargs)
124+
125+
return pois

doframework/core/pwl.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,19 @@
1+
#
2+
# Copyright IBM Corporation 2022
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
#
16+
117
import itertools
218
import numpy as np
319
from numpy import linalg

0 commit comments

Comments
 (0)