Skip to content

Commit 20e64ed

Browse files
authored
Dev lgbm (#147) (#152)
* initial restructure * thresholds on unet output * added gmean tta, experimented with thresholding (#125) * feature exractor and lightgbm * pipeline is running ok * tmp commit * lgbm ready for tests * tmp * faster nms and feature extraction * small fix * cleaning * Dev repo cleanup (#138) * initial restructure * clean structure (#126) * clean structure * correct readme * further cleaning * Dev apply transformer (#131) * clean structure * correct readme * further cleaning * resizer docstring * couple docstrings * make apply transformer, memory cache * fixes * postprocessing docstrings * fixes in PR * Dev repo cleanup (#132) * cleanup * remove src. * Dev clean tta (#134) * added resize padding, refactored inference pipelines * refactored piepliens * added color shift augmentation * reduced caching to just mask_resize * updated config * Dev-repo_cleanup models and losses docstrings (#135) * models and losses docstrings * small fixes in docstrings * resolve conflicts in with TTA PR (#137) * refactor in stream mode (#139) * hot fix of mask_postprocessing in tta with new make transformer * finishing merge * finishing merge v2 * finishing merge v3 * finishing merge v4 * tmp commit * lgbm train and evaluate pipelines run correctly * something is not yes * fix * working lgbm training with ugly train_mode=True * back to pipelines.py * small fix * preparing PR * preparing PR v2 * preparing PR v2 * fix * fix_2 * fix_3 * fix_4
1 parent 8e269de commit 20e64ed

File tree

12 files changed

+485
-51
lines changed

12 files changed

+485
-51
lines changed

neptune.yaml

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
project: YOUR_PROJECT_NAME
22

33
name: mapping_challenge_open_solution
4-
tags: [solution_1]
4+
tags: [solution_5]
55

66
metric:
77
channel: 'Final Validation Score'
@@ -41,7 +41,6 @@ parameters:
4141
loader_mode: resize
4242
stream_mode: 0
4343

44-
4544
# General parameters
4645
image_h: 256
4746
image_w: 256
@@ -86,11 +85,10 @@ parameters:
8685

8786
# Postprocessing
8887
threshold: 0.5
89-
min_nuclei_size: 20
90-
erosion_percentages: '[10,20,30]'
9188
erode_selem_size: 0
9289
dilate_selem_size: 2
9390
tta_aggregation_method: gmean
91+
nms__iou_threshold: 0.5
9492

9593
# Inference padding
9694
crop_image_h: 300
@@ -100,4 +98,18 @@ parameters:
10098
pad_method: 'replicate'
10199

102100
#Neptune monitor
103-
unet_outputs_to_plot: '["multichannel_map",]'
101+
unet_outputs_to_plot: '["multichannel_map",]'
102+
103+
#Scoring model
104+
scoring_model: 'lgbm'
105+
scoring_model__num_training_examples: 10000
106+
107+
#LightGBM
108+
lgbm__learning_rate: 0.001
109+
lgbm__num_leaves: 10
110+
lgbm__min_data: 50
111+
lgbm__max_depth: 10
112+
lgbm__number_of_trees: 100
113+
lgbm__early_stopping: 5
114+
lgbm__train_size: 0.7
115+
lgbm__target: 'iou'

src/callbacks.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from .steps.utils import get_logger
1414
from .steps.pytorch.callbacks import NeptuneMonitor, ValidationMonitor
1515
from .utils import softmax, coco_evaluation, create_annotations, make_apply_transformer
16-
from .pipeline_config import CATEGORY_IDS, Y_COLUMNS_SCORING
16+
from .pipeline_config import CATEGORY_IDS, Y_COLUMNS_SCORING, CATEGORY_LAYERS
1717

1818
logger = get_logger()
1919

@@ -200,7 +200,7 @@ def _generate_prediction(self, cache_dirpath, outputs):
200200
output = pipeline.transform(data)
201201
y_pred = output['y_pred']
202202

203-
prediction = create_annotations(self.meta_valid, y_pred, logger, CATEGORY_IDS)
203+
prediction = create_annotations(self.meta_valid, y_pred, logger, CATEGORY_IDS, CATEGORY_LAYERS)
204204
return prediction
205205

206206

src/loaders.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -436,9 +436,9 @@ def _get_tta_data(self, i, row):
436436

437437

438438
class TestTimeAugmentationAggregator(BaseTransformer):
439-
def __init__(self, method, nthreads):
439+
def __init__(self, method, num_threads):
440440
self.method = method
441-
self.nthreads = nthreads
441+
self.num_threads = num_threads
442442

443443
@property
444444
def agg_method(self):
@@ -456,7 +456,7 @@ def transform(self, images, tta_params, img_ids, **kwargs):
456456
img_ids=img_ids,
457457
agg_method=self.agg_method)
458458
unique_img_ids = set(img_ids)
459-
threads = min(self.nthreads, len(unique_img_ids))
459+
threads = min(self.num_threads, len(unique_img_ids))
460460
with mp.pool.ThreadPool(threads) as executor:
461461
averages_images = executor.map(_aggregate_augmentations, unique_img_ids)
462462
return {'aggregated_prediction': averages_images}

src/models.py

Lines changed: 94 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,18 @@
44
import torch.nn as nn
55
from torch.autograd import Variable
66
from torch import optim
7+
import pandas as pd
8+
from sklearn.model_selection import train_test_split
9+
from sklearn.externals import joblib
10+
from sklearn.ensemble import RandomForestRegressor
711

812
from .callbacks import NeptuneMonitorSegmentation, ValidationMonitorSegmentation
913
from .steps.pytorch.architectures.unet import UNet
1014
from .steps.pytorch.callbacks import CallbackList, TrainingMonitor, ModelCheckpoint, \
1115
ExperimentTiming, ExponentialLRScheduler, EarlyStopping
1216
from .steps.pytorch.models import Model
1317
from .steps.pytorch.validation import multiclass_segmentation_loss, DiceLoss
18+
from .steps.sklearn.models import LightGBM, make_transformer, SklearnRegressor
1419
from .utils import softmax
1520
from .unet_models import AlbuNet, UNet11, UNetVGG16, UNetResNet
1621

@@ -159,9 +164,12 @@ def __init__(self, architecture_config, training_config, callbacks_config):
159164
class PyTorchUNetWeightedStream(BasePyTorchUNet):
160165
def __init__(self, architecture_config, training_config, callbacks_config):
161166
super().__init__(architecture_config, training_config, callbacks_config)
162-
weighted_loss = partial(multiclass_weighted_cross_entropy,
163-
**get_loss_variables(**architecture_config['weighted_cross_entropy']))
164-
loss = partial(mixed_dice_cross_entropy_loss, dice_weight=architecture_config['loss_weights']['dice_mask'],
167+
weights_function = partial(get_weights, **architecture_config['weighted_cross_entropy'])
168+
weighted_loss = partial(multiclass_weighted_cross_entropy, weights_function=weights_function)
169+
dice_loss = partial(multiclass_dice_loss, excluded_classes=[0])
170+
loss = partial(mixed_dice_cross_entropy_loss,
171+
dice_loss=dice_loss,
172+
dice_weight=architecture_config['loss_weights']['dice_mask'],
165173
cross_entropy_weight=architecture_config['loss_weights']['bce_mask'],
166174
cross_entropy_loss=weighted_loss,
167175
**architecture_config['dice'])
@@ -201,6 +209,81 @@ def _transform(self, datagen, validation_datagen=None):
201209
self.model.train()
202210

203211

212+
class ScoringLightGBM(LightGBM):
213+
def __init__(self, model_params, training_params, train_size, target):
214+
self.train_size = train_size
215+
self.target = target
216+
self.feature_names = []
217+
self.estimator = None
218+
super().__init__(model_params, training_params)
219+
220+
def fit(self, features, **kwargs):
221+
df_features = _convert_features_to_df(features)
222+
train_data, val_data = train_test_split(df_features, train_size=self.train_size)
223+
self.feature_names = list(df_features.columns.drop(self.target))
224+
super().fit(X=train_data[self.feature_names],
225+
y=train_data[self.target],
226+
X_valid=val_data[self.feature_names],
227+
y_valid=val_data[self.target],
228+
feature_names=self.feature_names,
229+
categorical_features=[])
230+
return self
231+
232+
def transform(self, features, **kwargs):
233+
scores = []
234+
for image_features in features:
235+
image_scores = []
236+
for layer_features in image_features:
237+
if len(layer_features) > 0:
238+
layer_scores = super().transform(layer_features[self.feature_names])
239+
image_scores.append(list(layer_scores['prediction']))
240+
else:
241+
image_scores.append([])
242+
scores.append(image_scores)
243+
return {'scores': scores}
244+
245+
def save(self, filepath):
246+
joblib.dump((self.estimator, self.feature_names), filepath)
247+
248+
def load(self, filepath):
249+
self.estimator, self.feature_names = joblib.load(filepath)
250+
251+
252+
class ScoringRandomForest(SklearnRegressor):
253+
def __init__(self, train_size, target, **kwargs):
254+
self.train_size = train_size
255+
self.target = target
256+
self.feature_names = []
257+
self.estimator = RandomForestRegressor()
258+
259+
def fit(self, features, **kwargs):
260+
df_features = _convert_features_to_df(features)
261+
train_data, val_data = train_test_split(df_features, train_size=self.train_size)
262+
self.feature_names = list(df_features.columns.drop(self.target))
263+
super().fit(X=train_data[self.feature_names],
264+
y=train_data[self.target])
265+
return self
266+
267+
def transform(self, features, **kwargs):
268+
scores = []
269+
for image_features in features:
270+
image_scores = []
271+
for layer_features in image_features:
272+
if len(layer_features) > 0:
273+
layer_scores = super().transform(layer_features[self.feature_names])
274+
image_scores.append(list(layer_scores['prediction']))
275+
else:
276+
image_scores.append([])
277+
scores.append(image_scores)
278+
return {'scores': scores}
279+
280+
def save(self, filepath):
281+
joblib.dump((self.estimator, self.feature_names), filepath)
282+
283+
def load(self, filepath):
284+
self.estimator, self.feature_names = joblib.load(filepath)
285+
286+
204287
def weight_regularization_unet(model, regularize, weight_decay_conv2d):
205288
if regularize:
206289
parameter_list = [{'params': model.parameters(), 'weight_decay': weight_decay_conv2d}]
@@ -369,3 +452,11 @@ def multiclass_dice_loss(output, target, smooth=0, activation='softmax', exclude
369452
class_target.data = class_target.data.float()
370453
loss += dice(output[:, class_nr, :, :], class_target)
371454
return loss
455+
456+
457+
def _convert_features_to_df(features):
458+
df_features = []
459+
for image_features in features:
460+
for layer_features in image_features[1:]:
461+
df_features.append(layer_features)
462+
return pd.concat(df_features)

src/pipeline_config.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,9 @@
1212
X_COLUMNS = ['file_path_image']
1313
Y_COLUMNS = ['file_path_mask_eroded_0_dilated_0']
1414
Y_COLUMNS_SCORING = ['ImageId']
15-
CATEGORY_IDS = [None, 100]
1615
SEED = 1234
16+
CATEGORY_IDS = [None, 100]
17+
CATEGORY_LAYERS = [1, 19]
1718
MEAN = [0.485, 0.456, 0.406]
1819
STD = [0.229, 0.224, 0.225]
1920

@@ -121,15 +122,32 @@
121122
'rotation': True,
122123
'color_shift_runs': False},
123124
'tta_aggregator': {'method': params.tta_aggregation_method,
124-
'nthreads': params.num_threads
125+
'num_threads': params.num_threads
125126
},
126-
'dropper': {'min_size': params.min_nuclei_size},
127127
'postprocessor': {'mask_dilation': {'dilate_selem_size': params.dilate_selem_size
128128
},
129129
'mask_erosion': {'erode_selem_size': params.erode_selem_size
130130
},
131131
'prediction_crop': {'h_crop': params.crop_image_h,
132132
'w_crop': params.crop_image_w
133133
},
134+
'scoring_model': params.scoring_model,
135+
'lightGBM': {'model_params': {'learning_rate': params.lgbm__learning_rate,
136+
'boosting_type': 'gbdt',
137+
'objective': 'regression',
138+
'metric': 'regression_l2',
139+
'sub_feature': 1.0,
140+
'num_leaves': params.lgbm__num_leaves,
141+
'min_data': params.lgbm__min_data,
142+
'max_depth': params.lgbm__max_depth},
143+
'training_params': {'number_boosting_rounds': params.lgbm__number_of_trees,
144+
'early_stopping_rounds': params.lgbm__early_stopping},
145+
'train_size': params.lgbm__train_size,
146+
'target': params.lgbm__target
147+
},
148+
'random_forest': {'train_size': params.lgbm__train_size,
149+
'target': params.lgbm__target},
150+
'nms': {'iou_threshold': params.nms__iou_threshold,
151+
'num_threads': params.num_threads},
134152
}
135153
})

0 commit comments

Comments
 (0)