From e40883b9e208201c97e5cba4f39c6bf14e715eab Mon Sep 17 00:00:00 2001 From: milancurcic Date: Wed, 22 Jan 2025 15:07:51 -0500 Subject: [PATCH 01/30] First stab at dropout; conflict with base type TODO --- src/nf/nf_dropout_layer.f90 | 87 ++++++++++++++++++++++ src/nf/nf_dropout_layer_submodule.f90 | 65 ++++++++++++++++ src/nf/nf_layer_constructors_submodule.f90 | 1 + test/test_dropout_layer.f90 | 20 +++++ 4 files changed, 173 insertions(+) create mode 100644 src/nf/nf_dropout_layer.f90 create mode 100644 src/nf/nf_dropout_layer_submodule.f90 create mode 100644 test/test_dropout_layer.f90 diff --git a/src/nf/nf_dropout_layer.f90 b/src/nf/nf_dropout_layer.f90 new file mode 100644 index 00000000..cab1ac35 --- /dev/null +++ b/src/nf/nf_dropout_layer.f90 @@ -0,0 +1,87 @@ +module nf_dropout_layer + + !! This module provides the concrete dropout layer type. + !! It is used internally by the layer type. + !! It is not intended to be used directly by the user. + + use nf_activation, only: activation_function + use nf_base_layer, only: base_layer + + implicit none + + private + public :: dropout_layer + + type, extends(base_layer) :: dropout_layer + + !! Concrete implementation of a dropout layer type + + integer :: input_size + integer :: output_size + + real, allocatable :: output(:) + real, allocatable :: gradient(:) + real :: dropout_rate ! probability of dropping a neuron + real, allocatable :: mask(:) ! binary mask for dropout + + class(activation_function), allocatable :: activation + + contains + + procedure :: backward + procedure :: forward + procedure :: init + + end type dropout_layer + + interface dropout_layer + module function dropout_layer_cons(rate) & + result(res) + !! This function returns the `dropout_layer` instance. + real, intent(in) :: rate + !! Dropout rate + type(dropout_layer) :: res + !! dropout_layer instance + end function dropout_layer_cons + end interface dropout_layer + + interface + + pure module subroutine backward(self, input, gradient) + !! Apply the backward gradient descent pass. + !! Only weight and bias gradients are updated in this subroutine, + !! while the weights and biases themselves are untouched. + class(dropout_layer), intent(in out) :: self + !! Dropout layer instance + real, intent(in) :: input(:) + !! Input from the previous layer + real, intent(in) :: gradient(:) + !! Gradient from the next layer + end subroutine backward + + pure module subroutine forward(self, input) + !! Propagate forward the layer. + !! Calling this subroutine updates the values of a few data components + !! of `dropout_layer` that are needed for the backward pass. + class(dropout_layer), intent(in out) :: self + !! Dense layer instance + real, intent(in) :: input(:) + !! Input from the previous layer + end subroutine forward + + module subroutine init(self, input_shape, training) + !! Initialize the layer data structures. + !! + !! This is a deferred procedure from the `base_layer` abstract type. + class(dropout_layer), intent(in out) :: self + !! Dropout layer instance + integer, intent(in) :: input_shape(:) + !! Shape of the input layer + logical, intent(in) :: training + !! Whether the layer is in training mode (.true. == dropping out neurons) + !! or in inference mode (.false. == doing nothing) + end subroutine init + + end interface + +end module nf_dropout_layer diff --git a/src/nf/nf_dropout_layer_submodule.f90 b/src/nf/nf_dropout_layer_submodule.f90 new file mode 100644 index 00000000..02610a68 --- /dev/null +++ b/src/nf/nf_dropout_layer_submodule.f90 @@ -0,0 +1,65 @@ +submodule (nf_dropout_layer) nf_dropout_layer_submodule + !! This submodule implements the procedures defined in the + !! nf_dropout_layer module. + +contains + + module function dropout_layer_cons(rate) result(res) + real, intent(in) :: rate + type(dropout_layer) :: res + + ! Initialize dropout rate + res % dropout_rate = rate + end function dropout_layer_cons + + module subroutine init(self, input_shape, training) + class(dropout_layer), intent(in out) :: self + integer, intent(in) :: input_shape(:) + logical, intent(in) :: training + + ! Set input and output sizes (dropout preserves dimensions) + self % input_size = input_shape(1) + self % output_size = input_shape(1) + + ! Allocate arrays + if (allocated(self % output)) deallocate(self % output) + if (allocated(self % gradient)) deallocate(self % gradient) + if (allocated(self % mask)) deallocate(self % mask) + + allocate(self % output(self % output_size)) + allocate(self % gradient(self % input_size)) + allocate(self % mask(self % input_size)) + + ! Initialize arrays to zero + self % output = 0.0 + self % gradient = 0.0 + self % mask = 1.0 ! Default mask is all ones (no dropout) + end subroutine init + + pure module subroutine forward(self, input) + class(dropout_layer), intent(in out) :: self + real, intent(in) :: input(:) + real :: rand_vals(size(input)) + + ! Generate random mask for dropout + call random_number(rand_vals) + where (rand_vals < self % dropout_rate) + self % mask = 0 + elsewhere + self % mask = 1 / (1 - self % dropout_rate) ! Scale to preserve expected value + end where + + ! Apply dropout mask + self % output = input * self % mask + end subroutine forward + + pure module subroutine backward(self, input, gradient) + class(dropout_layer), intent(in out) :: self + real, intent(in) :: input(:) + real, intent(in) :: gradient(:) + + ! Backpropagate gradient through dropout mask + self % gradient = gradient * self % mask + end subroutine backward + +end submodule nf_dropout_layer_submodule \ No newline at end of file diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90 index 234b20b1..86cef8ac 100644 --- a/src/nf/nf_layer_constructors_submodule.f90 +++ b/src/nf/nf_layer_constructors_submodule.f90 @@ -3,6 +3,7 @@ use nf_layer, only: layer use nf_conv2d_layer, only: conv2d_layer use nf_dense_layer, only: dense_layer + use nf_dropout_layer, only: dropout_layer use nf_flatten_layer, only: flatten_layer use nf_input1d_layer, only: input1d_layer use nf_input3d_layer, only: input3d_layer diff --git a/test/test_dropout_layer.f90 b/test/test_dropout_layer.f90 new file mode 100644 index 00000000..c0f37d8e --- /dev/null +++ b/test/test_dropout_layer.f90 @@ -0,0 +1,20 @@ +program test_dropout_layer + use iso_fortran_env, only: stderr => error_unit + use nf, only: dropout, layer + type(layer) :: layer1 + + layer1 = dropout(0.5) + + if (.not. layer1 % name == 'dropout') then + ok = .false. + write(stderr, '(a)') 'dropout layer has its name set correctly.. failed' + end if + + if (ok) then + print '(a)', 'test_dropout_layer: All tests passed.' + else + write(stderr, '(a)') 'test_dropout_layer: One or more tests failed.' + stop 1 + end if + +end program test_dropout_layer From 37aa7a5db719c7cad1190653085469ab1ae700d5 Mon Sep 17 00:00:00 2001 From: milancurcic Date: Thu, 23 Jan 2025 12:53:14 -0500 Subject: [PATCH 02/30] Partial dropout integration --- src/nf.f90 | 2 +- src/nf/nf_dropout_layer.f90 | 13 +++------- src/nf/nf_dropout_layer_submodule.f90 | 30 ++++++++++------------ src/nf/nf_layer_constructors.f90 | 21 ++++++++++++++- src/nf/nf_layer_constructors_submodule.f90 | 10 ++++++++ test/test_dropout_layer.f90 | 1 + 6 files changed, 49 insertions(+), 28 deletions(-) diff --git a/src/nf.f90 b/src/nf.f90 index b97d9e62..d477f1b5 100644 --- a/src/nf.f90 +++ b/src/nf.f90 @@ -3,7 +3,7 @@ module nf use nf_datasets_mnist, only: label_digits, load_mnist use nf_layer, only: layer use nf_layer_constructors, only: & - conv2d, dense, flatten, input, maxpool2d, reshape + conv2d, dense, dropout, flatten, input, maxpool2d, reshape use nf_loss, only: mse, quadratic use nf_metrics, only: corr, maxabs use nf_network, only: network diff --git a/src/nf/nf_dropout_layer.f90 b/src/nf/nf_dropout_layer.f90 index cab1ac35..9489ad60 100644 --- a/src/nf/nf_dropout_layer.f90 +++ b/src/nf/nf_dropout_layer.f90 @@ -4,7 +4,6 @@ module nf_dropout_layer !! It is used internally by the layer type. !! It is not intended to be used directly by the user. - use nf_activation, only: activation_function use nf_base_layer, only: base_layer implicit none @@ -17,14 +16,13 @@ module nf_dropout_layer !! Concrete implementation of a dropout layer type integer :: input_size - integer :: output_size real, allocatable :: output(:) real, allocatable :: gradient(:) - real :: dropout_rate ! probability of dropping a neuron real, allocatable :: mask(:) ! binary mask for dropout - class(activation_function), allocatable :: activation + real :: dropout_rate ! probability of dropping a neuron + logical :: training = .true. contains @@ -59,7 +57,7 @@ pure module subroutine backward(self, input, gradient) !! Gradient from the next layer end subroutine backward - pure module subroutine forward(self, input) + module subroutine forward(self, input) !! Propagate forward the layer. !! Calling this subroutine updates the values of a few data components !! of `dropout_layer` that are needed for the backward pass. @@ -69,7 +67,7 @@ pure module subroutine forward(self, input) !! Input from the previous layer end subroutine forward - module subroutine init(self, input_shape, training) + module subroutine init(self, input_shape) !! Initialize the layer data structures. !! !! This is a deferred procedure from the `base_layer` abstract type. @@ -77,9 +75,6 @@ module subroutine init(self, input_shape, training) !! Dropout layer instance integer, intent(in) :: input_shape(:) !! Shape of the input layer - logical, intent(in) :: training - !! Whether the layer is in training mode (.true. == dropping out neurons) - !! or in inference mode (.false. == doing nothing) end subroutine init end interface diff --git a/src/nf/nf_dropout_layer_submodule.f90 b/src/nf/nf_dropout_layer_submodule.f90 index 02610a68..e3a3cf21 100644 --- a/src/nf/nf_dropout_layer_submodule.f90 +++ b/src/nf/nf_dropout_layer_submodule.f90 @@ -12,38 +12,33 @@ module function dropout_layer_cons(rate) result(res) res % dropout_rate = rate end function dropout_layer_cons - module subroutine init(self, input_shape, training) + + module subroutine init(self, input_shape) class(dropout_layer), intent(in out) :: self integer, intent(in) :: input_shape(:) - logical, intent(in) :: training - ! Set input and output sizes (dropout preserves dimensions) self % input_size = input_shape(1) - self % output_size = input_shape(1) ! Allocate arrays - if (allocated(self % output)) deallocate(self % output) - if (allocated(self % gradient)) deallocate(self % gradient) - if (allocated(self % mask)) deallocate(self % mask) - - allocate(self % output(self % output_size)) + allocate(self % output(self % input_size)) allocate(self % gradient(self % input_size)) allocate(self % mask(self % input_size)) - ! Initialize arrays to zero - self % output = 0.0 - self % gradient = 0.0 - self % mask = 1.0 ! Default mask is all ones (no dropout) + ! Initialize arrays + self % output = 0 + self % gradient = 0 + self % mask = 1 ! Default mask is all ones (no dropout) + end subroutine init - pure module subroutine forward(self, input) + + module subroutine forward(self, input) class(dropout_layer), intent(in out) :: self real, intent(in) :: input(:) - real :: rand_vals(size(input)) ! Generate random mask for dropout - call random_number(rand_vals) - where (rand_vals < self % dropout_rate) + call random_number(self % mask) + where (self % mask < self % dropout_rate) self % mask = 0 elsewhere self % mask = 1 / (1 - self % dropout_rate) ! Scale to preserve expected value @@ -53,6 +48,7 @@ pure module subroutine forward(self, input) self % output = input * self % mask end subroutine forward + pure module subroutine backward(self, input, gradient) class(dropout_layer), intent(in out) :: self real, intent(in) :: input(:) diff --git a/src/nf/nf_layer_constructors.f90 b/src/nf/nf_layer_constructors.f90 index 309be6e4..24fc7e63 100644 --- a/src/nf/nf_layer_constructors.f90 +++ b/src/nf/nf_layer_constructors.f90 @@ -8,7 +8,7 @@ module nf_layer_constructors implicit none private - public :: conv2d, dense, flatten, input, maxpool2d, reshape + public :: conv2d, dense, flatten, input, maxpool2d, reshape, dropout interface input @@ -85,6 +85,24 @@ module function dense(layer_size, activation) result(res) !! Resulting layer instance end function dense + module function dropout(rate) result(res) + !! Create a dropout layer with a given dropout rate. + !! + !! This layer is for randomly disabling neurons during training. + !! + !! Example: + !! + !! ``` + !! use nf, only :: dropout, layer + !! type(layer) :: dropout_layer + !! dropout_layer = dropout(rate=0.5) + !! ``` + real, intent(in) :: rate + !! Dropout rate - fraction of neurons to randomly disable during training + type(layer) :: res + !! Resulting layer instance + end function dropout + module function flatten() result(res) !! Flatten (3-d -> 1-d) layer constructor. !! @@ -166,6 +184,7 @@ module function reshape(output_shape) result(res) !! Resulting layer instance end function reshape + end interface end module nf_layer_constructors diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90 index 86cef8ac..09c79e90 100644 --- a/src/nf/nf_layer_constructors_submodule.f90 +++ b/src/nf/nf_layer_constructors_submodule.f90 @@ -64,6 +64,14 @@ module function dense(layer_size, activation) result(res) end function dense + module function dropout(rate) result(res) + real, intent(in) :: rate + type(layer) :: res + res % name = 'dropout' + allocate(res % p, source=dropout_layer(rate)) + end function dropout + + module function flatten() result(res) type(layer) :: res res % name = 'flatten' @@ -92,6 +100,7 @@ module function input3d(layer_shape) result(res) res % initialized = .true. end function input3d + module function maxpool2d(pool_size, stride) result(res) integer, intent(in) :: pool_size integer, intent(in), optional :: stride @@ -120,6 +129,7 @@ module function maxpool2d(pool_size, stride) result(res) end function maxpool2d + module function reshape(output_shape) result(res) integer, intent(in) :: output_shape(:) type(layer) :: res diff --git a/test/test_dropout_layer.f90 b/test/test_dropout_layer.f90 index c0f37d8e..3424730e 100644 --- a/test/test_dropout_layer.f90 +++ b/test/test_dropout_layer.f90 @@ -2,6 +2,7 @@ program test_dropout_layer use iso_fortran_env, only: stderr => error_unit use nf, only: dropout, layer type(layer) :: layer1 + logical :: ok = .true. layer1 = dropout(0.5) From 820b081cb8af5e2cfa078283863025bbeaee8574 Mon Sep 17 00:00:00 2001 From: milancurcic Date: Thu, 23 Jan 2025 13:10:28 -0500 Subject: [PATCH 03/30] Test uninitialized dropout layer --- src/nf/nf_dropout_layer.f90 | 3 +-- src/nf/nf_layer_submodule.f90 | 7 +++++-- test/test_dropout_layer.f90 | 18 ++++++++++++++++++ 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/src/nf/nf_dropout_layer.f90 b/src/nf/nf_dropout_layer.f90 index 9489ad60..67613271 100644 --- a/src/nf/nf_dropout_layer.f90 +++ b/src/nf/nf_dropout_layer.f90 @@ -12,10 +12,9 @@ module nf_dropout_layer public :: dropout_layer type, extends(base_layer) :: dropout_layer - !! Concrete implementation of a dropout layer type - integer :: input_size + integer :: input_size = 0 real, allocatable :: output(:) real, allocatable :: gradient(:) diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90 index c672581a..80647972 100644 --- a/src/nf/nf_layer_submodule.f90 +++ b/src/nf/nf_layer_submodule.f90 @@ -3,6 +3,7 @@ use iso_fortran_env, only: stderr => error_unit use nf_conv2d_layer, only: conv2d_layer use nf_dense_layer, only: dense_layer + use nf_dropout_layer, only: dropout_layer use nf_flatten_layer, only: flatten_layer use nf_input1d_layer, only: input1d_layer use nf_input3d_layer, only: input3d_layer @@ -240,15 +241,17 @@ impure elemental module subroutine init(self, input) call this_layer % init(input % layer_shape) end select - ! The shape of conv2d, maxpool2d, or flatten layers is not known + ! The shape of conv2d, dropout, flatten, or maxpool2d layers is not known ! until we receive an input layer. select type(this_layer => self % p) type is(conv2d_layer) self % layer_shape = shape(this_layer % output) - type is(maxpool2d_layer) + type is(dropout_layer) self % layer_shape = shape(this_layer % output) type is(flatten_layer) self % layer_shape = shape(this_layer % output) + type is(maxpool2d_layer) + self % layer_shape = shape(this_layer % output) end select self % input_layer_shape = input % layer_shape diff --git a/test/test_dropout_layer.f90 b/test/test_dropout_layer.f90 index 3424730e..b46bd30a 100644 --- a/test/test_dropout_layer.f90 +++ b/test/test_dropout_layer.f90 @@ -1,6 +1,7 @@ program test_dropout_layer use iso_fortran_env, only: stderr => error_unit use nf, only: dropout, layer + use nf_dropout_layer, only: dropout_layer type(layer) :: layer1 logical :: ok = .true. @@ -11,6 +12,23 @@ program test_dropout_layer write(stderr, '(a)') 'dropout layer has its name set correctly.. failed' end if + ! Dropout on its own is not initialized and its arrays not allocated. + select type(layer1_p => layer1 % p) + type is(dropout_layer) + + if (layer1_p % input_size /= 0) then + print *, 'input_size: ', layer1_p % input_size + ok = .false. + write(stderr, '(a)') 'dropout layer size should be zero.. failed' + end if + + if (allocated(layer1_p % output)) then + ok = .false. + write(stderr, '(a)') 'dropout layer output array should not be allocated.. failed' + end if + + end select + if (ok) then print '(a)', 'test_dropout_layer: All tests passed.' else From 75ef184c73c7d659df72b8cfd063d608f673bb19 Mon Sep 17 00:00:00 2001 From: milancurcic Date: Thu, 23 Jan 2025 13:16:05 -0500 Subject: [PATCH 04/30] Test dropout state that follows an input layer --- test/test_dropout_layer.f90 | 38 ++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/test/test_dropout_layer.f90 b/test/test_dropout_layer.f90 index b46bd30a..5d092cb4 100644 --- a/test/test_dropout_layer.f90 +++ b/test/test_dropout_layer.f90 @@ -1,8 +1,11 @@ program test_dropout_layer use iso_fortran_env, only: stderr => error_unit - use nf, only: dropout, layer + use nf, only: dropout, input, layer, network use nf_dropout_layer, only: dropout_layer type(layer) :: layer1 + type(network) :: net + integer :: input_size + logical :: ok = .true. layer1 = dropout(0.5) @@ -29,6 +32,39 @@ program test_dropout_layer end select + ! Now we're gonna initialize a minimal network with an input layer and a + ! dropout that follows and we'll check that the dropout layer has expected + ! state. + input_size = 10 + net = network([ & + input(input_size), & + dropout(0.5) & + ]) + + select type(layer1_p => net % layers(1) % p) + type is(dropout_layer) + if (layer1_p % input_size /= input_size) then + ok = .false. + write(stderr, '(a)') 'dropout layer input size should be the same as the input layer.. failed' + end if + + if (.not. allocated(layer1_p % output)) then + ok = .false. + write(stderr, '(a)') 'dropout layer output array should be allocated.. failed' + end if + + if (.not. allocated(layer1_p % gradient)) then + ok = .false. + write(stderr, '(a)') 'dropout layer gradient array should be allocated.. failed' + end if + + if (.not. allocated(layer1_p % mask)) then + ok = .false. + write(stderr, '(a)') 'dropout layer mask array should be allocated.. failed' + end if + + end select + if (ok) then print '(a)', 'test_dropout_layer: All tests passed.' else From 796ae74bd308b74c4231772c9f0ab505862c006f Mon Sep 17 00:00:00 2001 From: milancurcic Date: Thu, 23 Jan 2025 14:29:21 -0500 Subject: [PATCH 05/30] Enable forward pass for dropout; backward pass TODO --- src/nf/nf_dropout_layer_submodule.f90 | 9 ++++++++- src/nf/nf_layer.f90 | 2 +- src/nf/nf_layer_submodule.f90 | 14 +++++++++++++- src/nf/nf_network_submodule.f90 | 3 +++ test/test_dropout_layer.f90 | 26 ++++++++++++++++++++++++++ 5 files changed, 51 insertions(+), 3 deletions(-) diff --git a/src/nf/nf_dropout_layer_submodule.f90 b/src/nf/nf_dropout_layer_submodule.f90 index e3a3cf21..5a022a29 100644 --- a/src/nf/nf_dropout_layer_submodule.f90 +++ b/src/nf/nf_dropout_layer_submodule.f90 @@ -35,17 +35,24 @@ end subroutine init module subroutine forward(self, input) class(dropout_layer), intent(in out) :: self real, intent(in) :: input(:) + real :: scale ! Generate random mask for dropout call random_number(self % mask) where (self % mask < self % dropout_rate) self % mask = 0 elsewhere - self % mask = 1 / (1 - self % dropout_rate) ! Scale to preserve expected value + self % mask = 1 end where ! Apply dropout mask self % output = input * self % mask + + ! Scale output and mask to preserve the input sum + scale = sum(input) / sum(self % output) + self % output = self % output * scale + self % mask = self % mask * scale + end subroutine forward diff --git a/src/nf/nf_layer.f90 b/src/nf/nf_layer.f90 index ca5e9606..18e8f76a 100644 --- a/src/nf/nf_layer.f90 +++ b/src/nf/nf_layer.f90 @@ -76,7 +76,7 @@ end subroutine backward_3d interface - pure module subroutine forward(self, input) + module subroutine forward(self, input) !! Apply a forward pass on the layer. !! This changes the internal state of the layer. !! This is normally called internally by the `network % forward` diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90 index 80647972..d44ef179 100644 --- a/src/nf/nf_layer_submodule.f90 +++ b/src/nf/nf_layer_submodule.f90 @@ -107,7 +107,7 @@ pure module subroutine backward_3d(self, previous, gradient) end subroutine backward_3d - pure module subroutine forward(self, input) + module subroutine forward(self, input) implicit none class(layer), intent(in out) :: self class(layer), intent(in) :: input @@ -126,6 +126,18 @@ pure module subroutine forward(self, input) call this_layer % forward(prev_layer % output) end select + type is(dropout_layer) + + ! Upstream layers permitted: input1d, dense, flatten + select type(prev_layer => input % p) + type is(input1d_layer) + call this_layer % forward(prev_layer % output) + type is(dense_layer) + call this_layer % forward(prev_layer % output) + type is(flatten_layer) + call this_layer % forward(prev_layer % output) + end select + type is(conv2d_layer) ! Upstream layers permitted: input3d, conv2d, maxpool2d, reshape3d diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90 index 140c9226..6aaaec38 100644 --- a/src/nf/nf_network_submodule.f90 +++ b/src/nf/nf_network_submodule.f90 @@ -2,6 +2,7 @@ use nf_conv2d_layer, only: conv2d_layer use nf_dense_layer, only: dense_layer + use nf_dropout_layer, only: dropout_layer use nf_flatten_layer, only: flatten_layer use nf_input1d_layer, only: input1d_layer use nf_input3d_layer, only: input3d_layer @@ -227,6 +228,8 @@ module function predict_1d(self, input) result(res) select type(output_layer => self % layers(num_layers) % p) type is(dense_layer) res = output_layer % output + type is(dropout_layer) + res = output_layer % output type is(flatten_layer) res = output_layer % output class default diff --git a/test/test_dropout_layer.f90 b/test/test_dropout_layer.f90 index 5d092cb4..b9b4b2a2 100644 --- a/test/test_dropout_layer.f90 +++ b/test/test_dropout_layer.f90 @@ -65,6 +65,32 @@ program test_dropout_layer end select + ! Now we're gonna run the forward pass and check that the dropout indeed + ! drops according to the requested dropout rate. + forward_pass: block + real :: input_data(5) + real :: output_data(size(input_data)) + integer :: n + + net = network([ & + input(size(input_data)), & + dropout(0.5) & + ]) + + call random_number(input_data) + do n = 1, 10000 + output_data = net % predict(input_data) + ! Check that sum of output matches sum of input within small tolerance + if (abs(sum(output_data) - sum(input_data)) > 1e-5) then + ok = .false. + exit + end if + end do + if (.not. ok) then + write(stderr, '(a)') 'dropout layer output sum should match input sum within 1% tolerance.. failed' + end if + end block forward_pass + if (ok) then print '(a)', 'test_dropout_layer: All tests passed.' else From b04d44725a329158b24ebe4363302583308dc77b Mon Sep 17 00:00:00 2001 From: milancurcic Date: Thu, 23 Jan 2025 14:31:20 -0500 Subject: [PATCH 06/30] Version bump and add dropout to the features table --- README.md | 1 + fpm.toml | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 7e3a4445..75a66491 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,7 @@ Read the paper [here](https://arxiv.org/abs/1902.06714). |------------|------------------|------------------------|----------------------|--------------|---------------| | Input | `input` | n/a | 1, 3 | n/a | n/a | | Dense (fully-connected) | `dense` | `input1d`, `flatten` | 1 | ✅ | ✅ | +| Dropout | `dropout` | Any | 1 | ✅ | ✅ | | Convolutional (2-d) | `conv2d` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 3 | ✅ | ✅(*) | | Max-pooling (2-d) | `maxpool2d` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 3 | ✅ | ✅ | | Flatten | `flatten` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 1 | ✅ | ✅ | diff --git a/fpm.toml b/fpm.toml index 5f68f8f6..368812c8 100644 --- a/fpm.toml +++ b/fpm.toml @@ -1,6 +1,6 @@ name = "neural-fortran" -version = "0.18.0" +version = "0.19.0" license = "MIT" author = "Milan Curcic" maintainer = "milancurcic@hey.com" -copyright = "Copyright 2018-2024, neural-fortran contributors" +copyright = "Copyright 2018-2025, neural-fortran contributors" From 544b23a2911cdaccae87f15fed75a6b9cf2037d8 Mon Sep 17 00:00:00 2001 From: milancurcic Date: Thu, 23 Jan 2025 17:11:18 -0500 Subject: [PATCH 07/30] Add dropout to CMake --- CMakeLists.txt | 2 ++ test/CMakeLists.txt | 1 + test/test_dropout_layer.f90 | 2 +- 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 490f7ff1..50a0f208 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -51,6 +51,8 @@ add_library(neural-fortran src/nf/nf_reshape_layer_submodule.f90 src/nf/io/nf_io_binary.f90 src/nf/io/nf_io_binary_submodule.f90 + src/nf/nf_dropout_layer.f90 + src/nf/nf_dropout_layer_submodule.f90 ) target_link_libraries(neural-fortran PRIVATE) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index bfd3538a..108dee66 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,6 +1,7 @@ foreach(execid input1d_layer input3d_layer + dropout_layer parametric_activation dense_layer conv2d_layer diff --git a/test/test_dropout_layer.f90 b/test/test_dropout_layer.f90 index b9b4b2a2..9ed7b864 100644 --- a/test/test_dropout_layer.f90 +++ b/test/test_dropout_layer.f90 @@ -87,7 +87,7 @@ program test_dropout_layer end if end do if (.not. ok) then - write(stderr, '(a)') 'dropout layer output sum should match input sum within 1% tolerance.. failed' + write(stderr, '(a)') 'dropout layer output sum should match input sum within tolerance.. failed' end if end block forward_pass From 56dbd52377b96622c0caf53fe2a9e79d14c7ef84 Mon Sep 17 00:00:00 2001 From: milancurcic Date: Fri, 24 Jan 2025 10:49:00 -0500 Subject: [PATCH 08/30] Enable preprocessing in fpm.toml (needed with recent versions of fpm) --- fpm.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fpm.toml b/fpm.toml index 368812c8..3df459fb 100644 --- a/fpm.toml +++ b/fpm.toml @@ -4,3 +4,6 @@ license = "MIT" author = "Milan Curcic" maintainer = "milancurcic@hey.com" copyright = "Copyright 2018-2025, neural-fortran contributors" + +[preprocess] +[preprocess.cpp] From 3b5cc27f04867e24f64aa3df9aa0bbf494b1e85e Mon Sep 17 00:00:00 2001 From: milancurcic Date: Fri, 24 Jan 2025 10:57:27 -0500 Subject: [PATCH 09/30] Small change in scale implementation --- src/nf/nf_dropout_layer.f90 | 1 + src/nf/nf_dropout_layer_submodule.f90 | 13 +++++-------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/nf/nf_dropout_layer.f90 b/src/nf/nf_dropout_layer.f90 index 67613271..0f557d63 100644 --- a/src/nf/nf_dropout_layer.f90 +++ b/src/nf/nf_dropout_layer.f90 @@ -21,6 +21,7 @@ module nf_dropout_layer real, allocatable :: mask(:) ! binary mask for dropout real :: dropout_rate ! probability of dropping a neuron + real :: scale ! scale factor to preserve the input sum logical :: training = .true. contains diff --git a/src/nf/nf_dropout_layer_submodule.f90 b/src/nf/nf_dropout_layer_submodule.f90 index 5a022a29..568cbf21 100644 --- a/src/nf/nf_dropout_layer_submodule.f90 +++ b/src/nf/nf_dropout_layer_submodule.f90 @@ -35,7 +35,6 @@ end subroutine init module subroutine forward(self, input) class(dropout_layer), intent(in out) :: self real, intent(in) :: input(:) - real :: scale ! Generate random mask for dropout call random_number(self % mask) @@ -45,13 +44,11 @@ module subroutine forward(self, input) self % mask = 1 end where - ! Apply dropout mask - self % output = input * self % mask + ! Scale factor to preserve the input sum + self % scale = sum(input) / sum(self % output) ! scale == 1/P(keep) - ! Scale output and mask to preserve the input sum - scale = sum(input) / sum(self % output) - self % output = self % output * scale - self % mask = self % mask * scale + ! Apply dropout mask + self % output = input * self % mask * self % scale end subroutine forward @@ -62,7 +59,7 @@ pure module subroutine backward(self, input, gradient) real, intent(in) :: gradient(:) ! Backpropagate gradient through dropout mask - self % gradient = gradient * self % mask + self % gradient = gradient * self % mask * self % scale end subroutine backward end submodule nf_dropout_layer_submodule \ No newline at end of file From 703f8023a175a584005105c3be9a6051a695edad Mon Sep 17 00:00:00 2001 From: milancurcic Date: Fri, 24 Jan 2025 11:14:42 -0500 Subject: [PATCH 10/30] Integration of backward pass for dropout --- src/nf/nf_layer_submodule.f90 | 19 +++++++++++++++++-- src/nf/nf_network_submodule.f90 | 2 ++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90 index d44ef179..69b40d22 100644 --- a/src/nf/nf_layer_submodule.f90 +++ b/src/nf/nf_layer_submodule.f90 @@ -25,12 +25,14 @@ pure module subroutine backward_1d(self, previous, gradient) type is(dense_layer) - ! Upstream layers permitted: input1d, dense, flatten + ! Upstream layers permitted: input1d, dense, dropout, flatten select type(prev_layer => previous % p) type is(input1d_layer) call this_layer % backward(prev_layer % output, gradient) type is(dense_layer) call this_layer % backward(prev_layer % output, gradient) + type is(dropout_layer) + call this_layer % backward(prev_layer % output, gradient) type is(flatten_layer) call this_layer % backward(prev_layer % output, gradient) end select @@ -116,12 +118,14 @@ module subroutine forward(self, input) type is(dense_layer) - ! Upstream layers permitted: input1d, dense, flatten + ! Upstream layers permitted: input1d, dense, dropout, flatten select type(prev_layer => input % p) type is(input1d_layer) call this_layer % forward(prev_layer % output) type is(dense_layer) call this_layer % forward(prev_layer % output) + type is(dropout_layer) + call this_layer % forward(prev_layer % output) type is(flatten_layer) call this_layer % forward(prev_layer % output) end select @@ -299,6 +303,8 @@ elemental module function get_num_params(self) result(num_params) num_params = 0 type is (dense_layer) num_params = this_layer % get_num_params() + type is (dropout_layer) + num_params = size(this_layer % mask) type is (conv2d_layer) num_params = this_layer % get_num_params() type is (maxpool2d_layer) @@ -324,6 +330,8 @@ module function get_params(self) result(params) ! No parameters to get. type is (dense_layer) params = this_layer % get_params() + type is (dropout_layer) + ! No parameters to get. type is (conv2d_layer) params = this_layer % get_params() type is (maxpool2d_layer) @@ -349,6 +357,8 @@ module function get_gradients(self) result(gradients) ! No gradients to get. type is (dense_layer) gradients = this_layer % get_gradients() + type is (dropout_layer) + ! No gradients to get. type is (conv2d_layer) gradients = this_layer % get_gradients() type is (maxpool2d_layer) @@ -396,6 +406,11 @@ module subroutine set_params(self, params) type is (dense_layer) call this_layer % set_params(params) + type is (dropout_layer) + ! No parameters to set. + write(stderr, '(a)') 'Warning: calling set_params() ' & + // 'on a zero-parameter layer; nothing to do.' + type is (conv2d_layer) call this_layer % set_params(params) diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90 index 6aaaec38..0b076b9f 100644 --- a/src/nf/nf_network_submodule.f90 +++ b/src/nf/nf_network_submodule.f90 @@ -135,6 +135,8 @@ module subroutine backward(self, output, loss) select type(next_layer => self % layers(n + 1) % p) type is(dense_layer) call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient) + type is(dropout_layer) + call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient) type is(conv2d_layer) call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient) type is(flatten_layer) From 1dfe6b39dd075391c1d56adb6ef816be0c1bab57 Mon Sep 17 00:00:00 2001 From: milancurcic Date: Thu, 6 Feb 2025 12:52:56 -0500 Subject: [PATCH 11/30] Reduce tolerance in conv2d convergence tests --- test/test_conv2d_network.f90 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/test_conv2d_network.f90 b/test/test_conv2d_network.f90 index 47c9a819..28dce100 100644 --- a/test/test_conv2d_network.f90 +++ b/test/test_conv2d_network.f90 @@ -39,7 +39,7 @@ program test_conv2d_network type(network) :: cnn real :: y(1) - real :: tolerance = 1e-5 + real :: tolerance = 1e-4 integer :: n integer, parameter :: num_iterations = 1000 @@ -76,7 +76,7 @@ program test_conv2d_network type(network) :: cnn real :: x(1, 8, 8) real :: y(1) - real :: tolerance = 1e-5 + real :: tolerance = 1e-4 integer :: n integer, parameter :: num_iterations = 1000 @@ -111,7 +111,7 @@ program test_conv2d_network type(network) :: cnn real :: x(1, 12, 12) real :: y(9) - real :: tolerance = 1e-5 + real :: tolerance = 1e-4 integer :: n integer, parameter :: num_iterations = 5000 From 59cc7e1a98434ca3a55236734cf399c6fb9ecda6 Mon Sep 17 00:00:00 2001 From: milancurcic Date: Thu, 6 Feb 2025 12:53:54 -0500 Subject: [PATCH 12/30] Fix bug in dropout scaling Co-authored-by: Ricardo Orsi <@ricor07> --- src/nf/nf_dropout_layer_submodule.f90 | 2 +- test/test_dropout_layer.f90 | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/nf/nf_dropout_layer_submodule.f90 b/src/nf/nf_dropout_layer_submodule.f90 index 568cbf21..d24ed34a 100644 --- a/src/nf/nf_dropout_layer_submodule.f90 +++ b/src/nf/nf_dropout_layer_submodule.f90 @@ -45,7 +45,7 @@ module subroutine forward(self, input) end where ! Scale factor to preserve the input sum - self % scale = sum(input) / sum(self % output) ! scale == 1/P(keep) + self % scale = sum(input) / sum(input * self % mask) ! Apply dropout mask self % output = input * self % mask * self % scale diff --git a/test/test_dropout_layer.f90 b/test/test_dropout_layer.f90 index 9ed7b864..23dd0209 100644 --- a/test/test_dropout_layer.f90 +++ b/test/test_dropout_layer.f90 @@ -81,7 +81,7 @@ program test_dropout_layer do n = 1, 10000 output_data = net % predict(input_data) ! Check that sum of output matches sum of input within small tolerance - if (abs(sum(output_data) - sum(input_data)) > 1e-5) then + if (abs(sum(output_data) - sum(input_data)) > 1e-6) then ok = .false. exit end if From c984b15f8e750e4aa52cf0f9b88bcf5cc35f17da Mon Sep 17 00:00:00 2001 From: milancurcic Date: Thu, 6 Feb 2025 13:58:38 -0500 Subject: [PATCH 13/30] disable dropout in inference mode (net % predict); TODO enable in net % train --- src/nf/nf_dropout_layer.f90 | 6 ++-- src/nf/nf_dropout_layer_submodule.f90 | 36 +++++++++++++--------- src/nf/nf_layer_constructors.f90 | 4 ++- src/nf/nf_layer_constructors_submodule.f90 | 5 +-- src/nf/nf_layer_submodule.f90 | 2 +- src/nf/nf_network_submodule.f90 | 22 +++++++++++-- 6 files changed, 53 insertions(+), 22 deletions(-) diff --git a/src/nf/nf_dropout_layer.f90 b/src/nf/nf_dropout_layer.f90 index 0f557d63..bffca5f0 100644 --- a/src/nf/nf_dropout_layer.f90 +++ b/src/nf/nf_dropout_layer.f90 @@ -22,7 +22,7 @@ module nf_dropout_layer real :: dropout_rate ! probability of dropping a neuron real :: scale ! scale factor to preserve the input sum - logical :: training = .true. + logical :: training = .false. ! set to .true. in training mode contains @@ -33,11 +33,13 @@ module nf_dropout_layer end type dropout_layer interface dropout_layer - module function dropout_layer_cons(rate) & + module function dropout_layer_cons(rate, training) & result(res) !! This function returns the `dropout_layer` instance. real, intent(in) :: rate !! Dropout rate + logical, intent(in), optional :: training + !! Training mode (default .false.) type(dropout_layer) :: res !! dropout_layer instance end function dropout_layer_cons diff --git a/src/nf/nf_dropout_layer_submodule.f90 b/src/nf/nf_dropout_layer_submodule.f90 index d24ed34a..fb787699 100644 --- a/src/nf/nf_dropout_layer_submodule.f90 +++ b/src/nf/nf_dropout_layer_submodule.f90 @@ -4,12 +4,12 @@ contains - module function dropout_layer_cons(rate) result(res) + module function dropout_layer_cons(rate, training) result(res) real, intent(in) :: rate + logical, intent(in), optional :: training type(dropout_layer) :: res - - ! Initialize dropout rate res % dropout_rate = rate + if (present(training)) res % training = training end function dropout_layer_cons @@ -36,19 +36,27 @@ module subroutine forward(self, input) class(dropout_layer), intent(in out) :: self real, intent(in) :: input(:) - ! Generate random mask for dropout - call random_number(self % mask) - where (self % mask < self % dropout_rate) - self % mask = 0 - elsewhere - self % mask = 1 - end where + ! Generate random mask for dropout, training mode only + if (self % training) then + + call random_number(self % mask) + where (self % mask < self % dropout_rate) + self % mask = 0 + elsewhere + self % mask = 1 + end where + + ! Scale factor to preserve the input sum + self % scale = sum(input) / sum(input * self % mask) + + ! Apply dropout mask + self % output = input * self % mask * self % scale - ! Scale factor to preserve the input sum - self % scale = sum(input) / sum(input * self % mask) + else + ! In inference mode, we don't apply dropout; simply pass through the input + self % output = input - ! Apply dropout mask - self % output = input * self % mask * self % scale + end if end subroutine forward diff --git a/src/nf/nf_layer_constructors.f90 b/src/nf/nf_layer_constructors.f90 index 24fc7e63..fcc49342 100644 --- a/src/nf/nf_layer_constructors.f90 +++ b/src/nf/nf_layer_constructors.f90 @@ -85,7 +85,7 @@ module function dense(layer_size, activation) result(res) !! Resulting layer instance end function dense - module function dropout(rate) result(res) + module function dropout(rate, training) result(res) !! Create a dropout layer with a given dropout rate. !! !! This layer is for randomly disabling neurons during training. @@ -99,6 +99,8 @@ module function dropout(rate) result(res) !! ``` real, intent(in) :: rate !! Dropout rate - fraction of neurons to randomly disable during training + logical, intent(in), optional :: training + !! Training mode (default .false.) type(layer) :: res !! Resulting layer instance end function dropout diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90 index 09c79e90..5203497d 100644 --- a/src/nf/nf_layer_constructors_submodule.f90 +++ b/src/nf/nf_layer_constructors_submodule.f90 @@ -64,11 +64,12 @@ module function dense(layer_size, activation) result(res) end function dense - module function dropout(rate) result(res) + module function dropout(rate, training) result(res) real, intent(in) :: rate + logical, intent(in), optional :: training type(layer) :: res res % name = 'dropout' - allocate(res % p, source=dropout_layer(rate)) + allocate(res % p, source=dropout_layer(rate, training)) end function dropout diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90 index 69b40d22..8bf94ea5 100644 --- a/src/nf/nf_layer_submodule.f90 +++ b/src/nf/nf_layer_submodule.f90 @@ -304,7 +304,7 @@ elemental module function get_num_params(self) result(num_params) type is (dense_layer) num_params = this_layer % get_num_params() type is (dropout_layer) - num_params = size(this_layer % mask) + num_params = 0 type is (conv2d_layer) num_params = this_layer % get_num_params() type is (maxpool2d_layer) diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90 index 0b076b9f..ee9792aa 100644 --- a/src/nf/nf_network_submodule.f90 +++ b/src/nf/nf_network_submodule.f90 @@ -221,10 +221,19 @@ module function predict_1d(self, input) result(res) class(network), intent(in out) :: self real, intent(in) :: input(:) real, allocatable :: res(:) - integer :: num_layers + integer :: n, num_layers num_layers = size(self % layers) + ! predict is run in inference mode only; + ! set all dropout layers' training mode to false. + do n = 2, num_layers + select type(this_layer => self % layers(n) % p) + type is(dropout_layer) + this_layer % training = .false. + end select + end do + call self % forward(input) select type(output_layer => self % layers(num_layers) % p) @@ -245,10 +254,19 @@ module function predict_3d(self, input) result(res) class(network), intent(in out) :: self real, intent(in) :: input(:,:,:) real, allocatable :: res(:) - integer :: num_layers + integer :: n, num_layers num_layers = size(self % layers) + ! predict is run in inference mode only; + ! set all dropout layers' training mode to false. + do n = 2, num_layers + select type(this_layer => self % layers(n) % p) + type is(dropout_layer) + this_layer % training = .false. + end select + end do + call self % forward(input) select type(output_layer => self % layers(num_layers) % p) From e9772a0535fbdb46ed914fe719385a2475bbde75 Mon Sep 17 00:00:00 2001 From: milancurcic Date: Thu, 6 Feb 2025 15:00:42 -0500 Subject: [PATCH 14/30] Set dropout's training mode to true in net % train(); add tests --- src/nf/nf_dropout_layer_submodule.f90 | 9 ++++++-- src/nf/nf_network_submodule.f90 | 30 +++++++++++++++++++++++++-- test/test_dropout_layer.f90 | 29 ++++++++++++++++++++++++++ 3 files changed, 64 insertions(+), 4 deletions(-) diff --git a/src/nf/nf_dropout_layer_submodule.f90 b/src/nf/nf_dropout_layer_submodule.f90 index fb787699..6e7e35a0 100644 --- a/src/nf/nf_dropout_layer_submodule.f90 +++ b/src/nf/nf_dropout_layer_submodule.f90 @@ -66,8 +66,13 @@ pure module subroutine backward(self, input, gradient) real, intent(in) :: input(:) real, intent(in) :: gradient(:) - ! Backpropagate gradient through dropout mask - self % gradient = gradient * self % mask * self % scale + if (self % training) then + ! Backpropagate gradient through dropout mask + self % gradient = gradient * self % mask * self % scale + else + ! In inference mode, pass through the gradient unchanged + self % gradient = gradient + end if end subroutine backward end submodule nf_dropout_layer_submodule \ No newline at end of file diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90 index ee9792aa..f28a98e9 100644 --- a/src/nf/nf_network_submodule.f90 +++ b/src/nf/nf_network_submodule.f90 @@ -288,12 +288,21 @@ module function predict_batch_1d(self, input) result(res) class(network), intent(in out) :: self real, intent(in) :: input(:,:) real, allocatable :: res(:,:) - integer :: i, batch_size, num_layers, output_size + integer :: i, n, batch_size, num_layers, output_size num_layers = size(self % layers) batch_size = size(input, dim=rank(input)) output_size = product(self % layers(num_layers) % layer_shape) + ! predict is run in inference mode only; + ! set all dropout layers' training mode to false. + do n = 2, num_layers + select type(this_layer => self % layers(n) % p) + type is(dropout_layer) + this_layer % training = .false. + end select + end do + allocate(res(output_size, batch_size)) batch: do i = 1, size(res, dim=2) @@ -318,12 +327,21 @@ module function predict_batch_3d(self, input) result(res) class(network), intent(in out) :: self real, intent(in) :: input(:,:,:,:) real, allocatable :: res(:,:) - integer :: i, batch_size, num_layers, output_size + integer :: i, n, batch_size, num_layers, output_size num_layers = size(self % layers) batch_size = size(input, dim=rank(input)) output_size = product(self % layers(num_layers) % layer_shape) + ! predict is run in inference mode only; + ! set all dropout layers' training mode to false. + do n = 2, num_layers + select type(this_layer => self % layers(n) % p) + type is(dropout_layer) + this_layer % training = .false. + end select + end do + allocate(res(output_size, batch_size)) batch: do i = 1, batch_size @@ -457,6 +475,14 @@ module subroutine train(self, input_data, output_data, batch_size, & self % loss = quadratic() end if + ! Set all dropout layers' training mode to true. + do n = 2, size(self % layers) + select type(this_layer => self % layers(n) % p) + type is(dropout_layer) + this_layer % training = .true. + end select + end do + dataset_size = size(output_data, dim=2) epoch_loop: do n = 1, epochs diff --git a/test/test_dropout_layer.f90 b/test/test_dropout_layer.f90 index 23dd0209..b0ad0664 100644 --- a/test/test_dropout_layer.f90 +++ b/test/test_dropout_layer.f90 @@ -19,6 +19,16 @@ program test_dropout_layer select type(layer1_p => layer1 % p) type is(dropout_layer) + if (layer1_p % dropout_rate /= 0.5) then + ok = .false. + write(stderr, '(a)') 'dropout layer dropout rate should be 0.5.. failed' + end if + + if (layer1_p % training) then + ok = .false. + write(stderr, '(a)') 'dropout layer default training mode should be false.. failed' + end if + if (layer1_p % input_size /= 0) then print *, 'input_size: ', layer1_p % input_size ok = .false. @@ -32,6 +42,25 @@ program test_dropout_layer end select + ! Test setting training mode explicitly. + layer1 = dropout(0.5, training=.true.) + select type(layer1_p => layer1 % p) + type is(dropout_layer) + if (.not. layer1_p % training) then + ok = .false. + write(stderr, '(a)') 'dropout layer training mode should be true.. failed' + end if + end select + + layer1 = dropout(0.5, training=.false.) + select type(layer1_p => layer1 % p) + type is(dropout_layer) + if (layer1_p % training) then + ok = .false. + write(stderr, '(a)') 'dropout layer training mode should be false.. failed' + end if + end select + ! Now we're gonna initialize a minimal network with an input layer and a ! dropout that follows and we'll check that the dropout layer has expected ! state. From 5ae7e9dffeff6072c0a18da4de3ba3052e3062cc Mon Sep 17 00:00:00 2001 From: milancurcic Date: Sat, 15 Feb 2025 21:43:09 -0500 Subject: [PATCH 15/30] WIP dropout tests --- test/test_dropout_layer.f90 | 40 ++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/test/test_dropout_layer.f90 b/test/test_dropout_layer.f90 index b0ad0664..2f52c715 100644 --- a/test/test_dropout_layer.f90 +++ b/test/test_dropout_layer.f90 @@ -1,6 +1,6 @@ program test_dropout_layer use iso_fortran_env, only: stderr => error_unit - use nf, only: dropout, input, layer, network + use nf, only: dense, dropout, input, layer, network use nf_dropout_layer, only: dropout_layer type(layer) :: layer1 type(network) :: net @@ -120,6 +120,44 @@ program test_dropout_layer end if end block forward_pass + + training: block + real :: x(10), y(5) + real :: tolerance = 1e-3 + integer :: n + integer, parameter :: num_iterations = 100000 + + call random_number(x) + y = [0.1234, 0.2345, 0.3456, 0.4567, 0.5678] + + net = network([ & + input(10), & + dropout(0.5, training=.true.), & + dense(5) & + ]) + + do n = 1, num_iterations + !select type(dropout_l => net % layers(2) % p) + ! type is(dropout_layer) + ! print *, dropout_l % training, dropout_l % mask + !end select + call net % forward(x) + call net % backward(y) + call net % update() + !print *, n, net % predict(x) + + if (all(abs(net % predict(x) - y) < tolerance)) exit + end do + + if (.not. n <= num_iterations) then + write(stderr, '(a)') & + 'dense network should converge in simple training.. failed' + ok = .false. + end if + + end block training + + if (ok) then print '(a)', 'test_dropout_layer: All tests passed.' else From 0934f7f526c5b109a1cfed465a9ead5755f4a18f Mon Sep 17 00:00:00 2001 From: milancurcic Date: Mon, 17 Feb 2025 12:42:27 -0500 Subject: [PATCH 16/30] Dropout layers always in training mode; except when is called, when they are in inference mode --- src/nf/nf_dropout_layer.f90 | 6 +- src/nf/nf_dropout_layer_submodule.f90 | 4 +- src/nf/nf_layer_constructors.f90 | 4 +- src/nf/nf_layer_constructors_submodule.f90 | 5 +- src/nf/nf_network.f90 | 10 +++ src/nf/nf_network_submodule.f90 | 88 +++++++++++++--------- test/test_dropout_layer.f90 | 39 ++-------- 7 files changed, 77 insertions(+), 79 deletions(-) diff --git a/src/nf/nf_dropout_layer.f90 b/src/nf/nf_dropout_layer.f90 index bffca5f0..77999abb 100644 --- a/src/nf/nf_dropout_layer.f90 +++ b/src/nf/nf_dropout_layer.f90 @@ -22,7 +22,7 @@ module nf_dropout_layer real :: dropout_rate ! probability of dropping a neuron real :: scale ! scale factor to preserve the input sum - logical :: training = .false. ! set to .true. in training mode + logical :: training = .true. ! set to .false. for inference contains @@ -33,13 +33,11 @@ module nf_dropout_layer end type dropout_layer interface dropout_layer - module function dropout_layer_cons(rate, training) & + module function dropout_layer_cons(rate) & result(res) !! This function returns the `dropout_layer` instance. real, intent(in) :: rate !! Dropout rate - logical, intent(in), optional :: training - !! Training mode (default .false.) type(dropout_layer) :: res !! dropout_layer instance end function dropout_layer_cons diff --git a/src/nf/nf_dropout_layer_submodule.f90 b/src/nf/nf_dropout_layer_submodule.f90 index 6e7e35a0..5dc4ef07 100644 --- a/src/nf/nf_dropout_layer_submodule.f90 +++ b/src/nf/nf_dropout_layer_submodule.f90 @@ -4,12 +4,10 @@ contains - module function dropout_layer_cons(rate, training) result(res) + module function dropout_layer_cons(rate) result(res) real, intent(in) :: rate - logical, intent(in), optional :: training type(dropout_layer) :: res res % dropout_rate = rate - if (present(training)) res % training = training end function dropout_layer_cons diff --git a/src/nf/nf_layer_constructors.f90 b/src/nf/nf_layer_constructors.f90 index c63027a1..770e3b8d 100644 --- a/src/nf/nf_layer_constructors.f90 +++ b/src/nf/nf_layer_constructors.f90 @@ -104,7 +104,7 @@ module function dense(layer_size, activation) result(res) !! Resulting layer instance end function dense - module function dropout(rate, training) result(res) + module function dropout(rate) result(res) !! Create a dropout layer with a given dropout rate. !! !! This layer is for randomly disabling neurons during training. @@ -118,8 +118,6 @@ module function dropout(rate, training) result(res) !! ``` real, intent(in) :: rate !! Dropout rate - fraction of neurons to randomly disable during training - logical, intent(in), optional :: training - !! Training mode (default .false.) type(layer) :: res !! Resulting layer instance end function dropout diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90 index 57bd682a..e9ac30ae 100644 --- a/src/nf/nf_layer_constructors_submodule.f90 +++ b/src/nf/nf_layer_constructors_submodule.f90 @@ -65,12 +65,11 @@ module function dense(layer_size, activation) result(res) end function dense - module function dropout(rate, training) result(res) + module function dropout(rate) result(res) real, intent(in) :: rate - logical, intent(in), optional :: training type(layer) :: res res % name = 'dropout' - allocate(res % p, source=dropout_layer(rate, training)) + allocate(res % p, source=dropout_layer(rate)) end function dropout diff --git a/src/nf/nf_network.f90 b/src/nf/nf_network.f90 index fa7ea4eb..5916924e 100644 --- a/src/nf/nf_network.f90 +++ b/src/nf/nf_network.f90 @@ -26,6 +26,7 @@ module nf_network procedure :: get_params procedure :: print_info procedure :: set_params + procedure :: set_training_mode procedure :: train procedure :: update @@ -223,6 +224,15 @@ module subroutine set_params(self, params) !! Network parameters to set end subroutine set_params + module subroutine set_training_mode(self, training) + !! Set the mode to training (.true.) or inference (.false.). + !! Used internally to enable/disable the dropout layers in the network. + class(network), intent(in out) :: self + !! Network instance + logical, intent(in) :: training + !! .true. for training mode, .false. for inference. + end subroutine set_training_mode + module subroutine print_info(self) !! Prints a brief summary of the network and its layers to the screen. class(network), intent(in) :: self diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90 index 5cf66fe2..5d530ee4 100644 --- a/src/nf/nf_network_submodule.f90 +++ b/src/nf/nf_network_submodule.f90 @@ -251,15 +251,11 @@ module function predict_1d(self, input) result(res) num_layers = size(self % layers) ! predict is run in inference mode only; - ! set all dropout layers' training mode to false. - do n = 2, num_layers - select type(this_layer => self % layers(n) % p) - type is(dropout_layer) - this_layer % training = .false. - end select - end do - + ! set all dropout layers' training mode to false, and + ! return to training mode after inference. + call self % set_training_mode(.false.) call self % forward(input) + call self % set_training_mode(.true.) select type(output_layer => self % layers(num_layers) % p) type is(dense_layer) @@ -269,7 +265,8 @@ module function predict_1d(self, input) result(res) type is(flatten_layer) res = output_layer % output class default - error stop 'network % output not implemented for this output layer' + error stop 'network % output not implemented for ' // & + trim(self % layers(num_layers) % name) // ' layer' end select end function predict_1d @@ -279,15 +276,25 @@ module function predict_2d(self, input) result(res) class(network), intent(in out) :: self real, intent(in) :: input(:,:) real, allocatable :: res(:) - integer :: num_layers + integer :: n, num_layers num_layers = size(self % layers) + ! predict is run in inference mode only; + ! set all dropout layers' training mode to false, and + ! return to training mode after inference. + call self % set_training_mode(.false.) call self % forward(input) + call self % set_training_mode(.true.) select type(output_layer => self % layers(num_layers) % p) type is(dense_layer) res = output_layer % output + type is(flatten_layer) + res = output_layer % output + class default + error stop 'network % output not implemented for ' // & + trim(self % layers(num_layers) % name) // ' layer' end select end function predict_2d @@ -302,15 +309,11 @@ module function predict_3d(self, input) result(res) num_layers = size(self % layers) ! predict is run in inference mode only; - ! set all dropout layers' training mode to false. - do n = 2, num_layers - select type(this_layer => self % layers(n) % p) - type is(dropout_layer) - this_layer % training = .false. - end select - end do - + ! set all dropout layers' training mode to false, and + ! return to training mode after inference. + call self % set_training_mode(.false.) call self % forward(input) + call self % set_training_mode(.true.) select type(output_layer => self % layers(num_layers) % p) type is(conv2d_layer) @@ -321,7 +324,8 @@ module function predict_3d(self, input) result(res) type is(flatten_layer) res = output_layer % output class default - error stop 'network % output not implemented for this output layer' + error stop 'network % output not implemented for ' // & + trim(self % layers(num_layers) % name) // ' layer' end select end function predict_3d @@ -338,13 +342,9 @@ module function predict_batch_1d(self, input) result(res) output_size = product(self % layers(num_layers) % layer_shape) ! predict is run in inference mode only; - ! set all dropout layers' training mode to false. - do n = 2, num_layers - select type(this_layer => self % layers(n) % p) - type is(dropout_layer) - this_layer % training = .false. - end select - end do + ! set all dropout layers' training mode to false, and + ! return to training mode after inference. + call self % set_training_mode(.false.) allocate(res(output_size, batch_size)) @@ -358,11 +358,16 @@ module function predict_batch_1d(self, input) result(res) type is(flatten_layer) res(:,i) = output_layer % output class default - error stop 'network % output not implemented for this output layer' + error stop 'network % output not implemented for ' // & + trim(self % layers(num_layers) % name) // ' layer' end select end do batch + ! We are now done with inference; + ! return to training mode for dropout layers. + call self % set_training_mode(.true.) + end function predict_batch_1d @@ -377,13 +382,9 @@ module function predict_batch_3d(self, input) result(res) output_size = product(self % layers(num_layers) % layer_shape) ! predict is run in inference mode only; - ! set all dropout layers' training mode to false. - do n = 2, num_layers - select type(this_layer => self % layers(n) % p) - type is(dropout_layer) - this_layer % training = .false. - end select - end do + ! set all dropout layers' training mode to false, and + ! return to training mode after inference. + call self % set_training_mode(.false.) allocate(res(output_size, batch_size)) @@ -400,11 +401,16 @@ module function predict_batch_3d(self, input) result(res) type is(flatten_layer) res(:,i) = output_layer % output class default - error stop 'network % output not implemented for this output layer' + error stop 'network % output not implemented for ' // & + trim(self % layers(num_layers) % name) // ' layer' end select end do batch + ! We are now done with inference; + ! return to training mode for dropout layers. + call self % set_training_mode(.true.) + end function predict_batch_3d @@ -484,6 +490,18 @@ module subroutine set_params(self, params) end subroutine set_params + module subroutine set_training_mode(self, training) + class(network), intent(in out) :: self + logical, intent(in) :: training + integer :: n + do n = 2, size(self % layers) + select type(this_layer => self % layers(n) % p); type is(dropout_layer) + this_layer % training = training + end select + end do + end subroutine set_training_mode + + module subroutine train(self, input_data, output_data, batch_size, & epochs, optimizer, loss) class(network), intent(in out) :: self diff --git a/test/test_dropout_layer.f90 b/test/test_dropout_layer.f90 index 2f52c715..a4aa4b6b 100644 --- a/test/test_dropout_layer.f90 +++ b/test/test_dropout_layer.f90 @@ -24,9 +24,9 @@ program test_dropout_layer write(stderr, '(a)') 'dropout layer dropout rate should be 0.5.. failed' end if - if (layer1_p % training) then + if (.not. layer1_p % training) then ok = .false. - write(stderr, '(a)') 'dropout layer default training mode should be false.. failed' + write(stderr, '(a)') 'dropout layer default training mode should be true.. failed' end if if (layer1_p % input_size /= 0) then @@ -42,25 +42,6 @@ program test_dropout_layer end select - ! Test setting training mode explicitly. - layer1 = dropout(0.5, training=.true.) - select type(layer1_p => layer1 % p) - type is(dropout_layer) - if (.not. layer1_p % training) then - ok = .false. - write(stderr, '(a)') 'dropout layer training mode should be true.. failed' - end if - end select - - layer1 = dropout(0.5, training=.false.) - select type(layer1_p => layer1 % p) - type is(dropout_layer) - if (layer1_p % training) then - ok = .false. - write(stderr, '(a)') 'dropout layer training mode should be false.. failed' - end if - end select - ! Now we're gonna initialize a minimal network with an input layer and a ! dropout that follows and we'll check that the dropout layer has expected ! state. @@ -122,31 +103,27 @@ program test_dropout_layer training: block - real :: x(10), y(5) + real :: x(100), y(5) real :: tolerance = 1e-3 integer :: n - integer, parameter :: num_iterations = 100000 + integer, parameter :: num_iterations = 10000 call random_number(x) - y = [0.1234, 0.2345, 0.3456, 0.4567, 0.5678] + y = [0.12345, 0.23456, 0.34567, 0.45678, 0.56789] net = network([ & - input(10), & - dropout(0.5, training=.true.), & + input(100), & + dropout(0.5), & dense(5) & ]) do n = 1, num_iterations - !select type(dropout_l => net % layers(2) % p) - ! type is(dropout_layer) - ! print *, dropout_l % training, dropout_l % mask - !end select call net % forward(x) call net % backward(y) call net % update() - !print *, n, net % predict(x) if (all(abs(net % predict(x) - y) < tolerance)) exit + end do if (.not. n <= num_iterations) then From 0f640445c492525bcac050cfa58294ff2d5ea555 Mon Sep 17 00:00:00 2001 From: milancurcic Date: Mon, 17 Feb 2025 12:52:32 -0500 Subject: [PATCH 17/30] Update the layers table --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 14f944fb..c8e2b5d1 100644 --- a/README.md +++ b/README.md @@ -30,8 +30,8 @@ Read the paper [here](https://arxiv.org/abs/1902.06714). | Layer type | Constructor name | Supported input layers | Rank of output array | Forward pass | Backward pass | |------------|------------------|------------------------|----------------------|--------------|---------------| | Input | `input` | n/a | 1, 2, 3 | n/a | n/a | -| Dense (fully-connected) | `dense` | `input1d`, `flatten` | 1 | ✅ | ✅ | -| Dropout | `dropout` | Any | 1 | ✅ | ✅ | +| Dense (fully-connected) | `dense` | `input1d`, `dense`, `dropout`, `flatten` | 1 | ✅ | ✅ | +| Dropout | `dropout` | `dense`, `flatten`, `input1d` | 1 | ✅ | ✅ | | Convolutional (2-d) | `conv2d` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 3 | ✅ | ✅(*) | | Max-pooling (2-d) | `maxpool2d` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 3 | ✅ | ✅ | | Flatten | `flatten` | `input2d`, `input3d`, `conv2d`, `maxpool2d`, `reshape` | 1 | ✅ | ✅ | From aa19f69b10213d2fd81704bb6a745808d57d82a4 Mon Sep 17 00:00:00 2001 From: milancurcic Date: Tue, 18 Feb 2025 12:13:42 -0500 Subject: [PATCH 18/30] Ensure the actual dropout rate == requested dropout rate in most cases --- src/nf/nf_dropout_layer_submodule.f90 | 14 ++++++----- src/nf/nf_random.f90 | 24 ++++++++++++++++--- test/test_dropout_layer.f90 | 34 +++++++++++++++++++++++++++ 3 files changed, 63 insertions(+), 9 deletions(-) diff --git a/src/nf/nf_dropout_layer_submodule.f90 b/src/nf/nf_dropout_layer_submodule.f90 index 5dc4ef07..d2f50e15 100644 --- a/src/nf/nf_dropout_layer_submodule.f90 +++ b/src/nf/nf_dropout_layer_submodule.f90 @@ -1,4 +1,5 @@ submodule (nf_dropout_layer) nf_dropout_layer_submodule + use nf_random, only: shuffle !! This submodule implements the procedures defined in the !! nf_dropout_layer module. @@ -37,12 +38,13 @@ module subroutine forward(self, input) ! Generate random mask for dropout, training mode only if (self % training) then - call random_number(self % mask) - where (self % mask < self % dropout_rate) - self % mask = 0 - elsewhere - self % mask = 1 - end where + ! Set the first dropout_rate number of elements to 0, the rest to 1, + ! and shuffle. Note that the selection of the elements rounds down to + ! the nearest integer, so in cases where size(input) * dropout_rate is + ! not an integer, the actual dropout rate will be slightly lower. + self % mask = 1 + self % mask(:int(size(self % mask) * self % dropout_rate)) = 0 + call shuffle(self % mask) ! Scale factor to preserve the input sum self % scale = sum(input) / sum(input * self % mask) diff --git a/src/nf/nf_random.f90 b/src/nf/nf_random.f90 index 57c5d11f..5160bc13 100644 --- a/src/nf/nf_random.f90 +++ b/src/nf/nf_random.f90 @@ -1,12 +1,12 @@ module nf_random - !! Provides a random number generator with - !! normal distribution, centered on zero. + !! Provides a random number generator with normal distribution, + !! centered on zero, and a Fisher-Yates shuffle. implicit none private - public :: random_normal + public :: random_normal, shuffle real, parameter :: pi = 4 * atan(1.d0) @@ -23,4 +23,22 @@ impure elemental subroutine random_normal(x) x = sqrt(- 2 * log(u(1))) * cos(2 * pi * u(2)) end subroutine random_normal + + subroutine shuffle(x) + !! Fisher-Yates shuffle. + real, intent(in out) :: x(:) + !! Array to shuffle + integer :: i, j + real :: r, temp + + do i = size(x), 2, -1 + call random_number(r) + j = floor(r * i) + 1 + temp = x(i) + x(i) = x(j) + x(j) = temp + end do + + end subroutine shuffle + end module nf_random diff --git a/test/test_dropout_layer.f90 b/test/test_dropout_layer.f90 index a4aa4b6b..262df3ee 100644 --- a/test/test_dropout_layer.f90 +++ b/test/test_dropout_layer.f90 @@ -75,6 +75,40 @@ program test_dropout_layer end select + ! Test that the generated dropout mask matches the requested dropout rate. + test_mask: block + integer, parameter :: input_sizes(3) = [10, 100, 1000] + real, parameter :: dropout_rates(5) = [0., 0.2, 0.5, 0.8, 1.] + real, allocatable :: input_data(:) + integer :: i, j + + do i = 1, size(input_sizes) + do j = 1, size(dropout_rates) + + net = network([ & + input(input_sizes(i)), & + dropout(dropout_rates(j)) & + ]) + + if (allocated(input_data)) deallocate(input_data) + allocate(input_data(input_sizes(i))) + call random_number(input_data) + + call net % forward(input_data) + + select type(layer1_p => net % layers(2) % p) + type is(dropout_layer) + if (abs(sum(layer1_p % mask) / size(layer1_p % mask) - (1 - dropout_rates(j))) > 1e-6) then + ok = .false. + write(stderr, '(a)') 'actual dropout rate is equal to requested.. failed' + end if + end select + end do + end do + + end block test_mask + + ! Now we're gonna run the forward pass and check that the dropout indeed ! drops according to the requested dropout rate. forward_pass: block From a99d80009aefde7af274bf416fce517790ae4091 Mon Sep 17 00:00:00 2001 From: milancurcic Date: Thu, 20 Feb 2025 14:49:32 -0500 Subject: [PATCH 19/30] Accumulate the gradient in dropout % backward and flush in network % update --- src/nf/nf_dropout_layer_submodule.f90 | 7 ++++--- src/nf/nf_network_submodule.f90 | 10 ++-------- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/src/nf/nf_dropout_layer_submodule.f90 b/src/nf/nf_dropout_layer_submodule.f90 index d2f50e15..dcdc620b 100644 --- a/src/nf/nf_dropout_layer_submodule.f90 +++ b/src/nf/nf_dropout_layer_submodule.f90 @@ -47,7 +47,8 @@ module subroutine forward(self, input) call shuffle(self % mask) ! Scale factor to preserve the input sum - self % scale = sum(input) / sum(input * self % mask) + self % scale = sum(input) / sum(input * self % mask) ! input conservative approach + !self % scale = 1 / (1 - self % dropout_rate) ! reference approach ! Apply dropout mask self % output = input * self % mask * self % scale @@ -68,10 +69,10 @@ pure module subroutine backward(self, input, gradient) if (self % training) then ! Backpropagate gradient through dropout mask - self % gradient = gradient * self % mask * self % scale + self % gradient = self % gradient + gradient * self % mask * self % scale else ! In inference mode, pass through the gradient unchanged - self % gradient = gradient + self % gradient = self % gradient + gradient end if end subroutine backward diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90 index a8b5a7bc..fae3476c 100644 --- a/src/nf/nf_network_submodule.f90 +++ b/src/nf/nf_network_submodule.f90 @@ -543,14 +543,6 @@ module subroutine train(self, input_data, output_data, batch_size, & self % loss = quadratic() end if - ! Set all dropout layers' training mode to true. - do n = 2, size(self % layers) - select type(this_layer => self % layers(n) % p) - type is(dropout_layer) - this_layer % training = .true. - end select - end do - dataset_size = size(output_data, dim=2) epoch_loop: do n = 1, epochs @@ -640,6 +632,8 @@ module subroutine update(self, optimizer, batch_size) type is(conv2d_layer) this_layer % dw = 0 this_layer % db = 0 + type is(dropout_layer) + this_layer % gradient = 0 end select end do From ea0012a256cc79db3b5691e5f861fec018dbc47c Mon Sep 17 00:00:00 2001 From: milancurcic Date: Thu, 20 Feb 2025 14:50:12 -0500 Subject: [PATCH 20/30] Guard against bad dropout rate --- src/nf/nf_layer_constructors_submodule.f90 | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90 index b2f8e462..9558a0bc 100644 --- a/src/nf/nf_layer_constructors_submodule.f90 +++ b/src/nf/nf_layer_constructors_submodule.f90 @@ -69,6 +69,8 @@ end function dense module function dropout(rate) result(res) real, intent(in) :: rate type(layer) :: res + if (rate < 0 .or. rate > 1) & + error stop 'rate must be between 0 and 1 in a dropout layer' res % name = 'dropout' allocate(res % p, source=dropout_layer(rate)) end function dropout @@ -81,7 +83,6 @@ module function flatten() result(res) end function flatten - module function input1d(layer_size) result(res) integer, intent(in) :: layer_size type(layer) :: res From 0350c7d0a66daa6a2ba82150b8b0fd341c1a916c Mon Sep 17 00:00:00 2001 From: milancurcic Date: Thu, 20 Feb 2025 14:51:16 -0500 Subject: [PATCH 21/30] Connect the backward pass; expand tests --- src/nf/nf_layer_submodule.f90 | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90 index 3cbdf036..ca18f9c7 100644 --- a/src/nf/nf_layer_submodule.f90 +++ b/src/nf/nf_layer_submodule.f90 @@ -39,6 +39,18 @@ pure module subroutine backward_1d(self, previous, gradient) call this_layer % backward(prev_layer % output, gradient) end select + type is(dropout_layer) + + ! Upstream layers permitted: input1d, dense, dropout, flatten + select type(prev_layer => previous % p) + type is(input1d_layer) + call this_layer % backward(prev_layer % output, gradient) + type is(dense_layer) + call this_layer % backward(prev_layer % output, gradient) + type is(flatten_layer) + call this_layer % backward(prev_layer % output, gradient) + end select + type is(flatten_layer) ! Upstream layers permitted: input2d, input3d, conv2d, maxpool2d From 183e82f9ba56b629d361541dd6542c44467cd900 Mon Sep 17 00:00:00 2001 From: milancurcic Date: Thu, 20 Feb 2025 14:51:29 -0500 Subject: [PATCH 22/30] Expand tests --- test/test_dropout_layer.f90 | 40 +++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/test/test_dropout_layer.f90 b/test/test_dropout_layer.f90 index 262df3ee..dc18789d 100644 --- a/test/test_dropout_layer.f90 +++ b/test/test_dropout_layer.f90 @@ -112,7 +112,7 @@ program test_dropout_layer ! Now we're gonna run the forward pass and check that the dropout indeed ! drops according to the requested dropout rate. forward_pass: block - real :: input_data(5) + real :: input_data(4) real :: output_data(size(input_data)) integer :: n @@ -121,33 +121,41 @@ program test_dropout_layer dropout(0.5) & ]) - call random_number(input_data) do n = 1, 10000 - output_data = net % predict(input_data) + + call random_number(input_data) + call net % forward(input_data) + ! Check that sum of output matches sum of input within small tolerance - if (abs(sum(output_data) - sum(input_data)) > 1e-6) then - ok = .false. - exit - end if + select type(layer1_p => net % layers(2) % p) + type is(dropout_layer) + if (abs(sum(layer1_p % output) - sum(input_data)) > 1e-6) then + ok = .false. + exit + end if + end select + end do - if (.not. ok) then - write(stderr, '(a)') 'dropout layer output sum should match input sum within tolerance.. failed' - end if + + if (.not. ok) write(stderr, '(a)') & + 'dropout layer output sum should match input sum within tolerance.. failed' + end block forward_pass training: block - real :: x(100), y(5) - real :: tolerance = 1e-3 + real :: x(20), y(5) + real :: tolerance = 1e-4 integer :: n - integer, parameter :: num_iterations = 10000 + integer, parameter :: num_iterations = 100000 call random_number(x) y = [0.12345, 0.23456, 0.34567, 0.45678, 0.56789] net = network([ & - input(100), & - dropout(0.5), & + input(20), & + dense(20), & + dropout(0.2), & dense(5) & ]) @@ -155,9 +163,7 @@ program test_dropout_layer call net % forward(x) call net % backward(y) call net % update() - if (all(abs(net % predict(x) - y) < tolerance)) exit - end do if (.not. n <= num_iterations) then From 6c07cd7133a6e0f23c50bfba532e6ff070587076 Mon Sep 17 00:00:00 2001 From: milancurcic Date: Thu, 20 Feb 2025 15:09:51 -0500 Subject: [PATCH 23/30] Use the reference scaling in dropout; don't accumulate gradients because it's not needed --- src/nf/nf_dropout_layer_submodule.f90 | 7 +++---- src/nf/nf_network_submodule.f90 | 2 -- test/test_dropout_layer.f90 | 13 ++++++++----- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/nf/nf_dropout_layer_submodule.f90 b/src/nf/nf_dropout_layer_submodule.f90 index dcdc620b..7e2610c4 100644 --- a/src/nf/nf_dropout_layer_submodule.f90 +++ b/src/nf/nf_dropout_layer_submodule.f90 @@ -47,8 +47,7 @@ module subroutine forward(self, input) call shuffle(self % mask) ! Scale factor to preserve the input sum - self % scale = sum(input) / sum(input * self % mask) ! input conservative approach - !self % scale = 1 / (1 - self % dropout_rate) ! reference approach + self % scale = 1 / (1 - self % dropout_rate) ! Apply dropout mask self % output = input * self % mask * self % scale @@ -69,10 +68,10 @@ pure module subroutine backward(self, input, gradient) if (self % training) then ! Backpropagate gradient through dropout mask - self % gradient = self % gradient + gradient * self % mask * self % scale + self % gradient = gradient * self % mask * self % scale else ! In inference mode, pass through the gradient unchanged - self % gradient = self % gradient + gradient + self % gradient = gradient end if end subroutine backward diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90 index fae3476c..dd632d96 100644 --- a/src/nf/nf_network_submodule.f90 +++ b/src/nf/nf_network_submodule.f90 @@ -632,8 +632,6 @@ module subroutine update(self, optimizer, batch_size) type is(conv2d_layer) this_layer % dw = 0 this_layer % db = 0 - type is(dropout_layer) - this_layer % gradient = 0 end select end do diff --git a/test/test_dropout_layer.f90 b/test/test_dropout_layer.f90 index dc18789d..3d144138 100644 --- a/test/test_dropout_layer.f90 +++ b/test/test_dropout_layer.f90 @@ -112,16 +112,18 @@ program test_dropout_layer ! Now we're gonna run the forward pass and check that the dropout indeed ! drops according to the requested dropout rate. forward_pass: block - real :: input_data(4) + real :: input_data(10) real :: output_data(size(input_data)) + real, parameter :: dropout_rate = 0.2 + real :: realized_dropout_rate integer :: n net = network([ & input(size(input_data)), & - dropout(0.5) & + dropout(dropout_rate) & ]) - do n = 1, 10000 + do n = 1, 100 call random_number(input_data) call net % forward(input_data) @@ -129,9 +131,10 @@ program test_dropout_layer ! Check that sum of output matches sum of input within small tolerance select type(layer1_p => net % layers(2) % p) type is(dropout_layer) - if (abs(sum(layer1_p % output) - sum(input_data)) > 1e-6) then + realized_dropout_rate = 1 - sum(input_data * layer1_p % mask) / sum(layer1_p % output) + if (abs(realized_dropout_rate - dropout_rate) > 1e-6) then ok = .false. - exit + write(stderr, '(a)') 'realized dropout rate does not match requested dropout rate.. failed' end if end select From a904c6e0f33a430f3f4165c12b63c867e6c37fe7 Mon Sep 17 00:00:00 2001 From: milancurcic Date: Thu, 20 Feb 2025 15:13:03 -0500 Subject: [PATCH 24/30] Add dropout to MNIST example; small model changes --- example/dense_mnist.f90 | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/example/dense_mnist.f90 b/example/dense_mnist.f90 index c26d0ced..c1db2da4 100644 --- a/example/dense_mnist.f90 +++ b/example/dense_mnist.f90 @@ -1,6 +1,6 @@ program dense_mnist - use nf, only: dense, input, network, sgd, label_digits, load_mnist, corr + use nf, only: dense, input, network, sgd, label_digits, load_mnist, corr, relu, softmax, dropout implicit none @@ -17,8 +17,9 @@ program dense_mnist net = network([ & input(784), & - dense(30), & - dense(10) & + dense(64, relu()), & + dropout(0.2), & + dense(10, softmax()) & ]) num_epochs = 10 @@ -32,7 +33,7 @@ program dense_mnist call net % train( & training_images, & label_digits(training_labels), & - batch_size=100, & + batch_size=128, & epochs=1, & optimizer=sgd(learning_rate=3.) & ) From 35671dd14c55fdc2a0f3672258964dbe6aa57242 Mon Sep 17 00:00:00 2001 From: milancurcic Date: Thu, 20 Feb 2025 15:15:32 -0500 Subject: [PATCH 25/30] Add reference --- src/nf/nf_dropout_layer.f90 | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/nf/nf_dropout_layer.f90 b/src/nf/nf_dropout_layer.f90 index 77999abb..570426b1 100644 --- a/src/nf/nf_dropout_layer.f90 +++ b/src/nf/nf_dropout_layer.f90 @@ -1,8 +1,11 @@ module nf_dropout_layer - !! This module provides the concrete dropout layer type. - !! It is used internally by the layer type. - !! It is not intended to be used directly by the user. + !! Dropout layer by Srivastava et al. (2014). + !! + !! Srivastava, N., Hinton, G., Krizhevsky, A., Sutskever, I. and + !! Salakhutdinov, R., 2014. Dropout: a simple way to prevent neural networks + !! from overfitting. The Journal of Machine Learning Research, 16(1), + !! pp.1929-1958. use nf_base_layer, only: base_layer From 31ebd69938e37494f255bf9c1c9434ca569f3fa1 Mon Sep 17 00:00:00 2001 From: "Vandenplas, Jeremie" Date: Fri, 21 Feb 2025 11:34:04 +0100 Subject: [PATCH 26/30] Update print_info dropout --- src/nf/nf_layer_submodule.f90 | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90 index ca18f9c7..13903855 100644 --- a/src/nf/nf_layer_submodule.f90 +++ b/src/nf/nf_layer_submodule.f90 @@ -360,8 +360,10 @@ impure elemental module subroutine print_info(self) print '("Input shape: ", *(i0, 1x))', self % input_layer_shape print '("Output shape: ", *(i0, 1x))', self % layer_shape print '("Parameters: ", i0)', self % get_num_params() - if (.not. self % name == 'input') & + if (.not. (self % name == 'input' .or. self % name == 'dropout')) & print '("Activation: ", a)', self % activation + if (self % name == 'dropout') & + print '("Dropout rate: ", f0.2)', self % dropout_rate print * end subroutine print_info From 1cd9e2cd58c9602bd0a8775378502a764e5e27f6 Mon Sep 17 00:00:00 2001 From: "Vandenplas, Jeremie" Date: Fri, 21 Feb 2025 11:44:29 +0100 Subject: [PATCH 27/30] Update print_info --- src/nf/nf_layer_submodule.f90 | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90 index 13903855..3f281411 100644 --- a/src/nf/nf_layer_submodule.f90 +++ b/src/nf/nf_layer_submodule.f90 @@ -359,11 +359,14 @@ impure elemental module subroutine print_info(self) if (.not. self % name == 'input') & print '("Input shape: ", *(i0, 1x))', self % input_layer_shape print '("Output shape: ", *(i0, 1x))', self % layer_shape - print '("Parameters: ", i0)', self % get_num_params() + if (.not. self % name == 'dropout') & + print '("Parameters: ", i0)', self % get_num_params() if (.not. (self % name == 'input' .or. self % name == 'dropout')) & print '("Activation: ", a)', self % activation - if (self % name == 'dropout') & - print '("Dropout rate: ", f0.2)', self % dropout_rate + select type (this_layer => self % p) + type is (dropout_layer) + print '("Dropout rate: ", f0.2)', this_layer % dropout_rate + end select print * end subroutine print_info From 8961f75c12121df06bf895c441c95fd57053c19a Mon Sep 17 00:00:00 2001 From: milancurcic Date: Fri, 21 Feb 2025 11:57:39 -0500 Subject: [PATCH 28/30] Compute scale once in dropout constructor --- src/nf/nf_dropout_layer_submodule.f90 | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/nf/nf_dropout_layer_submodule.f90 b/src/nf/nf_dropout_layer_submodule.f90 index 7e2610c4..7ae24472 100644 --- a/src/nf/nf_dropout_layer_submodule.f90 +++ b/src/nf/nf_dropout_layer_submodule.f90 @@ -9,6 +9,7 @@ module function dropout_layer_cons(rate) result(res) real, intent(in) :: rate type(dropout_layer) :: res res % dropout_rate = rate + res % scale = 1 / (1 - rate) end function dropout_layer_cons @@ -46,9 +47,6 @@ module subroutine forward(self, input) self % mask(:int(size(self % mask) * self % dropout_rate)) = 0 call shuffle(self % mask) - ! Scale factor to preserve the input sum - self % scale = 1 / (1 - self % dropout_rate) - ! Apply dropout mask self % output = input * self % mask * self % scale From ee7fdc95645407fa7fe34696ba30f45647563bab Mon Sep 17 00:00:00 2001 From: milancurcic Date: Fri, 21 Feb 2025 12:04:21 -0500 Subject: [PATCH 29/30] dropout % backward() doesn't need input from the previous layer --- src/nf/nf_dropout_layer.f90 | 4 +--- src/nf/nf_dropout_layer_submodule.f90 | 12 ++---------- src/nf/nf_layer_submodule.f90 | 10 +--------- 3 files changed, 4 insertions(+), 22 deletions(-) diff --git a/src/nf/nf_dropout_layer.f90 b/src/nf/nf_dropout_layer.f90 index 570426b1..f7165aa0 100644 --- a/src/nf/nf_dropout_layer.f90 +++ b/src/nf/nf_dropout_layer.f90 @@ -48,14 +48,12 @@ end function dropout_layer_cons interface - pure module subroutine backward(self, input, gradient) + pure module subroutine backward(self, gradient) !! Apply the backward gradient descent pass. !! Only weight and bias gradients are updated in this subroutine, !! while the weights and biases themselves are untouched. class(dropout_layer), intent(in out) :: self !! Dropout layer instance - real, intent(in) :: input(:) - !! Input from the previous layer real, intent(in) :: gradient(:) !! Gradient from the next layer end subroutine backward diff --git a/src/nf/nf_dropout_layer_submodule.f90 b/src/nf/nf_dropout_layer_submodule.f90 index 7ae24472..3fe07b1a 100644 --- a/src/nf/nf_dropout_layer_submodule.f90 +++ b/src/nf/nf_dropout_layer_submodule.f90 @@ -59,18 +59,10 @@ module subroutine forward(self, input) end subroutine forward - pure module subroutine backward(self, input, gradient) + pure module subroutine backward(self, gradient) class(dropout_layer), intent(in out) :: self - real, intent(in) :: input(:) real, intent(in) :: gradient(:) - - if (self % training) then - ! Backpropagate gradient through dropout mask - self % gradient = gradient * self % mask * self % scale - else - ! In inference mode, pass through the gradient unchanged - self % gradient = gradient - end if + self % gradient = gradient * self % mask * self % scale end subroutine backward end submodule nf_dropout_layer_submodule \ No newline at end of file diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90 index ca18f9c7..7dd5cafe 100644 --- a/src/nf/nf_layer_submodule.f90 +++ b/src/nf/nf_layer_submodule.f90 @@ -40,16 +40,8 @@ pure module subroutine backward_1d(self, previous, gradient) end select type is(dropout_layer) - ! Upstream layers permitted: input1d, dense, dropout, flatten - select type(prev_layer => previous % p) - type is(input1d_layer) - call this_layer % backward(prev_layer % output, gradient) - type is(dense_layer) - call this_layer % backward(prev_layer % output, gradient) - type is(flatten_layer) - call this_layer % backward(prev_layer % output, gradient) - end select + call this_layer % backward(gradient) type is(flatten_layer) From a2726340521f7c1e6b8a8e10b400a414ede4c486 Mon Sep 17 00:00:00 2001 From: milancurcic Date: Fri, 21 Feb 2025 12:23:20 -0500 Subject: [PATCH 30/30] Timing info of dropout --- test/test_dropout_layer.f90 | 55 +++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/test/test_dropout_layer.f90 b/test/test_dropout_layer.f90 index 3d144138..a79d0de5 100644 --- a/test/test_dropout_layer.f90 +++ b/test/test_dropout_layer.f90 @@ -177,6 +177,61 @@ program test_dropout_layer end block training + ! The following timing test is not part of the unit tests, but it's a good + ! way to see the performance difference between a network with and without + ! dropout. + timing: block + integer, parameter :: layer_size = 100 + integer, parameter :: num_iterations = 1000 + real :: x(layer_size), y(layer_size) + integer :: n + type(network) :: net1, net2 + real :: t1, t2 + real :: accumulated_time1 = 0 + real :: accumulated_time2 = 0 + + net1 = network([ & + input(layer_size), & + dense(layer_size), & + dense(layer_size) & + ]) + + net2 = network([ & + input(layer_size), & + dense(layer_size), & + dropout(0.5), & + dense(layer_size) & + ]) + + call random_number(y) + + ! Network without dropout + do n = 1, num_iterations + call random_number(x) + call cpu_time(t1) + call net1 % forward(x) + call net1 % backward(y) + call net1 % update() + call cpu_time(t2) + accumulated_time1 = accumulated_time1 + (t2 - t1) + end do + + ! Network with dropout + do n = 1, num_iterations + call random_number(x) + call cpu_time(t1) + call net2 % forward(x) + call net2 % backward(y) + call net2 % update() + call cpu_time(t2) + accumulated_time2 = accumulated_time2 + (t2 - t1) + end do + + ! Uncomment the following prints to see the timing results. + !print '(a, f9.6, a, f9.6, a)', 'No dropout time: ', accumulated_time1, ' seconds' + !print '(a, f9.6, a, f9.6, a)', 'Dropout time: ', accumulated_time2, ' seconds' + + end block timing if (ok) then print '(a)', 'test_dropout_layer: All tests passed.'