From e40883b9e208201c97e5cba4f39c6bf14e715eab Mon Sep 17 00:00:00 2001
From: milancurcic <caomaco@gmail.com>
Date: Wed, 22 Jan 2025 15:07:51 -0500
Subject: [PATCH 01/30] First stab at dropout; conflict with base type TODO

---
 src/nf/nf_dropout_layer.f90                | 87 ++++++++++++++++++++++
 src/nf/nf_dropout_layer_submodule.f90      | 65 ++++++++++++++++
 src/nf/nf_layer_constructors_submodule.f90 |  1 +
 test/test_dropout_layer.f90                | 20 +++++
 4 files changed, 173 insertions(+)
 create mode 100644 src/nf/nf_dropout_layer.f90
 create mode 100644 src/nf/nf_dropout_layer_submodule.f90
 create mode 100644 test/test_dropout_layer.f90

diff --git a/src/nf/nf_dropout_layer.f90 b/src/nf/nf_dropout_layer.f90
new file mode 100644
index 00000000..cab1ac35
--- /dev/null
+++ b/src/nf/nf_dropout_layer.f90
@@ -0,0 +1,87 @@
+module nf_dropout_layer
+
+  !! This module provides the concrete dropout layer type.
+  !! It is used internally by the layer type.
+  !! It is not intended to be used directly by the user.
+
+  use nf_activation, only: activation_function
+  use nf_base_layer, only: base_layer
+
+  implicit none
+
+  private
+  public :: dropout_layer
+
+  type, extends(base_layer) :: dropout_layer
+
+    !! Concrete implementation of a dropout layer type
+
+    integer :: input_size
+    integer :: output_size
+
+    real, allocatable :: output(:)
+    real, allocatable :: gradient(:)
+    real :: dropout_rate ! probability of dropping a neuron
+    real, allocatable :: mask(:) ! binary mask for dropout
+
+    class(activation_function), allocatable :: activation
+
+  contains
+
+    procedure :: backward
+    procedure :: forward
+    procedure :: init
+
+  end type dropout_layer
+
+  interface dropout_layer
+    module function dropout_layer_cons(rate) &
+      result(res)
+      !! This function returns the `dropout_layer` instance.
+      real, intent(in) :: rate
+        !! Dropout rate
+      type(dropout_layer) :: res
+        !! dropout_layer instance
+    end function dropout_layer_cons
+  end interface dropout_layer
+
+  interface
+
+    pure module subroutine backward(self, input, gradient)
+      !! Apply the backward gradient descent pass.
+      !! Only weight and bias gradients are updated in this subroutine,
+      !! while the weights and biases themselves are untouched.
+      class(dropout_layer), intent(in out) :: self
+        !! Dropout layer instance
+      real, intent(in) :: input(:)
+        !! Input from the previous layer
+      real, intent(in) :: gradient(:)
+        !! Gradient from the next layer
+    end subroutine backward
+
+    pure module subroutine forward(self, input)
+      !! Propagate forward the layer.
+      !! Calling this subroutine updates the values of a few data components
+      !! of `dropout_layer` that are needed for the backward pass.
+      class(dropout_layer), intent(in out) :: self
+        !! Dense layer instance
+      real, intent(in) :: input(:)
+        !! Input from the previous layer
+    end subroutine forward
+
+    module subroutine init(self, input_shape, training)
+      !! Initialize the layer data structures.
+      !!
+      !! This is a deferred procedure from the `base_layer` abstract type.
+      class(dropout_layer), intent(in out) :: self
+        !! Dropout layer instance
+      integer, intent(in) :: input_shape(:)
+        !! Shape of the input layer
+      logical, intent(in) :: training
+        !! Whether the layer is in training mode (.true. == dropping out neurons)
+        !! or in inference mode (.false. == doing nothing)
+    end subroutine init
+
+  end interface
+
+end module nf_dropout_layer
diff --git a/src/nf/nf_dropout_layer_submodule.f90 b/src/nf/nf_dropout_layer_submodule.f90
new file mode 100644
index 00000000..02610a68
--- /dev/null
+++ b/src/nf/nf_dropout_layer_submodule.f90
@@ -0,0 +1,65 @@
+submodule (nf_dropout_layer) nf_dropout_layer_submodule
+  !! This submodule implements the procedures defined in the
+  !! nf_dropout_layer module.
+
+contains
+
+  module function dropout_layer_cons(rate) result(res)
+    real, intent(in) :: rate
+    type(dropout_layer) :: res
+
+    ! Initialize dropout rate
+    res % dropout_rate = rate
+  end function dropout_layer_cons
+
+  module subroutine init(self, input_shape, training)
+    class(dropout_layer), intent(in out) :: self
+    integer, intent(in) :: input_shape(:)
+    logical, intent(in) :: training
+
+    ! Set input and output sizes (dropout preserves dimensions)
+    self % input_size = input_shape(1)
+    self % output_size = input_shape(1)
+
+    ! Allocate arrays
+    if (allocated(self % output)) deallocate(self % output)
+    if (allocated(self % gradient)) deallocate(self % gradient)
+    if (allocated(self % mask)) deallocate(self % mask)
+
+    allocate(self % output(self % output_size))
+    allocate(self % gradient(self % input_size))
+    allocate(self % mask(self % input_size))
+
+    ! Initialize arrays to zero
+    self % output = 0.0
+    self % gradient = 0.0
+    self % mask = 1.0  ! Default mask is all ones (no dropout)
+  end subroutine init
+
+  pure module subroutine forward(self, input)
+    class(dropout_layer), intent(in out) :: self
+    real, intent(in) :: input(:)
+    real :: rand_vals(size(input))
+
+    ! Generate random mask for dropout
+    call random_number(rand_vals)
+    where (rand_vals < self % dropout_rate)
+      self % mask = 0
+    elsewhere
+      self % mask = 1 / (1 - self % dropout_rate)  ! Scale to preserve expected value
+    end where
+
+    ! Apply dropout mask
+    self % output = input * self % mask
+  end subroutine forward
+
+  pure module subroutine backward(self, input, gradient)
+    class(dropout_layer), intent(in out) :: self
+    real, intent(in) :: input(:)
+    real, intent(in) :: gradient(:)
+
+    ! Backpropagate gradient through dropout mask
+    self % gradient = gradient * self % mask
+  end subroutine backward
+
+end submodule nf_dropout_layer_submodule 
\ No newline at end of file
diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90
index 234b20b1..86cef8ac 100644
--- a/src/nf/nf_layer_constructors_submodule.f90
+++ b/src/nf/nf_layer_constructors_submodule.f90
@@ -3,6 +3,7 @@
   use nf_layer, only: layer
   use nf_conv2d_layer, only: conv2d_layer
   use nf_dense_layer, only: dense_layer
+  use nf_dropout_layer, only: dropout_layer
   use nf_flatten_layer, only: flatten_layer
   use nf_input1d_layer, only: input1d_layer
   use nf_input3d_layer, only: input3d_layer
diff --git a/test/test_dropout_layer.f90 b/test/test_dropout_layer.f90
new file mode 100644
index 00000000..c0f37d8e
--- /dev/null
+++ b/test/test_dropout_layer.f90
@@ -0,0 +1,20 @@
+program test_dropout_layer
+  use iso_fortran_env, only: stderr => error_unit
+  use nf, only: dropout, layer
+  type(layer) :: layer1
+
+  layer1 = dropout(0.5)
+
+  if (.not. layer1 % name == 'dropout') then
+    ok = .false.
+    write(stderr, '(a)') 'dropout layer has its name set correctly.. failed'
+  end if
+
+  if (ok) then
+    print '(a)', 'test_dropout_layer: All tests passed.'
+  else
+    write(stderr, '(a)') 'test_dropout_layer: One or more tests failed.'
+    stop 1
+  end if
+
+end program test_dropout_layer

From 37aa7a5db719c7cad1190653085469ab1ae700d5 Mon Sep 17 00:00:00 2001
From: milancurcic <caomaco@gmail.com>
Date: Thu, 23 Jan 2025 12:53:14 -0500
Subject: [PATCH 02/30] Partial dropout integration

---
 src/nf.f90                                 |  2 +-
 src/nf/nf_dropout_layer.f90                | 13 +++-------
 src/nf/nf_dropout_layer_submodule.f90      | 30 ++++++++++------------
 src/nf/nf_layer_constructors.f90           | 21 ++++++++++++++-
 src/nf/nf_layer_constructors_submodule.f90 | 10 ++++++++
 test/test_dropout_layer.f90                |  1 +
 6 files changed, 49 insertions(+), 28 deletions(-)

diff --git a/src/nf.f90 b/src/nf.f90
index b97d9e62..d477f1b5 100644
--- a/src/nf.f90
+++ b/src/nf.f90
@@ -3,7 +3,7 @@ module nf
   use nf_datasets_mnist, only: label_digits, load_mnist
   use nf_layer, only: layer
   use nf_layer_constructors, only: &
-    conv2d, dense, flatten, input, maxpool2d, reshape
+    conv2d, dense, dropout, flatten, input, maxpool2d, reshape
   use nf_loss, only: mse, quadratic
   use nf_metrics, only: corr, maxabs
   use nf_network, only: network
diff --git a/src/nf/nf_dropout_layer.f90 b/src/nf/nf_dropout_layer.f90
index cab1ac35..9489ad60 100644
--- a/src/nf/nf_dropout_layer.f90
+++ b/src/nf/nf_dropout_layer.f90
@@ -4,7 +4,6 @@ module nf_dropout_layer
   !! It is used internally by the layer type.
   !! It is not intended to be used directly by the user.
 
-  use nf_activation, only: activation_function
   use nf_base_layer, only: base_layer
 
   implicit none
@@ -17,14 +16,13 @@ module nf_dropout_layer
     !! Concrete implementation of a dropout layer type
 
     integer :: input_size
-    integer :: output_size
 
     real, allocatable :: output(:)
     real, allocatable :: gradient(:)
-    real :: dropout_rate ! probability of dropping a neuron
     real, allocatable :: mask(:) ! binary mask for dropout
 
-    class(activation_function), allocatable :: activation
+    real :: dropout_rate ! probability of dropping a neuron
+    logical :: training = .true.
 
   contains
 
@@ -59,7 +57,7 @@ pure module subroutine backward(self, input, gradient)
         !! Gradient from the next layer
     end subroutine backward
 
-    pure module subroutine forward(self, input)
+    module subroutine forward(self, input)
       !! Propagate forward the layer.
       !! Calling this subroutine updates the values of a few data components
       !! of `dropout_layer` that are needed for the backward pass.
@@ -69,7 +67,7 @@ pure module subroutine forward(self, input)
         !! Input from the previous layer
     end subroutine forward
 
-    module subroutine init(self, input_shape, training)
+    module subroutine init(self, input_shape)
       !! Initialize the layer data structures.
       !!
       !! This is a deferred procedure from the `base_layer` abstract type.
@@ -77,9 +75,6 @@ module subroutine init(self, input_shape, training)
         !! Dropout layer instance
       integer, intent(in) :: input_shape(:)
         !! Shape of the input layer
-      logical, intent(in) :: training
-        !! Whether the layer is in training mode (.true. == dropping out neurons)
-        !! or in inference mode (.false. == doing nothing)
     end subroutine init
 
   end interface
diff --git a/src/nf/nf_dropout_layer_submodule.f90 b/src/nf/nf_dropout_layer_submodule.f90
index 02610a68..e3a3cf21 100644
--- a/src/nf/nf_dropout_layer_submodule.f90
+++ b/src/nf/nf_dropout_layer_submodule.f90
@@ -12,38 +12,33 @@ module function dropout_layer_cons(rate) result(res)
     res % dropout_rate = rate
   end function dropout_layer_cons
 
-  module subroutine init(self, input_shape, training)
+
+  module subroutine init(self, input_shape)
     class(dropout_layer), intent(in out) :: self
     integer, intent(in) :: input_shape(:)
-    logical, intent(in) :: training
 
-    ! Set input and output sizes (dropout preserves dimensions)
     self % input_size = input_shape(1)
-    self % output_size = input_shape(1)
 
     ! Allocate arrays
-    if (allocated(self % output)) deallocate(self % output)
-    if (allocated(self % gradient)) deallocate(self % gradient)
-    if (allocated(self % mask)) deallocate(self % mask)
-
-    allocate(self % output(self % output_size))
+    allocate(self % output(self % input_size))
     allocate(self % gradient(self % input_size))
     allocate(self % mask(self % input_size))
 
-    ! Initialize arrays to zero
-    self % output = 0.0
-    self % gradient = 0.0
-    self % mask = 1.0  ! Default mask is all ones (no dropout)
+    ! Initialize arrays
+    self % output = 0
+    self % gradient = 0
+    self % mask = 1  ! Default mask is all ones (no dropout)
+
   end subroutine init
 
-  pure module subroutine forward(self, input)
+
+  module subroutine forward(self, input)
     class(dropout_layer), intent(in out) :: self
     real, intent(in) :: input(:)
-    real :: rand_vals(size(input))
 
     ! Generate random mask for dropout
-    call random_number(rand_vals)
-    where (rand_vals < self % dropout_rate)
+    call random_number(self % mask)
+    where (self % mask < self % dropout_rate)
       self % mask = 0
     elsewhere
       self % mask = 1 / (1 - self % dropout_rate)  ! Scale to preserve expected value
@@ -53,6 +48,7 @@ pure module subroutine forward(self, input)
     self % output = input * self % mask
   end subroutine forward
 
+
   pure module subroutine backward(self, input, gradient)
     class(dropout_layer), intent(in out) :: self
     real, intent(in) :: input(:)
diff --git a/src/nf/nf_layer_constructors.f90 b/src/nf/nf_layer_constructors.f90
index 309be6e4..24fc7e63 100644
--- a/src/nf/nf_layer_constructors.f90
+++ b/src/nf/nf_layer_constructors.f90
@@ -8,7 +8,7 @@ module nf_layer_constructors
   implicit none
 
   private
-  public :: conv2d, dense, flatten, input, maxpool2d, reshape
+  public :: conv2d, dense, flatten, input, maxpool2d, reshape, dropout
 
   interface input
 
@@ -85,6 +85,24 @@ module function dense(layer_size, activation) result(res)
         !! Resulting layer instance
     end function dense
 
+    module function dropout(rate) result(res)
+      !! Create a dropout layer with a given dropout rate.
+      !!
+      !! This layer is for randomly disabling neurons during training.
+      !!
+      !! Example:
+      !!
+      !! ```
+      !! use nf, only :: dropout, layer
+      !! type(layer) :: dropout_layer
+      !! dropout_layer = dropout(rate=0.5)
+      !! ```
+      real, intent(in) :: rate
+        !! Dropout rate - fraction of neurons to randomly disable during training
+      type(layer) :: res
+        !! Resulting layer instance
+    end function dropout
+
     module function flatten() result(res)
       !! Flatten (3-d -> 1-d) layer constructor.
       !!
@@ -166,6 +184,7 @@ module function reshape(output_shape) result(res)
         !! Resulting layer instance
     end function reshape
 
+
   end interface
 
 end module nf_layer_constructors
diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90
index 86cef8ac..09c79e90 100644
--- a/src/nf/nf_layer_constructors_submodule.f90
+++ b/src/nf/nf_layer_constructors_submodule.f90
@@ -64,6 +64,14 @@ module function dense(layer_size, activation) result(res)
   end function dense
 
 
+  module function dropout(rate) result(res)
+    real, intent(in) :: rate
+    type(layer) :: res
+    res % name = 'dropout'
+    allocate(res % p, source=dropout_layer(rate))
+  end function dropout
+
+
   module function flatten() result(res)
     type(layer) :: res
     res % name = 'flatten'
@@ -92,6 +100,7 @@ module function input3d(layer_shape) result(res)
     res % initialized = .true.
   end function input3d
 
+
   module function maxpool2d(pool_size, stride) result(res)
     integer, intent(in) :: pool_size
     integer, intent(in), optional :: stride
@@ -120,6 +129,7 @@ module function maxpool2d(pool_size, stride) result(res)
 
   end function maxpool2d
 
+
   module function reshape(output_shape) result(res)
     integer, intent(in) :: output_shape(:)
     type(layer) :: res
diff --git a/test/test_dropout_layer.f90 b/test/test_dropout_layer.f90
index c0f37d8e..3424730e 100644
--- a/test/test_dropout_layer.f90
+++ b/test/test_dropout_layer.f90
@@ -2,6 +2,7 @@ program test_dropout_layer
   use iso_fortran_env, only: stderr => error_unit
   use nf, only: dropout, layer
   type(layer) :: layer1
+  logical :: ok = .true.
 
   layer1 = dropout(0.5)
 

From 820b081cb8af5e2cfa078283863025bbeaee8574 Mon Sep 17 00:00:00 2001
From: milancurcic <caomaco@gmail.com>
Date: Thu, 23 Jan 2025 13:10:28 -0500
Subject: [PATCH 03/30] Test uninitialized dropout layer

---
 src/nf/nf_dropout_layer.f90   |  3 +--
 src/nf/nf_layer_submodule.f90 |  7 +++++--
 test/test_dropout_layer.f90   | 18 ++++++++++++++++++
 3 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/src/nf/nf_dropout_layer.f90 b/src/nf/nf_dropout_layer.f90
index 9489ad60..67613271 100644
--- a/src/nf/nf_dropout_layer.f90
+++ b/src/nf/nf_dropout_layer.f90
@@ -12,10 +12,9 @@ module nf_dropout_layer
   public :: dropout_layer
 
   type, extends(base_layer) :: dropout_layer
-
     !! Concrete implementation of a dropout layer type
 
-    integer :: input_size
+    integer :: input_size = 0
 
     real, allocatable :: output(:)
     real, allocatable :: gradient(:)
diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90
index c672581a..80647972 100644
--- a/src/nf/nf_layer_submodule.f90
+++ b/src/nf/nf_layer_submodule.f90
@@ -3,6 +3,7 @@
   use iso_fortran_env, only: stderr => error_unit
   use nf_conv2d_layer, only: conv2d_layer
   use nf_dense_layer, only: dense_layer
+  use nf_dropout_layer, only: dropout_layer
   use nf_flatten_layer, only: flatten_layer
   use nf_input1d_layer, only: input1d_layer
   use nf_input3d_layer, only: input3d_layer
@@ -240,15 +241,17 @@ impure elemental module subroutine init(self, input)
       call this_layer % init(input % layer_shape)
     end select
 
-    ! The shape of conv2d, maxpool2d, or flatten layers is not known
+    ! The shape of conv2d, dropout, flatten, or maxpool2d layers is not known
     ! until we receive an input layer.
     select type(this_layer => self % p)
       type is(conv2d_layer)
         self % layer_shape = shape(this_layer % output)
-      type is(maxpool2d_layer)
+      type is(dropout_layer)
         self % layer_shape = shape(this_layer % output)
       type is(flatten_layer)
         self % layer_shape = shape(this_layer % output)
+      type is(maxpool2d_layer)
+        self % layer_shape = shape(this_layer % output)
     end select
 
     self % input_layer_shape = input % layer_shape 
diff --git a/test/test_dropout_layer.f90 b/test/test_dropout_layer.f90
index 3424730e..b46bd30a 100644
--- a/test/test_dropout_layer.f90
+++ b/test/test_dropout_layer.f90
@@ -1,6 +1,7 @@
 program test_dropout_layer
   use iso_fortran_env, only: stderr => error_unit
   use nf, only: dropout, layer
+  use nf_dropout_layer, only: dropout_layer
   type(layer) :: layer1
   logical :: ok = .true.
 
@@ -11,6 +12,23 @@ program test_dropout_layer
     write(stderr, '(a)') 'dropout layer has its name set correctly.. failed'
   end if
 
+  ! Dropout on its own is not initialized and its arrays not allocated.
+  select type(layer1_p => layer1 % p)
+    type is(dropout_layer)
+
+      if (layer1_p % input_size /= 0) then
+        print *, 'input_size: ', layer1_p % input_size
+        ok = .false.
+        write(stderr, '(a)') 'dropout layer size should be zero.. failed'
+      end if
+
+      if (allocated(layer1_p % output)) then
+        ok = .false.
+        write(stderr, '(a)') 'dropout layer output array should not be allocated.. failed'
+      end if
+
+  end select
+
   if (ok) then
     print '(a)', 'test_dropout_layer: All tests passed.'
   else

From 75ef184c73c7d659df72b8cfd063d608f673bb19 Mon Sep 17 00:00:00 2001
From: milancurcic <caomaco@gmail.com>
Date: Thu, 23 Jan 2025 13:16:05 -0500
Subject: [PATCH 04/30] Test dropout state that follows an input layer

---
 test/test_dropout_layer.f90 | 38 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 37 insertions(+), 1 deletion(-)

diff --git a/test/test_dropout_layer.f90 b/test/test_dropout_layer.f90
index b46bd30a..5d092cb4 100644
--- a/test/test_dropout_layer.f90
+++ b/test/test_dropout_layer.f90
@@ -1,8 +1,11 @@
 program test_dropout_layer
   use iso_fortran_env, only: stderr => error_unit
-  use nf, only: dropout, layer
+  use nf, only: dropout, input, layer, network
   use nf_dropout_layer, only: dropout_layer
   type(layer) :: layer1
+  type(network) :: net
+  integer :: input_size
+
   logical :: ok = .true.
 
   layer1 = dropout(0.5)
@@ -29,6 +32,39 @@ program test_dropout_layer
 
   end select
 
+  ! Now we're gonna initialize a minimal network with an input layer and a
+  ! dropout that follows and we'll check that the dropout layer has expected
+  ! state.
+  input_size = 10
+  net = network([ &
+    input(input_size), &
+    dropout(0.5) &
+  ])
+
+  select type(layer1_p => net % layers(1) % p)
+    type is(dropout_layer)
+      if (layer1_p % input_size /= input_size) then
+        ok = .false.
+        write(stderr, '(a)') 'dropout layer input size should be the same as the input layer.. failed'
+      end if
+
+      if (.not. allocated(layer1_p % output)) then
+        ok = .false.
+        write(stderr, '(a)') 'dropout layer output array should be allocated.. failed'
+      end if
+
+      if (.not. allocated(layer1_p % gradient)) then
+        ok = .false.
+        write(stderr, '(a)') 'dropout layer gradient array should be allocated.. failed'
+      end if
+
+      if (.not. allocated(layer1_p % mask)) then
+        ok = .false.
+        write(stderr, '(a)') 'dropout layer mask array should be allocated.. failed'
+      end if
+
+  end select
+
   if (ok) then
     print '(a)', 'test_dropout_layer: All tests passed.'
   else

From 796ae74bd308b74c4231772c9f0ab505862c006f Mon Sep 17 00:00:00 2001
From: milancurcic <caomaco@gmail.com>
Date: Thu, 23 Jan 2025 14:29:21 -0500
Subject: [PATCH 05/30] Enable forward pass for dropout; backward pass TODO

---
 src/nf/nf_dropout_layer_submodule.f90 |  9 ++++++++-
 src/nf/nf_layer.f90                   |  2 +-
 src/nf/nf_layer_submodule.f90         | 14 +++++++++++++-
 src/nf/nf_network_submodule.f90       |  3 +++
 test/test_dropout_layer.f90           | 26 ++++++++++++++++++++++++++
 5 files changed, 51 insertions(+), 3 deletions(-)

diff --git a/src/nf/nf_dropout_layer_submodule.f90 b/src/nf/nf_dropout_layer_submodule.f90
index e3a3cf21..5a022a29 100644
--- a/src/nf/nf_dropout_layer_submodule.f90
+++ b/src/nf/nf_dropout_layer_submodule.f90
@@ -35,17 +35,24 @@ end subroutine init
   module subroutine forward(self, input)
     class(dropout_layer), intent(in out) :: self
     real, intent(in) :: input(:)
+    real :: scale
 
     ! Generate random mask for dropout
     call random_number(self % mask)
     where (self % mask < self % dropout_rate)
       self % mask = 0
     elsewhere
-      self % mask = 1 / (1 - self % dropout_rate)  ! Scale to preserve expected value
+      self % mask = 1
     end where
 
     ! Apply dropout mask
     self % output = input * self % mask
+
+    ! Scale output and mask to preserve the input sum
+    scale = sum(input) / sum(self % output)
+    self % output = self % output * scale
+    self % mask = self % mask * scale
+
   end subroutine forward
 
 
diff --git a/src/nf/nf_layer.f90 b/src/nf/nf_layer.f90
index ca5e9606..18e8f76a 100644
--- a/src/nf/nf_layer.f90
+++ b/src/nf/nf_layer.f90
@@ -76,7 +76,7 @@ end subroutine backward_3d
 
   interface
 
-    pure module subroutine forward(self, input)
+    module subroutine forward(self, input)
       !! Apply a forward pass on the layer.
       !! This changes the internal state of the layer.
       !! This is normally called internally by the `network % forward`
diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90
index 80647972..d44ef179 100644
--- a/src/nf/nf_layer_submodule.f90
+++ b/src/nf/nf_layer_submodule.f90
@@ -107,7 +107,7 @@ pure module subroutine backward_3d(self, previous, gradient)
   end subroutine backward_3d
 
 
-  pure module subroutine forward(self, input)
+  module subroutine forward(self, input)
     implicit none
     class(layer), intent(in out) :: self
     class(layer), intent(in) :: input
@@ -126,6 +126,18 @@ pure module subroutine forward(self, input)
             call this_layer % forward(prev_layer % output)
         end select
 
+      type is(dropout_layer)
+
+        ! Upstream layers permitted: input1d, dense, flatten
+        select type(prev_layer => input % p)
+          type is(input1d_layer)
+            call this_layer % forward(prev_layer % output)
+          type is(dense_layer)
+            call this_layer % forward(prev_layer % output)
+          type is(flatten_layer)
+            call this_layer % forward(prev_layer % output)
+        end select
+
       type is(conv2d_layer)
 
         ! Upstream layers permitted: input3d, conv2d, maxpool2d, reshape3d
diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90
index 140c9226..6aaaec38 100644
--- a/src/nf/nf_network_submodule.f90
+++ b/src/nf/nf_network_submodule.f90
@@ -2,6 +2,7 @@
 
   use nf_conv2d_layer, only: conv2d_layer
   use nf_dense_layer, only: dense_layer
+  use nf_dropout_layer, only: dropout_layer
   use nf_flatten_layer, only: flatten_layer
   use nf_input1d_layer, only: input1d_layer
   use nf_input3d_layer, only: input3d_layer
@@ -227,6 +228,8 @@ module function predict_1d(self, input) result(res)
     select type(output_layer => self % layers(num_layers) % p)
       type is(dense_layer)
         res = output_layer % output
+      type is(dropout_layer)
+        res = output_layer % output
       type is(flatten_layer)
         res = output_layer % output
       class default
diff --git a/test/test_dropout_layer.f90 b/test/test_dropout_layer.f90
index 5d092cb4..b9b4b2a2 100644
--- a/test/test_dropout_layer.f90
+++ b/test/test_dropout_layer.f90
@@ -65,6 +65,32 @@ program test_dropout_layer
 
   end select
 
+  ! Now we're gonna run the forward pass and check that the dropout indeed
+  ! drops according to the requested dropout rate.
+  forward_pass: block
+    real :: input_data(5)
+    real :: output_data(size(input_data))
+    integer :: n
+
+    net = network([ &
+      input(size(input_data)), &
+      dropout(0.5) &
+    ])
+
+    call random_number(input_data)
+    do n = 1, 10000
+      output_data = net % predict(input_data)
+      ! Check that sum of output matches sum of input within small tolerance
+      if (abs(sum(output_data) - sum(input_data)) > 1e-5) then
+        ok = .false.
+        exit
+      end if
+    end do
+    if (.not. ok) then
+      write(stderr, '(a)') 'dropout layer output sum should match input sum within 1% tolerance.. failed'
+    end if
+  end block forward_pass
+
   if (ok) then
     print '(a)', 'test_dropout_layer: All tests passed.'
   else

From b04d44725a329158b24ebe4363302583308dc77b Mon Sep 17 00:00:00 2001
From: milancurcic <caomaco@gmail.com>
Date: Thu, 23 Jan 2025 14:31:20 -0500
Subject: [PATCH 06/30] Version bump and add dropout to the features table

---
 README.md | 1 +
 fpm.toml  | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 7e3a4445..75a66491 100644
--- a/README.md
+++ b/README.md
@@ -31,6 +31,7 @@ Read the paper [here](https://arxiv.org/abs/1902.06714).
 |------------|------------------|------------------------|----------------------|--------------|---------------|
 | Input | `input` | n/a | 1, 3 | n/a | n/a |
 | Dense (fully-connected) | `dense` | `input1d`, `flatten` | 1 | ✅ | ✅ |
+| Dropout | `dropout` | Any | 1 | ✅ | ✅ |
 | Convolutional (2-d) | `conv2d` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 3 | ✅ | ✅(*) |
 | Max-pooling (2-d) | `maxpool2d` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 3 | ✅ | ✅ |
 | Flatten | `flatten` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 1 | ✅ | ✅ |
diff --git a/fpm.toml b/fpm.toml
index 5f68f8f6..368812c8 100644
--- a/fpm.toml
+++ b/fpm.toml
@@ -1,6 +1,6 @@
 name = "neural-fortran"
-version = "0.18.0"
+version = "0.19.0"
 license = "MIT"
 author = "Milan Curcic"
 maintainer = "milancurcic@hey.com"
-copyright = "Copyright 2018-2024, neural-fortran contributors"
+copyright = "Copyright 2018-2025, neural-fortran contributors"

From 544b23a2911cdaccae87f15fed75a6b9cf2037d8 Mon Sep 17 00:00:00 2001
From: milancurcic <caomaco@gmail.com>
Date: Thu, 23 Jan 2025 17:11:18 -0500
Subject: [PATCH 07/30] Add dropout to CMake

---
 CMakeLists.txt              | 2 ++
 test/CMakeLists.txt         | 1 +
 test/test_dropout_layer.f90 | 2 +-
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 490f7ff1..50a0f208 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -51,6 +51,8 @@ add_library(neural-fortran
   src/nf/nf_reshape_layer_submodule.f90
   src/nf/io/nf_io_binary.f90
   src/nf/io/nf_io_binary_submodule.f90
+  src/nf/nf_dropout_layer.f90
+  src/nf/nf_dropout_layer_submodule.f90
 )
 
 target_link_libraries(neural-fortran PRIVATE)
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index bfd3538a..108dee66 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -1,6 +1,7 @@
 foreach(execid
   input1d_layer
   input3d_layer
+  dropout_layer
   parametric_activation
   dense_layer
   conv2d_layer
diff --git a/test/test_dropout_layer.f90 b/test/test_dropout_layer.f90
index b9b4b2a2..9ed7b864 100644
--- a/test/test_dropout_layer.f90
+++ b/test/test_dropout_layer.f90
@@ -87,7 +87,7 @@ program test_dropout_layer
       end if
     end do
     if (.not. ok) then
-      write(stderr, '(a)') 'dropout layer output sum should match input sum within 1% tolerance.. failed'
+      write(stderr, '(a)') 'dropout layer output sum should match input sum within tolerance.. failed'
     end if
   end block forward_pass
 

From 56dbd52377b96622c0caf53fe2a9e79d14c7ef84 Mon Sep 17 00:00:00 2001
From: milancurcic <caomaco@gmail.com>
Date: Fri, 24 Jan 2025 10:49:00 -0500
Subject: [PATCH 08/30] Enable preprocessing in fpm.toml (needed with recent
 versions of fpm)

---
 fpm.toml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fpm.toml b/fpm.toml
index 368812c8..3df459fb 100644
--- a/fpm.toml
+++ b/fpm.toml
@@ -4,3 +4,6 @@ license = "MIT"
 author = "Milan Curcic"
 maintainer = "milancurcic@hey.com"
 copyright = "Copyright 2018-2025, neural-fortran contributors"
+
+[preprocess]
+[preprocess.cpp]

From 3b5cc27f04867e24f64aa3df9aa0bbf494b1e85e Mon Sep 17 00:00:00 2001
From: milancurcic <caomaco@gmail.com>
Date: Fri, 24 Jan 2025 10:57:27 -0500
Subject: [PATCH 09/30] Small change in scale implementation

---
 src/nf/nf_dropout_layer.f90           |  1 +
 src/nf/nf_dropout_layer_submodule.f90 | 13 +++++--------
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/src/nf/nf_dropout_layer.f90 b/src/nf/nf_dropout_layer.f90
index 67613271..0f557d63 100644
--- a/src/nf/nf_dropout_layer.f90
+++ b/src/nf/nf_dropout_layer.f90
@@ -21,6 +21,7 @@ module nf_dropout_layer
     real, allocatable :: mask(:) ! binary mask for dropout
 
     real :: dropout_rate ! probability of dropping a neuron
+    real :: scale ! scale factor to preserve the input sum
     logical :: training = .true.
 
   contains
diff --git a/src/nf/nf_dropout_layer_submodule.f90 b/src/nf/nf_dropout_layer_submodule.f90
index 5a022a29..568cbf21 100644
--- a/src/nf/nf_dropout_layer_submodule.f90
+++ b/src/nf/nf_dropout_layer_submodule.f90
@@ -35,7 +35,6 @@ end subroutine init
   module subroutine forward(self, input)
     class(dropout_layer), intent(in out) :: self
     real, intent(in) :: input(:)
-    real :: scale
 
     ! Generate random mask for dropout
     call random_number(self % mask)
@@ -45,13 +44,11 @@ module subroutine forward(self, input)
       self % mask = 1
     end where
 
-    ! Apply dropout mask
-    self % output = input * self % mask
+    ! Scale factor to preserve the input sum
+    self % scale = sum(input) / sum(self % output)  ! scale == 1/P(keep)
 
-    ! Scale output and mask to preserve the input sum
-    scale = sum(input) / sum(self % output)
-    self % output = self % output * scale
-    self % mask = self % mask * scale
+    ! Apply dropout mask
+    self % output = input * self % mask * self % scale
 
   end subroutine forward
 
@@ -62,7 +59,7 @@ pure module subroutine backward(self, input, gradient)
     real, intent(in) :: gradient(:)
 
     ! Backpropagate gradient through dropout mask
-    self % gradient = gradient * self % mask
+    self % gradient = gradient * self % mask * self % scale
   end subroutine backward
 
 end submodule nf_dropout_layer_submodule 
\ No newline at end of file

From 703f8023a175a584005105c3be9a6051a695edad Mon Sep 17 00:00:00 2001
From: milancurcic <caomaco@gmail.com>
Date: Fri, 24 Jan 2025 11:14:42 -0500
Subject: [PATCH 10/30] Integration of backward pass for dropout

---
 src/nf/nf_layer_submodule.f90   | 19 +++++++++++++++++--
 src/nf/nf_network_submodule.f90 |  2 ++
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90
index d44ef179..69b40d22 100644
--- a/src/nf/nf_layer_submodule.f90
+++ b/src/nf/nf_layer_submodule.f90
@@ -25,12 +25,14 @@ pure module subroutine backward_1d(self, previous, gradient)
 
       type is(dense_layer)
 
-        ! Upstream layers permitted: input1d, dense, flatten
+        ! Upstream layers permitted: input1d, dense, dropout, flatten
         select type(prev_layer => previous % p)
           type is(input1d_layer)
             call this_layer % backward(prev_layer % output, gradient)
           type is(dense_layer)
             call this_layer % backward(prev_layer % output, gradient)
+          type is(dropout_layer)
+            call this_layer % backward(prev_layer % output, gradient)
           type is(flatten_layer)
             call this_layer % backward(prev_layer % output, gradient)
         end select
@@ -116,12 +118,14 @@ module subroutine forward(self, input)
 
       type is(dense_layer)
 
-        ! Upstream layers permitted: input1d, dense, flatten
+        ! Upstream layers permitted: input1d, dense, dropout, flatten
         select type(prev_layer => input % p)
           type is(input1d_layer)
             call this_layer % forward(prev_layer % output)
           type is(dense_layer)
             call this_layer % forward(prev_layer % output)
+          type is(dropout_layer)
+            call this_layer % forward(prev_layer % output)
           type is(flatten_layer)
             call this_layer % forward(prev_layer % output)
         end select
@@ -299,6 +303,8 @@ elemental module function get_num_params(self) result(num_params)
         num_params = 0
       type is (dense_layer)
         num_params = this_layer % get_num_params()
+      type is (dropout_layer)
+        num_params = size(this_layer % mask)
       type is (conv2d_layer)
         num_params = this_layer % get_num_params()
       type is (maxpool2d_layer)
@@ -324,6 +330,8 @@ module function get_params(self) result(params)
          ! No parameters to get.
       type is (dense_layer)
         params = this_layer % get_params()
+      type is (dropout_layer)
+        ! No parameters to get.
       type is (conv2d_layer)
         params = this_layer % get_params()
       type is (maxpool2d_layer)
@@ -349,6 +357,8 @@ module function get_gradients(self) result(gradients)
         ! No gradients to get.
       type is (dense_layer)
         gradients = this_layer % get_gradients()
+      type is (dropout_layer)
+        ! No gradients to get.
       type is (conv2d_layer)
         gradients = this_layer % get_gradients()
       type is (maxpool2d_layer)
@@ -396,6 +406,11 @@ module subroutine set_params(self, params)
       type is (dense_layer)
         call this_layer % set_params(params)
 
+      type is (dropout_layer)
+        ! No parameters to set.
+        write(stderr, '(a)') 'Warning: calling set_params() ' &
+          // 'on a zero-parameter layer; nothing to do.'
+
       type is (conv2d_layer)
         call this_layer % set_params(params)
 
diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90
index 6aaaec38..0b076b9f 100644
--- a/src/nf/nf_network_submodule.f90
+++ b/src/nf/nf_network_submodule.f90
@@ -135,6 +135,8 @@ module subroutine backward(self, output, loss)
         select type(next_layer => self % layers(n + 1) % p)
           type is(dense_layer)
             call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient)
+          type is(dropout_layer)
+            call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient)
           type is(conv2d_layer)
             call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient)
           type is(flatten_layer)

From 1dfe6b39dd075391c1d56adb6ef816be0c1bab57 Mon Sep 17 00:00:00 2001
From: milancurcic <caomaco@gmail.com>
Date: Thu, 6 Feb 2025 12:52:56 -0500
Subject: [PATCH 11/30] Reduce tolerance in conv2d convergence tests

---
 test/test_conv2d_network.f90 | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/test_conv2d_network.f90 b/test/test_conv2d_network.f90
index 47c9a819..28dce100 100644
--- a/test/test_conv2d_network.f90
+++ b/test/test_conv2d_network.f90
@@ -39,7 +39,7 @@ program test_conv2d_network
 
     type(network) :: cnn
     real :: y(1)
-    real :: tolerance = 1e-5
+    real :: tolerance = 1e-4
     integer :: n
     integer, parameter :: num_iterations = 1000
 
@@ -76,7 +76,7 @@ program test_conv2d_network
     type(network) :: cnn
     real :: x(1, 8, 8)
     real :: y(1)
-    real :: tolerance = 1e-5
+    real :: tolerance = 1e-4
     integer :: n
     integer, parameter :: num_iterations = 1000
 
@@ -111,7 +111,7 @@ program test_conv2d_network
     type(network) :: cnn
     real :: x(1, 12, 12)
     real :: y(9)
-    real :: tolerance = 1e-5
+    real :: tolerance = 1e-4
     integer :: n
     integer, parameter :: num_iterations = 5000
 

From 59cc7e1a98434ca3a55236734cf399c6fb9ecda6 Mon Sep 17 00:00:00 2001
From: milancurcic <caomaco@gmail.com>
Date: Thu, 6 Feb 2025 12:53:54 -0500
Subject: [PATCH 12/30] Fix bug in dropout scaling

Co-authored-by: Ricardo Orsi <@ricor07>
---
 src/nf/nf_dropout_layer_submodule.f90 | 2 +-
 test/test_dropout_layer.f90           | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/nf/nf_dropout_layer_submodule.f90 b/src/nf/nf_dropout_layer_submodule.f90
index 568cbf21..d24ed34a 100644
--- a/src/nf/nf_dropout_layer_submodule.f90
+++ b/src/nf/nf_dropout_layer_submodule.f90
@@ -45,7 +45,7 @@ module subroutine forward(self, input)
     end where
 
     ! Scale factor to preserve the input sum
-    self % scale = sum(input) / sum(self % output)  ! scale == 1/P(keep)
+    self % scale = sum(input) / sum(input * self % mask)
 
     ! Apply dropout mask
     self % output = input * self % mask * self % scale
diff --git a/test/test_dropout_layer.f90 b/test/test_dropout_layer.f90
index 9ed7b864..23dd0209 100644
--- a/test/test_dropout_layer.f90
+++ b/test/test_dropout_layer.f90
@@ -81,7 +81,7 @@ program test_dropout_layer
     do n = 1, 10000
       output_data = net % predict(input_data)
       ! Check that sum of output matches sum of input within small tolerance
-      if (abs(sum(output_data) - sum(input_data)) > 1e-5) then
+      if (abs(sum(output_data) - sum(input_data)) > 1e-6) then
         ok = .false.
         exit
       end if

From c984b15f8e750e4aa52cf0f9b88bcf5cc35f17da Mon Sep 17 00:00:00 2001
From: milancurcic <caomaco@gmail.com>
Date: Thu, 6 Feb 2025 13:58:38 -0500
Subject: [PATCH 13/30] disable dropout in inference mode (net % predict); TODO
 enable in net % train

---
 src/nf/nf_dropout_layer.f90                |  6 ++--
 src/nf/nf_dropout_layer_submodule.f90      | 36 +++++++++++++---------
 src/nf/nf_layer_constructors.f90           |  4 ++-
 src/nf/nf_layer_constructors_submodule.f90 |  5 +--
 src/nf/nf_layer_submodule.f90              |  2 +-
 src/nf/nf_network_submodule.f90            | 22 +++++++++++--
 6 files changed, 53 insertions(+), 22 deletions(-)

diff --git a/src/nf/nf_dropout_layer.f90 b/src/nf/nf_dropout_layer.f90
index 0f557d63..bffca5f0 100644
--- a/src/nf/nf_dropout_layer.f90
+++ b/src/nf/nf_dropout_layer.f90
@@ -22,7 +22,7 @@ module nf_dropout_layer
 
     real :: dropout_rate ! probability of dropping a neuron
     real :: scale ! scale factor to preserve the input sum
-    logical :: training = .true.
+    logical :: training = .false. ! set to .true. in training mode
 
   contains
 
@@ -33,11 +33,13 @@ module nf_dropout_layer
   end type dropout_layer
 
   interface dropout_layer
-    module function dropout_layer_cons(rate) &
+    module function dropout_layer_cons(rate, training) &
       result(res)
       !! This function returns the `dropout_layer` instance.
       real, intent(in) :: rate
         !! Dropout rate
+      logical, intent(in), optional :: training
+        !! Training mode (default .false.)
       type(dropout_layer) :: res
         !! dropout_layer instance
     end function dropout_layer_cons
diff --git a/src/nf/nf_dropout_layer_submodule.f90 b/src/nf/nf_dropout_layer_submodule.f90
index d24ed34a..fb787699 100644
--- a/src/nf/nf_dropout_layer_submodule.f90
+++ b/src/nf/nf_dropout_layer_submodule.f90
@@ -4,12 +4,12 @@
 
 contains
 
-  module function dropout_layer_cons(rate) result(res)
+  module function dropout_layer_cons(rate, training) result(res)
     real, intent(in) :: rate
+    logical, intent(in), optional :: training
     type(dropout_layer) :: res
-
-    ! Initialize dropout rate
     res % dropout_rate = rate
+    if (present(training)) res % training = training
   end function dropout_layer_cons
 
 
@@ -36,19 +36,27 @@ module subroutine forward(self, input)
     class(dropout_layer), intent(in out) :: self
     real, intent(in) :: input(:)
 
-    ! Generate random mask for dropout
-    call random_number(self % mask)
-    where (self % mask < self % dropout_rate)
-      self % mask = 0
-    elsewhere
-      self % mask = 1
-    end where
+    ! Generate random mask for dropout, training mode only
+    if (self % training) then
+
+      call random_number(self % mask)
+      where (self % mask < self % dropout_rate)
+        self % mask = 0
+      elsewhere
+        self % mask = 1
+      end where
+
+      ! Scale factor to preserve the input sum
+      self % scale = sum(input) / sum(input * self % mask)
+
+      ! Apply dropout mask
+      self % output = input * self % mask * self % scale
 
-    ! Scale factor to preserve the input sum
-    self % scale = sum(input) / sum(input * self % mask)
+    else
+      ! In inference mode, we don't apply dropout; simply pass through the input
+      self % output = input
 
-    ! Apply dropout mask
-    self % output = input * self % mask * self % scale
+    end if
 
   end subroutine forward
 
diff --git a/src/nf/nf_layer_constructors.f90 b/src/nf/nf_layer_constructors.f90
index 24fc7e63..fcc49342 100644
--- a/src/nf/nf_layer_constructors.f90
+++ b/src/nf/nf_layer_constructors.f90
@@ -85,7 +85,7 @@ module function dense(layer_size, activation) result(res)
         !! Resulting layer instance
     end function dense
 
-    module function dropout(rate) result(res)
+    module function dropout(rate, training) result(res)
       !! Create a dropout layer with a given dropout rate.
       !!
       !! This layer is for randomly disabling neurons during training.
@@ -99,6 +99,8 @@ module function dropout(rate) result(res)
       !! ```
       real, intent(in) :: rate
         !! Dropout rate - fraction of neurons to randomly disable during training
+      logical, intent(in), optional :: training
+        !! Training mode (default .false.)
       type(layer) :: res
         !! Resulting layer instance
     end function dropout
diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90
index 09c79e90..5203497d 100644
--- a/src/nf/nf_layer_constructors_submodule.f90
+++ b/src/nf/nf_layer_constructors_submodule.f90
@@ -64,11 +64,12 @@ module function dense(layer_size, activation) result(res)
   end function dense
 
 
-  module function dropout(rate) result(res)
+  module function dropout(rate, training) result(res)
     real, intent(in) :: rate
+    logical, intent(in), optional :: training
     type(layer) :: res
     res % name = 'dropout'
-    allocate(res % p, source=dropout_layer(rate))
+    allocate(res % p, source=dropout_layer(rate, training))
   end function dropout
 
 
diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90
index 69b40d22..8bf94ea5 100644
--- a/src/nf/nf_layer_submodule.f90
+++ b/src/nf/nf_layer_submodule.f90
@@ -304,7 +304,7 @@ elemental module function get_num_params(self) result(num_params)
       type is (dense_layer)
         num_params = this_layer % get_num_params()
       type is (dropout_layer)
-        num_params = size(this_layer % mask)
+        num_params = 0
       type is (conv2d_layer)
         num_params = this_layer % get_num_params()
       type is (maxpool2d_layer)
diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90
index 0b076b9f..ee9792aa 100644
--- a/src/nf/nf_network_submodule.f90
+++ b/src/nf/nf_network_submodule.f90
@@ -221,10 +221,19 @@ module function predict_1d(self, input) result(res)
     class(network), intent(in out) :: self
     real, intent(in) :: input(:)
     real, allocatable :: res(:)
-    integer :: num_layers
+    integer :: n, num_layers
 
     num_layers = size(self % layers)
 
+    ! predict is run in inference mode only;
+    ! set all dropout layers' training mode to false.
+    do n = 2, num_layers
+      select type(this_layer => self % layers(n) % p)
+        type is(dropout_layer)
+          this_layer % training = .false.
+      end select
+    end do
+
     call self % forward(input)
 
     select type(output_layer => self % layers(num_layers) % p)
@@ -245,10 +254,19 @@ module function predict_3d(self, input) result(res)
     class(network), intent(in out) :: self
     real, intent(in) :: input(:,:,:)
     real, allocatable :: res(:)
-    integer :: num_layers
+    integer :: n, num_layers
 
     num_layers = size(self % layers)
 
+    ! predict is run in inference mode only;
+    ! set all dropout layers' training mode to false.
+    do n = 2, num_layers
+      select type(this_layer => self % layers(n) % p)
+        type is(dropout_layer)
+          this_layer % training = .false.
+      end select
+    end do
+
     call self % forward(input)
 
     select type(output_layer => self % layers(num_layers) % p)

From e9772a0535fbdb46ed914fe719385a2475bbde75 Mon Sep 17 00:00:00 2001
From: milancurcic <caomaco@gmail.com>
Date: Thu, 6 Feb 2025 15:00:42 -0500
Subject: [PATCH 14/30] Set dropout's training mode to true in net % train();
 add tests

---
 src/nf/nf_dropout_layer_submodule.f90 |  9 ++++++--
 src/nf/nf_network_submodule.f90       | 30 +++++++++++++++++++++++++--
 test/test_dropout_layer.f90           | 29 ++++++++++++++++++++++++++
 3 files changed, 64 insertions(+), 4 deletions(-)

diff --git a/src/nf/nf_dropout_layer_submodule.f90 b/src/nf/nf_dropout_layer_submodule.f90
index fb787699..6e7e35a0 100644
--- a/src/nf/nf_dropout_layer_submodule.f90
+++ b/src/nf/nf_dropout_layer_submodule.f90
@@ -66,8 +66,13 @@ pure module subroutine backward(self, input, gradient)
     real, intent(in) :: input(:)
     real, intent(in) :: gradient(:)
 
-    ! Backpropagate gradient through dropout mask
-    self % gradient = gradient * self % mask * self % scale
+    if (self % training) then
+      ! Backpropagate gradient through dropout mask
+      self % gradient = gradient * self % mask * self % scale
+    else
+      ! In inference mode, pass through the gradient unchanged
+      self % gradient = gradient
+    end if
   end subroutine backward
 
 end submodule nf_dropout_layer_submodule 
\ No newline at end of file
diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90
index ee9792aa..f28a98e9 100644
--- a/src/nf/nf_network_submodule.f90
+++ b/src/nf/nf_network_submodule.f90
@@ -288,12 +288,21 @@ module function predict_batch_1d(self, input) result(res)
     class(network), intent(in out) :: self
     real, intent(in) :: input(:,:)
     real, allocatable :: res(:,:)
-    integer :: i, batch_size, num_layers, output_size
+    integer :: i, n, batch_size, num_layers, output_size
 
     num_layers = size(self % layers)
     batch_size = size(input, dim=rank(input))
     output_size = product(self % layers(num_layers) % layer_shape)
 
+    ! predict is run in inference mode only;
+    ! set all dropout layers' training mode to false.
+    do n = 2, num_layers
+      select type(this_layer => self % layers(n) % p)
+        type is(dropout_layer)
+          this_layer % training = .false.
+      end select
+    end do
+
     allocate(res(output_size, batch_size))
 
     batch: do i = 1, size(res, dim=2)
@@ -318,12 +327,21 @@ module function predict_batch_3d(self, input) result(res)
     class(network), intent(in out) :: self
     real, intent(in) :: input(:,:,:,:)
     real, allocatable :: res(:,:)
-    integer :: i, batch_size, num_layers, output_size
+    integer :: i, n, batch_size, num_layers, output_size
 
     num_layers = size(self % layers)
     batch_size = size(input, dim=rank(input))
     output_size = product(self % layers(num_layers) % layer_shape)
 
+    ! predict is run in inference mode only;
+    ! set all dropout layers' training mode to false.
+    do n = 2, num_layers
+      select type(this_layer => self % layers(n) % p)
+        type is(dropout_layer)
+          this_layer % training = .false.
+      end select
+    end do
+
     allocate(res(output_size, batch_size))
 
     batch: do i = 1, batch_size
@@ -457,6 +475,14 @@ module subroutine train(self, input_data, output_data, batch_size, &
       self % loss = quadratic()
     end if
 
+    ! Set all dropout layers' training mode to true.
+    do n = 2, size(self % layers)
+      select type(this_layer => self % layers(n) % p)
+        type is(dropout_layer)
+          this_layer % training = .true.
+      end select
+    end do
+
     dataset_size = size(output_data, dim=2)
 
     epoch_loop: do n = 1, epochs
diff --git a/test/test_dropout_layer.f90 b/test/test_dropout_layer.f90
index 23dd0209..b0ad0664 100644
--- a/test/test_dropout_layer.f90
+++ b/test/test_dropout_layer.f90
@@ -19,6 +19,16 @@ program test_dropout_layer
   select type(layer1_p => layer1 % p)
     type is(dropout_layer)
 
+      if (layer1_p % dropout_rate /= 0.5) then
+        ok = .false.
+        write(stderr, '(a)') 'dropout layer dropout rate should be 0.5.. failed'
+      end if
+
+      if (layer1_p % training) then
+        ok = .false.
+        write(stderr, '(a)') 'dropout layer default training mode should be false.. failed'
+      end if
+
       if (layer1_p % input_size /= 0) then
         print *, 'input_size: ', layer1_p % input_size
         ok = .false.
@@ -32,6 +42,25 @@ program test_dropout_layer
 
   end select
 
+  ! Test setting training mode explicitly.
+  layer1 = dropout(0.5, training=.true.)
+  select type(layer1_p => layer1 % p)
+    type is(dropout_layer)
+      if (.not. layer1_p % training) then
+        ok = .false.
+        write(stderr, '(a)') 'dropout layer training mode should be true.. failed'
+      end if
+  end select
+
+  layer1 = dropout(0.5, training=.false.)
+  select type(layer1_p => layer1 % p)
+    type is(dropout_layer)
+      if (layer1_p % training) then
+        ok = .false.
+        write(stderr, '(a)') 'dropout layer training mode should be false.. failed'
+      end if
+  end select
+
   ! Now we're gonna initialize a minimal network with an input layer and a
   ! dropout that follows and we'll check that the dropout layer has expected
   ! state.

From 5ae7e9dffeff6072c0a18da4de3ba3052e3062cc Mon Sep 17 00:00:00 2001
From: milancurcic <caomaco@gmail.com>
Date: Sat, 15 Feb 2025 21:43:09 -0500
Subject: [PATCH 15/30] WIP dropout tests

---
 test/test_dropout_layer.f90 | 40 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 39 insertions(+), 1 deletion(-)

diff --git a/test/test_dropout_layer.f90 b/test/test_dropout_layer.f90
index b0ad0664..2f52c715 100644
--- a/test/test_dropout_layer.f90
+++ b/test/test_dropout_layer.f90
@@ -1,6 +1,6 @@
 program test_dropout_layer
   use iso_fortran_env, only: stderr => error_unit
-  use nf, only: dropout, input, layer, network
+  use nf, only: dense, dropout, input, layer, network
   use nf_dropout_layer, only: dropout_layer
   type(layer) :: layer1
   type(network) :: net
@@ -120,6 +120,44 @@ program test_dropout_layer
     end if
   end block forward_pass
 
+
+  training: block
+    real :: x(10), y(5)
+    real :: tolerance = 1e-3
+    integer :: n
+    integer, parameter :: num_iterations = 100000
+
+    call random_number(x)
+    y = [0.1234, 0.2345, 0.3456, 0.4567, 0.5678]
+
+    net = network([ &
+      input(10), &
+      dropout(0.5, training=.true.), &
+      dense(5) &
+    ])
+
+    do n = 1, num_iterations
+      !select type(dropout_l => net % layers(2) % p)
+      !  type is(dropout_layer)
+      !    print *, dropout_l % training, dropout_l % mask
+      !end select
+      call net % forward(x)
+      call net % backward(y)
+      call net % update()
+      !print *, n, net % predict(x)
+
+      if (all(abs(net % predict(x) - y) < tolerance)) exit
+    end do
+
+    if (.not. n <= num_iterations) then
+      write(stderr, '(a)') &
+        'dense network should converge in simple training.. failed'
+      ok = .false.
+    end if
+
+  end block training
+
+
   if (ok) then
     print '(a)', 'test_dropout_layer: All tests passed.'
   else

From 0934f7f526c5b109a1cfed465a9ead5755f4a18f Mon Sep 17 00:00:00 2001
From: milancurcic <caomaco@gmail.com>
Date: Mon, 17 Feb 2025 12:42:27 -0500
Subject: [PATCH 16/30] Dropout layers always in training mode; except when  is
 called, when they are in inference mode

---
 src/nf/nf_dropout_layer.f90                |  6 +-
 src/nf/nf_dropout_layer_submodule.f90      |  4 +-
 src/nf/nf_layer_constructors.f90           |  4 +-
 src/nf/nf_layer_constructors_submodule.f90 |  5 +-
 src/nf/nf_network.f90                      | 10 +++
 src/nf/nf_network_submodule.f90            | 88 +++++++++++++---------
 test/test_dropout_layer.f90                | 39 ++--------
 7 files changed, 77 insertions(+), 79 deletions(-)

diff --git a/src/nf/nf_dropout_layer.f90 b/src/nf/nf_dropout_layer.f90
index bffca5f0..77999abb 100644
--- a/src/nf/nf_dropout_layer.f90
+++ b/src/nf/nf_dropout_layer.f90
@@ -22,7 +22,7 @@ module nf_dropout_layer
 
     real :: dropout_rate ! probability of dropping a neuron
     real :: scale ! scale factor to preserve the input sum
-    logical :: training = .false. ! set to .true. in training mode
+    logical :: training = .true. ! set to .false. for inference
 
   contains
 
@@ -33,13 +33,11 @@ module nf_dropout_layer
   end type dropout_layer
 
   interface dropout_layer
-    module function dropout_layer_cons(rate, training) &
+    module function dropout_layer_cons(rate) &
       result(res)
       !! This function returns the `dropout_layer` instance.
       real, intent(in) :: rate
         !! Dropout rate
-      logical, intent(in), optional :: training
-        !! Training mode (default .false.)
       type(dropout_layer) :: res
         !! dropout_layer instance
     end function dropout_layer_cons
diff --git a/src/nf/nf_dropout_layer_submodule.f90 b/src/nf/nf_dropout_layer_submodule.f90
index 6e7e35a0..5dc4ef07 100644
--- a/src/nf/nf_dropout_layer_submodule.f90
+++ b/src/nf/nf_dropout_layer_submodule.f90
@@ -4,12 +4,10 @@
 
 contains
 
-  module function dropout_layer_cons(rate, training) result(res)
+  module function dropout_layer_cons(rate) result(res)
     real, intent(in) :: rate
-    logical, intent(in), optional :: training
     type(dropout_layer) :: res
     res % dropout_rate = rate
-    if (present(training)) res % training = training
   end function dropout_layer_cons
 
 
diff --git a/src/nf/nf_layer_constructors.f90 b/src/nf/nf_layer_constructors.f90
index c63027a1..770e3b8d 100644
--- a/src/nf/nf_layer_constructors.f90
+++ b/src/nf/nf_layer_constructors.f90
@@ -104,7 +104,7 @@ module function dense(layer_size, activation) result(res)
         !! Resulting layer instance
     end function dense
 
-    module function dropout(rate, training) result(res)
+    module function dropout(rate) result(res)
       !! Create a dropout layer with a given dropout rate.
       !!
       !! This layer is for randomly disabling neurons during training.
@@ -118,8 +118,6 @@ module function dropout(rate, training) result(res)
       !! ```
       real, intent(in) :: rate
         !! Dropout rate - fraction of neurons to randomly disable during training
-      logical, intent(in), optional :: training
-        !! Training mode (default .false.)
       type(layer) :: res
         !! Resulting layer instance
     end function dropout
diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90
index 57bd682a..e9ac30ae 100644
--- a/src/nf/nf_layer_constructors_submodule.f90
+++ b/src/nf/nf_layer_constructors_submodule.f90
@@ -65,12 +65,11 @@ module function dense(layer_size, activation) result(res)
   end function dense
 
 
-  module function dropout(rate, training) result(res)
+  module function dropout(rate) result(res)
     real, intent(in) :: rate
-    logical, intent(in), optional :: training
     type(layer) :: res
     res % name = 'dropout'
-    allocate(res % p, source=dropout_layer(rate, training))
+    allocate(res % p, source=dropout_layer(rate))
   end function dropout
 
 
diff --git a/src/nf/nf_network.f90 b/src/nf/nf_network.f90
index fa7ea4eb..5916924e 100644
--- a/src/nf/nf_network.f90
+++ b/src/nf/nf_network.f90
@@ -26,6 +26,7 @@ module nf_network
     procedure :: get_params
     procedure :: print_info
     procedure :: set_params
+    procedure :: set_training_mode
     procedure :: train
     procedure :: update
 
@@ -223,6 +224,15 @@ module subroutine set_params(self, params)
         !! Network parameters to set
     end subroutine set_params
 
+    module subroutine set_training_mode(self, training)
+      !! Set the mode to training (.true.) or inference (.false.).
+      !! Used internally to enable/disable the dropout layers in the network.
+      class(network), intent(in out) :: self
+        !! Network instance
+      logical, intent(in) :: training
+        !! .true. for training mode, .false. for inference.
+    end subroutine set_training_mode
+
     module subroutine print_info(self)
       !! Prints a brief summary of the network and its layers to the screen.
       class(network), intent(in) :: self
diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90
index 5cf66fe2..5d530ee4 100644
--- a/src/nf/nf_network_submodule.f90
+++ b/src/nf/nf_network_submodule.f90
@@ -251,15 +251,11 @@ module function predict_1d(self, input) result(res)
     num_layers = size(self % layers)
 
     ! predict is run in inference mode only;
-    ! set all dropout layers' training mode to false.
-    do n = 2, num_layers
-      select type(this_layer => self % layers(n) % p)
-        type is(dropout_layer)
-          this_layer % training = .false.
-      end select
-    end do
-
+    ! set all dropout layers' training mode to false, and
+    ! return to training mode after inference.
+    call self % set_training_mode(.false.)
     call self % forward(input)
+    call self % set_training_mode(.true.)
 
     select type(output_layer => self % layers(num_layers) % p)
       type is(dense_layer)
@@ -269,7 +265,8 @@ module function predict_1d(self, input) result(res)
       type is(flatten_layer)
         res = output_layer % output
       class default
-        error stop 'network % output not implemented for this output layer'
+        error stop 'network % output not implemented for ' // &
+          trim(self % layers(num_layers) % name) // ' layer'
     end select
 
   end function predict_1d
@@ -279,15 +276,25 @@ module function predict_2d(self, input) result(res)
     class(network), intent(in out) :: self
     real, intent(in) :: input(:,:)
     real, allocatable :: res(:)
-    integer :: num_layers
+    integer :: n, num_layers
 
     num_layers = size(self % layers)
 
+    ! predict is run in inference mode only;
+    ! set all dropout layers' training mode to false, and
+    ! return to training mode after inference.
+    call self % set_training_mode(.false.)
     call self % forward(input)
+    call self % set_training_mode(.true.)
 
     select type(output_layer => self % layers(num_layers) % p)
       type is(dense_layer)
         res = output_layer % output
+      type is(flatten_layer)
+        res = output_layer % output
+      class default
+        error stop 'network % output not implemented for ' // &
+          trim(self % layers(num_layers) % name) // ' layer'
     end select
 
   end function predict_2d
@@ -302,15 +309,11 @@ module function predict_3d(self, input) result(res)
     num_layers = size(self % layers)
 
     ! predict is run in inference mode only;
-    ! set all dropout layers' training mode to false.
-    do n = 2, num_layers
-      select type(this_layer => self % layers(n) % p)
-        type is(dropout_layer)
-          this_layer % training = .false.
-      end select
-    end do
-
+    ! set all dropout layers' training mode to false, and
+    ! return to training mode after inference.
+    call self % set_training_mode(.false.)
     call self % forward(input)
+    call self % set_training_mode(.true.)
 
     select type(output_layer => self % layers(num_layers) % p)
       type is(conv2d_layer)
@@ -321,7 +324,8 @@ module function predict_3d(self, input) result(res)
       type is(flatten_layer)
         res = output_layer % output
       class default
-        error stop 'network % output not implemented for this output layer'
+        error stop 'network % output not implemented for ' // &
+          trim(self % layers(num_layers) % name) // ' layer'
     end select
 
   end function predict_3d
@@ -338,13 +342,9 @@ module function predict_batch_1d(self, input) result(res)
     output_size = product(self % layers(num_layers) % layer_shape)
 
     ! predict is run in inference mode only;
-    ! set all dropout layers' training mode to false.
-    do n = 2, num_layers
-      select type(this_layer => self % layers(n) % p)
-        type is(dropout_layer)
-          this_layer % training = .false.
-      end select
-    end do
+    ! set all dropout layers' training mode to false, and
+    ! return to training mode after inference.
+    call self % set_training_mode(.false.)
 
     allocate(res(output_size, batch_size))
 
@@ -358,11 +358,16 @@ module function predict_batch_1d(self, input) result(res)
         type is(flatten_layer)
           res(:,i) = output_layer % output
         class default
-          error stop 'network % output not implemented for this output layer'
+          error stop 'network % output not implemented for ' // &
+            trim(self % layers(num_layers) % name) // ' layer'
       end select
 
     end do batch
 
+    ! We are now done with inference;
+    ! return to training mode for dropout layers.
+    call self % set_training_mode(.true.)
+
   end function predict_batch_1d
 
 
@@ -377,13 +382,9 @@ module function predict_batch_3d(self, input) result(res)
     output_size = product(self % layers(num_layers) % layer_shape)
 
     ! predict is run in inference mode only;
-    ! set all dropout layers' training mode to false.
-    do n = 2, num_layers
-      select type(this_layer => self % layers(n) % p)
-        type is(dropout_layer)
-          this_layer % training = .false.
-      end select
-    end do
+    ! set all dropout layers' training mode to false, and
+    ! return to training mode after inference.
+    call self % set_training_mode(.false.)
 
     allocate(res(output_size, batch_size))
 
@@ -400,11 +401,16 @@ module function predict_batch_3d(self, input) result(res)
         type is(flatten_layer)
           res(:,i) = output_layer % output
         class default
-          error stop 'network % output not implemented for this output layer'
+          error stop 'network % output not implemented for ' // &
+            trim(self % layers(num_layers) % name) // ' layer'
       end select
 
     end do batch
 
+    ! We are now done with inference;
+    ! return to training mode for dropout layers.
+    call self % set_training_mode(.true.)
+
   end function predict_batch_3d
 
 
@@ -484,6 +490,18 @@ module subroutine set_params(self, params)
   end subroutine set_params
 
 
+  module subroutine set_training_mode(self, training)
+    class(network), intent(in out) :: self
+    logical, intent(in) :: training
+    integer :: n
+    do n = 2, size(self % layers)
+      select type(this_layer => self % layers(n) % p); type is(dropout_layer)
+        this_layer % training = training
+      end select
+    end do
+  end subroutine set_training_mode
+
+
   module subroutine train(self, input_data, output_data, batch_size, &
                           epochs, optimizer, loss)
     class(network), intent(in out) :: self
diff --git a/test/test_dropout_layer.f90 b/test/test_dropout_layer.f90
index 2f52c715..a4aa4b6b 100644
--- a/test/test_dropout_layer.f90
+++ b/test/test_dropout_layer.f90
@@ -24,9 +24,9 @@ program test_dropout_layer
         write(stderr, '(a)') 'dropout layer dropout rate should be 0.5.. failed'
       end if
 
-      if (layer1_p % training) then
+      if (.not. layer1_p % training) then
         ok = .false.
-        write(stderr, '(a)') 'dropout layer default training mode should be false.. failed'
+        write(stderr, '(a)') 'dropout layer default training mode should be true.. failed'
       end if
 
       if (layer1_p % input_size /= 0) then
@@ -42,25 +42,6 @@ program test_dropout_layer
 
   end select
 
-  ! Test setting training mode explicitly.
-  layer1 = dropout(0.5, training=.true.)
-  select type(layer1_p => layer1 % p)
-    type is(dropout_layer)
-      if (.not. layer1_p % training) then
-        ok = .false.
-        write(stderr, '(a)') 'dropout layer training mode should be true.. failed'
-      end if
-  end select
-
-  layer1 = dropout(0.5, training=.false.)
-  select type(layer1_p => layer1 % p)
-    type is(dropout_layer)
-      if (layer1_p % training) then
-        ok = .false.
-        write(stderr, '(a)') 'dropout layer training mode should be false.. failed'
-      end if
-  end select
-
   ! Now we're gonna initialize a minimal network with an input layer and a
   ! dropout that follows and we'll check that the dropout layer has expected
   ! state.
@@ -122,31 +103,27 @@ program test_dropout_layer
 
 
   training: block
-    real :: x(10), y(5)
+    real :: x(100), y(5)
     real :: tolerance = 1e-3
     integer :: n
-    integer, parameter :: num_iterations = 100000
+    integer, parameter :: num_iterations = 10000
 
     call random_number(x)
-    y = [0.1234, 0.2345, 0.3456, 0.4567, 0.5678]
+    y = [0.12345, 0.23456, 0.34567, 0.45678, 0.56789]
 
     net = network([ &
-      input(10), &
-      dropout(0.5, training=.true.), &
+      input(100), &
+      dropout(0.5), &
       dense(5) &
     ])
 
     do n = 1, num_iterations
-      !select type(dropout_l => net % layers(2) % p)
-      !  type is(dropout_layer)
-      !    print *, dropout_l % training, dropout_l % mask
-      !end select
       call net % forward(x)
       call net % backward(y)
       call net % update()
-      !print *, n, net % predict(x)
 
       if (all(abs(net % predict(x) - y) < tolerance)) exit
+
     end do
 
     if (.not. n <= num_iterations) then

From 0f640445c492525bcac050cfa58294ff2d5ea555 Mon Sep 17 00:00:00 2001
From: milancurcic <caomaco@gmail.com>
Date: Mon, 17 Feb 2025 12:52:32 -0500
Subject: [PATCH 17/30] Update the layers table

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 14f944fb..c8e2b5d1 100644
--- a/README.md
+++ b/README.md
@@ -30,8 +30,8 @@ Read the paper [here](https://arxiv.org/abs/1902.06714).
 | Layer type | Constructor name | Supported input layers | Rank of output array | Forward pass | Backward pass |
 |------------|------------------|------------------------|----------------------|--------------|---------------|
 | Input | `input` | n/a | 1, 2, 3 | n/a | n/a |
-| Dense (fully-connected) | `dense` | `input1d`, `flatten` | 1 | ✅ | ✅ |
-| Dropout | `dropout` | Any | 1 | ✅ | ✅ |
+| Dense (fully-connected) | `dense` | `input1d`, `dense`, `dropout`, `flatten` | 1 | ✅ | ✅ |
+| Dropout | `dropout` | `dense`, `flatten`, `input1d` | 1 | ✅ | ✅ |
 | Convolutional (2-d) | `conv2d` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 3 | ✅ | ✅(*) |
 | Max-pooling (2-d) | `maxpool2d` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 3 | ✅ | ✅ |
 | Flatten | `flatten` | `input2d`, `input3d`, `conv2d`, `maxpool2d`, `reshape` | 1 | ✅ | ✅ |

From aa19f69b10213d2fd81704bb6a745808d57d82a4 Mon Sep 17 00:00:00 2001
From: milancurcic <caomaco@gmail.com>
Date: Tue, 18 Feb 2025 12:13:42 -0500
Subject: [PATCH 18/30] Ensure the actual dropout rate == requested dropout
 rate in most cases

---
 src/nf/nf_dropout_layer_submodule.f90 | 14 ++++++-----
 src/nf/nf_random.f90                  | 24 ++++++++++++++++---
 test/test_dropout_layer.f90           | 34 +++++++++++++++++++++++++++
 3 files changed, 63 insertions(+), 9 deletions(-)

diff --git a/src/nf/nf_dropout_layer_submodule.f90 b/src/nf/nf_dropout_layer_submodule.f90
index 5dc4ef07..d2f50e15 100644
--- a/src/nf/nf_dropout_layer_submodule.f90
+++ b/src/nf/nf_dropout_layer_submodule.f90
@@ -1,4 +1,5 @@
 submodule (nf_dropout_layer) nf_dropout_layer_submodule
+  use nf_random, only: shuffle
   !! This submodule implements the procedures defined in the
   !! nf_dropout_layer module.
 
@@ -37,12 +38,13 @@ module subroutine forward(self, input)
     ! Generate random mask for dropout, training mode only
     if (self % training) then
 
-      call random_number(self % mask)
-      where (self % mask < self % dropout_rate)
-        self % mask = 0
-      elsewhere
-        self % mask = 1
-      end where
+      ! Set the first dropout_rate number of elements to 0, the rest to 1,
+      ! and shuffle. Note that the selection of the elements rounds down to
+      ! the nearest integer, so in cases where size(input) * dropout_rate is
+      ! not an integer, the actual dropout rate will be slightly lower.
+      self % mask = 1
+      self % mask(:int(size(self % mask) * self % dropout_rate)) = 0
+      call shuffle(self % mask)
 
       ! Scale factor to preserve the input sum
       self % scale = sum(input) / sum(input * self % mask)
diff --git a/src/nf/nf_random.f90 b/src/nf/nf_random.f90
index 57c5d11f..5160bc13 100644
--- a/src/nf/nf_random.f90
+++ b/src/nf/nf_random.f90
@@ -1,12 +1,12 @@
 module nf_random
 
-  !! Provides a random number generator with
-  !! normal distribution, centered on zero.
+  !! Provides a random number generator with normal distribution,
+  !! centered on zero, and a Fisher-Yates shuffle.
 
   implicit none
 
   private
-  public :: random_normal
+  public :: random_normal, shuffle
 
   real, parameter :: pi = 4 * atan(1.d0)
 
@@ -23,4 +23,22 @@ impure elemental subroutine random_normal(x)
     x = sqrt(- 2 * log(u(1))) * cos(2 * pi * u(2))
   end subroutine random_normal
 
+
+  subroutine shuffle(x)
+    !! Fisher-Yates shuffle.
+    real, intent(in out) :: x(:)
+      !! Array to shuffle
+    integer :: i, j
+    real :: r, temp
+
+    do i = size(x), 2, -1
+      call random_number(r)
+      j = floor(r * i) + 1
+      temp = x(i)
+      x(i) = x(j)
+      x(j) = temp
+    end do
+
+  end subroutine shuffle
+
 end module nf_random
diff --git a/test/test_dropout_layer.f90 b/test/test_dropout_layer.f90
index a4aa4b6b..262df3ee 100644
--- a/test/test_dropout_layer.f90
+++ b/test/test_dropout_layer.f90
@@ -75,6 +75,40 @@ program test_dropout_layer
 
   end select
 
+  ! Test that the generated dropout mask matches the requested dropout rate.
+  test_mask: block
+    integer, parameter :: input_sizes(3) = [10, 100, 1000]
+    real, parameter :: dropout_rates(5) = [0., 0.2, 0.5, 0.8, 1.]
+    real, allocatable :: input_data(:)
+    integer :: i, j
+
+    do i = 1, size(input_sizes)
+      do j = 1, size(dropout_rates)
+
+        net = network([ &
+          input(input_sizes(i)), &
+          dropout(dropout_rates(j)) &
+        ])
+
+        if (allocated(input_data)) deallocate(input_data)
+        allocate(input_data(input_sizes(i)))
+        call random_number(input_data)
+
+        call net % forward(input_data)
+
+        select type(layer1_p => net % layers(2) % p)
+          type is(dropout_layer)
+            if (abs(sum(layer1_p % mask) / size(layer1_p % mask) - (1 - dropout_rates(j))) > 1e-6) then
+              ok = .false.
+              write(stderr, '(a)') 'actual dropout rate is equal to requested.. failed'
+            end if
+        end select
+      end do
+    end do
+
+  end block test_mask
+
+
   ! Now we're gonna run the forward pass and check that the dropout indeed
   ! drops according to the requested dropout rate.
   forward_pass: block

From a99d80009aefde7af274bf416fce517790ae4091 Mon Sep 17 00:00:00 2001
From: milancurcic <caomaco@gmail.com>
Date: Thu, 20 Feb 2025 14:49:32 -0500
Subject: [PATCH 19/30] Accumulate the gradient in dropout % backward and flush
 in network % update

---
 src/nf/nf_dropout_layer_submodule.f90 |  7 ++++---
 src/nf/nf_network_submodule.f90       | 10 ++--------
 2 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/src/nf/nf_dropout_layer_submodule.f90 b/src/nf/nf_dropout_layer_submodule.f90
index d2f50e15..dcdc620b 100644
--- a/src/nf/nf_dropout_layer_submodule.f90
+++ b/src/nf/nf_dropout_layer_submodule.f90
@@ -47,7 +47,8 @@ module subroutine forward(self, input)
       call shuffle(self % mask)
 
       ! Scale factor to preserve the input sum
-      self % scale = sum(input) / sum(input * self % mask)
+      self % scale = sum(input) / sum(input * self % mask)  ! input conservative approach
+      !self % scale = 1 / (1 - self % dropout_rate)  ! reference approach
 
       ! Apply dropout mask
       self % output = input * self % mask * self % scale
@@ -68,10 +69,10 @@ pure module subroutine backward(self, input, gradient)
 
     if (self % training) then
       ! Backpropagate gradient through dropout mask
-      self % gradient = gradient * self % mask * self % scale
+      self % gradient = self % gradient + gradient * self % mask * self % scale
     else
       ! In inference mode, pass through the gradient unchanged
-      self % gradient = gradient
+      self % gradient = self % gradient + gradient
     end if
   end subroutine backward
 
diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90
index a8b5a7bc..fae3476c 100644
--- a/src/nf/nf_network_submodule.f90
+++ b/src/nf/nf_network_submodule.f90
@@ -543,14 +543,6 @@ module subroutine train(self, input_data, output_data, batch_size, &
       self % loss = quadratic()
     end if
 
-    ! Set all dropout layers' training mode to true.
-    do n = 2, size(self % layers)
-      select type(this_layer => self % layers(n) % p)
-        type is(dropout_layer)
-          this_layer % training = .true.
-      end select
-    end do
-
     dataset_size = size(output_data, dim=2)
 
     epoch_loop: do n = 1, epochs
@@ -640,6 +632,8 @@ module subroutine update(self, optimizer, batch_size)
         type is(conv2d_layer)
           this_layer % dw = 0
           this_layer % db = 0
+        type is(dropout_layer)
+          this_layer % gradient = 0
       end select
     end do
 

From ea0012a256cc79db3b5691e5f861fec018dbc47c Mon Sep 17 00:00:00 2001
From: milancurcic <caomaco@gmail.com>
Date: Thu, 20 Feb 2025 14:50:12 -0500
Subject: [PATCH 20/30] Guard against bad dropout rate

---
 src/nf/nf_layer_constructors_submodule.f90 | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90
index b2f8e462..9558a0bc 100644
--- a/src/nf/nf_layer_constructors_submodule.f90
+++ b/src/nf/nf_layer_constructors_submodule.f90
@@ -69,6 +69,8 @@ end function dense
   module function dropout(rate) result(res)
     real, intent(in) :: rate
     type(layer) :: res
+    if (rate < 0 .or. rate > 1) &
+      error stop 'rate must be between 0 and 1 in a dropout layer'
     res % name = 'dropout'
     allocate(res % p, source=dropout_layer(rate))
   end function dropout
@@ -81,7 +83,6 @@ module function flatten() result(res)
   end function flatten
 
 
-
   module function input1d(layer_size) result(res)
     integer, intent(in) :: layer_size
     type(layer) :: res

From 0350c7d0a66daa6a2ba82150b8b0fd341c1a916c Mon Sep 17 00:00:00 2001
From: milancurcic <caomaco@gmail.com>
Date: Thu, 20 Feb 2025 14:51:16 -0500
Subject: [PATCH 21/30] Connect the backward pass; expand tests

---
 src/nf/nf_layer_submodule.f90 | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90
index 3cbdf036..ca18f9c7 100644
--- a/src/nf/nf_layer_submodule.f90
+++ b/src/nf/nf_layer_submodule.f90
@@ -39,6 +39,18 @@ pure module subroutine backward_1d(self, previous, gradient)
             call this_layer % backward(prev_layer % output, gradient)
         end select
 
+      type is(dropout_layer)
+
+        ! Upstream layers permitted: input1d, dense, dropout, flatten
+        select type(prev_layer => previous % p)
+          type is(input1d_layer)
+            call this_layer % backward(prev_layer % output, gradient)
+          type is(dense_layer)
+            call this_layer % backward(prev_layer % output, gradient)
+          type is(flatten_layer)
+            call this_layer % backward(prev_layer % output, gradient)
+        end select
+
       type is(flatten_layer)
 
         ! Upstream layers permitted: input2d, input3d, conv2d, maxpool2d

From 183e82f9ba56b629d361541dd6542c44467cd900 Mon Sep 17 00:00:00 2001
From: milancurcic <caomaco@gmail.com>
Date: Thu, 20 Feb 2025 14:51:29 -0500
Subject: [PATCH 22/30] Expand tests

---
 test/test_dropout_layer.f90 | 40 +++++++++++++++++++++----------------
 1 file changed, 23 insertions(+), 17 deletions(-)

diff --git a/test/test_dropout_layer.f90 b/test/test_dropout_layer.f90
index 262df3ee..dc18789d 100644
--- a/test/test_dropout_layer.f90
+++ b/test/test_dropout_layer.f90
@@ -112,7 +112,7 @@ program test_dropout_layer
   ! Now we're gonna run the forward pass and check that the dropout indeed
   ! drops according to the requested dropout rate.
   forward_pass: block
-    real :: input_data(5)
+    real :: input_data(4)
     real :: output_data(size(input_data))
     integer :: n
 
@@ -121,33 +121,41 @@ program test_dropout_layer
       dropout(0.5) &
     ])
 
-    call random_number(input_data)
     do n = 1, 10000
-      output_data = net % predict(input_data)
+
+      call random_number(input_data)
+      call net % forward(input_data)
+
       ! Check that sum of output matches sum of input within small tolerance
-      if (abs(sum(output_data) - sum(input_data)) > 1e-6) then
-        ok = .false.
-        exit
-      end if
+      select type(layer1_p => net % layers(2) % p)
+        type is(dropout_layer)
+          if (abs(sum(layer1_p % output) - sum(input_data)) > 1e-6) then
+            ok = .false.
+            exit
+          end if
+      end select
+
     end do
-    if (.not. ok) then
-      write(stderr, '(a)') 'dropout layer output sum should match input sum within tolerance.. failed'
-    end if
+
+    if (.not. ok) write(stderr, '(a)') &
+      'dropout layer output sum should match input sum within tolerance.. failed'
+
   end block forward_pass
 
 
   training: block
-    real :: x(100), y(5)
-    real :: tolerance = 1e-3
+    real :: x(20), y(5)
+    real :: tolerance = 1e-4
     integer :: n
-    integer, parameter :: num_iterations = 10000
+    integer, parameter :: num_iterations = 100000
 
     call random_number(x)
     y = [0.12345, 0.23456, 0.34567, 0.45678, 0.56789]
 
     net = network([ &
-      input(100), &
-      dropout(0.5), &
+      input(20), &
+      dense(20), &
+      dropout(0.2), &
       dense(5) &
     ])
 
@@ -155,9 +163,7 @@ program test_dropout_layer
       call net % forward(x)
       call net % backward(y)
       call net % update()
-
       if (all(abs(net % predict(x) - y) < tolerance)) exit
-
     end do
 
     if (.not. n <= num_iterations) then

From 6c07cd7133a6e0f23c50bfba532e6ff070587076 Mon Sep 17 00:00:00 2001
From: milancurcic <caomaco@gmail.com>
Date: Thu, 20 Feb 2025 15:09:51 -0500
Subject: [PATCH 23/30] Use the reference scaling in dropout; don't accumulate
 gradients because it's not needed

---
 src/nf/nf_dropout_layer_submodule.f90 |  7 +++----
 src/nf/nf_network_submodule.f90       |  2 --
 test/test_dropout_layer.f90           | 13 ++++++++-----
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/nf/nf_dropout_layer_submodule.f90 b/src/nf/nf_dropout_layer_submodule.f90
index dcdc620b..7e2610c4 100644
--- a/src/nf/nf_dropout_layer_submodule.f90
+++ b/src/nf/nf_dropout_layer_submodule.f90
@@ -47,8 +47,7 @@ module subroutine forward(self, input)
       call shuffle(self % mask)
 
       ! Scale factor to preserve the input sum
-      self % scale = sum(input) / sum(input * self % mask)  ! input conservative approach
-      !self % scale = 1 / (1 - self % dropout_rate)  ! reference approach
+      self % scale = 1 / (1 - self % dropout_rate)
 
       ! Apply dropout mask
       self % output = input * self % mask * self % scale
@@ -69,10 +68,10 @@ pure module subroutine backward(self, input, gradient)
 
     if (self % training) then
       ! Backpropagate gradient through dropout mask
-      self % gradient = self % gradient + gradient * self % mask * self % scale
+      self % gradient = gradient * self % mask * self % scale
     else
       ! In inference mode, pass through the gradient unchanged
-      self % gradient = self % gradient + gradient
+      self % gradient = gradient
     end if
   end subroutine backward
 
diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90
index fae3476c..dd632d96 100644
--- a/src/nf/nf_network_submodule.f90
+++ b/src/nf/nf_network_submodule.f90
@@ -632,8 +632,6 @@ module subroutine update(self, optimizer, batch_size)
         type is(conv2d_layer)
           this_layer % dw = 0
           this_layer % db = 0
-        type is(dropout_layer)
-          this_layer % gradient = 0
       end select
     end do
 
diff --git a/test/test_dropout_layer.f90 b/test/test_dropout_layer.f90
index dc18789d..3d144138 100644
--- a/test/test_dropout_layer.f90
+++ b/test/test_dropout_layer.f90
@@ -112,16 +112,18 @@ program test_dropout_layer
   ! Now we're gonna run the forward pass and check that the dropout indeed
   ! drops according to the requested dropout rate.
   forward_pass: block
-    real :: input_data(4)
+    real :: input_data(10)
     real :: output_data(size(input_data))
+    real, parameter :: dropout_rate = 0.2
+    real :: realized_dropout_rate
     integer :: n
 
     net = network([ &
       input(size(input_data)), &
-      dropout(0.5) &
+      dropout(dropout_rate) &
     ])
 
-    do n = 1, 10000
+    do n = 1, 100
 
       call random_number(input_data)
       call net % forward(input_data)
@@ -129,9 +131,10 @@ program test_dropout_layer
       ! Check that sum of output matches sum of input within small tolerance
       select type(layer1_p => net % layers(2) % p)
         type is(dropout_layer)
-          if (abs(sum(layer1_p % output) - sum(input_data)) > 1e-6) then
+          realized_dropout_rate = 1 - sum(input_data * layer1_p % mask) / sum(layer1_p % output)
+          if (abs(realized_dropout_rate - dropout_rate) > 1e-6) then
             ok = .false.
-            exit
+            write(stderr, '(a)') 'realized dropout rate does not match requested dropout rate.. failed'
           end if
       end select
 

From a904c6e0f33a430f3f4165c12b63c867e6c37fe7 Mon Sep 17 00:00:00 2001
From: milancurcic <caomaco@gmail.com>
Date: Thu, 20 Feb 2025 15:13:03 -0500
Subject: [PATCH 24/30] Add dropout to MNIST example; small model changes

---
 example/dense_mnist.f90 | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/example/dense_mnist.f90 b/example/dense_mnist.f90
index c26d0ced..c1db2da4 100644
--- a/example/dense_mnist.f90
+++ b/example/dense_mnist.f90
@@ -1,6 +1,6 @@
 program dense_mnist
 
-  use nf, only: dense, input, network, sgd, label_digits, load_mnist, corr
+  use nf, only: dense, input, network, sgd, label_digits, load_mnist, corr, relu, softmax, dropout
 
   implicit none
 
@@ -17,8 +17,9 @@ program dense_mnist
 
   net = network([ &
     input(784), &
-    dense(30), &
-    dense(10) &
+    dense(64, relu()), &
+    dropout(0.2), &
+    dense(10, softmax()) &
   ])
   num_epochs = 10
 
@@ -32,7 +33,7 @@ program dense_mnist
     call net % train( &
       training_images, &
       label_digits(training_labels), &
-      batch_size=100, &
+      batch_size=128, &
       epochs=1, &
       optimizer=sgd(learning_rate=3.) &
     )

From 35671dd14c55fdc2a0f3672258964dbe6aa57242 Mon Sep 17 00:00:00 2001
From: milancurcic <caomaco@gmail.com>
Date: Thu, 20 Feb 2025 15:15:32 -0500
Subject: [PATCH 25/30] Add reference

---
 src/nf/nf_dropout_layer.f90 | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/nf/nf_dropout_layer.f90 b/src/nf/nf_dropout_layer.f90
index 77999abb..570426b1 100644
--- a/src/nf/nf_dropout_layer.f90
+++ b/src/nf/nf_dropout_layer.f90
@@ -1,8 +1,11 @@
 module nf_dropout_layer
 
-  !! This module provides the concrete dropout layer type.
-  !! It is used internally by the layer type.
-  !! It is not intended to be used directly by the user.
+  !! Dropout layer by Srivastava et al. (2014).
+  !!
+  !! Srivastava, N., Hinton, G., Krizhevsky, A., Sutskever, I. and 
+  !! Salakhutdinov, R., 2014. Dropout: a simple way to prevent neural networks 
+  !! from overfitting. The Journal of Machine Learning Research, 16(1), 
+  !! pp.1929-1958.
 
   use nf_base_layer, only: base_layer
 

From 31ebd69938e37494f255bf9c1c9434ca569f3fa1 Mon Sep 17 00:00:00 2001
From: "Vandenplas, Jeremie" <jeremie.vandenplas@wur.nl>
Date: Fri, 21 Feb 2025 11:34:04 +0100
Subject: [PATCH 26/30] Update print_info dropout

---
 src/nf/nf_layer_submodule.f90 | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90
index ca18f9c7..13903855 100644
--- a/src/nf/nf_layer_submodule.f90
+++ b/src/nf/nf_layer_submodule.f90
@@ -360,8 +360,10 @@ impure elemental module subroutine print_info(self)
       print '("Input shape: ", *(i0, 1x))', self % input_layer_shape
     print '("Output shape: ", *(i0, 1x))', self % layer_shape
     print '("Parameters: ", i0)', self % get_num_params()
-    if (.not. self % name == 'input') &
+    if (.not. (self % name == 'input' .or. self % name == 'dropout')) &
       print '("Activation: ", a)', self % activation
+    if (self % name == 'dropout') &
+      print '("Dropout rate: ", f0.2)', self % dropout_rate
     print *
   end subroutine print_info
 

From 1cd9e2cd58c9602bd0a8775378502a764e5e27f6 Mon Sep 17 00:00:00 2001
From: "Vandenplas, Jeremie" <jeremie.vandenplas@wur.nl>
Date: Fri, 21 Feb 2025 11:44:29 +0100
Subject: [PATCH 27/30] Update print_info

---
 src/nf/nf_layer_submodule.f90 | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90
index 13903855..3f281411 100644
--- a/src/nf/nf_layer_submodule.f90
+++ b/src/nf/nf_layer_submodule.f90
@@ -359,11 +359,14 @@ impure elemental module subroutine print_info(self)
     if (.not. self % name == 'input') &
       print '("Input shape: ", *(i0, 1x))', self % input_layer_shape
     print '("Output shape: ", *(i0, 1x))', self % layer_shape
-    print '("Parameters: ", i0)', self % get_num_params()
+    if (.not. self % name == 'dropout') &
+      print '("Parameters: ", i0)', self % get_num_params()
     if (.not. (self % name == 'input' .or. self % name == 'dropout')) &
       print '("Activation: ", a)', self % activation
-    if (self % name == 'dropout') &
-      print '("Dropout rate: ", f0.2)', self % dropout_rate
+    select type (this_layer => self % p)
+      type is (dropout_layer)
+        print '("Dropout rate: ", f0.2)', this_layer % dropout_rate
+    end select
     print *
   end subroutine print_info
 

From 8961f75c12121df06bf895c441c95fd57053c19a Mon Sep 17 00:00:00 2001
From: milancurcic <caomaco@gmail.com>
Date: Fri, 21 Feb 2025 11:57:39 -0500
Subject: [PATCH 28/30] Compute scale once in dropout constructor

---
 src/nf/nf_dropout_layer_submodule.f90 | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/nf/nf_dropout_layer_submodule.f90 b/src/nf/nf_dropout_layer_submodule.f90
index 7e2610c4..7ae24472 100644
--- a/src/nf/nf_dropout_layer_submodule.f90
+++ b/src/nf/nf_dropout_layer_submodule.f90
@@ -9,6 +9,7 @@ module function dropout_layer_cons(rate) result(res)
     real, intent(in) :: rate
     type(dropout_layer) :: res
     res % dropout_rate = rate
+    res % scale = 1 / (1 - rate)
   end function dropout_layer_cons
 
 
@@ -46,9 +47,6 @@ module subroutine forward(self, input)
       self % mask(:int(size(self % mask) * self % dropout_rate)) = 0
       call shuffle(self % mask)
 
-      ! Scale factor to preserve the input sum
-      self % scale = 1 / (1 - self % dropout_rate)
-
       ! Apply dropout mask
       self % output = input * self % mask * self % scale
 

From ee7fdc95645407fa7fe34696ba30f45647563bab Mon Sep 17 00:00:00 2001
From: milancurcic <caomaco@gmail.com>
Date: Fri, 21 Feb 2025 12:04:21 -0500
Subject: [PATCH 29/30] dropout % backward() doesn't need input from the
 previous layer

---
 src/nf/nf_dropout_layer.f90           |  4 +---
 src/nf/nf_dropout_layer_submodule.f90 | 12 ++----------
 src/nf/nf_layer_submodule.f90         | 10 +---------
 3 files changed, 4 insertions(+), 22 deletions(-)

diff --git a/src/nf/nf_dropout_layer.f90 b/src/nf/nf_dropout_layer.f90
index 570426b1..f7165aa0 100644
--- a/src/nf/nf_dropout_layer.f90
+++ b/src/nf/nf_dropout_layer.f90
@@ -48,14 +48,12 @@ end function dropout_layer_cons
 
   interface
 
-    pure module subroutine backward(self, input, gradient)
+    pure module subroutine backward(self, gradient)
       !! Apply the backward gradient descent pass.
       !! Only weight and bias gradients are updated in this subroutine,
       !! while the weights and biases themselves are untouched.
       class(dropout_layer), intent(in out) :: self
         !! Dropout layer instance
-      real, intent(in) :: input(:)
-        !! Input from the previous layer
       real, intent(in) :: gradient(:)
         !! Gradient from the next layer
     end subroutine backward
diff --git a/src/nf/nf_dropout_layer_submodule.f90 b/src/nf/nf_dropout_layer_submodule.f90
index 7ae24472..3fe07b1a 100644
--- a/src/nf/nf_dropout_layer_submodule.f90
+++ b/src/nf/nf_dropout_layer_submodule.f90
@@ -59,18 +59,10 @@ module subroutine forward(self, input)
   end subroutine forward
 
 
-  pure module subroutine backward(self, input, gradient)
+  pure module subroutine backward(self, gradient)
     class(dropout_layer), intent(in out) :: self
-    real, intent(in) :: input(:)
     real, intent(in) :: gradient(:)
-
-    if (self % training) then
-      ! Backpropagate gradient through dropout mask
-      self % gradient = gradient * self % mask * self % scale
-    else
-      ! In inference mode, pass through the gradient unchanged
-      self % gradient = gradient
-    end if
+    self % gradient = gradient * self % mask * self % scale
   end subroutine backward
 
 end submodule nf_dropout_layer_submodule 
\ No newline at end of file
diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90
index ca18f9c7..7dd5cafe 100644
--- a/src/nf/nf_layer_submodule.f90
+++ b/src/nf/nf_layer_submodule.f90
@@ -40,16 +40,8 @@ pure module subroutine backward_1d(self, previous, gradient)
         end select
 
       type is(dropout_layer)
-
         ! Upstream layers permitted: input1d, dense, dropout, flatten
-        select type(prev_layer => previous % p)
-          type is(input1d_layer)
-            call this_layer % backward(prev_layer % output, gradient)
-          type is(dense_layer)
-            call this_layer % backward(prev_layer % output, gradient)
-          type is(flatten_layer)
-            call this_layer % backward(prev_layer % output, gradient)
-        end select
+        call this_layer % backward(gradient)
 
       type is(flatten_layer)
 

From a2726340521f7c1e6b8a8e10b400a414ede4c486 Mon Sep 17 00:00:00 2001
From: milancurcic <caomaco@gmail.com>
Date: Fri, 21 Feb 2025 12:23:20 -0500
Subject: [PATCH 30/30] Timing info of dropout

---
 test/test_dropout_layer.f90 | 55 +++++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

diff --git a/test/test_dropout_layer.f90 b/test/test_dropout_layer.f90
index 3d144138..a79d0de5 100644
--- a/test/test_dropout_layer.f90
+++ b/test/test_dropout_layer.f90
@@ -177,6 +177,61 @@ program test_dropout_layer
 
   end block training
 
+  ! The following timing test is not part of the unit tests, but it's a good
+  ! way to see the performance difference between a network with and without
+  ! dropout.
+  timing: block
+    integer, parameter :: layer_size = 100
+    integer, parameter :: num_iterations = 1000
+    real :: x(layer_size), y(layer_size)
+    integer :: n
+    type(network) :: net1, net2
+    real :: t1, t2
+    real :: accumulated_time1 = 0
+    real :: accumulated_time2 = 0
+
+    net1 = network([ &
+      input(layer_size), &
+      dense(layer_size), &
+      dense(layer_size) &
+    ])
+
+    net2 = network([ &
+      input(layer_size), &
+      dense(layer_size), &
+      dropout(0.5), &
+      dense(layer_size) &
+    ])
+
+    call random_number(y)
+
+    ! Network without dropout
+    do n = 1, num_iterations
+      call random_number(x)
+      call cpu_time(t1)
+      call net1 % forward(x)
+      call net1 % backward(y)
+      call net1 % update()
+      call cpu_time(t2)
+      accumulated_time1 = accumulated_time1 + (t2 - t1)
+    end do
+
+    ! Network with dropout
+    do n = 1, num_iterations
+      call random_number(x)
+      call cpu_time(t1)
+      call net2 % forward(x)
+      call net2 % backward(y)
+      call net2 % update()
+      call cpu_time(t2)
+      accumulated_time2 = accumulated_time2 + (t2 - t1)
+    end do
+
+    ! Uncomment the following prints to see the timing results.
+    !print '(a, f9.6, a, f9.6, a)', 'No dropout time: ', accumulated_time1, ' seconds'
+    !print '(a, f9.6, a, f9.6, a)', 'Dropout time: ', accumulated_time2, ' seconds'
+
+  end block timing
 
   if (ok) then
     print '(a)', 'test_dropout_layer: All tests passed.'