Add batchesPerIteration

FlorentinD · FlorentinD · commit cedd259ce74c · 2022-05-10T12:32:42.000+02:00
diff --git a/algo/src/main/java/org/neo4j/gds/embeddings/graphsage/GraphSageModelTrainer.java b/algo/src/main/java/org/neo4j/gds/embeddings/graphsage/GraphSageModelTrainer.java
@@ -70,19 +70,12 @@
 public class GraphSageModelTrainer {
     private final long randomSeed;
     private final boolean useWeights;
-    private final double learningRate;
-    private final double tolerance;
-    private final int negativeSampleWeight;
-    private final int concurrency;
-    private final int epochs;
-    private final int maxIterations;
-    private final int maxSearchDepth;
     private final Function<Graph, List<LayerConfig>> layerConfigsFunction;
     private final FeatureFunction featureFunction;
     private final Collection<Weights<Matrix>> labelProjectionWeights;
     private final ExecutorService executor;
     private final ProgressTracker progressTracker;
-    private final int batchSize;
+    private final GraphSageTrainConfig config;
 
     public GraphSageModelTrainer(GraphSageTrainConfig config, ExecutorService executor, ProgressTracker progressTracker) {
         this(config, executor, progressTracker, new SingleLabelFeatureFunction(), Collections.emptyList());
@@ -96,14 +89,7 @@ public GraphSageModelTrainer(
         Collection<Weights<Matrix>> labelProjectionWeights
     ) {
         this.layerConfigsFunction = graph -> config.layerConfigs(firstLayerColumns(config, graph));
-        this.batchSize = config.batchSize();
-        this.learningRate = config.learningRate();
-        this.tolerance = config.tolerance();
-        this.negativeSampleWeight = config.negativeSampleWeight();
-        this.concurrency = config.concurrency();
-        this.epochs = config.epochs();
-        this.maxIterations = config.maxIterations();
-        this.maxSearchDepth = config.searchDepth();
+        this.config = config;
         this.featureFunction = featureFunction;
         this.labelProjectionWeights = labelProjectionWeights;
         this.executor = executor;
@@ -141,7 +127,7 @@ public ModelTrainResult train(Graph graph, HugeObjectArray<double[]> features) {
 
         var batchTasks = PartitionUtils.rangePartitionWithBatchSize(
             graph.nodeCount(),
-            batchSize,
+            config.batchSize(),
             batch -> createBatchTask(graph, features, layers, weights, batch)
         );
 
@@ -155,6 +141,7 @@ public ModelTrainResult train(Graph graph, HugeObjectArray<double[]> features) {
 
         progressTracker.beginSubTask("Train model");
 
+        int epochs = config.epochs();
         for (int epoch = 1; epoch <= epochs && !converged; epoch++) {
             progressTracker.beginSubTask("Epoch");
             var epochResult = trainEpoch(() -> batchTasks.get(random.nextInt(batchTasks.size())), weights, prevEpochLoss);
@@ -195,36 +182,36 @@ private BatchTask createBatchTask(
             useWeights ? localGraph::relationshipProperty : UNWEIGHTED,
             embeddingVariable,
             totalBatch,
-            negativeSampleWeight
+            config.negativeSampleWeight()
         );
 
-        return new BatchTask(lossFunction, weights, tolerance, progressTracker);
+        return new BatchTask(lossFunction, weights, progressTracker);
     }
 
     private EpochResult trainEpoch(Supplier<BatchTask> batchTaskSupplier, List<Weights<? extends Tensor<?>>> weights, double prevEpochLoss) {
-        var updater = new AdamOptimizer(weights, learningRate);
+        var updater = new AdamOptimizer(weights, config.learningRate());
 
         int iteration = 1;
         var iterationLosses = new ArrayList<Double>();
         double prevLoss = prevEpochLoss;
         var converged = false;
 
-        for (;iteration <= maxIterations; iteration++) {
+        int maxIterations = config.maxIterations();
+        for (; iteration <= maxIterations; iteration++) {
             progressTracker.beginSubTask("Iteration");
 
-            // TODO let the user configer the number of batches per iteration
             var batchTasks = IntStream
-                .range(0, concurrency)
+                .range(0, config.batchesPerIteration())
                 .mapToObj(__ -> batchTaskSupplier.get())
                 .collect(Collectors.toList());
 
             // run forward + maybe backward for each Batch
-            ParallelUtil.runWithConcurrency(concurrency, batchTasks, executor);
+            ParallelUtil.runWithConcurrency(config.concurrency(), batchTasks, executor);
             var avgLoss = batchTasks.stream().mapToDouble(BatchTask::loss).average().orElseThrow();
             iterationLosses.add(avgLoss);
             progressTracker.logMessage(formatWithLocale("LOSS: %.10f", avgLoss));
 
-            if (Math.abs(prevLoss - avgLoss) < tolerance) {
+            if (Math.abs(prevLoss - avgLoss) < config.tolerance()) {
                 converged = true;
                 progressTracker.endSubTask("Iteration");
                 break;
@@ -258,19 +245,16 @@ static class BatchTask implements Runnable {
         private final Variable<Scalar> lossFunction;
         private final List<Weights<? extends Tensor<?>>> weightVariables;
         private List<? extends Tensor<?>> weightGradients;
-        private final double tolerance;
         private final ProgressTracker progressTracker;
         private double prevLoss;
 
         BatchTask(
             Variable<Scalar> lossFunction,
             List<Weights<? extends Tensor<?>>> weightVariables,
-            double tolerance,
             ProgressTracker progressTracker
         ) {
             this.lossFunction = lossFunction;
             this.weightVariables = weightVariables;
-            this.tolerance = tolerance;
             this.progressTracker = progressTracker;
         }
 
@@ -321,7 +305,7 @@ LongStream neighborBatch(Graph graph, Partition batch, long batchLocalSeed) {
         // sample a neighbor for each batchNode
         batch.consume(nodeId -> {
             // randomWalk with at most maxSearchDepth steps and only save last node
-            int searchDepth = localRandom.nextInt(maxSearchDepth) + 1;
+            int searchDepth = localRandom.nextInt(config.searchDepth()) + 1;
             AtomicLong currentNode = new AtomicLong(nodeId);
             while (searchDepth > 0) {
                 NeighborhoodSampler neighborhoodSampler = new NeighborhoodSampler(currentNode.get() + searchDepth);
diff --git a/algo/src/main/java/org/neo4j/gds/embeddings/graphsage/algo/GraphSageTrainConfig.java b/algo/src/main/java/org/neo4j/gds/embeddings/graphsage/algo/GraphSageTrainConfig.java
@@ -120,6 +120,15 @@ default int maxIterations() {
         return 10;
     }
 
+    @Configuration.Key("batchesPerIteration")
+    Optional<Integer> maybeBatchesPerIteration();
+
+    @Configuration.Ignore
+    @Value.Derived
+    default int batchesPerIteration() {
+        return maybeBatchesPerIteration().orElse(concurrency());
+    }
+
     @Value.Default
     default int searchDepth() {
         return 5;
diff --git a/algo/src/test/java/org/neo4j/gds/embeddings/graphsage/GraphSageModelTrainerTest.java b/algo/src/test/java/org/neo4j/gds/embeddings/graphsage/GraphSageModelTrainerTest.java
@@ -26,6 +26,7 @@
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.CsvSource;
 import org.junit.jupiter.params.provider.ValueSource;
 import org.neo4j.gds.Orientation;
 import org.neo4j.gds.api.Graph;
@@ -34,7 +35,7 @@
 import org.neo4j.gds.core.utils.partition.PartitionUtils;
 import org.neo4j.gds.core.utils.progress.tasks.ProgressTracker;
 import org.neo4j.gds.embeddings.graphsage.algo.GraphSageTrainConfig;
-import org.neo4j.gds.embeddings.graphsage.algo.ImmutableGraphSageTrainConfig;
+import org.neo4j.gds.embeddings.graphsage.algo.GraphSageTrainConfigImpl;
 import org.neo4j.gds.extension.GdlExtension;
 import org.neo4j.gds.extension.GdlGraph;
 import org.neo4j.gds.extension.Inject;
@@ -77,7 +78,7 @@ class GraphSageModelTrainerTest {
     @Inject
     private Graph arrayGraph;
     private HugeObjectArray<double[]> features;
-    private ImmutableGraphSageTrainConfig.Builder configBuilder;
+    private GraphSageTrainConfigImpl.Builder configBuilder;
 
 
     @BeforeEach
@@ -87,7 +88,8 @@ void setUp() {
 
         Random random = new Random(19L);
         LongStream.range(0, nodeCount).forEach(n -> features.set(n, random.doubles(FEATURES_COUNT).toArray()));
-        configBuilder = ImmutableGraphSageTrainConfig.builder()
+        configBuilder = GraphSageTrainConfigImpl.builder()
+            .username("DUMMY")
             .featureProperties(Collections.nCopies(FEATURES_COUNT, "dummyProp"))
             .embeddingDimension(EMBEDDING_DIMENSION);
     }
@@ -202,7 +204,7 @@ void testLosses() {
             .embeddingDimension(12)
             .epochs(10)
             .tolerance(1e-10)
-            .addSampleSizes(5, 3)
+            .sampleSizes(List.of(5, 3))
             .batchSize(5)
             .maxIterations(100)
             .randomSeed(42L)
@@ -250,7 +252,7 @@ void testLossesWithPoolAggregator() {
             .aggregator(AggregatorType.POOL)
             .epochs(10)
             .tolerance(1e-10)
-            .addSampleSizes(5, 3)
+            .sampleSizes(List.of(5, 3))
             .batchSize(5)
             .maxIterations(100)
             .randomSeed(42L)
@@ -306,6 +308,35 @@ void testConvergence() {
         assertThat(trainMetrics.ranIterationsPerEpoch()).containsExactly(2);
     }
 
+    @ParameterizedTest
+    @CsvSource({
+        "1, true, 8",
+        "5, false, 10"
+    })
+    void batchesPerIteration(int batchesPerIteration, boolean expectedConvergence, int expectedRanEpochs) {
+        var trainer = new GraphSageModelTrainer(
+            configBuilder.modelName("convergingModel:)")
+                .maybeBatchesPerIteration(batchesPerIteration)
+                .embeddingDimension(12)
+                .aggregator(AggregatorType.POOL)
+                .epochs(10)
+                .tolerance(1e-10)
+                .sampleSizes(List.of(5, 3))
+                .batchSize(5)
+                .maxIterations(100)
+                .randomSeed(42L)
+                .build(),
+            Pools.DEFAULT,
+            ProgressTracker.NULL_TRACKER
+        );
+
+        var trainResult = trainer.train(graph, features);
+
+        var trainMetrics = trainResult.metrics();
+        assertThat(trainMetrics.didConverge()).isEqualTo(expectedConvergence);
+        assertThat(trainMetrics.ranEpochs()).isEqualTo(expectedRanEpochs);
+    }
+
     @ParameterizedTest
     @ValueSource(longs = {20L, -100L, 30L})
     void seededSingleBatch(long seed) {
diff --git a/algo/src/test/java/org/neo4j/gds/embeddings/graphsage/algo/GraphSageConfigTest.java b/algo/src/test/java/org/neo4j/gds/embeddings/graphsage/algo/GraphSageConfigTest.java
@@ -30,6 +30,7 @@
 import java.util.Map;
 import java.util.stream.Stream;
 
+import static org.assertj.core.api.Assertions.assertThat;
 import static org.assertj.core.api.Assertions.assertThatThrownBy;
 import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -50,6 +51,17 @@ private static Stream<Arguments> invalidAggregator() {
         );
     }
 
+    @Test
+    void specifyBatchesPerIteration() {
+        var mapWrapper = CypherMapWrapper.create(Map.of(
+            "modelName", "foo",
+            "featureProperties", List.of("a"),
+            "batchesPerIteration", 42
+        ));
+
+        assertThat(GraphSageTrainConfig.of("user", mapWrapper).batchesPerIteration()).isEqualTo(42);
+    }
+
     @Test
     void shouldThrowIfNoPropertiesProvided() {
         var mapWrapper = CypherMapWrapper.create(Map.of("modelName", "foo"));
diff --git a/doc/asciidoc/machine-learning/node-embeddings/graph-sage/specific-train-configuration.adoc b/doc/asciidoc/machine-learning/node-embeddings/graph-sage/specific-train-configuration.adoc
@@ -14,6 +14,7 @@
 | learningRate                                                                     | Float         | 0.1       | yes      | The learning rate determines the step size at each iteration while moving toward a minimum of a loss function.
 | epochs                                                                           | Integer       | 1         | yes      | Number of times to traverse the graph.
 | <<common-configuration-max-iterations,maxIterations>>                            | Integer       | 10        | yes      | Maximum number of iterations per epoch. Each iteration the weights are updated.
+| <<common-configuration-max-iterations,batchesPerIteration>>                      | Integer       | `concurrency` | yes  | Number of batches to consider per weight updates.
 | searchDepth                                                                      | Integer       | 5         | yes      | Maximum depth of the RandomWalks to sample nearby nodes for the training.
 | negativeSampleWeight                                                             | Integer       | 20        | yes      | The weight of the negative samples. Higher values increase the impact of negative samples in the loss.
 | <<common-configuration-relationship-weight-property,relationshipWeightProperty>> | String        | null      | yes      | Name of the relationship property to use as weights. If unspecified, the algorithm runs unweighted.