CommonWorkflowScheduler · Lehmann-Fabian · Mar 4, 2024 · Sep 27, 2023 · Oct 18, 2023 · Oct 18, 2023
diff --git a/pom.xml b/pom.xml
@@ -134,6 +134,12 @@
             <artifactId>jackson-annotations</artifactId>
         </dependency>
 
+        <dependency>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-math3</artifactId>
+            <version>3.6.1</version>
+        </dependency>
+
     </dependencies>
 
     <build>

diff --git a/src/main/java/cws/k8s/scheduler/memory/CombiPredictor.java b/src/main/java/cws/k8s/scheduler/memory/CombiPredictor.java
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2023, Florian Friederici. All rights reserved.
+ * 
+ * This code is free software: you can redistribute it and/or modify it under 
+ * the terms of the GNU General Public License as published by the Free 
+ * Software Foundation, either version 3 of the License, or (at your option) 
+ * any later version.
+ * 
+ * This code is distributed in the hope that it will be useful, but WITHOUT ANY 
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 
+ * details.
+ * 
+ * You should have received a copy of the GNU General Public License along with 
+ * this work. If not, see <https://www.gnu.org/licenses/>. 
+ */
+
+package cws.k8s.scheduler.memory;
+
+import java.math.BigDecimal;
+
+import cws.k8s.scheduler.model.Task;
+import lombok.extern.slf4j.Slf4j;
+
+//@formatter:off
+/**
+* CombiPredictor will combine predictions made by ConstantPredictor and
+* LineraPredictor.
+* 
+* LinearPredictor fails if there are no inputSize differences to tasks,
+* ConstantPredictor can handle this case. So CombiPredictor will run both and
+* decide dynamically which predictions to apply.
+* 
+* @author Florian Friederici
+*
+*/
+//@formatter:on
+@Slf4j
+public class CombiPredictor implements MemoryPredictor {
+
+    ConstantPredictor constantPredictor;
+    LinearPredictor linearPredictor;
+
+    public CombiPredictor() {
+        this.constantPredictor = new ConstantPredictor();
+        this.linearPredictor = new LinearPredictor();
+    }
+
+    @Override
+    public void addObservation(Observation o) {
+        log.debug("CombiPredictor.addObservation({})", o);
+        constantPredictor.addObservation(o);
+        linearPredictor.addObservation(o);
+    }
+
+    @Override
+    public String queryPrediction(Task task) {
+        String taskName = task.getConfig().getTask();
+        log.debug("CombiPredictor.queryPrediction({},{})", taskName, task.getInputSize());
+
+        String constantPrediction = constantPredictor.queryPrediction(task);
+        String linearPrediction = linearPredictor.queryPrediction(task);
+
+        if (constantPrediction==null && linearPrediction==null) {
+            // no prediction available at all
+            return null;
+        }
+
+        if (constantPrediction!=null && linearPrediction==null) {
+            // only the constantPrediction is available
+            return constantPrediction;
+        }
+
+        if (constantPrediction==null && linearPrediction!=null) {
+            // only the linearPrediction is available (unusual case)
+            return linearPrediction;
+        }
+
+        log.debug("constantPrediction={}, linearPrediction={}, difference={}", constantPrediction, linearPrediction, new BigDecimal(constantPrediction).subtract(new BigDecimal(linearPrediction)));
+
+        // prefer linearPrediction if both would be available
+        return linearPrediction;
+    }
+
+}
diff --git a/src/main/java/cws/k8s/scheduler/memory/ConstantPredictor.java b/src/main/java/cws/k8s/scheduler/memory/ConstantPredictor.java
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2023, Florian Friederici. All rights reserved.
+ * 
+ * This code is free software: you can redistribute it and/or modify it under 
+ * the terms of the GNU General Public License as published by the Free 
+ * Software Foundation, either version 3 of the License, or (at your option) 
+ * any later version.
+ * 
+ * This code is distributed in the hope that it will be useful, but WITHOUT ANY 
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 
+ * details.
+ * 
+ * You should have received a copy of the GNU General Public License along with 
+ * this work. If not, see <https://www.gnu.org/licenses/>. 
+ */
+
+package cws.k8s.scheduler.memory;
+
+import java.math.BigDecimal;
+import java.math.RoundingMode;
+import java.util.HashMap;
+import java.util.Map;
+
+import cws.k8s.scheduler.model.Task;
+import lombok.extern.slf4j.Slf4j;
+
+// @formatter:off
+/**
+ * ConstantPredictor will use the following strategy:
+ * 
+ * - In case task was successful:
+ *   - let the next prediction be 10% higher, then the peakRss was 
+ *
+ * - In case task has failed:
+ *   - reset to initial value
+ * 
+ * I.e. the suggestions from ConstantPredictor are not dependent on the input
+ * size of the tasks.
+ * 
+ * @author Florian Friederici
+ *
+ */
+// @formatter:on
+@Slf4j
+class ConstantPredictor implements MemoryPredictor {
+
+    Map<String, BigDecimal> model;
+    Map<String, BigDecimal> initialValue;
+
+    public ConstantPredictor() {
+        model = new HashMap<>();
+        initialValue = new HashMap<>();
+    }
+
+    @Override
+    public void addObservation(Observation o) {
+        log.debug("ConstantPredictor.addObservation({})", o);
+        if (!TaskScaler.checkObservationSanity(o)) {
+            log.warn("dismiss observation {}", o);
+            return;
+        }
+
+        // store initial ramRequest value per task
+        if (!initialValue.containsKey(o.task)) {
+            initialValue.put(o.task, o.getRamRequest());
+        }
+
+        if (Boolean.TRUE.equals(o.success)) {
+            // set model to peakRss + 10%
+            if (model.containsKey(o.task)) {
+                model.replace(o.task, o.peakRss.multiply(new BigDecimal("1.1")).setScale(0, RoundingMode.CEILING));
+            } else {
+                model.put(o.task, o.peakRss.multiply(new BigDecimal("1.1")).setScale(0, RoundingMode.CEILING));
+            }
+        } else {
+            // reset to initialValue
+            if (model.containsKey(o.task)) {
+                model.replace(o.task, this.initialValue.get(o.task));
+            } else {
+                model.put(o.task, o.ramRequest.multiply(new BigDecimal(2)).setScale(0, RoundingMode.CEILING));
+            }
+        }
+
+    }
+
+    @Override
+    public String queryPrediction(Task task) {
+        String taskName = task.getConfig().getTask();
+        log.debug("ConstantPredictor.queryPrediction({})", taskName);
+
+        if (model.containsKey(taskName)) {
+            return model.get(taskName).toPlainString();
+        } else {
+            return null;
+        }
+    }
+}
diff --git a/src/main/java/cws/k8s/scheduler/memory/LinearPredictor.java b/src/main/java/cws/k8s/scheduler/memory/LinearPredictor.java
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2023, Florian Friederici. All rights reserved.
+ * 
+ * This code is free software: you can redistribute it and/or modify it under 
+ * the terms of the GNU General Public License as published by the Free 
+ * Software Foundation, either version 3 of the License, or (at your option) 
+ * any later version.
+ * 
+ * This code is distributed in the hope that it will be useful, but WITHOUT ANY 
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 
+ * details.
+ * 
+ * You should have received a copy of the GNU General Public License along with 
+ * this work. If not, see <https://www.gnu.org/licenses/>. 
+ */
+
+package cws.k8s.scheduler.memory;
+
+import java.math.BigDecimal;
+import java.math.RoundingMode;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.commons.math3.stat.regression.SimpleRegression;
+
+import cws.k8s.scheduler.model.Task;
+import lombok.extern.slf4j.Slf4j;
+
+//@formatter:off
+/**
+ * LinearPredictor will use the following strategy:
+ * 
+ * If there are less than 2 observations, give no prediction, else:
+ * Calculate linear regression model and provide predictions.
+ * 
+ * Predictions start with 10% over-provisioning. If tasks fail, this will
+ * increase automatically.
+ * 
+ * @author Florian Friederici
+ *
+ */
+//@formatter:on
+@Slf4j
+public class LinearPredictor implements MemoryPredictor {
+
+    Map<String, SimpleRegression> model;
+    Map<String, Double> overprovisioning;
+
+    public LinearPredictor() {
+        model = new HashMap<>();
+        overprovisioning = new HashMap<>();
+    }
+
+    @Override
+    public void addObservation(Observation o) {
+        log.debug("LinearPredictor.addObservation({})", o);
+        if (!TaskScaler.checkObservationSanity(o)) {
+            log.warn("dismiss observation {}", o);
+            return;
+        }
+
+        if (!overprovisioning.containsKey(o.task)) {
+            overprovisioning.put(o.task, 1.1);
+        }
+
+        if (Boolean.TRUE.equals(o.success)) {
+            if (!model.containsKey(o.task)) {
+                model.put(o.task, new SimpleRegression());
+            }
+
+            double x = o.getInputSize();
+            double y = o.getPeakRss().doubleValue();
+            model.get(o.task).addData(x,y);
+        } else {
+            log.debug("overprovisioning value will increase due to task failure");
+            Double old = overprovisioning.get(o.task);
+            overprovisioning.put(o.task, old+0.05);
+        }
+    }
+
+    @Override
+    public String queryPrediction(Task task) {
+        String taskName = task.getConfig().getTask();
+        log.debug("LinearPredictor.queryPrediction({},{})", taskName, task.getInputSize());
+
+        if (!model.containsKey(taskName)) {
+            log.debug("LinearPredictor has no model for {}", taskName);
+            return null;
+        }
+
+        SimpleRegression simpleRegression = model.get(taskName);
+        double prediction = simpleRegression.predict(task.getInputSize());
+
+        if (Double.isNaN(prediction)) {
+            log.debug("No prediction possible for {}", taskName);
+            return null;
+        }
+
+        if (prediction < 0) {
+            log.warn("prediction would be negative: {}", prediction);
+            return null;
+        }
+
+        return BigDecimal.valueOf(prediction).multiply(BigDecimal.valueOf(overprovisioning.get(taskName))).setScale(0, RoundingMode.CEILING).toPlainString();
+    }
+
+}
diff --git a/src/main/java/cws/k8s/scheduler/memory/MemoryPredictor.java b/src/main/java/cws/k8s/scheduler/memory/MemoryPredictor.java
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2023, Florian Friederici. All rights reserved.
+ * 
+ * This code is free software: you can redistribute it and/or modify it under 
+ * the terms of the GNU General Public License as published by the Free 
+ * Software Foundation, either version 3 of the License, or (at your option) 
+ * any later version.
+ * 
+ * This code is distributed in the hope that it will be useful, but WITHOUT ANY 
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 
+ * details.
+ * 
+ * You should have received a copy of the GNU General Public License along with 
+ * this work. If not, see <https://www.gnu.org/licenses/>. 
+ */
+
+package cws.k8s.scheduler.memory;
+
+import cws.k8s.scheduler.model.Task;
+
+// @formatter:off
+/**
+ * The MemoryPredictor has two important interfaces:
+ * 
+ * 1) addObservation() 
+ *    - "add a new observation" after a workflow task is finished, the 
+ *    observation result will be collected in the MemoryPredictor 
+ * 
+ * 2) queryPrediction() 
+ *    - "ask for a suggestion" at any time, the MemoryPredictor can be asked 
+ *    what its guess is on the resource requirement of a task
+ * 
+ * Different strategies can be tried and exchanged easily, they just have to 
+ * implement those two interfaces. See ConstantPredictor and LinearPredictor
+ * for concrete strategies.
+ * 
+ * @author Florian Friederici
+ *
+ */
+// @formatter:on
+interface MemoryPredictor {
+
+    /**
+     * input observation into the MemoryPredictor, to be used to learn memory usage
+     * of tasks to create suggestions
+     * 
+     * @param o the observation that was made
+     */
+    void addObservation(Observation o);
+
+    /**
+     * ask the MemoryPredictor for a suggestion on how much memory should be
+     * assigned to the task.
+     * 
+     * @param task the task to get a suggestion form
+     * @return null, if no suggestion possible, otherwise the value to be used
+     */
+    String queryPrediction(Task task);
+
+}