feat: setup elasticsearch, kibana and ingest data

teleivo · Nov 13, 2021 · 20950e2 · 20950e2
1 parent ffbdc5b
commit 20950e2
Show file tree

Hide file tree

Showing 6 changed files with 273 additions and 6 deletions.
diff --git a/TODO.md b/TODO.md
@@ -49,6 +49,15 @@ retryAfter: 16
 - ingest data
 - create first visualization for for example how did the "Run integration tests" step evolve over time?
 
+## Elastic
+
+- automate kibana index pattern creation?
+- and runtime field `duration` format into human readable
+- can I automatically index steps array separately? Or at least not let ES
+  merge the array into one single field?
+- get rid of security warning in kibana
+  https://www.elastic.co/guide/en/elasticsearch/reference/7.15/security-minimal-setup.html
+
 ## Whishlist
 
 - can I fetch the workflowId so it can be passed by name in the CLI?
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -0,0 +1,41 @@
+version: "3"
+services:
+  es01:
+    image: docker.elastic.co/elasticsearch/elasticsearch:7.15.1
+    container_name: es01
+    environment:
+      - node.name=es01
+      - cluster.name=es-docker-cluster
+      - discovery.type=single-node
+      - bootstrap.memory_lock=true
+    ulimits:
+      memlock:
+        soft: -1
+        hard: -1
+    volumes:
+      - data01:/usr/share/elasticsearch/data
+    ports:
+      - 127.0.0.1:9200:9200
+      - 127.0.0.1:9300:9300
+    networks:
+      - elastic
+  kib01:
+    image: docker.elastic.co/kibana/kibana:7.15.1
+    container_name: kib01
+    environment:
+      ELASTICSEARCH_URL: http://es01:9200
+      ELASTICSEARCH_HOSTS: http://es01:9200
+    ports:
+      - 127.0.0.1:5601:5601
+    depends_on:
+      - es01
+    networks:
+      - elastic
+
+volumes:
+  data01:
+    driver: local
+
+networks:
+  elastic:
+    driver: bridge
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -23,6 +23,7 @@
   },
   "homepage": "https://github.com/teleivo/github-action-metrics#readme",
   "dependencies": {
+    "@elastic/elasticsearch": "^7.15.0",
     "commander": "^8.3.0",
     "octokit": "^1.7.0"
   },

diff --git a/src/elastic.ts b/src/elastic.ts
@@ -0,0 +1,146 @@
+import fs from "fs";
+import path from "path";
+
+import { Client } from "@elastic/elasticsearch";
+
+// TODO expose this in the CLI
+// gm ingest runs
+// gm ingest jobs
+
+// rename others into subcommands?
+// gm fetch runs
+// gm fetch jobs
+
+// TODO ingest jobs
+// note jobs have a total_count field with potentially multiple jobs per run.
+// Do I want to ingest it as is? I feel like I would rather want them to be
+// ingested separately so the job will have an appropriate mapping in ES as
+// well :) each job already has the run_id as a field so I can tie it back
+// together with its run :)
+async function* generatorRuns(workflowId: number, srcDir: string) {
+  const file = path.join(srcDir, `/workflows/${workflowId}/runs/`);
+  const dir = fs.opendirSync(file);
+  for await (const dirent of dir) {
+    if (!dirent.isFile()) {
+      continue;
+    }
+    const runId = Number(path.parse(dirent.name).name);
+    if (Number.isNaN(runId)) {
+      console.log(`failed to parse runId from file ${dirent.name}`);
+      continue;
+    }
+    const data = fs.readFileSync(path.join(file, dirent.name), "utf8");
+    yield JSON.parse(data);
+  }
+}
+
+async function* generatorJobs(workflowId: number, srcDir: string) {
+  const file = path.join(srcDir, `/workflows/${workflowId}/jobs/`);
+  const dir = fs.opendirSync(file);
+  for await (const dirent of dir) {
+    if (!dirent.isFile()) {
+      continue;
+    }
+    const runId = Number(path.parse(dirent.name).name);
+    if (Number.isNaN(runId)) {
+      console.log(`failed to parse runId from file ${dirent.name}`);
+      continue;
+    }
+    const data = fs.readFileSync(path.join(file, dirent.name), "utf8");
+    const jobs = JSON.parse(data);
+    for (const job of jobs.jobs) {
+      yield job;
+    }
+  }
+}
+async function* generatorSteps(workflowId: number, srcDir: string) {
+  const file = path.join(srcDir, `/workflows/${workflowId}/jobs/`);
+  const dir = fs.opendirSync(file);
+  for await (const dirent of dir) {
+    if (!dirent.isFile()) {
+      continue;
+    }
+    const runId = Number(path.parse(dirent.name).name);
+    if (Number.isNaN(runId)) {
+      console.log(`failed to parse runId from file ${dirent.name}`);
+      continue;
+    }
+    const data = fs.readFileSync(path.join(file, dirent.name), "utf8");
+    const jobs = JSON.parse(data);
+    for (const job of jobs.jobs) {
+      for (const step of job.steps) {
+        yield {
+          ...step,
+          job_id: job.id,
+          job_name: job.name,
+          run_id: job.run_id,
+          run_url: job.run_url,
+          run_html_url: job.html_url,
+          run_attempt: job.run_attempt,
+          head_sha: job.head_sha,
+        };
+      }
+    }
+  }
+}
+
+async function ingestRuns(node: string, workflowId: number, srcDir: string) {
+  const client = new Client({ node });
+  const result = await client.helpers.bulk({
+    datasource: generatorRuns(workflowId, srcDir),
+    // TODO add a type for the doc?
+    onDocument(doc: any) {
+      return {
+        index: { _index: "runs", _id: doc.id },
+      };
+    },
+  });
+
+  console.log(result);
+}
+
+async function ingestJobs(node: string, workflowId: number, srcDir: string) {
+  const client = new Client({ node });
+  const result = await client.helpers.bulk({
+    datasource: generatorJobs(workflowId, srcDir),
+    // TODO add a type for the doc?
+    onDocument(doc: any) {
+      return {
+        index: { _index: "jobs", _id: doc.id },
+      };
+    },
+  });
+
+  console.log(result);
+}
+
+async function ingestSteps(node: string, workflowId: number, srcDir: string) {
+  const client = new Client({ node });
+  const result = await client.helpers.bulk({
+    datasource: generatorSteps(workflowId, srcDir),
+    // TODO add a type for the doc?
+    onDocument(doc: any) {
+      return {
+        index: { _index: "steps", _id: doc.job_id + "-" + doc.number },
+      };
+    },
+  });
+
+  console.log(result);
+}
+
+ingestRuns(
+  "http://localhost:9200",
+  10954,
+  "/home/ivo/code/dhis2/dhis2-github-action-metrics/data"
+);
+ingestJobs(
+  "http://localhost:9200",
+  10954,
+  "/home/ivo/code/dhis2/dhis2-github-action-metrics/data"
+);
+ingestSteps(
+  "http://localhost:9200",
+  10954,
+  "/home/ivo/code/dhis2/dhis2-github-action-metrics/data"
+);
diff --git a/step_duration.sh b/step_duration.sh
@@ -0,0 +1,28 @@
+#!/bin/sh
+curl -X PUT 'http://localhost:9200/steps/_mapping/?pretty' \
+    -H 'Content-Type: application/json' -d'
+{
+   "runtime": {
+     "duration": {
+       "type": "long",
+        "script": {
+          "source": "emit(doc[\u0027completed_at\u0027].value.millis - doc[\u0027started_at\u0027].value.millis)"
+      }
+     } 
+   } 
+}
+'
+
+# curl -X PUT "localhost:9200/seats/_mapping?pretty" -H 'Content-Type: application/json' -d'
+# {
+#   "runtime": {
+#     "day_of_week": {
+#       "type": "keyword",
+#       "script": {
+#         "source": "emit(doc[\u0027datetime\u0027].value.getDayOfWeekEnum().toString())"
+#       }
+#     }
+#   }
+# }
+# '
+