Skip to content

Commit

Permalink
feat: setup elasticsearch, kibana and ingest data
Browse files Browse the repository at this point in the history
  • Loading branch information
teleivo committed Nov 13, 2021
1 parent ffbdc5b commit 20950e2
Show file tree
Hide file tree
Showing 6 changed files with 273 additions and 6 deletions.
9 changes: 9 additions & 0 deletions TODO.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,15 @@ retryAfter: 16
- ingest data
- create first visualization for for example how did the "Run integration tests" step evolve over time?

## Elastic

- automate kibana index pattern creation?
- and runtime field `duration` format into human readable
- can I automatically index steps array separately? Or at least not let ES
merge the array into one single field?
- get rid of security warning in kibana
https://www.elastic.co/guide/en/elasticsearch/reference/7.15/security-minimal-setup.html

## Whishlist

- can I fetch the workflowId so it can be passed by name in the CLI?
41 changes: 41 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
version: "3"
services:
es01:
image: docker.elastic.co/elasticsearch/elasticsearch:7.15.1
container_name: es01
environment:
- node.name=es01
- cluster.name=es-docker-cluster
- discovery.type=single-node
- bootstrap.memory_lock=true
ulimits:
memlock:
soft: -1
hard: -1
volumes:
- data01:/usr/share/elasticsearch/data
ports:
- 127.0.0.1:9200:9200
- 127.0.0.1:9300:9300
networks:
- elastic
kib01:
image: docker.elastic.co/kibana/kibana:7.15.1
container_name: kib01
environment:
ELASTICSEARCH_URL: http://es01:9200
ELASTICSEARCH_HOSTS: http://es01:9200
ports:
- 127.0.0.1:5601:5601
depends_on:
- es01
networks:
- elastic

volumes:
data01:
driver: local

networks:
elastic:
driver: bridge
54 changes: 48 additions & 6 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
},
"homepage": "https://github.com/teleivo/github-action-metrics#readme",
"dependencies": {
"@elastic/elasticsearch": "^7.15.0",
"commander": "^8.3.0",
"octokit": "^1.7.0"
},
Expand Down
146 changes: 146 additions & 0 deletions src/elastic.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
import fs from "fs";
import path from "path";

import { Client } from "@elastic/elasticsearch";

// TODO expose this in the CLI
// gm ingest runs
// gm ingest jobs

// rename others into subcommands?
// gm fetch runs
// gm fetch jobs

// TODO ingest jobs
// note jobs have a total_count field with potentially multiple jobs per run.
// Do I want to ingest it as is? I feel like I would rather want them to be
// ingested separately so the job will have an appropriate mapping in ES as
// well :) each job already has the run_id as a field so I can tie it back
// together with its run :)
async function* generatorRuns(workflowId: number, srcDir: string) {
const file = path.join(srcDir, `/workflows/${workflowId}/runs/`);
const dir = fs.opendirSync(file);
for await (const dirent of dir) {
if (!dirent.isFile()) {
continue;
}
const runId = Number(path.parse(dirent.name).name);
if (Number.isNaN(runId)) {
console.log(`failed to parse runId from file ${dirent.name}`);
continue;
}
const data = fs.readFileSync(path.join(file, dirent.name), "utf8");
yield JSON.parse(data);
}
}

async function* generatorJobs(workflowId: number, srcDir: string) {
const file = path.join(srcDir, `/workflows/${workflowId}/jobs/`);
const dir = fs.opendirSync(file);
for await (const dirent of dir) {
if (!dirent.isFile()) {
continue;
}
const runId = Number(path.parse(dirent.name).name);
if (Number.isNaN(runId)) {
console.log(`failed to parse runId from file ${dirent.name}`);
continue;
}
const data = fs.readFileSync(path.join(file, dirent.name), "utf8");
const jobs = JSON.parse(data);
for (const job of jobs.jobs) {
yield job;
}
}
}
async function* generatorSteps(workflowId: number, srcDir: string) {
const file = path.join(srcDir, `/workflows/${workflowId}/jobs/`);
const dir = fs.opendirSync(file);
for await (const dirent of dir) {
if (!dirent.isFile()) {
continue;
}
const runId = Number(path.parse(dirent.name).name);
if (Number.isNaN(runId)) {
console.log(`failed to parse runId from file ${dirent.name}`);
continue;
}
const data = fs.readFileSync(path.join(file, dirent.name), "utf8");
const jobs = JSON.parse(data);
for (const job of jobs.jobs) {
for (const step of job.steps) {
yield {
...step,
job_id: job.id,
job_name: job.name,
run_id: job.run_id,
run_url: job.run_url,
run_html_url: job.html_url,
run_attempt: job.run_attempt,
head_sha: job.head_sha,
};
}
}
}
}

async function ingestRuns(node: string, workflowId: number, srcDir: string) {
const client = new Client({ node });
const result = await client.helpers.bulk({
datasource: generatorRuns(workflowId, srcDir),
// TODO add a type for the doc?
onDocument(doc: any) {
return {
index: { _index: "runs", _id: doc.id },
};
},
});

console.log(result);
}

async function ingestJobs(node: string, workflowId: number, srcDir: string) {
const client = new Client({ node });
const result = await client.helpers.bulk({
datasource: generatorJobs(workflowId, srcDir),
// TODO add a type for the doc?
onDocument(doc: any) {
return {
index: { _index: "jobs", _id: doc.id },
};
},
});

console.log(result);
}

async function ingestSteps(node: string, workflowId: number, srcDir: string) {
const client = new Client({ node });
const result = await client.helpers.bulk({
datasource: generatorSteps(workflowId, srcDir),
// TODO add a type for the doc?
onDocument(doc: any) {
return {
index: { _index: "steps", _id: doc.job_id + "-" + doc.number },
};
},
});

console.log(result);
}

ingestRuns(
"http://localhost:9200",
10954,
"/home/ivo/code/dhis2/dhis2-github-action-metrics/data"
);
ingestJobs(
"http://localhost:9200",
10954,
"/home/ivo/code/dhis2/dhis2-github-action-metrics/data"
);
ingestSteps(
"http://localhost:9200",
10954,
"/home/ivo/code/dhis2/dhis2-github-action-metrics/data"
);
28 changes: 28 additions & 0 deletions step_duration.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/bin/sh
curl -X PUT 'http://localhost:9200/steps/_mapping/?pretty' \
-H 'Content-Type: application/json' -d'
{
"runtime": {
"duration": {
"type": "long",
"script": {
"source": "emit(doc[\u0027completed_at\u0027].value.millis - doc[\u0027started_at\u0027].value.millis)"
}
}
}
}
'

# curl -X PUT "localhost:9200/seats/_mapping?pretty" -H 'Content-Type: application/json' -d'
# {
# "runtime": {
# "day_of_week": {
# "type": "keyword",
# "script": {
# "source": "emit(doc[\u0027datetime\u0027].value.getDayOfWeekEnum().toString())"
# }
# }
# }
# }
# '

0 comments on commit 20950e2

Please sign in to comment.