Skip to content

Commit

Permalink
Merge pull request #1 from bcgov/feature/DBC22-2113
Browse files Browse the repository at this point in the history
DBC22-2113: Update logging process
  • Loading branch information
wmuldergov authored May 10, 2024
2 parents a2eedf1 + e746007 commit 8b0f2d5
Show file tree
Hide file tree
Showing 16 changed files with 621 additions and 26 deletions.
21 changes: 21 additions & 0 deletions compose/caching/openshiftjobs/DockerFile
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
FROM alpine:3
RUN apk update && apk upgrade
#Need goaccess 1.92 for a timezone fix. Once that version is in the regular branch, we can pull it from there.
RUN apk add goaccess --repository=https://dl-cdn.alpinelinux.org/alpine/edge/main

RUN apk add --no-cache \
aws-cli \
bash \
coreutils \
tzdata

COPY ./compose/openshiftjobs/entrypoint.sh /
COPY ./compose/openshiftjobs/scripts/analyzeexportlogs.sh /scripts/
COPY ./compose/openshiftjobs/scripts/ziplogs.sh /scripts/

RUN sed -i 's/\r$//g' /entrypoint.sh && chmod +x /entrypoint.sh
RUN sed -i 's/\r$//g' /scripts/analyzeexportlogs.sh && chmod +x /scripts/analyzeexportlogs.sh
RUN sed -i 's/\r$//g' /scripts/ziplogs.sh && chmod +x /scripts/ziplogs.sh


ENTRYPOINT ["/entrypoint.sh"]
24 changes: 24 additions & 0 deletions compose/caching/openshiftjobs/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#!/bin/bash

# Check the number of arguments
if [ "$#" -lt 1 ]; then
echo "Usage: $0 <script_name> [args...]"
exit 1
fi

# Determine which script to run based on the first argument
case "$1" in
ziplogs)
# Run ziplogs.sh which will zip all files that were created in the previous hour or older in the nginx log storage pvc
/scripts/ziplogs.sh
;;
analyzeexportlogs)
# Run analyzeexportlogs with additional arguments which will send the specified days logs through goaccess and then upload to s3.
shift # Remove the first argument (script number)
/scripts/analyzeexportlogs.sh "$@"
;;
*)
echo "Invalid script"
exit 1
;;
esac
107 changes: 107 additions & 0 deletions compose/caching/openshiftjobs/scripts/analyzeexportlogs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
#!/bin/bash

# Directory where the logs are stored
log_dir="/logs"
cd $log_dir || { echo "Failed to change directory to $log_dir. Exiting..."; exit 1; }

# Initialize an empty array
zipped_files=()

# Get the number of days ago as a positional parameter
days_ago="$1"

# If no argument is provided, default to 1 day ago
if [ -z "$days_ago" ]; then
days_ago=1
fi

# Get the start and end time for $days_ago days ago in the America/Vancouver timezone
start_time=$(TZ=America/Vancouver date -d "$days_ago days ago 00:00")
end_time=$(TZ=America/Vancouver date -d "$days_ago days ago 23:59")

# Convert the start and end time to UTC
start_time_utc=$(date -u -d "$start_time" +%Y%m%d%H)
end_time_utc=$(date -u -d "$end_time" +%Y%m%d%H)

echo "Will analyze and archive logs between $start_time_utc and $end_time_utc"

# Loop through all files in the current directory ending with ".gz"
for file in *.gz; do
# Check if the file exists and is a regular file
if [[ -f $file ]]; then
# Extract the date and time part from the filename (assuming UTC timezone)
file_datetime_utc=$(echo "$file" | grep -oE '[0-9]{10}')
# Check if the file date and time in UTC falls within days_ago's start and end time in UTC
if [[ $file_datetime_utc -ge $start_time_utc && $file_datetime_utc -le $end_time_utc ]]; then
zipped_files+=("$file")
fi
fi
done

# Print the elements of the array
echo "Log files that will be processed:"
printf '%s\n' "${zipped_files[@]}"

# Get the start dates formatted for the goaccess report name
start_time_formatted=$(date -d "$start_time" +%Y%m%d)

# Run the following command only if zipped_files array is not empty
if [ ${#zipped_files[@]} -gt 0 ]; then
# Define file URL and destination directory to pull down the latest geoip data
FILE_URL="https://download.db-ip.com/free/dbip-city-lite-$(date -u +'%Y-%m').mmdb.gz"
OLD_FILE="dbip-city-lite-$(date -d 'last month' +'%Y-%m').mmdb"

# Check if the file exists
if [ ! -f "dbip-city-lite-$(date -u +'%Y-%m').mmdb" ]; then
# If the file doesn't exist, download it
echo "Downloading file..."
if wget -q --spider "$FILE_URL"; then
wget "$FILE_URL" -P "$log_dir"
echo "Download complete."
gzip -d "dbip-city-lite-$(date -u +'%Y-%m').mmdb.gz"
# Delete the old file if it exists
if [ -f "$OLD_FILE" ]; then
echo "Deleting old file ($OLD_FILE)..."
rm "$OLD_FILE"
echo "Old file deleted."
fi
else
echo "Failed to download file. URL is unreachable."
fi
else
echo "MMDB file already exists."
fi
mmdb_file=$(find . -maxdepth 1 -type f -name "*.mmdb")

#Run goaccess on all the log files from the date entered
goaccess_report_name=$start_time_formatted-goaccess_report.html
zcat "${zipped_files[@]}" | goaccess - -o "$goaccess_report_name" --log-format='%h %e %^[%x] "%r" %s %b "%R" "%u" %C "%M" %T' --datetime-format='%d/%b/%Y:%H:%M:%S %z' --ignore-panel=REMOTE_USER --ignore-panel=ASN --tz=America/Vancouver --jobs=2 --geoip-database=$mmdb_file
echo "GoAccess report generated successfully at $goaccess_report_name"

# Get the start date formated in YYYY/MM/DD format
start_time_formatted_s3=$(date -d "$start_time" +"%Y/%m/%d")

# Create folder structure in S3 bucket
s3_path="s3://$AWS_BUCKET/$ENVIRONMENT/logs/$start_time_formatted_s3/"

# Upload zipped files to S3
for file in "${zipped_files[@]}"; do
aws --endpoint-url "$AWS_ENDPOINT" s3 cp "$file" "$s3_path" || { echo "Failed to upload $file to S3. Exiting..."; exit 1; }
echo "File $file copied to S3 bucket under $s3_path"
done

echo "All files copied to S3 bucket under $s3_path"


# Upload HTML report to S3
aws --endpoint-url $AWS_ENDPOINT s3 cp "$goaccess_report_name" "$s3_path" || { echo "Failed to upload HTML report to S3. Exiting..."; exit 1; }
echo "HTML report copied to S3 bucket under $s3_path"

# Delete the zipped files and HTML report
rm "${zipped_files[@]}" "$goaccess_report_name"

echo "Zipped Files and HTML report deleted from PVC successfully"

else
echo "No files to process for $start_time_formatted"
fi
21 changes: 21 additions & 0 deletions compose/caching/openshiftjobs/scripts/ziplogs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/bin/bash

# Calculate the hour one hour ago in UTC
previous_hour=$(TZ=UTC date -d '1 hour ago' +"%Y%m%d%H")
echo "Previous Hour in UTC: $previous_hour"

# Directory where the logs are stored
log_dir="./logs"

# Iterate over log files
find "$log_dir" -type f -name '*.log' | while read -r file; do
# Extract timestamp from filename
timestamp=$(echo "$file" | grep -oE '[0-9]{10}')

# Check if timestamp is less than or equal to previous hour
if [[ $timestamp -le $previous_hour ]]; then
# gzip the file
gzip "$file"
echo "File $file with timestamp $timestamp gzipped."
fi
done
80 changes: 60 additions & 20 deletions helm/README.md
Original file line number Diff line number Diff line change
@@ -1,30 +1,67 @@
# Image Caching Chart

A chart to provision an nginx image caching instance
A Chart to provision an instance of an nginx proxy cache which caches content from images.drivebc.ca to reduce load on those servers. It also includes components to zip and then process logs before uploading them to s3 storage.

## Configuration

### Image Caching Options

| Parameter | Description | Default |
| ----------------------- | --------------------------------------- | -------------------------------------- |
| `fullnameOverride` | Instance Name if other than default | `image-caching` |
| `nameOverride` | Instance Name if other than default | `image-caching` |
| `replicaCount` | Amount of replicas to run | `1` |
| `repository` | Image Source | `ghcr.io/bcgov/drivebc-image-caching` |
| `tag` | Image Tag | `latest` |
| `CPU Request` | CPU Request Amount | `50` |
| `CPU Limit` | CPU Limit Amount | `250` |
| `Memory Request` | Memory Requests Amount | `50` |
| `Memory Limit` | Memory Limit Amount | `100` |
| `Autoscaling` | Autoscaling enabled? | `true` |
| `min replicas` | Minimum amount of replicas | `1` |
| `max replicas` | Maxiumum amount of replicas | `2` |
| `networkPolicyRequired` | Do you require default network policies | `false` |
| `route enabled` | Do you wan it to create a route | `true |
| `route host` | What hostname do you want | `drivebc.apps.silver.devops.gov.bc.ca` |
| `iprestricted` | Should it be IP Restricted? | `false` |
| `ipallowlist` | What IP's are allowed to connect? | |
| Parameter | Description | Default |
| -------------------------------- | ----------------------------------------------------------------- | ------------------------ |
| fullnameOverride: | The full name override for the deployment | `drivebc-cache` |
| nameOverride: | The name override for the deployment | `drivebc-cache` |
| replicaCount: | The number of replicas for the deployment | `1` |
| image: | | |
| repository: | The repository containing the Docker image for the deployment | `ghcr.io/bcgov/drivebc.ca-caching` |
| tag: | The tag of the Docker image used for the deployment | `latest` |
| deployment: | | |
| resources: | | |
| requests: | The resource requests (CPU and Memory) for the deployment | CPU: `50m`, Memory: `50Mi` |
| limits: | The resource limits (CPU and Memory) for the deployment | CPU: `250m`, Memory: `100Mi`|
| env: | | |
| DRIVEBC_IMAGE_BASE_URL: | The base URL for images used by the deployment | `https://tst-images.drivebc.ca/` |
| autoscaling: | | |
| enabled: | Specifies whether autoscaling is enabled for the deployment | `true` |
| minReplicas: | The minimum number of replicas when autoscaling is enabled | `1` |
| maxReplicas: | The maximum number of replicas when autoscaling is enabled | `2` |
| targetCPUUtilizationPercentage:| The target CPU utilization percentage for autoscaling | `80` |
| networkPolicyRequired: | Set to true if you need to allow traffic between pods and internet ingress setup | `true` |
| route: | | |
| enabled: | Specifies whether the route is enabled | `true` |
| host: | The host for the route | `drivebc-cache.apps.silver.devops.gov.bc.ca` |
| iprestricted: | Set to true if you want to limit IPs in the ipallowlist | `false` |
| ipallowlist: | The list of allowed IP addresses | `142.34.53.0/24 142.22.0.0/15 142.24.0.0/13 142.32.0.0/13 208.181.128.46/32` |
| logpvc: | | |
| storage: | The storage size for logs | `1Gi` |
| cronjobs: | | |
| analyzeuploadlogs: | | |
| name: | The name of the cronjob | `analyzeuploadlogs` |
| schedule: | The cron schedule for the job (in UTC) | `0 9 * * *` |
| deployment: | | |
| resources: | | |
| requests: | The resource requests (CPU and Memory) for the job | CPU: `50m`, Memory: `1Gi` |
| limits: | The resource limits (CPU and Memory) for the job | CPU: `2000m`, Memory: `2Gi` |
| env: | | |
| s3Secret: | The secret for accessing the S3 bucket | `drivebc-cronjob-s3bucket` |
| environment: | The environment for the job | `dev` |
| volumes: | | |
| logs: | The volume mount for logs | `static-log-storage` |
| s3secret: | | |
| name: | The name of the S3 secret | `drivebc-cronjob-s3bucket` |
| access_key_id: | The access key ID for the S3 bucket (Do not commit to GitHub) | `""` |
| bucket: | The bucket name for the S3 bucket (Do not commit to GitHub) | `""` |
| endpoint: | The endpoint for the S3 bucket (Do not commit to GitHub) | `""` |
| secret_access_key: | The secret access key for the S3 bucket (Do not commit to GitHub) | `""` |
| name: | The name of the cronjob | `ziplogs` |
| schedule: | The cron schedule for the job | `30 * * * *` |
| deployment: | | |
| resources: | | |
| requests: | The resource requests (CPU and Memory) for the job | CPU: `50m`, Memory: `100Mi` |
| limits: | The resource limits (CPU and Memory) for the job | CPU: `150m`, Memory: `200Mi`|
| volumes: | | |
| logs: | The volume mount for logs | `static-log-storage` |




## Components
Expand All @@ -34,3 +71,6 @@ A chart to provision an nginx image caching instance
- Deployment
- HPA
- Network Policy
- Cronjob
- Analyze Upload Logs
- Zip Logs
57 changes: 57 additions & 0 deletions helm/templates/analyzeuploadlogs-cronjob.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
{{- $deploymentTag := .Values.image.tag | default .Chart.AppVersion -}}
{{- $deploymentTime := now | date "2006-01-02 15:04:05.000000" -}}

apiVersion: batch/v1
kind: CronJob
metadata:
name: {{ template "app.fullname" . }}-{{ .Values.cronjobs.analyzeuploadlogs.name }}
labels: {{ include "app.labels" . | nindent 4 }}

spec:
schedule: {{ .Values.cronjobs.analyzeuploadlogs.schedule }}
concurrencyPolicy: Replace
suspend: false
jobTemplate:
spec:
template:
spec:
volumes:
- name: log-storage
persistentVolumeClaim:
claimName: {{ .Values.cronjobs.analyzeuploadlogs.deployment.volumes.logs }}
containers:
- resources:
limits:
cpu: {{ .Values.cronjobs.analyzeuploadlogs.deployment.resources.limits.cpu }}
memory: {{ .Values.cronjobs.analyzeuploadlogs.deployment.resources.limits.memory }}
requests:
cpu: {{ .Values.cronjobs.analyzeuploadlogs.deployment.resources.requests.cpu }}
memory: {{ .Values.cronjobs.analyzeuploadlogs.deployment.resources.requests.memory }}
name: {{ include "app.fullname" . }}-{{ .Values.cronjobs.analyzeuploadlogs.name }}
args:
- "analyzeexportlogs"
- "1"
volumeMounts:
- name: log-storage
mountPath: /logs
env:
- name: DEPLOYMENT_TAG
value: {{ $deploymentTag | quote }}
- name: DEPLOYMENT_TIME
value: {{ $deploymentTime | quote }}
- name: ENVIRONMENT
value: {{ .Values.cronjobs.analyzeuploadlogs.deployment.env.environment }}
envFrom:
- secretRef:
name: {{ .Values.cronjobs.analyzeuploadlogs.deployment.env.s3Secret }}
imagePullPolicy: IfNotPresent
image: {{ .Values.image.repository }}:{{ .Values.image.tag }}
securityContext:
seccompProfile:
type: 'RuntimeDefault'
capabilities:
drop:
- all
add:
- NET_BIND_SERVICE
restartPolicy: Never
14 changes: 14 additions & 0 deletions helm/templates/analyzeuploadlogs-secret.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{{- if not (lookup "v1" "Secret" .Release.Namespace .Values.cronjobs.analyzeuploadlogs.s3secret.name) }}
apiVersion: v1
kind: Secret
metadata:
name: {{ .Values.cronjobs.analyzeuploadlogs.s3secret.name }}
annotations:
"helm.sh/resource-policy": "keep"
type: Opaque
data:
AWS_ACCESS_KEY_ID: {{ .Values.cronjobs.analyzeuploadlogs.s3secret.access_key_id | b64enc }}
AWS_BUCKET: {{ .Values.cronjobs.analyzeuploadlogs.s3secret.bucket | b64enc }}
AWS_ENDPOINT: {{ .Values.cronjobs.analyzeuploadlogs.s3secret.endpoint | b64enc }}
AWS_SECRET_ACCESS_KEY: {{ .Values.cronjobs.analyzeuploadlogs.s3secret.secret_access_key | b64enc }}
{{- end }}
Loading

0 comments on commit 8b0f2d5

Please sign in to comment.