Skip to content

Commit

Permalink
feat: Add Prometheus metrics support to PingPanda for enhanced monito…
Browse files Browse the repository at this point in the history
…ring
  • Loading branch information
KingPin committed Mar 4, 2025
1 parent ac6af3b commit 6c60f4b
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 4 deletions.
3 changes: 3 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,8 @@ COPY pingpanda.py .
# Make the script executable
RUN chmod +x pingpanda.py

# Add to your Dockerfile
EXPOSE 9090

# Set the entrypoint to run the Python script
ENTRYPOINT ["python", "/app/pingpanda.py"]
4 changes: 4 additions & 0 deletions compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ services:
- DISCORD_WEBHOOK_URL=
- ALERT_THRESHOLD=3
- NOTIFY_RECOVERY=true

# Prometheus settings
- ENABLE_PROMETHEUS=true
- PROMETHEUS_PORT=9090
volumes:
- ./logs:/logs
restart: unless-stopped
Expand Down
59 changes: 56 additions & 3 deletions pingpanda.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import pythonping
import requests
from slack_sdk import WebClient
from prometheus_client import start_http_server, Gauge, Counter, Summary


class PingPanda:
Expand All @@ -27,6 +28,7 @@ def __init__(self, config: Optional[Dict[str, Union[str, int, bool]]] = None):
self._setup_logging()
self._load_config()
self._initialize_status_tracking()
self._setup_prometheus() # Add this line
self.logger.info(f"PingPanda started on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

def _setup_logging(self):
Expand Down Expand Up @@ -85,6 +87,39 @@ def _load_config(self):

# Initialize Slack client if webhook URL is provided
self.slack_client = WebClient(token=self.slack_webhook_url) if self.slack_webhook_url else None

# Add Prometheus configuration
self.enable_prometheus = self.config.get("ENABLE_PROMETHEUS", "false").lower() == "true"
self.prometheus_port = int(self.config.get("PROMETHEUS_PORT", "9090"))

def _setup_prometheus(self):
"""Initialize Prometheus metrics if enabled."""
if not self.enable_prometheus:
return

# Status metrics (1=OK, 0=Error)
self.dns_status = Gauge('pingpanda_dns_status', 'DNS resolution status', ['domain'])
self.ping_status = Gauge('pingpanda_ping_status', 'Ping status', ['target'])
self.website_status = Gauge('pingpanda_website_status', 'Website check status', ['url'])
self.ssl_status = Gauge('pingpanda_ssl_status', 'SSL certificate status', ['domain'])

# Response time metrics
self.dns_response_time = Summary('pingpanda_dns_response_seconds', 'DNS resolution time', ['domain'])
self.ping_response_time = Summary('pingpanda_ping_response_seconds', 'Ping response time', ['target'])
self.website_response_time = Summary('pingpanda_website_response_seconds', 'Website response time', ['url'])

# SSL specific metrics
self.ssl_days_remaining = Gauge('pingpanda_ssl_days_remaining', 'Days until SSL certificate expiry', ['domain'])

# Error counters
self.dns_errors = Counter('pingpanda_dns_errors_total', 'Total DNS resolution errors', ['domain'])
self.ping_errors = Counter('pingpanda_ping_errors_total', 'Total ping errors', ['target'])
self.website_errors = Counter('pingpanda_website_errors_total', 'Total website check errors', ['url'])
self.ssl_errors = Counter('pingpanda_ssl_errors_total', 'Total SSL check errors', ['domain'])

# Start the HTTP server
start_http_server(self.prometheus_port)
self.logger.info(f"Prometheus metrics server started on port {self.prometheus_port}")

def _initialize_status_tracking(self):
"""Initialize status tracking for alert thresholds and recovery notifications."""
Expand Down Expand Up @@ -218,10 +253,18 @@ def check_dns(self):
try:
socket.gethostbyname(domain)
end_time = time.perf_counter()
duration = (end_time - start_time) * 1000 # Convert to milliseconds
self.logger.info(f"DNS Resolution for {domain}: PASS (Time: {duration:.2f}ms)")
duration = end_time - start_time # In seconds for Prometheus
duration_ms = duration * 1000 # In milliseconds for logging

self.logger.info(f"DNS Resolution for {domain}: PASS (Time: {duration_ms:.2f}ms)")

# Update Prometheus metrics
if self.enable_prometheus:
self.dns_status.labels(domain=domain).set(1) # 1 = OK
self.dns_response_time.labels(domain=domain).observe(duration)

self.send_notification(
f"DNS resolution successful in {duration:.2f}ms",
f"DNS resolution successful in {duration_ms:.2f}ms",
status="ok",
check_type="DNS",
target=domain
Expand All @@ -235,6 +278,12 @@ def check_dns(self):

if not success:
self.logger.error(f"DNS Resolution for {domain}: FAIL")

# Update Prometheus metrics for failure
if self.enable_prometheus:
self.dns_status.labels(domain=domain).set(0) # 0 = ERROR
self.dns_errors.labels(domain=domain).inc()

self.send_notification(
f"Failed to resolve domain after {self.retry_count} attempts",
status="error",
Expand Down Expand Up @@ -346,6 +395,10 @@ def check_ssl_expiry(self):
)
days_left = (expiry_date - datetime.now()).days

# Update Prometheus metrics
if self.enable_prometheus:
self.ssl_days_remaining.labels(domain=domain).set(days_left)

if days_left <= self.ssl_critical_days:
self.logger.error(
f"SSL certificate for {domain} critically expiring in {days_left} days"
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
pythonping>=1.1.4
requests>=2.32.3
slack-sdk>=3.34.0
slack-sdk>=3.34.0
prometheus-client>=0.17.0

0 comments on commit 6c60f4b

Please sign in to comment.