From 2a52baf4b031d5fcc30b9842a7b389341570c039 Mon Sep 17 00:00:00 2001 From: Ruslan Mstoi Date: Sat, 28 Dec 2024 18:33:51 +0200 Subject: [PATCH] Add website monitor --- .github/workflows/python.yml | 36 +++++++ .github/workflows/reuse.yml | 17 +++ LICENSES/MIT.txt | 22 ++++ README.md | 45 ++++++++ config.yaml | 39 +++++++ qacheck.sh | 13 +++ requirements.txt | 7 ++ test_wsmon.py | 32 ++++++ wsmon.py | 197 +++++++++++++++++++++++++++++++++++ 9 files changed, 408 insertions(+) create mode 100644 .github/workflows/python.yml create mode 100644 .github/workflows/reuse.yml create mode 100644 LICENSES/MIT.txt create mode 100644 README.md create mode 100644 config.yaml create mode 100755 qacheck.sh create mode 100644 requirements.txt create mode 100644 test_wsmon.py create mode 100755 wsmon.py diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml new file mode 100644 index 0000000..494f790 --- /dev/null +++ b/.github/workflows/python.yml @@ -0,0 +1,36 @@ +# SPDX-FileCopyrightText: 2025 Ruslan Mstoi +# +# SPDX-License-Identifier: MIT + +name: Python Lint and Test + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + +permissions: + contents: read + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python 3.13 + uses: actions/setup-python@v5 + with: + python-version: '3.13' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install flake8 pytest isort + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + - name: Lint + run: | + flake8 . --count --show-source --statistics + isort *.py --diff + - name: Test with pytest + run: | + pytest diff --git a/.github/workflows/reuse.yml b/.github/workflows/reuse.yml new file mode 100644 index 0000000..bde3bba --- /dev/null +++ b/.github/workflows/reuse.yml @@ -0,0 +1,17 @@ +# SPDX-FileCopyrightText: 2025 Ruslan Mstoi +# +# SPDX-License-Identifier: MIT + +name: REUSE Compliance Check + +on: [push, pull_request] + +jobs: + reuse-compliance-check: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: REUSE Compliance Check + uses: fsfe/reuse-action@v5 diff --git a/LICENSES/MIT.txt b/LICENSES/MIT.txt new file mode 100644 index 0000000..70fc593 --- /dev/null +++ b/LICENSES/MIT.txt @@ -0,0 +1,22 @@ +MIT License + +Copyright (c) 2025 Ruslan Mstoi + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/README.md b/README.md new file mode 100644 index 0000000..890b0fe --- /dev/null +++ b/README.md @@ -0,0 +1,45 @@ + + +A program that monitors the availability of many websites over the +network, produces metrics about these and stores the metrics into a +PostgreSQL database. + +# Install dependencies + +To install needed Python packages + +`python3 -m pip install -r requirements.txt` + +# Usage + +All of the default configuration settings are in `config.yaml` file. To +use different configuration settings yaml file pass it as an arument of +-c option. + +To change PostgreSQL database settings modify db.conninfo and +db.table\_name attributes. + +To add new sites to monitor add them to the `sites` list. Set as needed +check interval and regular expression pattern to check in the returned +page. + +To start site monitor + +`./wsmon.py` + +Press Ctrl-C to stop `wsmon.py` + +## Site check failure + +If website check fails the error info will be saved to the database. +Failing site will be checked again max\_retry times. + +## Unit tests + +Use `pytest` to run unit tests: + +`pytest .` diff --git a/config.yaml b/config.yaml new file mode 100644 index 0000000..c323a22 --- /dev/null +++ b/config.yaml @@ -0,0 +1,39 @@ +# SPDX-FileCopyrightText: 2025 Ruslan Mstoi +# +# SPDX-License-Identifier: MIT + +# database related variables +db: + # name of table used for site monitor results + table_name: "wsmon_results" + + # connection info, parameter to psycopg.AsyncConnection.connect + conninfo: + dbname: "wsmon" + host: "localhost" + port: 5432 + # "user": "user", + # "password": "password", + +# websites to monitor +sites: + - url: "https://www.brokenwebsitedoesnotexist.com" + check_interval: 5 + - url: "https://www.google.com" + check_interval: 10 + pattern: ".*ogle" + - url: "http://python.org" + check_interval: 15 + pattern: "\\d+" + - url: "https://www.yahoo.com" + check_interval: 30 + pattern: "zzz" + - url: "https://www.github.com" + check_interval: 10 + - url: "https://www.stackoverflow.com" + check_interval: 300 + - url: "https://www.linkedin.com" + check_interval: 100 + +# max time to retry a failing website monitor +max_retry: 2 diff --git a/qacheck.sh b/qacheck.sh new file mode 100755 index 0000000..5f8b4bd --- /dev/null +++ b/qacheck.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +# SPDX-FileCopyrightText: 2025 Ruslan Mstoi +# +# SPDX-License-Identifier: MIT + +set -x + +files="wsmon.py test_wsmon.py" + +isort $files +flake8 $files +reuse lint diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..ffba94c --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +# SPDX-FileCopyrightText: 2025 Ruslan Mstoi +# +# SPDX-License-Identifier: MIT + +aiohttp +psycopg +pyyaml diff --git a/test_wsmon.py b/test_wsmon.py new file mode 100644 index 0000000..04eee75 --- /dev/null +++ b/test_wsmon.py @@ -0,0 +1,32 @@ +# SPDX-FileCopyrightText: 2025 Ruslan Mstoi +# +# SPDX-License-Identifier: MIT + +from wsmon import WSData + +# WSData default test arguments +url = "http://testsite.io" +check_interval = 111 +pattern = r'(.*) example' + + +def test_wsdata_init(): + site = WSData(url, check_interval, pattern) + assert site.url == url + assert site.check_interval == check_interval + assert site.pattern == pattern + + +def test_wsdata_init_default(): + """Test default arguments""" + site = WSData(url) + assert site.url == url + assert site.check_interval == 5 + assert site.pattern is None + + +def test_wsdata_str(): + site = WSData(url, check_interval, pattern) + expect_str = ("url=%s check_interval=%s pattern=%r" % + (url, check_interval, pattern)) + assert str(site) == expect_str diff --git a/wsmon.py b/wsmon.py new file mode 100755 index 0000000..e3e5e5f --- /dev/null +++ b/wsmon.py @@ -0,0 +1,197 @@ +#!/usr/bin/env python3 + +# SPDX-FileCopyrightText: 2025 Ruslan Mstoi +# +# SPDX-License-Identifier: MIT + +"""Website monitor + +A program that monitors the availability of many websites over the network, +produces metrics about these and stores the metrics into a PostgreSQL database. + +""" + +import argparse +import asyncio +import re +import sys +from datetime import datetime + +import aiohttp +import psycopg +import yaml + + +class WSData(object): + """Monitor website data attributes""" + + # max time to retry a failing website monitor + max_retry = 2 + + def __init__(self, url, check_interval=5, pattern=None): + """url -- the website url to monitor + check_interval -- Each URL should be checked periodically, with the + ability to configure the interval (between 5 and 300 + seconds) + pattern -- Optional for checking the returned page contents for a regex + pattern that is expected to be found on the page. + + """ + interval_min = 5 + interval_max = 300 + assert check_interval >= interval_min and \ + check_interval <= interval_max, \ + f"interval set to {check_interval} but should be in range of " \ + " {interval_min} and {interval_max} seconds)" + + self.url = url + self.check_interval = check_interval + self.pattern = pattern + + def __str__(self): + return ("url=%s check_interval=%s pattern=%r" % + (self.url, self.check_interval, self.pattern)) + + +class Database(object): + """Database related functions and variables""" + + def __init__(self, conninfo, table_name): + """ + conninfo -- connection info, parameter to + psycopg.AsyncConnection.connect + table_name -- name of table used for site monitor results + """ + self.conninfo = conninfo + self.table_name = table_name + + async def create_table(self): + """Create a database table to store website monitor results.""" + async with await psycopg.AsyncConnection.connect(**self.conninfo) \ + as conn: + async with conn.cursor() as cursor: + await cursor.execute( + f"""CREATE TABLE IF NOT EXISTS {self.table_name} ( + id SERIAL PRIMARY KEY, + url TEXT NOT NULL, + status_code INTEGER NOT NULL, + request_time TIMESTAMP NOT NULL, + response_time TIMESTAMP NOT NULL, + time_diff FLOAT NOT NULL, + regex_check TEXT NOT NULL + ); + """) + await conn.commit() + + async def insert(self, url, status_code, request_time, response_time, + time_diff, regex_check): + """Insert row into a table""" + async with await psycopg.AsyncConnection.connect(**self.conninfo) \ + as conn: + async with conn.cursor() as cursor: + await cursor.execute( + f"""INSERT INTO {self.table_name} ( + url, status_code, request_time, response_time, + time_diff, regex_check) + VALUES (%s, %s, %s, %s, %s, %s);""", + (url, status_code, request_time, response_time, time_diff, + regex_check)) + await conn.commit() + + +def msg(text): + now = datetime.now().time().replace(microsecond=0).isoformat() + print(now + 4 * " " + text) + + +async def monitor_website(session, site, db): + """Monitor the availability of website + + Also produce metrics and and store the metrics into a PostgreSQL database. + + """ + msg(f"Start {site}") + + request_time = datetime.now() + async with session.get(site.url) as resp: + text = await resp.text() + status = resp.status + response_time = datetime.now() + time_diff = (response_time - request_time).total_seconds() + + regexp_check = "not unused" + if site.pattern: + pattern = re.compile(site.pattern) + m = pattern.search(text) + if m: + regexp_check = f"{pattern.pattern} found" + else: + regexp_check = "no match found" + else: + regexp_check = "not used" + + await db.insert(site.url, status, request_time, response_time, + time_diff, regexp_check) + + msg(f"End site={site} status={status} request_time={request_time} " + f"response_time={response_time} time_diff={time_diff} " + f"regex_check={regexp_check}") + + +async def wrapper(session, site, db): + """Wrapper that handles retries and adds interval sleep after website + monitor + + """ + retry = 0 + while True: + try: + await monitor_website(session, site, db) + except Exception: + msg(f"ERROR url={site}") + await db.insert(site.url, 444, datetime.now(), datetime.now(), 0, + "ERROR") + if retry == WSData.max_retry: + msg(f"ERROR too many retries {retry}, stop monitor url={site}") + break + retry += 1 + await asyncio.sleep(site.check_interval) + + +def parse_args(): + """Parse command line arguments and options""" + + arg_parser = argparse.ArgumentParser(description=__doc__.splitlines()[0]) + + default_conf_file = "config.yaml" + arg_parser.add_argument("-c", "--conf-file", + help="YAML config file. Default " + f"{default_conf_file}", + default=default_conf_file) + args = arg_parser.parse_args() + + try: + with open(args.conf_file, 'r') as conf_file: + config = yaml.safe_load(conf_file) + except Exception as err: + print("Error reading config file:", err) + raise + return config + + +async def main(): + config = parse_args() + db = Database(config["db"]["conninfo"], config["db"]["table_name"]) + WSData.max_retry = config['max_retry'] + sites = [WSData(**site) for site in config['sites']] + + await db.create_table() + async with aiohttp.ClientSession() as session: + await asyncio.gather(*(wrapper(session, site, db) for site in sites)) + + +if __name__ == "__main__": + try: + asyncio.run(main()) + except KeyboardInterrupt: + sys.exit("Bye!")