Skip to content

Commit

Permalink
Bookmark cleanup (#1193)
Browse files Browse the repository at this point in the history
* reformatting

* reformatting

* adding thread bookmark cleanup

* Update cleanup.py

* Update cleanup.py

* Update packages/slycat/web/server/cleanup.py

Co-authored-by: Spurs20 <[email protected]>

* Update packages/slycat/web/server/cleanup.py

Co-authored-by: Spurs20 <[email protected]>

---------

Co-authored-by: Spurs20 <[email protected]>
  • Loading branch information
Mletter1 and Spurs20 authored Dec 6, 2024
1 parent 727ee35 commit 6a7b0a5
Show file tree
Hide file tree
Showing 3 changed files with 1,527 additions and 731 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,24 +15,53 @@
import time

parser = argparse.ArgumentParser()
parser.add_argument("--input-dir", default="/usr/src/slycat/slycat/docker/compose/slycat-compose/DB", help="Directory containing data dumped with slycat-dump.py.")
parser.add_argument("--couchdb-database", default="slycat", help="CouchDB database. Default: %(default)s")
parser.add_argument("--couchdb-host", default="couchdb", help="CouchDB host. Default: %(default)s")
parser.add_argument("--couchdb-port", type=int, default=5984, help="CouchDB port. Default: %(default)s")
parser.add_argument("--port", default="5984", help="CouchDB port. Default: %(default)s")
parser.add_argument("--admin", default="admin", help="CouchDB admin user. Default: %(default)s")
parser.add_argument("--password", default="password", help="CouchDB admin password. Default: %(default)s")
parser.add_argument(
"--input-dir",
default="/usr/src/slycat/slycat/docker/compose/slycat-compose/DB",
help="Directory containing data dumped with slycat-dump.py.",
)
parser.add_argument(
"--couchdb-database",
default="slycat",
help="CouchDB database. Default: %(default)s",
)
parser.add_argument(
"--couchdb-host", default="couchdb", help="CouchDB host. Default: %(default)s"
)
parser.add_argument(
"--couchdb-port", type=int, default=5984, help="CouchDB port. Default: %(default)s"
)
parser.add_argument(
"--port", default="5984", help="CouchDB port. Default: %(default)s"
)
parser.add_argument(
"--admin", default="admin", help="CouchDB admin user. Default: %(default)s"
)
parser.add_argument(
"--password",
default="password",
help="CouchDB admin password. Default: %(default)s",
)

parser.add_argument("--data-store", default="/var/lib/slycat/data-store",
help="Path to the hdf5 data storage directory. Default: %(default)s")
parser.add_argument(
"--data-store",
default="/var/lib/slycat/data-store",
help="Path to the hdf5 data storage directory. Default: %(default)s",
)
parser.add_argument("--force", action="store_true", help="Overwrite existing data.")
parser.add_argument("--marking", default=[], nargs="+",
help="Use --marking='<source>:<target>' to map <source> markings to <target> markings. You may specify multiple maps, separated by whitespace.")
parser.add_argument(
"--marking",
default=[],
nargs="+",
help="Use --marking='<source>:<target>' to map <source> markings to <target> markings. You may specify multiple maps, separated by whitespace.",
)
arguments = parser.parse_args()

logging.getLogger().setLevel(logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler())
logging.getLogger().handlers[0].setFormatter(logging.Formatter("{} - %(levelname)s - %(message)s".format(sys.argv[0])))
logging.getLogger().handlers[0].setFormatter(
logging.Formatter("{} - %(levelname)s - %(message)s".format(sys.argv[0]))
)

# Sanity check input arguments ...
markings = [marking.split(":") for marking in arguments.marking]
Expand All @@ -41,20 +70,20 @@
# assuming CouchDB initialization from local process to local server
creds = ""
if arguments.admin != "":
creds = arguments.admin + ":" + arguments.password + "@"
creds = arguments.admin + ":" + arguments.password + "@"

serverURL = "http://" + creds + arguments.couchdb_host + ":" + arguments.port + "/"
logging.error("couch serverURL:%s" % serverURL)
while True:
try:
couchdb_server = couchdb.Server(serverURL)
version = couchdb_server.version()
couchdb = couchdb_server[arguments.couchdb_database]
break
except Exception as e:
logging.error("Waiting for couchdb for data load.")
logging.error(e.msg)
time.sleep(2)
try:
couchdb_server = couchdb.Server(serverURL)
version = couchdb_server.version()
couchdb = couchdb_server[arguments.couchdb_database]
break
except Exception as e:
logging.error("Waiting for couchdb for data load.")
logging.error(e.msg)
time.sleep(2)


# --host couchdb --admin admin --password password
Expand Down Expand Up @@ -122,5 +151,8 @@
del couchdb[reference["_id"]]
couchdb.save(reference)
logging.info("Loading references Done")

except Exception:
logging.error("Not loading data resource conflict encountered data is probably already loaded")
logging.error(
"Not loading data resource conflict encountered data is probably already loaded"
)
173 changes: 129 additions & 44 deletions packages/slycat/web/server/cleanup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,66 +12,151 @@
import time
import sys


def _array_cleanup_worker():
cherrypy.log.error("Started array cleanup worker.")
while True:
arrays.queue.get()
cherrypy.log.error("Started array cleanup worker.")
while True:
try:
database = slycat.web.server.database.couchdb.connect()
#cherrypy.log.error("Array cleanup worker running.")
for file in database.view("slycat/hdf5-file-counts", group=True):
if file.value == 0:
slycat.web.server.hdf5.delete(file.key)
database.delete(database[file.key])
cherrypy.log.error("Array cleanup worker finished.")
break
except Exception as e:
cherrypy.log.error("Array cleanup worker waiting for couchdb.")
time.sleep(2)

_array_cleanup_worker.thread = threading.Thread(name="array-cleanup", target=_array_cleanup_worker)
arrays.queue.get()
while True:
try:
database = slycat.web.server.database.couchdb.connect()
# cherrypy.log.error("Array cleanup worker running.")
for file in database.view("slycat/hdf5-file-counts", group=True):
if file.value == 0:
slycat.web.server.hdf5.delete(file.key)
database.delete(database[file.key])
# cherrypy.log.error("Array cleanup worker finished.")
break
except Exception as e:
cherrypy.log.error("Array cleanup worker waiting for couchdb.")
time.sleep(2)


_array_cleanup_worker.thread = threading.Thread(
name="array-cleanup", target=_array_cleanup_worker
)
_array_cleanup_worker.thread.daemon = True


def _login_session_cleanup_worker():
cherrypy.log.error("Started login session cleanup worker.")
while True:
try:
database = slycat.web.server.database.couchdb.connect()
#cherrypy.log.error("Login session cleanup worker running.")
cutoff = (datetime.datetime.utcnow() - cherrypy.request.app.config["slycat"]["session-timeout"]).isoformat()
for session in database.view("slycat/sessions", include_docs=True):
if session.doc["created"] < cutoff:
database.delete(session.doc)
cherrypy.log.error("Login session cleanup worker finished.")
time.sleep(datetime.timedelta(minutes=15).total_seconds())
except Exception as e:
cherrypy.log.error("Login session cleanup worker waiting for couchdb. %s" % str(e))
time.sleep(2)
_login_session_cleanup_worker.thread = threading.Thread(name="session-cleanup", target=_login_session_cleanup_worker)
cherrypy.log.error("Started login session cleanup worker.")
while True:
try:
database = slycat.web.server.database.couchdb.connect()
cherrypy.log.error("Login session cleanup worker running.")
cutoff = (
datetime.datetime.now(datetime.timezone.utc)
- cherrypy.request.app.config["slycat"]["session-timeout"]
).isoformat()
for session in database.view("slycat/sessions", include_docs=True):
if session.doc["created"] < cutoff:
database.delete(session.doc)
cherrypy.log.error("Login session cleanup worker finished.")
time.sleep(datetime.timedelta(minutes=15).total_seconds())
except Exception as e:
cherrypy.log.error(
"Login session cleanup worker waiting for couchdb. %s" % str(e)
)
time.sleep(2)


_login_session_cleanup_worker.thread = threading.Thread(
name="session-cleanup", target=_login_session_cleanup_worker
)
_login_session_cleanup_worker.thread.daemon = True


def _cache_cleanup_worker():
import cherrypy
from slycat.web.server import cache_it
cherrypy.log.error("Started server cache cleanup worker.")
while True:
import cherrypy
from slycat.web.server import cache_it

cherrypy.log.error("Started server cache cleanup worker.")
while True:
time.sleep(datetime.timedelta(minutes=15).total_seconds())
# cherrypy.log.error("[CACHE] running server cache-cleanup thread")
cache_it.clean()

_cache_cleanup_worker.thread = threading.Thread(name="cache-cleanup", target=_cache_cleanup_worker)

_cache_cleanup_worker.thread = threading.Thread(
name="cache-cleanup", target=_cache_cleanup_worker
)
_cache_cleanup_worker.thread.daemon = True


def _bookmark_cleanup_worker():
"""
This thread daemon is designed to compile a list of bookmarks excluding any linked by references
and then test each bookmark's `last access time` against a cutoff time. If the cutoff time is greater
than the last access time the bookmark will be deleted from the system
"""
# on a thread so we need to import this
import cherrypy

cherrypy.log.error("Started server cache cleanup worker.")
while True:
# set the run frequency
time.sleep(datetime.timedelta(days=1).total_seconds())
if "bookmark-expiration" in cherrypy.request.app.config["slycat-web-server"]:
cutoff = (
datetime.datetime.now(datetime.timezone.utc)
- cherrypy.request.app.config["slycat-web-server"][
"bookmark-expiration"
]
).isoformat()
else:
cutoff = (
datetime.datetime.now(datetime.timezone.utc)
- datetime.timedelta(weeks=56)
).isoformat()
# lets make this a locked operation
with slycat.web.server.database.couchdb.db_lock:
database = slycat.web.server.database.couchdb.connect()
# build a list of bookmark ids that should not be deleted
references = [
reference["bid"]
for reference in database.scan("slycat/references")
if "bid" in reference
]
bookmarks_with_no_references = [
bookmark
for bookmark in database.scan("slycat/project-bookmarks")
if bookmark["_id"] not in references
]
count = 0
for bookmark in bookmarks_with_no_references:
if "last_accessed" in bookmark:
if bookmark["last_accessed"] < cutoff:
database.delete(bookmark)
count = count + 1
# no last_accessed field means its way too old so lets delete it
else:
database.delete(bookmark)
count = count + 1
if count > 0:
cherrypy.log.error(
"[BOOKMARK-CLEANUP] bookmark-cleanup thread deleted %s bookmarks"
% str(count)
)


_bookmark_cleanup_worker.thread = threading.Thread(
name="bookmark-cleanup", target=_bookmark_cleanup_worker
)
_bookmark_cleanup_worker.thread.daemon = True


def start():
"""Called to start all of the cleanup worker threads."""
_array_cleanup_worker.thread.start()
_login_session_cleanup_worker.thread.start()
_cache_cleanup_worker.thread.start()
"""Called to start all of the cleanup worker threads."""
_array_cleanup_worker.thread.start()
_login_session_cleanup_worker.thread.start()
_cache_cleanup_worker.thread.start()
_bookmark_cleanup_worker.thread.start()


def arrays():
"""Request a cleanup pass for unused arrays."""
arrays.queue.put("cleanup")
"""Request a cleanup pass for unused arrays."""
arrays.queue.put("cleanup")


arrays.queue = Queue()
arrays.queue.put("cleanup")

Loading

0 comments on commit 6a7b0a5

Please sign in to comment.