Skip to content

Commit

Permalink
feat: add cleanup command to remove expired db entries
Browse files Browse the repository at this point in the history
  • Loading branch information
bjoern-m committed Jan 29, 2025
1 parent b8c4771 commit ba1f9db
Show file tree
Hide file tree
Showing 12 changed files with 321 additions and 26 deletions.
203 changes: 203 additions & 0 deletions backend/cmd/cleanup/cleanup.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
package cleanup

import (
"fmt"
"github.com/spf13/cobra"
"github.com/teamhanko/hanko/backend/config"
"github.com/teamhanko/hanko/backend/persistence"
"github.com/teamhanko/hanko/backend/persistence/models"
"log"
"sort"
"strings"
"time"
)

// options holds user-provided CLI options
type options struct {
tables []string // List of tables to clean up
configFile string // Path to configuration file
pageSize int // The number of entities to query at once
run bool // Whether to execute cleanup or simulate
}

// handlerParam holds the necessary parameters for cleanup operations
type handlerParam struct {
table string
config *config.Config
storage persistence.Storage
options *options
}

// handlerFunc defines the function signature for cleanup handlers
type handlerFunc func(handlerParam) error

// Table names used for cleanup operations
const (
tableAuditLogs = "audit_logs"
tableFlows = "flows"
tableWebauthnSessionData = "webauthn_session_data"
)

// Map of table names to their respective cleanup handlers
var handler = map[string]handlerFunc{
tableFlows: func(param handlerParam) error {
return cleanup[models.Flow](param, param.storage.GetFlowPersister(), time.Now().UTC())
},
tableAuditLogs: func(param handlerParam) error {
duration, err := time.ParseDuration(param.config.AuditLog.Retention)
if err != nil {
return fmt.Errorf("failed to parse the retention duration: %w", err)
}

return cleanup[models.AuditLog](param, param.storage.GetAuditLogPersister(), time.Now().Add(-duration).UTC())
},
tableWebauthnSessionData: func(param handlerParam) error {
return cleanup[models.WebauthnSessionData](param, param.storage.GetWebauthnSessionDataPersister(), time.Now().UTC())
},
}

// allowedTables is a list of table names that can be cleaned up
var allowedTables = func() []string {
keys := make([]string, 0, len(handler))
for key := range handler {
keys = append(keys, key)
}

sort.Strings(keys)

return keys
}()

// isTableAllowed checks if a given table name exists in the allowed list
func isTableAllowed(table string) bool {
for _, allowed := range allowedTables {
if table == allowed {
return true
}
}
return false
}

// validateTables checks if the specified table names exist in the allowed list
func validateTables(tables []string) error {
var invalidTables []string

for _, table := range tables {
if !isTableAllowed(table) {
invalidTables = append(invalidTables, table)
}
}

if len(invalidTables) > 0 {
return fmt.Errorf("invalid table name(s): %s - allowed values: %s",
strings.Join(invalidTables, ", "), strings.Join(allowedTables, ", "))
}

return nil
}

// newCleanupCommand creates the Cobra command for database cleanup
func newCleanupCommand() *cobra.Command {
opts := &options{}

cmd := &cobra.Command{
Use: "cleanup",
Short: "Cleanup the database.",
Long: `Cleans up the database by deleting expired entities.`,
PreRunE: func(cmd *cobra.Command, args []string) error {
if len(opts.tables) == 0 {
opts.tables = allowedTables
return nil
}

return validateTables(opts.tables)
},
RunE: func(cmd *cobra.Command, args []string) error {
cfg, err := config.Load(&opts.configFile)
if err != nil {
log.Fatal(err)
}

storage, err := persistence.New(cfg.Database)
if err != nil {
log.Fatal(err)
}

log.Printf("Cleaning up table(s): %s...\n", strings.Join(opts.tables, ", "))

for _, table := range opts.tables {
param := handlerParam{
table: table,
config: cfg,
storage: storage,
options: opts,
}
err = handler[table](param)
if err != nil {
log.Fatal(err)
}
}

log.Println("Cleanup completed.")

if !opts.run {
log.Println("This was a dry-run; add --run to the command to really delete the data.")
}

return nil
},
}

cmd.Flags().StringVarP(&opts.configFile, "config", "c", config.DefaultConfigFilePath, "path to config file")
cmd.Flags().StringSliceVarP(&opts.tables, "tables", "t", []string{}, fmt.Sprintf("specify individual tables to clean up (comma-separated) - allowed values: %s", strings.Join(allowedTables, ", ")))
cmd.Flags().IntVarP(&opts.pageSize, "page-size", "s", 512, "the number of entities to query at once")
cmd.Flags().BoolVar(&opts.run, "run", false, "execute the cleanup process instead of simulating")

return cmd
}

// cleanup performs the cleanup operation for a given table and persister
func cleanup[T any](param handlerParam, persister persistence.Cleanup[T], cutoffTime time.Time) error {
var (
page = 1
deleted = 0
)

for {
items, err := persister.FindExpired(cutoffTime, page, param.options.pageSize)
if err != nil {
return err
}

if len(items) > 0 {
for _, item := range items {
if param.options.run {
err = persister.Delete(item)
if err != nil {
return err
}
}

deleted++
}

log.Printf("Deleted %d %s in total.", deleted, param.table)

if !param.options.run {
page++
}
}

if len(items) < param.options.pageSize {
break
}
}

return nil
}

// RegisterCommands registers the cleanup command with the parent command
func RegisterCommands(parent *cobra.Command) {
cmd := newCleanupCommand()
parent.AddCommand(cmd)
}
2 changes: 2 additions & 0 deletions backend/cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ package cmd

import (
"github.com/spf13/cobra"
"github.com/teamhanko/hanko/backend/cmd/cleanup"
"github.com/teamhanko/hanko/backend/cmd/isready"
"github.com/teamhanko/hanko/backend/cmd/jwk"
"github.com/teamhanko/hanko/backend/cmd/jwt"
Expand All @@ -31,6 +32,7 @@ func NewRootCmd() *cobra.Command {
user.RegisterCommands(cmd)
siwa.RegisterCommands(cmd)
schema.RegisterCommands(cmd)
cleanup.RegisterCommands(cmd)

return cmd
}
Expand Down
4 changes: 4 additions & 0 deletions backend/config/config.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
audit_log:
storage:
enabled: false
retention: 720h
account:
allow_deletion: true
allow_signup: true
Expand Down
15 changes: 15 additions & 0 deletions backend/config/config_audit_log.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
package config

import (
"errors"
"time"
)

type AuditLog struct {
// `console_output` controls audit log console output.
ConsoleOutput AuditLogConsole `yaml:"console_output" json:"console_output,omitempty" koanf:"console_output" split_words:"true" jsonschema:"title=console_output"`
Expand All @@ -9,6 +14,16 @@ type AuditLog struct {
Mask bool `yaml:"mask" json:"mask,omitempty" koanf:"mask" jsonschema:"default=true"`
// `storage` controls audit log retention.
Storage AuditLogStorage `yaml:"storage" json:"storage,omitempty" koanf:"storage"`
// `retention` specifies the time duration after which log audit entries may be deleted.
Retention string `yaml:"retention" json:"retention,omitempty" koanf:"retention" jsonschema:"default=720h"`
}

func (al *AuditLog) Validate() error {
_, err := time.ParseDuration(al.Retention)
if err != nil {
return errors.New("failed to parse retention_duration")
}
return nil
}

type AuditLogStorage struct {
Expand Down
3 changes: 2 additions & 1 deletion backend/config/config_default.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,8 @@ func DefaultConfig() *Config {
Enabled: true,
OutputStream: OutputStreamStdOut,
},
Mask: true,
Mask: true,
Retention: "720h",
},
Emails: Emails{
RequireVerification: true,
Expand Down
3 changes: 1 addition & 2 deletions backend/flow_api/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ import (
"github.com/teamhanko/hanko/backend/flowpilot"
"github.com/teamhanko/hanko/backend/mapper"
"github.com/teamhanko/hanko/backend/persistence"
"github.com/teamhanko/hanko/backend/persistence/models"
"github.com/teamhanko/hanko/backend/session"
"strconv"
"time"
Expand Down Expand Up @@ -156,7 +155,7 @@ func (h *FlowPilotHandler) executeFlow(c echo.Context, flow flowpilot.Flow) erro

flow.Set("deps", deps)

flowResult, err = flow.Execute(models.NewFlowDB(tx),
flowResult, err = flow.Execute(persistence.NewFlowPersister(tx),
flowpilot.WithQueryParamKey(queryParamKey),
flowpilot.WithQueryParamValue(c.QueryParam(queryParamKey)),
flowpilot.WithInputData(inputData),
Expand Down
5 changes: 5 additions & 0 deletions backend/json_schema/hanko.config.json
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,11 @@
"storage": {
"$ref": "#/$defs/AuditLogStorage",
"description": "`storage` controls audit log retention."
},
"retention": {
"type": "string",
"description": "`retention` specifies the time duration after which log audit entries may be deleted.",
"default": "720h"
}
},
"additionalProperties": false,
Expand Down
32 changes: 22 additions & 10 deletions backend/persistence/audit_log_persister.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ type AuditLogPersister interface {
Create(auditLog models.AuditLog) error
Get(id uuid.UUID) (*models.AuditLog, error)
List(page int, perPage int, startTime *time.Time, endTime *time.Time, types []string, userId string, email string, ip string, searchString string) ([]models.AuditLog, error)
Delete(auditLog models.AuditLog) error
Count(startTime *time.Time, endTime *time.Time, types []string, userId string, email string, ip string, searchString string) (int, error)
Cleanup[models.AuditLog]
}

type auditLogPersister struct {
Expand Down Expand Up @@ -69,15 +69,6 @@ func (p *auditLogPersister) List(page int, perPage int, startTime *time.Time, en
return auditLogs, nil
}

func (p *auditLogPersister) Delete(auditLog models.AuditLog) error {
err := p.db.Eager().Destroy(&auditLog)
if err != nil {
return fmt.Errorf("failed to delete auditlog: %w", err)
}

return nil
}

func (p *auditLogPersister) Count(startTime *time.Time, endTime *time.Time, types []string, userId string, email string, ip string, searchString string) (int, error) {
query := p.db.Q()
query = p.addQueryParamsToSqlQuery(query, startTime, endTime, types, userId, email, ip, searchString)
Expand Down Expand Up @@ -132,3 +123,24 @@ func (p *auditLogPersister) addQueryParamsToSqlQuery(query *pop.Query, startTime

return query
}

func (p *auditLogPersister) FindExpired(cutoffTime time.Time, page, perPage int) ([]models.AuditLog, error) {
var items []models.AuditLog

query := p.db.
Where("created_at < ?", cutoffTime).
Select("id").
Paginate(page, perPage)
err := query.All(&items)

return items, err
}

func (p *auditLogPersister) Delete(auditLog models.AuditLog) error {
err := p.db.Eager().Destroy(&auditLog)
if err != nil {
return fmt.Errorf("failed to delete auditlog: %w", err)
}

return nil
}
Loading

0 comments on commit ba1f9db

Please sign in to comment.