Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add AzureSQL short term retention policies #1355

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 16 additions & 8 deletions api/v1beta1/azuresqldatabase_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,13 @@ type SqlDatabaseSku struct {
Capacity *int32 `json:"capacity,omitempty"`
}

type SQLDatabaseShortTermRetentionPolicy struct {
// RetentionDays is the backup retention period in days. This is how many days
// Point-in-Time Restore will be supported.
// +kubebuilder:validation:Required
RetentionDays int32 `json:"retentionDays"`
}

// AzureSqlDatabaseSpec defines the desired state of AzureSqlDatabase
type AzureSqlDatabaseSpec struct {
// INSERT ADDITIONAL SPEC FIELDS - desired state of cluster
Expand All @@ -59,14 +66,15 @@ type AzureSqlDatabaseSpec struct {
Server string `json:"server"`

// +kubebuilder:validation:Optional
Edition DBEdition `json:"edition"` // TODO: Remove this in v1beta2
Sku *SqlDatabaseSku `json:"sku,omitempty"` // TODO: make this required in v1beta2
MaxSize *resource.Quantity `json:"maxSize,omitempty"`
DbName string `json:"dbName,omitempty"`
WeeklyRetention string `json:"weeklyRetention,omitempty"`
MonthlyRetention string `json:"monthlyRetention,omitempty"`
YearlyRetention string `json:"yearlyRetention,omitempty"`
WeekOfYear int32 `json:"weekOfYear,omitempty"`
Edition DBEdition `json:"edition"` // TODO: Remove this in v1beta2
Sku *SqlDatabaseSku `json:"sku,omitempty"` // TODO: make this required in v1beta2
MaxSize *resource.Quantity `json:"maxSize,omitempty"`
DbName string `json:"dbName,omitempty"`
WeeklyRetention string `json:"weeklyRetention,omitempty"`
MonthlyRetention string `json:"monthlyRetention,omitempty"`
YearlyRetention string `json:"yearlyRetention,omitempty"`
WeekOfYear int32 `json:"weekOfYear,omitempty"`
ShortTermRetentionPolicy *SQLDatabaseShortTermRetentionPolicy `json:"shortTermRetentionPolicy,omitempty"`
}

// AzureSqlDatabase is the Schema for the azuresqldatabases API
Expand Down
7 changes: 6 additions & 1 deletion config/samples/azure_v1beta1_azuresqldatabase.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,9 @@ spec:

# The week of year to take the yearly backup, valid values [1, 52]
# weekOfYear: 16


# The short term retention policy to use
# shortTermRetentionPolicy:
# RetentionDays is the backup retention period in days. This is how many days
# Point-in-Time Restore will be supported.
# retentionDays: 21
57 changes: 51 additions & 6 deletions controllers/azuresql_combined_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,18 @@ import (
"strings"
"testing"

azurev1alpha1 "github.com/Azure/azure-service-operator/api/v1alpha1"
"github.com/Azure/azure-service-operator/api/v1beta1"
"github.com/stretchr/testify/assert"

helpers "github.com/Azure/azure-service-operator/pkg/helpers"
"github.com/Azure/azure-service-operator/pkg/resourcemanager/config"
kvsecrets "github.com/Azure/azure-service-operator/pkg/secrets/keyvault"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"

azurev1alpha1 "github.com/Azure/azure-service-operator/api/v1alpha1"
"github.com/Azure/azure-service-operator/api/v1beta1"
"github.com/Azure/azure-service-operator/pkg/errhelp"
helpers "github.com/Azure/azure-service-operator/pkg/helpers"
"github.com/Azure/azure-service-operator/pkg/resourcemanager/config"
kvsecrets "github.com/Azure/azure-service-operator/pkg/secrets/keyvault"
)

func TestAzureSqlServerCombinedHappyPath(t *testing.T) {
Expand Down Expand Up @@ -65,8 +66,10 @@ func TestAzureSqlServerCombinedHappyPath(t *testing.T) {

sqlDatabaseName1 := GenerateTestResourceNameWithRandom("sqldatabase", 10)
sqlDatabaseName2 := GenerateTestResourceNameWithRandom("sqldatabase", 10)
sqlDatabaseName3 := GenerateTestResourceNameWithRandom("sqldatabase", 10)
var sqlDatabaseInstance1 *v1beta1.AzureSqlDatabase
var sqlDatabaseInstance2 *v1beta1.AzureSqlDatabase
var sqlDatabaseInstance3 *v1beta1.AzureSqlDatabase

sqlFirewallRuleNamespacedNameLocal := types.NamespacedName{
Name: GenerateTestResourceNameWithRandom("sqlfwr-local", 10),
Expand Down Expand Up @@ -190,6 +193,47 @@ func TestAzureSqlServerCombinedHappyPath(t *testing.T) {
assert.Equal(true, db.Status.Provisioned)
})

// Create a database in the new server
t.Run("set up database with short and long term retention", func(t *testing.T) {
t.Parallel()

// Create the SqlDatabase object and expect the Reconcile to be created
sqlDatabaseInstance3 = &v1beta1.AzureSqlDatabase{
ObjectMeta: metav1.ObjectMeta{
Name: sqlDatabaseName3,
Namespace: "default",
},
Spec: v1beta1.AzureSqlDatabaseSpec{
Location: rgLocation,
ResourceGroup: rgName,
Server: sqlServerName,
Sku: &v1beta1.SqlDatabaseSku{
Name: "S0",
Tier: "Standard",
},
WeeklyRetention: "P3W",
ShortTermRetentionPolicy: &v1beta1.SQLDatabaseShortTermRetentionPolicy{
RetentionDays: 3,
},
},
}

EnsureInstance(ctx, t, tc, sqlDatabaseInstance3)

// Now update with an invalid retention policy
sqlDatabaseInstance3.Spec.ShortTermRetentionPolicy.RetentionDays = -1
err = tc.k8sClient.Update(ctx, sqlDatabaseInstance3)
assert.Equal(nil, err, "updating sql database in k8s")

namespacedName := types.NamespacedName{Name: sqlDatabaseName3, Namespace: "default"}
assert.Eventually(func() bool {
db := &v1beta1.AzureSqlDatabase{}
err = tc.k8sClient.Get(ctx, namespacedName, db)
assert.Equal(nil, err, "err getting DB from k8s")
return db.Status.Provisioned == false && strings.Contains(db.Status.Message, errhelp.BackupRetentionPolicyInvalid)
}, tc.timeout, tc.retry, "wait for sql database to be updated in k8s")
})

// Create FirewallRules ---------------------------------------

t.Run("set up wide range firewall rule in primary server", func(t *testing.T) {
Expand Down Expand Up @@ -494,6 +538,7 @@ func TestAzureSqlServerCombinedHappyPath(t *testing.T) {
t.Parallel()
EnsureDelete(ctx, t, tc, sqlDatabaseInstance1)
EnsureDelete(ctx, t, tc, sqlDatabaseInstance2)
EnsureDelete(ctx, t, tc, sqlDatabaseInstance3)
})

})
Expand Down
4 changes: 2 additions & 2 deletions controllers/keyvault_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -407,8 +407,8 @@ func TestKeyvaultControllerBadAccessPolicy(t *testing.T) {
Namespace: "default",
},
Spec: azurev1alpha1.KeyVaultSpec{
Location: keyVaultLocation,
ResourceGroup: tc.resourceGroupName,
Location: keyVaultLocation,
ResourceGroup: tc.resourceGroupName,
AccessPolicies: &accessPolicies,
},
}
Expand Down
1 change: 1 addition & 0 deletions pkg/errhelp/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ const (
FeatureNotSupportedForEdition = "FeatureNotSupportedForEdition"
VirtualNetworkRuleBadRequest = "VirtualNetworkRuleBadRequest"
LongTermRetentionPolicyInvalid = "LongTermRetentionPolicyInvalid"
BackupRetentionPolicyInvalid = "InvalidBackupRetentionPeriod"
OperationIdNotFound = "OperationIdNotFound"
)

Expand Down
100 changes: 72 additions & 28 deletions pkg/resourcemanager/azuresql/azuresqldb/azuresqldb.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@ package azuresqldb
import (
"context"
"fmt"
"net/http"

"github.com/Azure/azure-sdk-for-go/services/preview/sql/mgmt/v3.0/sql"
sql3 "github.com/Azure/azure-sdk-for-go/services/preview/sql/mgmt/v3.0/sql"
"github.com/pkg/errors"

"github.com/Azure/azure-service-operator/api/v1beta1"
"github.com/Azure/azure-service-operator/pkg/errhelp"
"github.com/Azure/azure-service-operator/pkg/helpers"
azuresqlshared "github.com/Azure/azure-service-operator/pkg/resourcemanager/azuresql/azuresqlshared"
Expand Down Expand Up @@ -139,42 +140,40 @@ func (m *AzureSqlDbManager) CreateOrUpdateDB(
}

// AddLongTermRetention enables / disables long term retention
func (m *AzureSqlDbManager) AddLongTermRetention(ctx context.Context, resourceGroupName string, serverName string, databaseName string, weeklyRetention string, monthlyRetention string, yearlyRetention string, weekOfYear int32) (*http.Response, error) {
func (m *AzureSqlDbManager) AddLongTermRetention(
ctx context.Context,
resourceGroupName string,
serverName string,
databaseName string,
policy azuresqlshared.SQLDatabaseBackupLongTermRetentionPolicy) (*sql.BackupLongTermRetentionPoliciesCreateOrUpdateFuture, error) {

longTermClient, err := azuresqlshared.GetBackupLongTermRetentionPoliciesClient(m.creds)
// TODO: Probably shouldn't return a response at all in the err case here (all through this function)
if err != nil {
return &http.Response{
StatusCode: 0,
}, err
return nil, err
}

// validate the input and exit if nothing needs to happen - this is ok!
if weeklyRetention == "" && monthlyRetention == "" && yearlyRetention == "" {
return &http.Response{
StatusCode: 200,
}, nil
if policy.WeeklyRetention == "" && policy.MonthlyRetention == "" && policy.YearlyRetention == "" {
return nil, nil
}

// validate the pairing of yearly retention and week of year
if yearlyRetention != "" && (weekOfYear <= 0 || weekOfYear > 52) {
return &http.Response{
StatusCode: 500,
}, fmt.Errorf("weekOfYear must be greater than 0 and less or equal to 52 when yearlyRetention is used")
if policy.YearlyRetention != "" && (policy.WeekOfYear <= 0 || policy.WeekOfYear > 52) {
return nil, fmt.Errorf("weekOfYear must be greater than 0 and less or equal to 52 when yearlyRetention is used")
}

// create pointers so that we can pass nils if needed
pWeeklyRetention := &weeklyRetention
if weeklyRetention == "" {
pWeeklyRetention := &policy.WeeklyRetention
if policy.WeeklyRetention == "" {
pWeeklyRetention = nil
}
pMonthlyRetention := &monthlyRetention
if monthlyRetention == "" {
pMonthlyRetention := &policy.MonthlyRetention
if policy.MonthlyRetention == "" {
pMonthlyRetention = nil
}
pYearlyRetention := &yearlyRetention
pWeekOfYear := &weekOfYear
if yearlyRetention == "" {
pYearlyRetention := &policy.YearlyRetention
pWeekOfYear := &policy.WeekOfYear
if policy.YearlyRetention == "" {
pYearlyRetention = nil
pWeekOfYear = nil
}
Expand All @@ -184,8 +183,8 @@ func (m *AzureSqlDbManager) AddLongTermRetention(ctx context.Context, resourceGr
resourceGroupName,
serverName,
databaseName,
sql3.BackupLongTermRetentionPolicy{
LongTermRetentionPolicyProperties: &sql3.LongTermRetentionPolicyProperties{
sql.BackupLongTermRetentionPolicy{
LongTermRetentionPolicyProperties: &sql.LongTermRetentionPolicyProperties{
WeeklyRetention: pWeeklyRetention,
MonthlyRetention: pMonthlyRetention,
YearlyRetention: pYearlyRetention,
Expand All @@ -195,12 +194,57 @@ func (m *AzureSqlDbManager) AddLongTermRetention(ctx context.Context, resourceGr
)

if err != nil {
return &http.Response{
StatusCode: 500,
}, nil
return nil, err
}

return &future, err
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was trying to find the corresponding change in the call to AddLongTermRetention but couldn't see it in the diff - I'm guessing because the calling code just ignores the response/future. Is there any problem with not calling .Response on the future? I guess it's not waiting until the operation has finished without that. Shouldn't we be calling Result on them so we can see any error from the operation?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Calling Response doesn't wait until the operation has finished either, as the implementation for Response just does:

// Response returns the last HTTP response.
func (f Future) Response() *http.Response {
	if f.pt == nil {
		return nil
	}
	return f.pt.latestResponse()
}

and f.pt.latestResponse() just says: // returns the cached HTTP response after a call to pollForStatus(), can be nil

So if for example it took more than a single polling interval, or we hit Response too quickly after calling it would be nil - so I'm pretty sure we were doing the wrong thing before as well and what I have here is effectively the same as what we had before. Doubly so because the result of Response() was always being ignored anyway.

I see a few paths towards fixing this...

  1. Do the wait inline. This would work and it'd probably be fast as I don't think that these operations take a long time, but it has the disadvantage of breaking a "rule" of Kubernetes controllers which seems to be that you don't loop inside of the Reconcile function, you set a variable and let reconcile call you again (respecting the backoff, etc configured for the operator as a whole).
  2. Set up a state machine infrastructure so that we can go through the required workflow for the DB. The workflow is something like: Create DB -> poll create DB LRO -> Set LongTermRetention -> wait for LongTermRetention LRO -> Set ShortTermRetention -> wait for ShortTermRetention LRO -> Set "complete".
  3. Similar to 2 above but rather than thinking of it as states (which I think Kubernetes doesn't really love), just do a delta comparison to each entity in Azure and set them one at a time. I think the workflow would be something like this:
    a. Poll LRO if we have one - if not done just keep waiting, if done check result. Will need error handling for each type of LRO.
    b. Does DB exist? If not, create and store LRO. If yes, compare with Spec. If different post and store LRO. If same continue.
    c. Does LongTermRetention match spec? If no, post and store LRO. If yes continue.
    d. Does ShortTermRetention match spec? If no, post and store LRO. If yes continue.
    e. Set provisioned = true

I think the right thing to do is technically option 3, which also does away with the spec JSON hash checking in favor of an actual diff with Azure (which has the added benefit of allowing us to correct differences in Azure that Kubernetes didn't know about). The issue is that both 2 and 3 (that fix this issue the "right" way) are big undertakings that would effectively require full rewrites of the SQL DB reconciler. That introduces more risk and also is more duplicate effort given we're tracking towards a generic implementation that does exactly the above in the code generated path.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Effectively I think that this is a situation where yes things are not ideal, but this is far from the only place that's true in the operator currently and it's not clear to me that it's the right thing to build a bespoke infrastructure to solve this problem in ASO when we have a generic one coming, so it might just be best to live with it for now?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see - yeah, if this is an existing issue then I think you're right that we can land this as is and fix it in the generic case. I think option 3 would be the right plan as well if we were doing that.

}

func (m *AzureSqlDbManager) AddShortTermRetention(
ctx context.Context,
resourceGroupName string,
serverName string,
databaseName string,
policy *v1beta1.SQLDatabaseShortTermRetentionPolicy) (*sql.BackupShortTermRetentionPoliciesCreateOrUpdateFuture, error) {

client, err := azuresqlshared.GetBackupShortTermRetentionPoliciesClient(m.creds)
if err != nil {
return nil, errors.Wrapf(err, "couldn't create BackupShortTermRetentionPoliciesClient")
}

var policyProperties *sql.BackupShortTermRetentionPolicyProperties
if policy == nil {
// If policy is nil we're in a bit of an awkward situation since we cannot know if the customer has mutated
// the retention policy in a previous reconciliation loop and then subsequently removed it. If they have,
// "doing nothing" here is wrong because that leaves them in the previous modified state (but with no reflection
// of that fact in the Spec).
// Unfortunately you cannot update the retention policy to nil, nor can you delete it, so we must awkwardly
// set it back to its default configuration.
// Note: There are risks here, such as if the default on the server and the default in our code drift apart
// at some point in the future.
policyProperties = &sql.BackupShortTermRetentionPolicyProperties{
RetentionDays: to.Int32Ptr(7), // 7 is the magical default as of Jan 2021
}
} else {
policyProperties = &sql.BackupShortTermRetentionPolicyProperties{
RetentionDays: to.Int32Ptr(policy.RetentionDays),
}
}

future, err := client.CreateOrUpdate(
ctx,
resourceGroupName,
serverName,
databaseName,
sql.BackupShortTermRetentionPolicy{
BackupShortTermRetentionPolicyProperties: policyProperties,
})

if err != nil {
return nil, err
}

return future.Response(), err
return &future, err
}

var goneCodes = []string{
Expand Down
8 changes: 2 additions & 6 deletions pkg/resourcemanager/azuresql/azuresqldb/azuresqldb_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,11 @@ package azuresqldb

import (
"context"
"net/http"

"github.com/Azure/azure-sdk-for-go/services/preview/sql/mgmt/v3.0/sql"
azuresqlshared "github.com/Azure/azure-service-operator/pkg/resourcemanager/azuresql/azuresqlshared"

"github.com/Azure/azure-service-operator/pkg/resourcemanager"
"github.com/Azure/azure-service-operator/pkg/resourcemanager/azuresql/azuresqlshared"
)

// SqlDbManager is the client for the resource manager for SQL databases
Expand Down Expand Up @@ -39,10 +38,7 @@ type SqlDbManager interface {
resourceGroupName string,
serverName string,
databaseName string,
weeklyRetention string,
monthlyRetention string,
yearlyRetention string,
weekOfYear int32) (*http.Response, error)
policy azuresqlshared.SQLDatabaseBackupLongTermRetentionPolicy) (*sql.BackupLongTermRetentionPoliciesCreateOrUpdateFuture, error)

resourcemanager.ARMClient
}
31 changes: 27 additions & 4 deletions pkg/resourcemanager/azuresql/azuresqldb/azuresqldb_reconcile.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,10 +114,12 @@ func (db *AzureSqlDbManager) Ensure(ctx context.Context, obj runtime.Object, opt
groupName,
server,
dbName,
instance.Spec.WeeklyRetention,
instance.Spec.MonthlyRetention,
instance.Spec.YearlyRetention,
instance.Spec.WeekOfYear)
azuresqlshared.SQLDatabaseBackupLongTermRetentionPolicy{
WeeklyRetention: instance.Spec.WeeklyRetention,
MonthlyRetention: instance.Spec.MonthlyRetention,
YearlyRetention: instance.Spec.YearlyRetention,
WeekOfYear: instance.Spec.WeekOfYear,
})
if err != nil {
failureErrors := []string{
errhelp.LongTermRetentionPolicyInvalid,
Expand All @@ -133,6 +135,27 @@ func (db *AzureSqlDbManager) Ensure(ctx context.Context, obj runtime.Object, opt
}
}

_, err = db.AddShortTermRetention(
ctx,
groupName,
server,
dbName,
instance.Spec.ShortTermRetentionPolicy)
if err != nil {
failureErrors := []string{
errhelp.BackupRetentionPolicyInvalid,
}
instance.Status.Message = fmt.Sprintf("Azure DB short-term retention policy error: %s", errhelp.StripErrorIDs(err))
azerr := errhelp.NewAzureError(err)
if helpers.ContainsString(failureErrors, azerr.Type) {
// Leave message the same as above
instance.Status.SetFailedProvisioning(instance.Status.Message)
return true, nil
} else {
return false, err
}
}

// db exists, we have successfully provisioned everything
instance.Status.SetProvisioned(resourcemanager.SuccessMsg)
instance.Status.State = string(dbGet.Status)
Expand Down
Loading