-
Notifications
You must be signed in to change notification settings - Fork 5.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
br: disk space check follow up #54596
Changes from all commits
75f7105
d6382dc
b93cdb3
816245d
0260a3e
28cc133
f52d72f
79b798a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -102,7 +102,6 @@ const ( | |
defaultFlagDdlBatchSize = 128 | ||
resetSpeedLimitRetryTimes = 3 | ||
maxRestoreBatchSizeLimit = 10240 | ||
pb = 1024 * 1024 * 1024 * 1024 * 1024 | ||
) | ||
|
||
const ( | ||
|
@@ -1240,37 +1239,41 @@ func getStores(ctx context.Context, mgr *conn.Mgr) (stores *http.StoresInfo, err | |
return stores, nil | ||
} | ||
|
||
func EstimateTikvUsage(files []*backuppb.File, maxReplica uint64, storeCnt int) uint64 { | ||
func EstimateTikvUsage(files []*backuppb.File, replicaCnt uint64, storeCnt uint64) uint64 { | ||
if storeCnt == 0 { | ||
return 0 | ||
} | ||
var totalSize uint64 = 0 | ||
if replicaCnt > storeCnt { | ||
replicaCnt = storeCnt | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @3pointer just to double check, do we have topology constraint that makes sure no more than 1 replica from the same resign placed on the same store? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, our raft implementation make this constraint |
||
} | ||
totalSize := uint64(0) | ||
for _, file := range files { | ||
totalSize += file.GetSize_() | ||
} | ||
return totalSize * maxReplica / uint64(storeCnt) | ||
log.Info("estimate tikv usage", zap.Uint64("total size", totalSize), zap.Uint64("replicaCnt", replicaCnt), zap.Uint64("store count", storeCnt)) | ||
return totalSize * replicaCnt / storeCnt | ||
} | ||
|
||
func EstimateTiflashUsage(tables []*metautil.Table, storeCnt int) uint64 { | ||
func EstimateTiflashUsage(tables []*metautil.Table, storeCnt uint64) uint64 { | ||
if storeCnt == 0 { | ||
return 0 | ||
} | ||
var tiflashTotal uint64 = 0 | ||
tiflashTotal := uint64(0) | ||
for _, table := range tables { | ||
if table.TiFlashReplicas <= 0 { | ||
if table.Info.TiFlashReplica == nil || table.Info.TiFlashReplica.Count <= 0 { | ||
continue | ||
} | ||
tableBytes := uint64(0) | ||
for _, file := range table.Files { | ||
tableBytes += file.GetSize_() | ||
} | ||
tiflashTotal += tableBytes * uint64(table.TiFlashReplicas) | ||
tiflashTotal += tableBytes * table.Info.TiFlashReplica.Count | ||
} | ||
return tiflashTotal / uint64(storeCnt) | ||
log.Info("estimate tiflash usage", zap.Uint64("total size", tiflashTotal), zap.Uint64("store count", storeCnt)) | ||
return tiflashTotal / storeCnt | ||
} | ||
|
||
func CheckStoreSpace(necessary uint64, store *http.StoreInfo) error { | ||
// Be careful editing the message, it is used in DiskCheckBackoffer | ||
available, err := units.RAMInBytes(store.Status.Available) | ||
if err != nil { | ||
return errors.Annotatef(berrors.ErrPDInvalidResponse, "store %d has invalid available space %s", store.Store.ID, store.Status.Available) | ||
|
@@ -1279,7 +1282,7 @@ func CheckStoreSpace(necessary uint64, store *http.StoreInfo) error { | |
return errors.Annotatef(berrors.ErrPDInvalidResponse, "store %d has invalid available space %s", store.Store.ID, store.Status.Available) | ||
} | ||
if uint64(available) < necessary { | ||
return errors.Errorf("store %d has no space left on device, available %s, necessary %s", | ||
return errors.Annotatef(berrors.ErrKVDiskFull, "store %d has no space left on device, available %s, necessary %s", | ||
store.Store.ID, units.BytesSize(float64(available)), units.BytesSize(float64(necessary))) | ||
} | ||
return nil | ||
|
@@ -1295,7 +1298,7 @@ func checkDiskSpace(ctx context.Context, mgr *conn.Mgr, files []*backuppb.File, | |
return errors.Trace(err) | ||
} | ||
|
||
tikvCnt, tiflashCnt := 0, 0 | ||
var tikvCnt, tiflashCnt uint64 = 0, 0 | ||
for i := range stores.Stores { | ||
store := &stores.Stores[i] | ||
if engine.IsTiFlashHTTPResp(&store.Store) { | ||
|
@@ -1307,13 +1310,14 @@ func checkDiskSpace(ctx context.Context, mgr *conn.Mgr, files []*backuppb.File, | |
|
||
// We won't need to restore more than 1800 PB data at one time, right? | ||
preserve := func(base uint64, ratio float32) uint64 { | ||
if base > 1000*pb { | ||
if base > 1000*units.PB { | ||
return base | ||
} | ||
return base * uint64(ratio*10) / 10 | ||
} | ||
tikvUsage := preserve(EstimateTikvUsage(files, maxReplica, tikvCnt), 1.1) | ||
tiflashUsage := preserve(EstimateTiflashUsage(tables, tiflashCnt), 1.1) | ||
tiflashUsage := preserve(EstimateTiflashUsage(tables, tiflashCnt), 1.4) | ||
log.Info("preserved disk space", zap.Uint64("tikv", tikvUsage), zap.Uint64("tiflash", tiflashUsage)) | ||
|
||
err = utils.WithRetry(ctx, func() error { | ||
stores, err = getStores(ctx, mgr) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit, a more common name could be
replicationFactor
orrf