-
Notifications
You must be signed in to change notification settings - Fork 21
/
Copy pathflatfs.go
1272 lines (1110 loc) · 29.5 KB
/
flatfs.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Package flatfs is a Datastore implementation that stores all
// objects in a two-level directory structure in the local file
// system, regardless of the hierarchy of the keys.
package flatfs
import (
"context"
"encoding/json"
"errors"
"fmt"
"math"
"math/rand"
"os"
"path/filepath"
"strings"
"sync"
"sync/atomic"
"syscall"
"time"
"github.com/ipfs/go-datastore"
"github.com/ipfs/go-datastore/query"
"github.com/jbenet/goprocess"
logging "github.com/ipfs/go-log/v2"
)
var log = logging.Logger("flatfs")
const (
extension = ".data"
diskUsageMessageTimeout = 5 * time.Second
diskUsageCheckpointPercent = 1.0
diskUsageCheckpointTimeout = 2 * time.Second
)
var (
// DiskUsageFile is the name of the file to cache the size of the
// datastore in disk
DiskUsageFile = "diskUsage.cache"
// DiskUsageFilesAverage is the maximum number of files per folder
// to stat in order to calculate the size of the datastore.
// The size of the rest of the files in a folder will be assumed
// to be the average of the values obtained. This includes
// regular files and directories.
DiskUsageFilesAverage = 2000
// DiskUsageCalcTimeout is the maximum time to spend
// calculating the DiskUsage upon a start when no
// DiskUsageFile is present.
// If this period did not suffice to read the size of the datastore,
// the remaining sizes will be estimated.
DiskUsageCalcTimeout = 5 * time.Minute
// RetryDelay is a timeout for a backoff on retrying operations
// that fail due to transient errors like too many file descriptors open.
RetryDelay = time.Millisecond * 200
// RetryAttempts is the maximum number of retries that will be attempted
// before giving up.
RetryAttempts = 6
)
const (
opPut = iota
opDelete
opRename
)
type initAccuracy string
const (
unknownA initAccuracy = "unknown"
exactA initAccuracy = "initial-exact"
approxA initAccuracy = "initial-approximate"
timedoutA initAccuracy = "initial-timed-out"
)
func combineAccuracy(a, b initAccuracy) initAccuracy {
if a == unknownA || b == unknownA {
return unknownA
}
if a == timedoutA || b == timedoutA {
return timedoutA
}
if a == approxA || b == approxA {
return approxA
}
if a == exactA && b == exactA {
return exactA
}
if a == "" {
return b
}
if b == "" {
return a
}
return unknownA
}
var _ datastore.Datastore = (*Datastore)(nil)
var _ datastore.PersistentDatastore = (*Datastore)(nil)
var _ datastore.Batching = (*Datastore)(nil)
var _ datastore.Batch = (*flatfsBatch)(nil)
var (
ErrDatastoreExists = errors.New("datastore already exists")
ErrDatastoreDoesNotExist = errors.New("datastore directory does not exist")
ErrShardingFileMissing = fmt.Errorf("%s file not found in datastore", SHARDING_FN)
ErrClosed = errors.New("datastore closed")
ErrInvalidKey = errors.New("key not supported by flatfs")
)
var (
r *rand.Rand
)
func init() {
r = rand.New(rand.NewSource(time.Now().UTC().UnixNano()))
}
// Datastore implements the go-datastore Interface.
// Note this datastore cannot guarantee order of concurrent
// write operations to the same key. See the explanation in
// Put().
type Datastore struct {
// atomic operations should always be used with diskUsage.
// Must be first in struct to ensure correct alignment
// (see https://golang.org/pkg/sync/atomic/#pkg-note-BUG)
diskUsage int64
path string
tempPath string
shardStr string
getDir ShardFunc
// synchronize all writes and directory changes for added safety
sync bool
// these values should only be used during internalization or
// inside the checkpoint loop
dirty bool
storedValue diskUsageValue
// Used to trigger a checkpoint.
checkpointCh chan struct{}
done chan struct{}
shutdownLock sync.RWMutex
shutdown bool
// opMap handles concurrent write operations (put/delete)
// to the same key
opMap *opMap
}
type diskUsageValue struct {
DiskUsage int64 `json:"diskUsage"`
Accuracy initAccuracy `json:"accuracy"`
}
type ShardFunc func(string) string
type opT int
// op wraps useful arguments of write operations
type op struct {
typ opT // operation type
key datastore.Key // datastore key. Mandatory.
tmp string // temp file path
path string // file path
v []byte // value
}
// opMap is a synchronisation structure where a single op can be stored
// for each key.
type opMap struct {
ops sync.Map
}
type opResult struct {
mu sync.RWMutex
success bool
opMap *opMap
name string
}
// Begins starts the processing of an op:
// - if no other op for the same key exist, register it and return immediately
// - if another op exist for the same key, wait until it's done:
// - if that previous op succeeded, consider that ours shouldn't execute and return nil
// - if that previous op failed, start ours
func (m *opMap) Begin(name string) *opResult {
for {
myOp := &opResult{opMap: m, name: name}
myOp.mu.Lock()
opIface, loaded := m.ops.LoadOrStore(name, myOp)
if !loaded { // no one else doing ops with this key
return myOp
}
op := opIface.(*opResult)
// someone else doing ops with this key, wait for
// the result
op.mu.RLock()
if op.success {
return nil
}
// if we are here, we will retry the operation
}
}
func (o *opResult) Finish(ok bool) {
o.success = ok
o.opMap.ops.Delete(o.name)
o.mu.Unlock()
}
func Create(path string, fun *ShardIdV1) error {
err := os.Mkdir(path, 0755)
if err != nil && !os.IsExist(err) {
return err
}
dsFun, err := ReadShardFunc(path)
switch err {
case ErrShardingFileMissing:
isEmpty, err := DirIsEmpty(path)
if err != nil {
return err
}
if !isEmpty {
return fmt.Errorf("directory missing %s file: %s", SHARDING_FN, path)
}
err = WriteShardFunc(path, fun)
if err != nil {
return err
}
err = WriteReadme(path, fun)
return err
case nil:
if fun.String() != dsFun.String() {
return fmt.Errorf("specified shard func '%s' does not match repo shard func '%s'",
fun.String(), dsFun.String())
}
return ErrDatastoreExists
default:
return err
}
}
func Open(path string, syncFiles bool) (*Datastore, error) {
_, err := os.Stat(path)
if os.IsNotExist(err) {
return nil, ErrDatastoreDoesNotExist
} else if err != nil {
return nil, err
}
tempPath := filepath.Join(path, ".temp")
err = os.RemoveAll(tempPath)
if err != nil && !os.IsNotExist(err) {
return nil, fmt.Errorf("failed to remove temporary directory: %v", err)
}
err = os.Mkdir(tempPath, 0755)
if err != nil {
return nil, fmt.Errorf("failed to create temporary directory: %v", err)
}
shardId, err := ReadShardFunc(path)
if err != nil {
return nil, err
}
fs := &Datastore{
path: path,
tempPath: tempPath,
shardStr: shardId.String(),
getDir: shardId.Func(),
sync: syncFiles,
checkpointCh: make(chan struct{}, 1),
done: make(chan struct{}),
diskUsage: 0,
opMap: new(opMap),
}
// This sets diskUsage to the correct value
// It might be slow, but allowing it to happen
// while the datastore is usable might
// cause diskUsage to not be accurate.
err = fs.calculateDiskUsage()
if err != nil {
// Cannot stat() all
// elements in the datastore.
return nil, err
}
go fs.checkpointLoop()
return fs, nil
}
// convenience method
func CreateOrOpen(path string, fun *ShardIdV1, sync bool) (*Datastore, error) {
err := Create(path, fun)
if err != nil && err != ErrDatastoreExists {
return nil, err
}
return Open(path, sync)
}
func (fs *Datastore) ShardStr() string {
return fs.shardStr
}
// encode returns the directory and file names for a given key according to
// the sharding function.
func (fs *Datastore) encode(key datastore.Key) (dir, file string) {
noslash := key.String()[1:]
dir = filepath.Join(fs.path, fs.getDir(noslash))
file = filepath.Join(dir, noslash+extension)
return dir, file
}
// decode returns the datastore.Key corresponding to a file name, according
// to the sharding function.
func (fs *Datastore) decode(file string) (key datastore.Key, ok bool) {
if !strings.HasSuffix(file, extension) {
// We expect random files like "put-". Log when we encounter
// others.
if !strings.HasPrefix(file, "put-") {
log.Warnw("failed to decode flatfs filename", "file", file)
}
return datastore.Key{}, false
}
name := file[:len(file)-len(extension)]
return datastore.NewKey(name), true
}
// makeDir is identical to makeDirNoSync but also enforce the sync
// if required by the config.
func (fs *Datastore) makeDir(dir string) error {
created, err := fs.makeDirNoSync(dir)
if err != nil {
return err
}
// In theory, if we create a new prefix dir and add a file to
// it, the creation of the prefix dir itself might not be
// durable yet. Sync the root dir after a successful mkdir of
// a prefix dir, just to be paranoid.
if fs.sync && created {
if err := syncDir(fs.path); err != nil {
return err
}
}
return nil
}
// makeDirNoSync create a directory on disk and report if it was created or
// already existed.
func (fs *Datastore) makeDirNoSync(dir string) (created bool, err error) {
if err := os.Mkdir(dir, 0755); err != nil {
if os.IsExist(err) {
return false, nil
}
return false, err
}
// Track DiskUsage of this NEW folder
fs.updateDiskUsage(dir, true)
return true, nil
}
// This function always runs under an opLock. Therefore, only one thread is
// touching the affected files.
func (fs *Datastore) renameAndUpdateDiskUsage(tmpPath, path string) error {
fi, err := os.Stat(path)
// Destination exists, we need to discount it from diskUsage
if fi != nil && err == nil {
atomic.AddInt64(&fs.diskUsage, -fi.Size())
} else if !os.IsNotExist(err) {
return err
}
// Rename and add new file's diskUsage. If the rename fails,
// it will either a) Re-add the size of an existing file, which
// was subtracted before b) Add 0 if there is no existing file.
for i := 0; i < RetryAttempts; i++ {
err = rename(tmpPath, path)
// if there's no error, or the source file doesn't exist, abort.
if err == nil || os.IsNotExist(err) {
break
}
// Otherwise, this could be a transient error due to some other
// process holding open one of the files. Wait a bit and then
// retry.
time.Sleep(time.Duration(i+1) * RetryDelay)
}
fs.updateDiskUsage(path, true)
return err
}
// Put stores a key/value in the datastore.
//
// Note, that we do not guarantee order of write operations (Put or Delete)
// to the same key in this datastore.
//
// For example. i.e. in the case of two concurrent Put, we only guarantee
// that one of them will come through, but cannot assure which one even if
// one arrived slightly later than the other. In the case of a
// concurrent Put and a Delete operation, we cannot guarantee which one
// will win.
func (fs *Datastore) Put(ctx context.Context, key datastore.Key, value []byte) error {
if !keyIsValid(key) {
return fmt.Errorf("when putting '%q': %v", key, ErrInvalidKey)
}
fs.shutdownLock.RLock()
defer fs.shutdownLock.RUnlock()
if fs.shutdown {
return ErrClosed
}
_, err := fs.doWriteOp(&op{
typ: opPut,
key: key,
v: value,
})
return err
}
func (fs *Datastore) Sync(ctx context.Context, prefix datastore.Key) error {
fs.shutdownLock.RLock()
defer fs.shutdownLock.RUnlock()
if fs.shutdown {
return ErrClosed
}
return nil
}
func (fs *Datastore) doOp(oper *op) error {
switch oper.typ {
case opPut:
return fs.doPut(oper.key, oper.v)
case opDelete:
return fs.doDelete(oper.key)
case opRename:
return fs.renameAndUpdateDiskUsage(oper.tmp, oper.path)
default:
panic("bad operation, this is a bug")
}
}
func isTooManyFDError(err error) bool {
perr, ok := err.(*os.PathError)
if ok && perr.Err == syscall.EMFILE {
return true
}
return false
}
// doWrite optimizes out write operations (put/delete) to the same
// key by queueing them and succeeding all queued
// operations if one of them does. In such case,
// we assume that the first succeeding operation
// on that key was the last one to happen after
// all successful others.
//
// done is true if we actually performed the operation, false if we skipped or
// failed.
func (fs *Datastore) doWriteOp(oper *op) (done bool, err error) {
keyStr := oper.key.String()
opRes := fs.opMap.Begin(keyStr)
if opRes == nil { // nothing to do, a concurrent op succeeded
return false, nil
}
err = fs.doOp(oper)
// Finish it. If no error, it will signal other operations
// waiting on this result to succeed. Otherwise, they will
// retry.
opRes.Finish(err == nil)
return err == nil, err
}
func (fs *Datastore) doPut(key datastore.Key, val []byte) error {
dir, path := fs.encode(key)
if err := fs.makeDir(dir); err != nil {
return err
}
tmp, err := fs.tempFile()
if err != nil {
return err
}
closed := false
removed := false
defer func() {
if !closed {
// silence errcheck
_ = tmp.Close()
}
if !removed {
// silence errcheck
_ = os.Remove(tmp.Name())
}
}()
if _, err := tmp.Write(val); err != nil {
return err
}
if fs.sync {
if err := syncFile(tmp); err != nil {
return err
}
}
if err := tmp.Close(); err != nil {
return err
}
closed = true
err = fs.renameAndUpdateDiskUsage(tmp.Name(), path)
if err != nil {
return err
}
removed = true
if fs.sync {
if err := syncDir(dir); err != nil {
return err
}
}
return nil
}
func (fs *Datastore) putMany(data map[datastore.Key][]byte) error {
fs.shutdownLock.RLock()
defer fs.shutdownLock.RUnlock()
if fs.shutdown {
return ErrClosed
}
type putManyOp struct {
key datastore.Key
file *os.File
dstPath string
srcPath string
}
var (
dirsToSync = make(map[string]struct{}, len(data))
files = make([]putManyOp, 0, len(data))
closed int
removed int
)
defer func() {
for closed < len(files) {
files[closed].file.Close()
closed++
}
for removed < len(files) {
_ = os.Remove(files[removed].srcPath)
removed++
}
}()
closer := func() error {
for closed < len(files) {
fi := files[closed].file
if fs.sync {
if err := syncFile(fi); err != nil {
return err
}
}
if err := fi.Close(); err != nil {
return err
}
closed++
}
return nil
}
// Start by writing all the data in temp files so that we can be sure that
// all the data is on disk before renaming to the final places.
for key, value := range data {
dir, path := fs.encode(key)
if _, err := fs.makeDirNoSync(dir); err != nil {
return err
}
dirsToSync[dir] = struct{}{}
tmp, err := fs.tempFileOnce()
// If we have too many files open, try closing some, then try
// again repeatedly.
if isTooManyFDError(err) {
if err = closer(); err != nil {
return err
}
tmp, err = fs.tempFile()
}
if err != nil {
return err
}
// Do this _first_ so we close it if writing fails.
files = append(files, putManyOp{
key: key,
file: tmp,
dstPath: path,
srcPath: tmp.Name(),
})
if _, err := tmp.Write(value); err != nil {
return err
}
}
// Now we sync everything
// sync and close files
err := closer()
if err != nil {
return err
}
// move files to their proper places
for _, pop := range files {
done, err := fs.doWriteOp(&op{
typ: opRename,
key: pop.key,
tmp: pop.srcPath,
path: pop.dstPath,
})
if err != nil {
return err
} else if !done {
_ = os.Remove(pop.file.Name())
}
removed++
}
// now sync the dirs for those files
if fs.sync {
for dir := range dirsToSync {
if err := syncDir(dir); err != nil {
return err
}
}
// sync top flatfs dir
if err := syncDir(fs.path); err != nil {
return err
}
}
return nil
}
func (fs *Datastore) Get(ctx context.Context, key datastore.Key) (value []byte, err error) {
// Can't exist in datastore.
if !keyIsValid(key) {
return nil, datastore.ErrNotFound
}
_, path := fs.encode(key)
data, err := readFile(path)
if err != nil {
if os.IsNotExist(err) {
return nil, datastore.ErrNotFound
}
// no specific error to return, so just pass it through
return nil, err
}
return data, nil
}
func (fs *Datastore) Has(ctx context.Context, key datastore.Key) (exists bool, err error) {
// Can't exist in datastore.
if !keyIsValid(key) {
return false, nil
}
_, path := fs.encode(key)
switch _, err := os.Stat(path); {
case err == nil:
return true, nil
case os.IsNotExist(err):
return false, nil
default:
return false, err
}
}
func (fs *Datastore) GetSize(ctx context.Context, key datastore.Key) (size int, err error) {
// Can't exist in datastore.
if !keyIsValid(key) {
return -1, datastore.ErrNotFound
}
_, path := fs.encode(key)
switch s, err := os.Stat(path); {
case err == nil:
return int(s.Size()), nil
case os.IsNotExist(err):
return -1, datastore.ErrNotFound
default:
return -1, err
}
}
// Delete removes a key/value from the Datastore. Please read
// the Put() explanation about the handling of concurrent write
// operations to the same key.
func (fs *Datastore) Delete(ctx context.Context, key datastore.Key) error {
// Can't exist in datastore.
if !keyIsValid(key) {
return nil
}
fs.shutdownLock.RLock()
defer fs.shutdownLock.RUnlock()
if fs.shutdown {
return ErrClosed
}
_, err := fs.doWriteOp(&op{
typ: opDelete,
key: key,
v: nil,
})
return err
}
// This function always runs within an opLock for the given
// key, and not concurrently.
func (fs *Datastore) doDelete(key datastore.Key) error {
_, path := fs.encode(key)
fSize := fileSize(path)
var err error
for i := 0; i < RetryAttempts; i++ {
err = os.Remove(path)
if err == nil {
break
} else if os.IsNotExist(err) {
return nil
}
}
if err == nil {
atomic.AddInt64(&fs.diskUsage, -fSize)
fs.checkpointDiskUsage()
}
return err
}
func (fs *Datastore) Query(ctx context.Context, q query.Query) (query.Results, error) {
prefix := datastore.NewKey(q.Prefix).String()
if prefix != "/" {
// This datastore can't include keys with multiple components.
// Therefore, it's always correct to return an empty result when
// the user requests a filter by prefix.
log.Warnw(
"flatfs was queried with a key prefix but flatfs only supports keys at the root",
"prefix", q.Prefix,
"query", q,
)
return query.ResultsWithEntries(q, nil), nil
}
// Replicates the logic in ResultsWithChan but actually respects calls
// to `Close`.
b := query.NewResultBuilder(q)
b.Process.Go(func(p goprocess.Process) {
err := fs.walkTopLevel(ctx, fs.path, b)
if err == nil {
return
}
select {
case b.Output <- query.Result{Error: errors.New("walk failed: " + err.Error())}:
case <-p.Closing():
}
})
go b.Process.CloseAfterChildren() //nolint
// We don't apply _any_ of the query logic ourselves so we'll leave it
// all up to the naive query engine.
return query.NaiveQueryApply(q, b.Results()), nil
}
func (fs *Datastore) walkTopLevel(ctx context.Context, path string, result *query.ResultBuilder) error {
dir, err := os.Open(path)
if err != nil {
return err
}
defer dir.Close()
entries, err := dir.Readdir(-1)
if err != nil {
return err
}
for _, entry := range entries {
if !entry.IsDir() {
continue
}
dir := entry.Name()
if len(dir) == 0 || dir[0] == '.' {
continue
}
err = fs.walk(ctx, filepath.Join(path, dir), result)
if err != nil {
return err
}
// Are we closing?
select {
case <-ctx.Done():
return ctx.Err()
case <-result.Process.Closing():
return nil
default:
}
}
return nil
}
// folderSize estimates the diskUsage of a folder by reading
// up to DiskUsageFilesAverage entries in it and assuming any
// other files will have an average size.
func folderSize(path string, deadline time.Time) (int64, initAccuracy, error) {
var du int64
folder, err := os.Open(path)
if err != nil {
return 0, "", err
}
defer folder.Close()
stat, err := folder.Stat()
if err != nil {
return 0, "", err
}
files, err := folder.Readdirnames(-1)
if err != nil {
return 0, "", err
}
totalFiles := len(files)
i := 0
filesProcessed := 0
maxFiles := DiskUsageFilesAverage
if maxFiles <= 0 {
maxFiles = totalFiles
}
// randomize file order
// https://stackoverflow.com/a/42776696
for i := len(files) - 1; i > 0; i-- {
j := r.Intn(i + 1)
files[i], files[j] = files[j], files[i]
}
accuracy := exactA
for {
// Do not process any files after deadline is over
if time.Now().After(deadline) {
accuracy = timedoutA
break
}
if i >= totalFiles || filesProcessed >= maxFiles {
if filesProcessed >= maxFiles {
accuracy = approxA
}
break
}
// Stat the file
fname := files[i]
subpath := filepath.Join(path, fname)
st, err := os.Stat(subpath)
if err != nil {
return 0, "", err
}
// Find folder size recursively
if st.IsDir() {
du2, acc, err := folderSize(filepath.Join(subpath), deadline)
if err != nil {
return 0, "", err
}
accuracy = combineAccuracy(acc, accuracy)
du += du2
filesProcessed++
} else { // in any other case, add the file size
du += st.Size()
filesProcessed++
}
i++
}
nonProcessed := totalFiles - filesProcessed
// Avg is total size in this folder up to now / total files processed
// it includes folders ant not folders
avg := 0.0
if filesProcessed > 0 {
avg = float64(du) / float64(filesProcessed)
}
duEstimation := int64(avg * float64(nonProcessed))
du += duEstimation
du += stat.Size()
//fmt.Println(path, "total:", totalFiles, "totalStat:", i, "totalFile:", filesProcessed, "left:", nonProcessed, "avg:", int(avg), "est:", int(duEstimation), "du:", du)
return du, accuracy, nil
}
// calculateDiskUsage tries to read the DiskUsageFile for a cached
// diskUsage value, otherwise walks the datastore files.
// it is only safe to call in Open()
func (fs *Datastore) calculateDiskUsage() error {
// Try to obtain a previously stored value from disk
if persDu := fs.readDiskUsageFile(); persDu > 0 {
fs.diskUsage = persDu
return nil
}
msgDone := make(chan struct{}, 1) // prevent race condition
msgTimer := time.AfterFunc(diskUsageMessageTimeout, func() {
fmt.Printf("Calculating datastore size. This might take %s at most and will happen only once\n",
DiskUsageCalcTimeout.String())
msgDone <- struct{}{}
})
defer msgTimer.Stop()
deadline := time.Now().Add(DiskUsageCalcTimeout)
du, accuracy, err := folderSize(fs.path, deadline)
if err != nil {
return err
}
if !msgTimer.Stop() {
<-msgDone
}
if accuracy == timedoutA {
fmt.Println("WARN: It took to long to calculate the datastore size")
fmt.Printf("WARN: The total size (%d) is an estimation. You can fix errors by\n", du)
fmt.Printf("WARN: replacing the %s file with the right disk usage in bytes and\n",
filepath.Join(fs.path, DiskUsageFile))
fmt.Println("WARN: re-opening the datastore")
}
fs.storedValue.Accuracy = accuracy
fs.diskUsage = du
fs.writeDiskUsageFile(du, true)
return nil
}
func fileSize(path string) int64 {
fi, err := os.Stat(path)
if err != nil {
return 0
}
return fi.Size()
}
// updateDiskUsage reads the size of path and atomically
// increases or decreases the diskUsage variable.
// setting add to false will subtract from disk usage.
func (fs *Datastore) updateDiskUsage(path string, add bool) {
fsize := fileSize(path)
if !add {
fsize = -fsize
}
if fsize != 0 {
atomic.AddInt64(&fs.diskUsage, fsize)
fs.checkpointDiskUsage()
}
}
// checkpointDiskUsage triggers a disk usage checkpoint write.
func (fs *Datastore) checkpointDiskUsage() {
select {
case fs.checkpointCh <- struct{}{}:
// msg sent
default: