@@ -20,6 +20,7 @@ import (
20
20
"fmt"
21
21
"runtime"
22
22
"strings"
23
+ "sync"
23
24
24
25
"cloud.google.com/go/storage"
25
26
"golang.org/x/sync/errgroup"
@@ -47,9 +48,11 @@ type ListerInput struct {
47
48
// Default value is 10x number of available CPU. Optional.
48
49
Parallelism int
49
50
50
- // BatchSize is the number of objects to list. Default value returns
51
- // all objects at once. The number of objects returned will be
52
- // rounded up to a multiple of gcs page size. Optional.
51
+ // BatchSize is the minimum number of objects to list in each batch.
52
+ // The number of objects returned in a batch will be rounded up to
53
+ // include all the objects received in the last request to GCS.
54
+ // By default, the Lister returns all objects in one batch.
55
+ // Optional.
53
56
BatchSize int
54
57
55
58
// Query is the query to filter objects for listing. Default value is nil.
@@ -58,10 +61,40 @@ type ListerInput struct {
58
61
Query storage.Query
59
62
60
63
// SkipDirectoryObjects is to indicate whether to list directory objects.
61
- // Default value is false. Optional.
64
+ // Note: Even if directory objects are excluded, they contribute to the
65
+ // [ListerInput.BatchSize] count. Default value is false. Optional.
62
66
SkipDirectoryObjects bool
63
67
}
64
68
69
+ // NewLister creates a new [Lister] that can be used to list objects in the given bucket.
70
+ func NewLister (c * storage.Client , in * ListerInput ) * Lister {
71
+ bucket := c .Bucket (in .BucketName )
72
+
73
+ // If parallelism is not given, set default value to 10x the number of
74
+ // available CPU.
75
+ if in .Parallelism == 0 {
76
+ in .Parallelism = runtime .NumCPU () * 10
77
+ }
78
+ // Initialize range channel with entire namespace of object for given
79
+ // prefix, startoffset and endoffset. For the default range to list is
80
+ // entire namespace, start and end will be empty.
81
+ rangeChannel := make (chan * listRange , in .Parallelism * 2 )
82
+ start , end := prefixAdjustedOffsets (in .Query .StartOffset , in .Query .EndOffset , in .Query .Prefix )
83
+ rangeChannel <- & listRange {startRange : start , endRange : end }
84
+
85
+ lister := & Lister {
86
+ method : open ,
87
+ parallelism : in .Parallelism ,
88
+ pageToken : "" ,
89
+ bucket : bucket ,
90
+ batchSize : in .BatchSize ,
91
+ query : in .Query ,
92
+ skipDirectoryObjects : in .SkipDirectoryObjects ,
93
+ ranges : rangeChannel ,
94
+ }
95
+ return lister
96
+ }
97
+
65
98
// Lister is used for interacting with Dataflux fast-listing. The caller should
66
99
// initialize it with NewLister() instead of creating it directly.
67
100
type Lister struct {
@@ -92,116 +125,156 @@ type Lister struct {
92
125
skipDirectoryObjects bool
93
126
}
94
127
95
- // NewLister creates a new dataflux Lister to list objects in the give bucket.
96
- func NewLister (c * storage.Client , in * ListerInput ) * Lister {
97
- bucket := c .Bucket (in .BucketName )
98
-
99
- // If parallelism is not given, set default value to 10x the number of
100
- // available CPU.
101
- if in .Parallelism == 0 {
102
- in .Parallelism = runtime .NumCPU () * 10
103
- }
104
- // Initialize range channel with entire namespace of object for given
105
- // prefix, startoffset and endoffset. For the default range to list is
106
- // entire namespace, start and end will be empty.
107
- rangeChannel := make (chan * listRange , in .Parallelism * 2 )
108
- start , end := updateStartEndOffset (in .Query .StartOffset , in .Query .EndOffset , in .Query .Prefix )
109
- rangeChannel <- & listRange {startRange : start , endRange : end }
110
-
111
- lister := & Lister {
112
- method : open ,
113
- parallelism : in .Parallelism ,
114
- pageToken : "" ,
115
- bucket : bucket ,
116
- batchSize : in .BatchSize ,
117
- query : in .Query ,
118
- skipDirectoryObjects : in .SkipDirectoryObjects ,
119
- ranges : rangeChannel ,
120
- }
121
- return lister
122
- }
123
-
124
- // NextBatch runs worksteal algorithm and sequential listing in parallel to quickly
125
- // return a list of objects in the bucket. For smaller dataset,
126
- // sequential listing is expected to be faster. For larger dataset,
128
+ // NextBatch returns the next N objects in the bucket, where N is [ListerInput.BatchSize].
129
+ // In case of failure, all processes are stopped and an error is returned immediately. Create a new Lister to retry.
130
+ // For the first batch, both worksteal listing and sequential
131
+ // listing runs in parallel to quickly list N number of objects in the bucket. For subsequent
132
+ // batches, only the method which returned object faster in the first batch is used.
133
+ // For smaller dataset, sequential listing is expected to be faster. For larger dataset,
127
134
// worksteal listing is expected to be faster.
135
+ //
136
+ // Worksteal algorithm list objects in GCS bucket in parallel using multiple parallel
137
+ // workers and each worker in the list operation is able to steal work from its siblings
138
+ // once it has finished all currently slated listing work.
128
139
func (c * Lister ) NextBatch (ctx context.Context ) ([]* storage.ObjectAttrs , error ) {
129
- // countError tracks the number of failed listing methods.
130
- countError := 0
131
- var results []* storage.ObjectAttrs
132
- ctx , cancel := context .WithCancel (ctx )
133
- defer cancel ()
134
- // Errgroup takes care of running both methods in parallel. As soon as one of
135
- // the method is complete, the running method also stops.
136
- g , childCtx := errgroup .WithContext (ctx )
137
-
138
- // To start listing method is Open and runs both worksteal and sequential listing
139
- // in parallel. The method which completes first is used for all subsequent runs.
140
140
141
- // TODO: Run worksteal listing when method is Open or WorkSteal.
141
+ var results [] * storage. ObjectAttrs
142
142
143
- // Run sequential listing when method is Open or Sequential.
144
- if c .method != worksteal {
143
+ // For the first batch, listing method is open and runs both worksteal and sequential listing
144
+ // in parallel. The method which completes first is used for all subsequent NextBatch calls.
145
+ switch c .method {
146
+ case worksteal :
147
+ // Run worksteal algorithm for listing.
148
+ objects , err := c .workstealListing (ctx )
149
+ if err != nil {
150
+ return nil , fmt .Errorf ("worksteal listing: %w" , err )
151
+ }
152
+ results = objects
153
+ case sequential :
154
+ // Run GCS sequential listing.
155
+ objects , token , err := c .sequentialListing (ctx )
156
+ if err != nil {
157
+ return nil , fmt .Errorf ("sequential listing: %w" , err )
158
+ }
159
+ results = objects
160
+ c .pageToken = token
161
+ c .ranges = nil
162
+ case open :
163
+ // countError tracks the number of failed listing methods.
164
+ countErr := & countErr {counter : 0 }
165
+
166
+ ctx , cancel := context .WithCancel (ctx )
167
+ defer cancel ()
168
+ // Errgroup takes care of running both methods in parallel. As soon as one of
169
+ // the method is complete, the running method also stops.
170
+ g , ctx := errgroup .WithContext (ctx )
171
+ wsCompletedfirst := false
172
+ seqCompletedfirst := false
173
+ var wsObjects []* storage.ObjectAttrs
174
+ var seqObjects []* storage.ObjectAttrs
175
+ var nextToken string
176
+ g .Go (func () error {
177
+ objects , err := c .workstealListing (ctx )
178
+ if err != nil {
179
+ countErr .increment ()
180
+ return fmt .Errorf ("worksteal listing: %w" , err )
181
+ }
182
+ // Close context when worksteal listing is complete.
183
+ cancel ()
184
+ wsCompletedfirst = true
185
+ wsObjects = objects
145
186
187
+ return nil
188
+ })
146
189
g .Go (func () error {
147
- objects , nextToken , err := c .sequentialListing (childCtx )
190
+ objects , token , err := c .sequentialListing (ctx )
148
191
if err != nil {
149
- countError ++
150
- return fmt .Errorf ("error in running sequential listing: %w" , err )
192
+ countErr . increment ()
193
+ return fmt .Errorf ("sequential listing: %w" , err )
151
194
}
152
- // If sequential listing completes first, set method to sequential listing
153
- // and ranges to nil. The nextToken will be used to continue sequential listing.
154
- results = objects
155
- c .pageToken = nextToken
156
- c .method = sequential
157
195
// Close context when sequential listing is complete.
158
196
cancel ()
197
+ seqCompletedfirst = true
198
+ seqObjects = objects
199
+ nextToken = token
200
+
159
201
return nil
160
202
})
161
- }
162
-
163
- // Close all functions if either sequential listing or worksteal listing is complete.
164
- err := g .Wait ()
165
-
166
- // If the error is not context.Canceled, then return error instead of falling back
167
- // to the other method. This is so that the error can be fixed and user can take
168
- // advantage of fast-listing.
169
- // As one of the listing method completes, it is expected to cancel context for the
170
- // only then return error. other method. If both sequential and worksteal listing
171
- // fail due to context canceled, return error.
172
- if err != nil && (! errors .Is (err , context .Canceled ) || countError > 1 ) {
173
- return nil , fmt .Errorf ("failed waiting for sequntial and work steal lister : %w" , err )
203
+ // Close all functions if either sequential listing or worksteal listing is complete.
204
+ err := g .Wait ()
205
+
206
+ // If the error is not context.Canceled, then return error instead of falling back
207
+ // to the other method. This is so that the error can be fixed and user can take
208
+ // advantage of fast-listing.
209
+ // As one of the listing method completes, it is expected to cancel context and
210
+ // return context canceled error for the other method. Since context canceled is expected, it
211
+ // will not be considered an error. If both sequential and worksteal listing fail due
212
+ // to context canceled, then return error.
213
+ if err != nil && (! errors .Is (err , context .Canceled ) || countErr .counter > 1 ) {
214
+ return nil , fmt .Errorf ("dataflux: %w" , err )
215
+ }
216
+ if wsCompletedfirst {
217
+ // If worksteal listing completes first, set method to worksteal listing and nextToken to "".
218
+ // The c.ranges channel will be used to continue worksteal listing.
219
+ results = wsObjects
220
+ c .pageToken = ""
221
+ c .method = worksteal
222
+ } else if seqCompletedfirst {
223
+ // If sequential listing completes first, set method to sequential listing
224
+ // and ranges to nil. The nextToken will be used to continue sequential listing.
225
+ results = seqObjects
226
+ c .pageToken = nextToken
227
+ c .method = sequential
228
+ c .ranges = nil
229
+ }
174
230
}
175
231
176
232
// If ranges for worksteal and pageToken for sequential listing is empty, then
177
233
// listing is complete.
178
- if c .pageToken == "" {
234
+ if c .pageToken == "" && len ( c . ranges ) == 0 {
179
235
return results , iterator .Done
180
236
}
181
237
return results , nil
182
238
}
183
239
184
- // Close closes the range channel of the Lister.
240
+ // Close is used to close the Lister.
185
241
func (c * Lister ) Close () {
186
242
if c .ranges != nil {
187
243
close (c .ranges )
188
244
}
189
245
}
190
246
191
- // updateStartEndOffset updates start and end offset based on prefix.
192
- // If a prefix is given, adjust start and end value such that it lists
193
- // objects with the given prefix. updateStartEndOffset assumes prefix will
194
- // be added to the object name while listing objects in worksteal algorithm.
247
+ type countErr struct {
248
+ mu sync.Mutex
249
+ counter int
250
+ }
251
+
252
+ func (cc * countErr ) increment () {
253
+ cc .mu .Lock ()
254
+ defer cc .mu .Unlock ()
255
+ cc .counter ++
256
+ }
257
+
258
+ // prefixAdjustedOffsets returns a start and end offset adjusted from the given offsets based on the prefix, stripping the prefix.
259
+ // These offsets can be used by adding back the prefix, so that the original offsets do not need to be checked.
260
+
261
+ // This means that if the given offsets are out of range of the prefix
262
+ // (for example, offsets {start:"a", end: "b"}, with prefix "c" which is lexicographically
263
+ // outside of "a" to "b"), the returned offsets will ensure no strings fall in their range.
264
+
265
+ // Otherwise, if the offset is too permissive given the prefix, it returns an empty string
266
+ // to indicate there is no offset and all objects starting from or ending at the prefix should
267
+ // be listed.
195
268
//
196
269
// For example:
197
270
// start = "abc", end = "prefix_a", prefix = "prefix",
198
271
//
199
- // end will change to "_a", prefix will be added in worksteal algorithm.
200
- // "abc" is lexicographically smaller than "prefix". So start will be the first
201
- // object with the given prefix.
272
+ // "abc" is lexicographically smaller than "prefix". The start offset indicates first
202
273
//
203
- // Therefore start will change to ""(empty string) and end to "_a" .
204
- func updateStartEndOffset (start , end , prefix string ) (string , string ) {
274
+ // object with the given prefix should be listed therefor start offset will be empty.
275
+ // The end offset will change to "_a" as the prefix is stripped.
276
+ // Therefore new offset will change to {start = "", end = "_a" }.
277
+ func prefixAdjustedOffsets (start , end , prefix string ) (string , string ) {
205
278
if prefix == "" {
206
279
return start , end
207
280
}
0 commit comments