1
1
# -*- coding: utf-8 -*-
2
+ import concurrent .futures
2
3
import json
4
+ import time
3
5
import warnings
6
+ from io import BytesIO
4
7
from pathlib import Path
5
- from typing import List , Iterable , Union
8
+ from typing import List , Iterable , Union , Tuple
6
9
10
+ from TM1py .Exceptions import TM1pyVersionException
7
11
from TM1py .Services import RestService
8
12
from TM1py .Services .ObjectService import ObjectService
9
13
from TM1py .Utils import format_url
10
14
from TM1py .Utils .Utils import verify_version , require_version
11
- from TM1py .Exceptions import TM1pyVersionException
12
15
13
16
14
17
class FileService (ObjectService ):
@@ -37,7 +40,7 @@ def get_names(self, **kwargs) -> bytes:
37
40
version_content_path = self .version_content_path )
38
41
39
42
return self ._rest .GET (url , ** kwargs ).content
40
-
43
+
41
44
@require_version (version = "11.4" )
42
45
def get_all_names (self , path : Union [str , Path ] = "" , ** kwargs ) -> List [str ]:
43
46
""" return list of blob file names
@@ -49,16 +52,16 @@ def get_all_names(self, path: Union[str, Path] = "", **kwargs) -> List[str]:
49
52
50
53
response = self ._rest .GET (url , ** kwargs ).content
51
54
return [file ['Name' ] for file in json .loads (response )['value' ]]
52
-
55
+
53
56
@require_version (version = "11.4" )
54
57
def get (self , file_name : str , ** kwargs ) -> bytes :
55
58
""" Get file
56
59
57
60
:param file_name: file name in root or path to file
58
61
"""
59
62
path = Path (file_name )
60
- self ._check_subfolder_support (path = path , function = "FileService.get" )
61
-
63
+ self ._check_subfolder_support (path = path , function = "FileService.get" )
64
+
62
65
url = self ._construct_content_url (
63
66
path = path ,
64
67
exclude_path_end = False ,
@@ -108,23 +111,27 @@ def _construct_content_url(self, path: Path, exclude_path_end: bool = True, exte
108
111
** parent_folders )
109
112
110
113
return url .rstrip ("/" )
111
-
114
+
112
115
def _check_subfolder_support (self , path : Path , function : str ) -> None :
113
116
REQUIRED_VERSION = "12"
114
117
if len (path .parts ) > 1 and not verify_version (required_version = REQUIRED_VERSION , version = self .version ):
115
- raise TM1pyVersionException (function = function , required_version = REQUIRED_VERSION , feature = 'Subfolder' )
116
-
118
+ raise TM1pyVersionException (function = function , required_version = REQUIRED_VERSION , feature = 'Subfolder' )
119
+
117
120
@require_version (version = "11.4" )
118
- def create (self , file_name : Union [str , Path ], file_content : bytes , ** kwargs ):
121
+ def create (self , file_name : Union [str , Path ], file_content : bytes , multi_part_upload : bool = False ,
122
+ max_mb_per_part : float = 200 , max_workers : int = 1 , ** kwargs ):
119
123
""" Create file
120
124
121
125
Folders in file_name (e.g. folderA/folderB/file.csv) will be created implicitly
122
126
123
127
:param file_name: file name in root or path to file
124
128
:param file_content: file_content as bytes or BytesIO
129
+ :param multi_part_upload: boolean use multipart upload or not (only available from TM1 12 onwards)
130
+ :param max_mb_per_part: max megabyte per part in multipart upload (only available from TM1 12 onwards)
131
+ :param max_workers: max parallel workers for multipart upload (only available from TM1 12 onwards)
125
132
"""
126
133
path = Path (file_name )
127
- self ._check_subfolder_support (path = path , function = "FileService.create" )
134
+ self ._check_subfolder_support (path = path , function = "FileService.create" )
128
135
129
136
# Create folder structure iteratively
130
137
if path .parents :
@@ -141,46 +148,139 @@ def create(self, file_name: Union[str, Path], file_content: bytes, **kwargs):
141
148
}
142
149
self ._rest .POST (url , json .dumps (body ), ** kwargs )
143
150
151
+ return self ._upload_file_content (path , file_content , multi_part_upload , max_mb_per_part , max_workers , ** kwargs )
152
+
153
+ def _upload_file_content (
154
+ self ,
155
+ path : Path ,
156
+ file_content : bytes ,
157
+ multi_part_upload : bool = False ,
158
+ max_mb_per_part : float = 200 ,
159
+ max_workers : int = 1 ,
160
+ ** kwargs ):
161
+
144
162
url = self ._construct_content_url (path , exclude_path_end = False , extension = "Content" )
145
- return self ._rest .PUT (
146
- url = url ,
147
- data = file_content ,
148
- headers = self .binary_http_header ,
163
+
164
+ if not multi_part_upload :
165
+ return self ._rest .PUT (
166
+ url = url ,
167
+ data = file_content ,
168
+ headers = self .binary_http_header ,
169
+ ** kwargs )
170
+
171
+ return self .upload_file_content_with_mpu (url , file_content , max_mb_per_part , max_workers , ** kwargs )
172
+
173
+ def upload_file_content_with_mpu (self , content_url : str , file_content : bytes , max_mb_per_part : float ,
174
+ max_workers : int = 1 , ** kwargs ):
175
+ # Initiate multipart upload
176
+ response = self ._rest .POST (
177
+ url = content_url + "/mpu.CreateMultipartUpload" ,
178
+ data = "{}" ,
179
+ async_requests_mode = False ,
149
180
** kwargs )
150
-
181
+ upload_id = response .json ()['UploadID' ]
182
+
183
+ # Split the file content into parts
184
+ parts_to_upload = self ._split_into_parts (
185
+ data = file_content ,
186
+ max_chunk_size = int (max_mb_per_part * 1024 * 1024 )
187
+ )
188
+
189
+ part_numbers_and_etags = []
190
+
191
+ # helper function for uploading each part
192
+ def upload_part_with_retry (part_index : int , bytes_part : bytes , retries : int = 3 ) -> Tuple [int , int , str ]:
193
+ for attempt in range (retries ):
194
+ try :
195
+ response = self ._rest .POST (
196
+ url = content_url + f"/!uploads('{ upload_id } ')/Parts" ,
197
+ data = bytes_part ,
198
+ headers = {** self .binary_http_header , 'Accept' : 'application/json,text/plain' },
199
+ async_requests_mode = False ,
200
+ ** kwargs )
201
+ return part_index , response .json ()["PartNumber" ], response .json ()["@odata.etag" ]
202
+ except Exception as e :
203
+ if attempt < retries - 1 :
204
+ time .sleep (2 ** attempt ) # Exponential backoff
205
+ else :
206
+ raise e from None # Raise the exception if all retries fail
207
+ if max_workers > 1 :
208
+ # upload parts concurrently
209
+ with concurrent .futures .ThreadPoolExecutor (max_workers = max_workers ) as executor :
210
+
211
+ futures = {
212
+ executor .submit (upload_part_with_retry , i , part , 3 ): i
213
+ for i , part
214
+ in enumerate (parts_to_upload )}
215
+
216
+ for future in concurrent .futures .as_completed (futures ):
217
+ part_index , part_number , etag = future .result ()
218
+ part_numbers_and_etags .append ((part_index , part_number , etag ))
219
+
220
+ else :
221
+ # Sequential upload
222
+ for i , bytes_part in enumerate (parts_to_upload ):
223
+ part_index , part_number , etag = upload_part (i , bytes_part )
224
+ part_numbers_and_etags .append ((part_index , part_number , etag ))
225
+
226
+ # Complete the multipart upload
227
+ self ._rest .POST (
228
+ url = content_url + f"/!uploads('{ upload_id } ')/mpu.Complete" ,
229
+ data = json .dumps (
230
+ {"Parts" : [
231
+ {"PartNumber" : part_number , "ETag" : etag }
232
+ for _ , part_number , etag in sorted (part_numbers_and_etags )
233
+ ]}
234
+ )
235
+ )
236
+
237
+ def _split_into_parts (self , data : Union [bytes , BytesIO ], max_chunk_size : int = 200 * 1024 * 1024 ):
238
+ # Convert data to bytes if it's a BytesIO object
239
+ if isinstance (data , BytesIO ):
240
+ data = data .getvalue ()
241
+
242
+ # List to store chunks
243
+ parts = []
244
+
245
+ # Split data into chunks
246
+ for i in range (0 , len (data ), max_chunk_size ):
247
+ part = data [i :i + max_chunk_size ]
248
+ parts .append (part )
249
+
250
+ return parts
251
+
151
252
@require_version (version = "11.4" )
152
- def update (self , file_name : Union [str , Path ], file_content : bytes , ** kwargs ):
253
+ def update (self , file_name : Union [str , Path ], file_content : bytes , multi_part_upload : bool = False ,
254
+ max_mb_per_part : float = 200 , max_workers : int = 1 , ** kwargs ):
153
255
""" Update existing file
154
256
155
257
:param file_name: file name in root or path to file
156
258
:param file_content: file_content as bytes or BytesIO
259
+ :param multi_part_upload: boolean use multipart upload or not (only available from TM1 12 onwards)
260
+ :param max_mb_per_part: max megabyte per part in multipart upload (only available from TM1 12 onwards)
261
+ :param max_workers: max parallel workers for multipart upload (only available from TM1 12 onwards)
157
262
"""
158
263
path = Path (file_name )
159
- self ._check_subfolder_support (path = path , function = "FileService.update" )
264
+ self ._check_subfolder_support (path = path , function = "FileService.update" )
160
265
161
- url = self ._construct_content_url (
162
- path = path ,
163
- exclude_path_end = False ,
164
- extension = "Content" )
266
+ return self ._upload_file_content (path , file_content , multi_part_upload , max_mb_per_part , max_workers , ** kwargs )
165
267
166
- return self ._rest .PUT (
167
- url = url ,
168
- data = file_content ,
169
- headers = self .binary_http_header ,
170
- ** kwargs )
171
-
172
268
@require_version (version = "11.4" )
173
- def update_or_create (self , file_name : Union [str , Path ], file_content : bytes , ** kwargs ):
269
+ def update_or_create (self , file_name : Union [str , Path ], file_content : bytes , multi_part_upload : bool = False ,
270
+ max_mb_per_part : float = 200 , max_workers : int = 1 , ** kwargs ):
174
271
""" Create file or update file if it already exists
175
272
176
273
:param file_name: file name in root or path to file
177
274
:param file_content: file_content as bytes or BytesIO
275
+ :param multi_part_upload: boolean use multipart upload or not (only available from TM1 12 onwards)
276
+ :param max_mb_per_part: max megabyte per part in multipart upload (only available from TM1 12 onwards)
277
+ :param max_workers: max parallel workers for multipart upload (only available from TM1 12 onwards)
178
278
"""
179
279
if self .exists (file_name , ** kwargs ):
180
- return self .update (file_name , file_content , ** kwargs )
280
+ return self .update (file_name , file_content , multi_part_upload , max_mb_per_part , max_workers , ** kwargs )
281
+
282
+ return self .create (file_name , file_content , multi_part_upload , max_mb_per_part , max_workers , ** kwargs )
181
283
182
- return self .create (file_name , file_content , ** kwargs )
183
-
184
284
@require_version (version = "11.4" )
185
285
def exists (self , file_name : Union [str , Path ], ** kwargs ):
186
286
""" Check if file exists
@@ -193,23 +293,23 @@ def exists(self, file_name: Union[str, Path], **kwargs):
193
293
extension = "" )
194
294
195
295
return self ._exists (url , ** kwargs )
196
-
296
+
197
297
@require_version (version = "11.4" )
198
298
def delete (self , file_name : Union [str , Path ], ** kwargs ):
199
299
""" Delete file
200
300
201
301
:param file_name: file name in root or path to file
202
302
"""
203
303
path = Path (file_name )
204
- self ._check_subfolder_support (path = path , function = "FileService.delete" )
304
+ self ._check_subfolder_support (path = path , function = "FileService.delete" )
205
305
206
306
url = self ._construct_content_url (
207
307
path = path ,
208
308
exclude_path_end = False ,
209
309
extension = "" )
210
310
211
311
return self ._rest .DELETE (url , ** kwargs )
212
-
312
+
213
313
@require_version (version = "11.4" )
214
314
def search_string_in_name (self , name_startswith : str = None , name_contains : Iterable = None ,
215
315
name_contains_operator : str = 'and' , path : Union [Path , str ] = "" ,
@@ -242,9 +342,9 @@ def search_string_in_name(self, name_startswith: str = None, name_contains: Iter
242
342
243
343
else :
244
344
raise ValueError ("'name_contains' must be str or iterable" )
245
-
345
+
246
346
path = Path (path )
247
- self ._check_subfolder_support (path = path , function = "FileService.search_string_in_name" )
347
+ self ._check_subfolder_support (path = path , function = "FileService.search_string_in_name" )
248
348
249
349
url = self ._construct_content_url (
250
350
path = Path (path ),
0 commit comments