1
1
# -*- coding: utf-8 -*-
2
+ import concurrent .futures
2
3
import json
4
+ import time
3
5
import warnings
6
+ from io import BytesIO
4
7
from pathlib import Path
5
- from typing import List , Iterable , Union
8
+ from typing import List , Iterable , Union , Tuple
6
9
10
+ from TM1py .Exceptions import TM1pyVersionException
7
11
from TM1py .Services import RestService
8
12
from TM1py .Services .ObjectService import ObjectService
9
13
from TM1py .Utils import format_url
10
14
from TM1py .Utils .Utils import verify_version , require_version
11
- from TM1py .Exceptions import TM1pyVersionException
12
15
13
16
14
17
class FileService (ObjectService ):
18
+ SUBFOLDER_REQUIRED_VERSION = "12"
19
+ MPU_REQUIRED_VERSION = "12"
15
20
16
21
def __init__ (self , tm1_rest : RestService ):
17
22
"""
@@ -37,7 +42,7 @@ def get_names(self, **kwargs) -> bytes:
37
42
version_content_path = self .version_content_path )
38
43
39
44
return self ._rest .GET (url , ** kwargs ).content
40
-
45
+
41
46
@require_version (version = "11.4" )
42
47
def get_all_names (self , path : Union [str , Path ] = "" , ** kwargs ) -> List [str ]:
43
48
""" return list of blob file names
@@ -49,16 +54,16 @@ def get_all_names(self, path: Union[str, Path] = "", **kwargs) -> List[str]:
49
54
50
55
response = self ._rest .GET (url , ** kwargs ).content
51
56
return [file ['Name' ] for file in json .loads (response )['value' ]]
52
-
57
+
53
58
@require_version (version = "11.4" )
54
59
def get (self , file_name : str , ** kwargs ) -> bytes :
55
60
""" Get file
56
61
57
62
:param file_name: file name in root or path to file
58
63
"""
59
64
path = Path (file_name )
60
- self ._check_subfolder_support (path = path , function = "FileService.get" )
61
-
65
+ self ._check_subfolder_support (path = path , function = "FileService.get" )
66
+
62
67
url = self ._construct_content_url (
63
68
path = path ,
64
69
exclude_path_end = False ,
@@ -108,23 +113,126 @@ def _construct_content_url(self, path: Path, exclude_path_end: bool = True, exte
108
113
** parent_folders )
109
114
110
115
return url .rstrip ("/" )
111
-
116
+
112
117
def _check_subfolder_support (self , path : Path , function : str ) -> None :
113
- REQUIRED_VERSION = "12"
114
- if len (path .parts ) > 1 and not verify_version (required_version = REQUIRED_VERSION , version = self .version ):
115
- raise TM1pyVersionException (function = function , required_version = REQUIRED_VERSION , feature = 'Subfolder' )
116
-
118
+ if not len (path .parts ) > 1 :
119
+ return
120
+
121
+ if not verify_version (required_version = self .SUBFOLDER_REQUIRED_VERSION , version = self .version ):
122
+ raise TM1pyVersionException (
123
+ function = function ,
124
+ required_version = self .SUBFOLDER_REQUIRED_VERSION ,
125
+ feature = 'Subfolder' )
126
+
127
+ def _check_mpu_support (self , function : str ) -> None :
128
+ if not verify_version (required_version = self .MPU_REQUIRED_VERSION , version = self .version ):
129
+ raise TM1pyVersionException (
130
+ function = function ,
131
+ required_version = self .MPU_REQUIRED_VERSION ,
132
+ feature = 'MultiProcessUpload' )
133
+
134
+ def _upload_file_content (
135
+ self ,
136
+ path : Path ,
137
+ file_content : bytes ,
138
+ multi_part_upload : bool = False ,
139
+ max_mb_per_part : float = 200 ,
140
+ max_workers : int = 1 ,
141
+ ** kwargs ):
142
+
143
+ url = self ._construct_content_url (path , exclude_path_end = False , extension = "Content" )
144
+
145
+ if multi_part_upload :
146
+ return self .upload_file_content_with_mpu (url , file_content , max_mb_per_part , max_workers , ** kwargs )
147
+
148
+ return self ._rest .PUT (
149
+ url = url ,
150
+ data = file_content ,
151
+ headers = self .binary_http_header ,
152
+ ** kwargs )
153
+
154
+ def upload_file_content_with_mpu (self , content_url : str , file_content : bytes , max_mb_per_part : float ,
155
+ max_workers : int = 1 , ** kwargs ):
156
+ # Initiate multipart upload
157
+ response = self ._rest .POST (
158
+ url = content_url + "/mpu.CreateMultipartUpload" ,
159
+ data = "{}" ,
160
+ async_requests_mode = False ,
161
+ ** kwargs )
162
+ upload_id = response .json ()['UploadID' ]
163
+
164
+ # Split the file content into parts
165
+ parts_to_upload = self ._split_into_parts (
166
+ data = file_content ,
167
+ max_chunk_size = int (max_mb_per_part * 1024 * 1024 )
168
+ )
169
+
170
+ part_numbers_and_etags = []
171
+
172
+ # helper function for uploading each part
173
+ def upload_part_with_retry (index : int , data : bytes , retries : int = 3 ) -> Tuple [int , int , str ]:
174
+ for attempt in range (retries ):
175
+ try :
176
+ part_response = self ._rest .POST (
177
+ url = content_url + f"/!uploads('{ upload_id } ')/Parts" ,
178
+ data = data ,
179
+ headers = {** self .binary_http_header , 'Accept' : 'application/json,text/plain' },
180
+ async_requests_mode = False ,
181
+ ** kwargs )
182
+ return index , part_response .json ()["PartNumber" ], part_response .json ()["@odata.etag" ]
183
+ except Exception as e :
184
+ if attempt < retries - 1 :
185
+ time .sleep (2 ** attempt ) # Exponential backoff
186
+ else :
187
+ raise e from None
188
+
189
+ if max_workers > 1 :
190
+ # upload parts concurrently
191
+ with concurrent .futures .ThreadPoolExecutor (max_workers = max_workers ) as executor :
192
+
193
+ futures = {
194
+ executor .submit (upload_part_with_retry , i , part , 3 ): i
195
+ for i , part
196
+ in enumerate (parts_to_upload )}
197
+
198
+ for future in concurrent .futures .as_completed (futures ):
199
+ part_index , part_number , odata_etag = future .result ()
200
+ part_numbers_and_etags .append ((part_index , part_number , odata_etag ))
201
+
202
+ else :
203
+ # Sequential upload
204
+ for i , bytes_part in enumerate (parts_to_upload ):
205
+ part_index , part_number , odata_etag = upload_part_with_retry (i , bytes_part )
206
+ part_numbers_and_etags .append ((part_index , part_number , odata_etag ))
207
+
208
+ # Complete the multipart upload
209
+ self ._rest .POST (
210
+ url = content_url + f"/!uploads('{ upload_id } ')/mpu.Complete" ,
211
+ data = json .dumps (
212
+ {"Parts" : [
213
+ {"PartNumber" : part_number , "ETag" : etag }
214
+ for _ , part_number , etag in sorted (part_numbers_and_etags )
215
+ ]}
216
+ )
217
+ )
218
+
117
219
@require_version (version = "11.4" )
118
- def create (self , file_name : Union [str , Path ], file_content : bytes , ** kwargs ):
220
+ def create (self , file_name : Union [str , Path ], file_content : bytes , multi_part_upload : bool = False ,
221
+ max_mb_per_part : float = 200 , max_workers : int = 1 , ** kwargs ):
119
222
""" Create file
120
223
121
224
Folders in file_name (e.g. folderA/folderB/file.csv) will be created implicitly
122
225
123
226
:param file_name: file name in root or path to file
124
227
:param file_content: file_content as bytes or BytesIO
228
+ :param multi_part_upload: boolean use multipart upload or not (only available from TM1 12 onwards)
229
+ :param max_mb_per_part: max megabyte per part in multipart upload (only available from TM1 12 onwards)
230
+ :param max_workers: max parallel workers for multipart upload (only available from TM1 12 onwards)
125
231
"""
126
232
path = Path (file_name )
127
- self ._check_subfolder_support (path = path , function = "FileService.create" )
233
+ self ._check_subfolder_support (path = path , function = "FileService.create" )
234
+ if multi_part_upload :
235
+ self ._check_mpu_support (function = "FileService.create" )
128
236
129
237
# Create folder structure iteratively
130
238
if path .parents :
@@ -141,46 +249,42 @@ def create(self, file_name: Union[str, Path], file_content: bytes, **kwargs):
141
249
}
142
250
self ._rest .POST (url , json .dumps (body ), ** kwargs )
143
251
144
- url = self ._construct_content_url (path , exclude_path_end = False , extension = "Content" )
145
- return self ._rest .PUT (
146
- url = url ,
147
- data = file_content ,
148
- headers = self .binary_http_header ,
149
- ** kwargs )
150
-
252
+ return self ._upload_file_content (path , file_content , multi_part_upload , max_mb_per_part , max_workers , ** kwargs )
253
+
151
254
@require_version (version = "11.4" )
152
- def update (self , file_name : Union [str , Path ], file_content : bytes , ** kwargs ):
255
+ def update (self , file_name : Union [str , Path ], file_content : bytes , multi_part_upload : bool = False ,
256
+ max_mb_per_part : float = 200 , max_workers : int = 1 , ** kwargs ):
153
257
""" Update existing file
154
258
155
259
:param file_name: file name in root or path to file
156
260
:param file_content: file_content as bytes or BytesIO
261
+ :param multi_part_upload: boolean use multipart upload or not (only available from TM1 12 onwards)
262
+ :param max_mb_per_part: max megabyte per part in multipart upload (only available from TM1 12 onwards)
263
+ :param max_workers: max parallel workers for multipart upload (only available from TM1 12 onwards)
157
264
"""
158
265
path = Path (file_name )
159
- self ._check_subfolder_support (path = path , function = "FileService.update" )
266
+ self ._check_subfolder_support (path = path , function = "FileService.update" )
267
+ if multi_part_upload :
268
+ self ._check_mpu_support (function = "FileService.create" )
160
269
161
- url = self ._construct_content_url (
162
- path = path ,
163
- exclude_path_end = False ,
164
- extension = "Content" )
270
+ return self ._upload_file_content (path , file_content , multi_part_upload , max_mb_per_part , max_workers , ** kwargs )
165
271
166
- return self ._rest .PUT (
167
- url = url ,
168
- data = file_content ,
169
- headers = self .binary_http_header ,
170
- ** kwargs )
171
-
172
272
@require_version (version = "11.4" )
173
- def update_or_create (self , file_name : Union [str , Path ], file_content : bytes , ** kwargs ):
273
+ def update_or_create (self , file_name : Union [str , Path ], file_content : bytes , multi_part_upload : bool = False ,
274
+ max_mb_per_part : float = 200 , max_workers : int = 1 , ** kwargs ):
174
275
""" Create file or update file if it already exists
175
276
176
277
:param file_name: file name in root or path to file
177
278
:param file_content: file_content as bytes or BytesIO
279
+ :param multi_part_upload: boolean use multipart upload or not (only available from TM1 12 onwards)
280
+ :param max_mb_per_part: max megabyte per part in multipart upload (only available from TM1 12 onwards)
281
+ :param max_workers: max parallel workers for multipart upload (only available from TM1 12 onwards)
178
282
"""
179
283
if self .exists (file_name , ** kwargs ):
180
- return self .update (file_name , file_content , ** kwargs )
284
+ return self .update (file_name , file_content , multi_part_upload , max_mb_per_part , max_workers , ** kwargs )
285
+
286
+ return self .create (file_name , file_content , multi_part_upload , max_mb_per_part , max_workers , ** kwargs )
181
287
182
- return self .create (file_name , file_content , ** kwargs )
183
-
184
288
@require_version (version = "11.4" )
185
289
def exists (self , file_name : Union [str , Path ], ** kwargs ):
186
290
""" Check if file exists
@@ -193,23 +297,23 @@ def exists(self, file_name: Union[str, Path], **kwargs):
193
297
extension = "" )
194
298
195
299
return self ._exists (url , ** kwargs )
196
-
300
+
197
301
@require_version (version = "11.4" )
198
302
def delete (self , file_name : Union [str , Path ], ** kwargs ):
199
303
""" Delete file
200
304
201
305
:param file_name: file name in root or path to file
202
306
"""
203
307
path = Path (file_name )
204
- self ._check_subfolder_support (path = path , function = "FileService.delete" )
308
+ self ._check_subfolder_support (path = path , function = "FileService.delete" )
205
309
206
310
url = self ._construct_content_url (
207
311
path = path ,
208
312
exclude_path_end = False ,
209
313
extension = "" )
210
314
211
315
return self ._rest .DELETE (url , ** kwargs )
212
-
316
+
213
317
@require_version (version = "11.4" )
214
318
def search_string_in_name (self , name_startswith : str = None , name_contains : Iterable = None ,
215
319
name_contains_operator : str = 'and' , path : Union [Path , str ] = "" ,
@@ -242,9 +346,9 @@ def search_string_in_name(self, name_startswith: str = None, name_contains: Iter
242
346
243
347
else :
244
348
raise ValueError ("'name_contains' must be str or iterable" )
245
-
349
+
246
350
path = Path (path )
247
- self ._check_subfolder_support (path = path , function = "FileService.search_string_in_name" )
351
+ self ._check_subfolder_support (path = path , function = "FileService.search_string_in_name" )
248
352
249
353
url = self ._construct_content_url (
250
354
path = Path (path ),
@@ -253,3 +357,19 @@ def search_string_in_name(self, name_startswith: str = None, name_contains: Iter
253
357
response = self ._rest .GET (url , ** kwargs ).content
254
358
255
359
return list (file ['Name' ] for file in json .loads (response )['value' ])
360
+
361
+ @staticmethod
362
+ def _split_into_parts (data : Union [bytes , BytesIO ], max_chunk_size : int = 200 * 1024 * 1024 ):
363
+ # Convert data to bytes if it's a BytesIO object
364
+ if isinstance (data , BytesIO ):
365
+ data = data .getvalue ()
366
+
367
+ # List to store chunks
368
+ parts = []
369
+
370
+ # Split data into chunks
371
+ for i in range (0 , len (data ), max_chunk_size ):
372
+ part = data [i :i + max_chunk_size ]
373
+ parts .append (part )
374
+
375
+ return parts
0 commit comments