1
1
# -*- coding: utf-8 -*-
2
+ import concurrent .futures
2
3
import json
3
4
import warnings
5
+ from io import BytesIO
4
6
from pathlib import Path
5
7
from typing import List , Iterable , Union
6
8
9
+ from TM1py .Exceptions import TM1pyVersionException
7
10
from TM1py .Services import RestService
8
11
from TM1py .Services .ObjectService import ObjectService
9
12
from TM1py .Utils import format_url
10
13
from TM1py .Utils .Utils import verify_version , require_version
11
- from TM1py .Exceptions import TM1pyVersionException
12
14
13
15
14
16
class FileService (ObjectService ):
@@ -37,7 +39,7 @@ def get_names(self, **kwargs) -> bytes:
37
39
version_content_path = self .version_content_path )
38
40
39
41
return self ._rest .GET (url , ** kwargs ).content
40
-
42
+
41
43
@require_version (version = "11.4" )
42
44
def get_all_names (self , path : Union [str , Path ] = "" , ** kwargs ) -> List [str ]:
43
45
""" return list of blob file names
@@ -49,16 +51,16 @@ def get_all_names(self, path: Union[str, Path] = "", **kwargs) -> List[str]:
49
51
50
52
response = self ._rest .GET (url , ** kwargs ).content
51
53
return [file ['Name' ] for file in json .loads (response )['value' ]]
52
-
54
+
53
55
@require_version (version = "11.4" )
54
56
def get (self , file_name : str , ** kwargs ) -> bytes :
55
57
""" Get file
56
58
57
59
:param file_name: file name in root or path to file
58
60
"""
59
61
path = Path (file_name )
60
- self ._check_subfolder_support (path = path , function = "FileService.get" )
61
-
62
+ self ._check_subfolder_support (path = path , function = "FileService.get" )
63
+
62
64
url = self ._construct_content_url (
63
65
path = path ,
64
66
exclude_path_end = False ,
@@ -108,23 +110,27 @@ def _construct_content_url(self, path: Path, exclude_path_end: bool = True, exte
108
110
** parent_folders )
109
111
110
112
return url .rstrip ("/" )
111
-
113
+
112
114
def _check_subfolder_support (self , path : Path , function : str ) -> None :
113
115
REQUIRED_VERSION = "12"
114
116
if len (path .parts ) > 1 and not verify_version (required_version = REQUIRED_VERSION , version = self .version ):
115
- raise TM1pyVersionException (function = function , required_version = REQUIRED_VERSION , feature = 'Subfolder' )
116
-
117
+ raise TM1pyVersionException (function = function , required_version = REQUIRED_VERSION , feature = 'Subfolder' )
118
+
117
119
@require_version (version = "11.4" )
118
- def create (self , file_name : Union [str , Path ], file_content : bytes , ** kwargs ):
120
+ def create (self , file_name : Union [str , Path ], file_content : bytes , multi_part_upload : bool = False ,
121
+ max_mb_per_part : float = 200 , max_workers : int = 1 , ** kwargs ):
119
122
""" Create file
120
123
121
124
Folders in file_name (e.g. folderA/folderB/file.csv) will be created implicitly
122
125
123
126
:param file_name: file name in root or path to file
124
127
:param file_content: file_content as bytes or BytesIO
128
+ :param multi_part_upload: boolean use multipart upload or not (only available from TM1 12 onwards)
129
+ :param max_mb_per_part: max megabyte per part in multipart upload (only available from TM1 12 onwards)
130
+ :param max_workers: max parallel workers for multipart upload (only available from TM1 12 onwards)
125
131
"""
126
132
path = Path (file_name )
127
- self ._check_subfolder_support (path = path , function = "FileService.create" )
133
+ self ._check_subfolder_support (path = path , function = "FileService.create" )
128
134
129
135
# Create folder structure iteratively
130
136
if path .parents :
@@ -141,46 +147,133 @@ def create(self, file_name: Union[str, Path], file_content: bytes, **kwargs):
141
147
}
142
148
self ._rest .POST (url , json .dumps (body ), ** kwargs )
143
149
150
+ return self ._upload_file_content (path , file_content , multi_part_upload , max_mb_per_part , max_workers , ** kwargs )
151
+
152
+ def _upload_file_content (
153
+ self ,
154
+ path : Path ,
155
+ file_content : bytes ,
156
+ multi_part_upload : bool = False ,
157
+ max_mb_per_part : float = 200 ,
158
+ max_workers : int = 1 ,
159
+ ** kwargs ):
160
+
144
161
url = self ._construct_content_url (path , exclude_path_end = False , extension = "Content" )
145
- return self ._rest .PUT (
146
- url = url ,
147
- data = file_content ,
148
- headers = self .binary_http_header ,
162
+
163
+ if not multi_part_upload :
164
+ return self ._rest .PUT (
165
+ url = url ,
166
+ data = file_content ,
167
+ headers = self .binary_http_header ,
168
+ ** kwargs )
169
+
170
+ self .upload_file_content_with_mpu (url , file_content , max_mb_per_part , max_workers , ** kwargs )
171
+
172
+ def upload_file_content_with_mpu (self , content_url : str , file_content : bytes , max_mb_per_part : float ,
173
+ max_workers : int = 1 , ** kwargs ):
174
+ # Initiate multipart upload
175
+ response = self ._rest .POST (
176
+ url = content_url + "/mpu.CreateMultipartUpload" ,
177
+ data = "{}" ,
178
+ async_requests_mode = False ,
149
179
** kwargs )
150
-
180
+ upload_id = response .json ()['UploadID' ]
181
+
182
+ # Split the file content into parts
183
+ parts_to_upload = self ._split_into_parts (
184
+ data = file_content ,
185
+ max_chunk_size = int (max_mb_per_part * 1024 * 1024 )
186
+ )
187
+
188
+ part_numbers_and_etags = []
189
+
190
+ # helper function for uploading each part
191
+ def upload_part (part_index , bytes_part ):
192
+ response = self ._rest .POST (
193
+ url = content_url + f"/!uploads('{ upload_id } ')/Parts" ,
194
+ data = bytes_part ,
195
+ headers = {** self .binary_http_header , 'Accept' : 'application/json,text/plain' },
196
+ async_requests_mode = False ,
197
+ ** kwargs )
198
+ return part_index , response .json ()["PartNumber" ], response .json ()["@odata.etag" ]
199
+
200
+ if max_workers > 1 :
201
+ # upload parts concurrently
202
+ with concurrent .futures .ThreadPoolExecutor (max_workers = max_workers ) as executor :
203
+
204
+ futures = {
205
+ executor .submit (upload_part , i , part ): i
206
+ for i , part
207
+ in enumerate (parts_to_upload )}
208
+
209
+ for future in concurrent .futures .as_completed (futures ):
210
+ part_index , part_number , etag = future .result ()
211
+ part_numbers_and_etags .append ((part_index , part_number , etag ))
212
+
213
+ else :
214
+ # Sequential upload
215
+ for i , bytes_part in enumerate (parts_to_upload ):
216
+ part_index , part_number , etag = upload_part (i , bytes_part )
217
+ part_numbers_and_etags .append ((part_index , part_number , etag ))
218
+
219
+ # Complete the multipart upload
220
+ self ._rest .POST (
221
+ url = content_url + f"/!uploads('{ upload_id } ')/mpu.Complete" ,
222
+ data = json .dumps (
223
+ {"Parts" : [
224
+ {"PartNumber" : part_number , "ETag" : etag }
225
+ for _ , part_number , etag in sorted (part_numbers_and_etags )
226
+ ]}
227
+ )
228
+ )
229
+
230
+ def _split_into_parts (self , data : Union [bytes , BytesIO ], max_chunk_size : int = 200 * 1024 * 1024 ):
231
+ # Convert data to bytes if it's a BytesIO object
232
+ if isinstance (data , BytesIO ):
233
+ data = data .getvalue ()
234
+
235
+ # List to store chunks
236
+ parts = []
237
+
238
+ # Split data into chunks
239
+ for i in range (0 , len (data ), max_chunk_size ):
240
+ part = data [i :i + max_chunk_size ]
241
+ parts .append (part )
242
+
243
+ return parts
244
+
151
245
@require_version (version = "11.4" )
152
- def update (self , file_name : Union [str , Path ], file_content : bytes , ** kwargs ):
246
+ def update (self , file_name : Union [str , Path ], file_content : bytes , multi_part_upload : bool = False ,
247
+ max_mb_per_part : float = 200 , max_workers : int = 1 , ** kwargs ):
153
248
""" Update existing file
154
249
155
250
:param file_name: file name in root or path to file
156
251
:param file_content: file_content as bytes or BytesIO
252
+ :param multi_part_upload: boolean use multipart upload or not (only available from TM1 12 onwards)
253
+ :param max_mb_per_part: max megabyte per part in multipart upload (only available from TM1 12 onwards)
254
+ :param max_workers: max parallel workers for multipart upload (only available from TM1 12 onwards)
157
255
"""
158
256
path = Path (file_name )
159
- self ._check_subfolder_support (path = path , function = "FileService.update" )
257
+ self ._check_subfolder_support (path = path , function = "FileService.update" )
160
258
161
- url = self ._construct_content_url (
162
- path = path ,
163
- exclude_path_end = False ,
164
- extension = "Content" )
259
+ return self ._upload_file_content (path , file_content , multi_part_upload , max_mb_per_part , max_workers , ** kwargs )
165
260
166
- return self ._rest .PUT (
167
- url = url ,
168
- data = file_content ,
169
- headers = self .binary_http_header ,
170
- ** kwargs )
171
-
172
261
@require_version (version = "11.4" )
173
- def update_or_create (self , file_name : Union [str , Path ], file_content : bytes , ** kwargs ):
262
+ def update_or_create (self , file_name : Union [str , Path ], file_content : bytes , multi_part_upload : bool = False ,
263
+ max_mb_per_part : float = 200 , max_workers : int = 1 , ** kwargs ):
174
264
""" Create file or update file if it already exists
175
265
176
266
:param file_name: file name in root or path to file
177
267
:param file_content: file_content as bytes or BytesIO
268
+ :param multi_part_upload: boolean use multipart upload or not (only available from TM1 12 onwards)
269
+ :param max_mb_per_part: max megabyte per part in multipart upload (only available from TM1 12 onwards)
270
+ :param max_workers: max parallel workers for multipart upload (only available from TM1 12 onwards)
178
271
"""
179
272
if self .exists (file_name , ** kwargs ):
180
- return self .update (file_name , file_content , ** kwargs )
273
+ return self .update (file_name , file_content , multi_part_upload , max_mb_per_part , max_workers , ** kwargs )
274
+
275
+ return self .create (file_name , file_content , multi_part_upload , max_mb_per_part , max_workers , ** kwargs )
181
276
182
- return self .create (file_name , file_content , ** kwargs )
183
-
184
277
@require_version (version = "11.4" )
185
278
def exists (self , file_name : Union [str , Path ], ** kwargs ):
186
279
""" Check if file exists
@@ -193,23 +286,23 @@ def exists(self, file_name: Union[str, Path], **kwargs):
193
286
extension = "" )
194
287
195
288
return self ._exists (url , ** kwargs )
196
-
289
+
197
290
@require_version (version = "11.4" )
198
291
def delete (self , file_name : Union [str , Path ], ** kwargs ):
199
292
""" Delete file
200
293
201
294
:param file_name: file name in root or path to file
202
295
"""
203
296
path = Path (file_name )
204
- self ._check_subfolder_support (path = path , function = "FileService.delete" )
297
+ self ._check_subfolder_support (path = path , function = "FileService.delete" )
205
298
206
299
url = self ._construct_content_url (
207
300
path = path ,
208
301
exclude_path_end = False ,
209
302
extension = "" )
210
303
211
304
return self ._rest .DELETE (url , ** kwargs )
212
-
305
+
213
306
@require_version (version = "11.4" )
214
307
def search_string_in_name (self , name_startswith : str = None , name_contains : Iterable = None ,
215
308
name_contains_operator : str = 'and' , path : Union [Path , str ] = "" ,
@@ -242,9 +335,9 @@ def search_string_in_name(self, name_startswith: str = None, name_contains: Iter
242
335
243
336
else :
244
337
raise ValueError ("'name_contains' must be str or iterable" )
245
-
338
+
246
339
path = Path (path )
247
- self ._check_subfolder_support (path = path , function = "FileService.search_string_in_name" )
340
+ self ._check_subfolder_support (path = path , function = "FileService.search_string_in_name" )
248
341
249
342
url = self ._construct_content_url (
250
343
path = Path (path ),
0 commit comments