From d71b3217f09446178e099cd8ccd41719924fea59 Mon Sep 17 00:00:00 2001 From: Lukas Larsson Date: Thu, 30 May 2024 12:30:21 +0200 Subject: [PATCH 01/21] zip: Remove dead code and move some functions --- lib/stdlib/src/zip.erl | 503 ++++++++++++++-------------------- lib/stdlib/test/zip_SUITE.erl | 55 +--- 2 files changed, 212 insertions(+), 346 deletions(-) diff --git a/lib/stdlib/src/zip.erl b/lib/stdlib/src/zip.erl index edae7180a4df..1cab335a1ee7 100644 --- a/lib/stdlib/src/zip.erl +++ b/lib/stdlib/src/zip.erl @@ -66,24 +66,12 @@ convention, add `.zip` to the filename. list_dir/1, list_dir/2, table/1, table/2, t/1, tt/1]). -%% unzipping piecemeal --export([openzip_open/1, openzip_open/2, - openzip_get/1, openzip_get/2, - openzip_t/1, openzip_tt/1, - openzip_list_dir/1, openzip_list_dir/2, - openzip_close/1]). -%% openzip_add/2]). - %% zip server -export([zip_open/1, zip_open/2, zip_get/1, zip_get/2, zip_get_crc32/2, - zip_t/1, zip_tt/1, - zip_list_dir/1, zip_list_dir/2, + zip_list_dir/1, zip_close/1]). -%% just for debugging zip server, not documented, not tested, not to be used --export([zip_get_state/1]). - %% includes -include("file.hrl"). % #file_info -include("zip.hrl"). % #zip_file, #zip_comment @@ -96,7 +84,6 @@ convention, add `.zip` to the filename. %% Debug. -define(SHOW_GP_BIT_11(B, F), ok). -%%-define(SHOW_GP_BIT_11(B, F), io:format("F = ~.16#, B = ~lp\n", [F, B])). %% option sets -record(unzip_opts, { @@ -152,19 +139,6 @@ convention, add `.zip` to the filename. %% max bytes read from files and archives (and fed to zlib) -define(READ_BLOCK_SIZE, 16*1024). -%% -record(primzip_file, { -%% name, -%% offset, -%% chunk_size -%% }). - -%% -record(primzip, { -%% zlib, % handle to the zlib port from zlib:open -%% input, % fun/2 for file/memory input -%% in, % input (file handle or binary) -%% files % [#primzip_file] -%% }). - %% ZIP-file format records and defines %% compression methods @@ -279,139 +253,6 @@ The record `zip_file` contains the following fields: -export_type([create_option/0, filename/0, handle/0]). -%% Open a zip archive with options -%% - --doc false. -openzip_open(F) -> - openzip_open(F, []). - --doc false. -openzip_open(F, Options) -> - case ?CATCH(do_openzip_open(F, Options)) of - {ok, OpenZip} -> - {ok, OpenZip}; - Error -> - {error, Error} - end. - -do_openzip_open(F, Options) -> - Opts = get_openzip_options(Options), - #openzip_opts{output = Output, open_opts = OpO, cwd = CWD} = Opts, - Input = get_input(F), - In0 = Input({open, F, OpO -- [write]}, []), - {[#zip_comment{comment = C} | Files], In1} = - get_central_dir(In0, fun raw_file_info_etc/5, Input), - Z = zlib:open(), - {ok, #openzip{zip_comment = C, - files = Files, - in = In1, - input = Input, - output = Output, - zlib = Z, - cwd = CWD}}. - -%% retrieve all files from an open archive --doc false. -openzip_get(OpenZip) -> - case ?CATCH(do_openzip_get(OpenZip)) of - {ok, Result} -> {ok, Result}; - Error -> {error, Error} - end. - -do_openzip_get(#openzip{files = Files, in = In0, input = Input, - output = Output, zlib = Z, cwd = CWD}) -> - ZipOpts = #unzip_opts{output = Output, input = Input, - file_filter = fun all/1, open_opts = [], - feedback = fun silent/1, cwd = CWD}, - R = get_z_files(Files, Z, In0, ZipOpts, []), - {ok, R}; -do_openzip_get(_) -> - throw(einval). - -%% retrieve the crc32 checksum from an open archive -openzip_get_crc32(FileName, #openzip{files = Files}) -> - case file_name_search(FileName, Files) of - {_,#zip_file_extra{crc32=CRC}} -> {ok, CRC}; - _ -> throw(file_not_found) - end. - -%% retrieve a file from an open archive --doc false. -openzip_get(FileName, OpenZip) -> - case ?CATCH(do_openzip_get(FileName, OpenZip)) of - {ok, Result} -> {ok, Result}; - Error -> {error, Error} - end. - -do_openzip_get(F, #openzip{files = Files, in = In0, input = Input, - output = Output, zlib = Z, cwd = CWD}) -> - %%case lists:keysearch(F, #zip_file.name, Files) of - case file_name_search(F, Files) of - {#zip_file{offset = Offset},_}=ZFile -> - In1 = Input({seek, bof, Offset}, In0), - case get_z_file(In1, Z, Input, Output, [], fun silent/1, - CWD, ZFile, fun all/1) of - {file, R, _In2} -> {ok, R}; - _ -> throw(file_not_found) - end; - _ -> throw(file_not_found) - end; -do_openzip_get(_, _) -> - throw(einval). - -file_name_search(Name,Files) -> - Fun = fun({ZipFile,_}) -> - not string:equal(ZipFile#zip_file.name, Name, - _IgnoreCase = false, _Norm = nfc) - end, - case lists:dropwhile(Fun, Files) of - [ZFile|_] -> ZFile; - [] -> false - end. - -%% %% add a file to an open archive -%% openzip_add(File, OpenZip) -> -%% case ?CATCH do_openzip_add(File, OpenZip) of -%% {ok, Result} -> {ok, Result}; -%% Error -> {error, Error} -%% end. - -%% do_openzip_add(File, #open_zip{files = Files, in = In0, -%% opts = Opts} = OpenZip0) -> -%% throw(nyi), -%% Z = zlib:open(), -%% R = get_z_files(Files, In0, Z, Opts, []), -%% zlib:close(Z), -%% {ok, R}; -%% do_openzip_add(_, _) -> -%% throw(einval). - -%% get file list from open archive --doc false. -openzip_list_dir(#openzip{zip_comment = Comment, - files = Files}) -> - {ZipFiles,_Extras} = lists:unzip(Files), - {ok, [#zip_comment{comment = Comment} | ZipFiles]}; -openzip_list_dir(_) -> - {error, einval}. - --doc false. -openzip_list_dir(#openzip{files = Files}, [names_only]) -> - {ZipFiles,_Extras} = lists:unzip(Files), - Names = [Name || {#zip_file{name=Name},_} <- ZipFiles], - {ok, Names}; -openzip_list_dir(_, _) -> - {error, einval}. - -%% close an open archive --doc false. -openzip_close(#openzip{in = In0, input = Input, zlib = Z}) -> - Input(close, In0), - zlib:close(Z); -openzip_close(_) -> - {error, einval}. - %% Extract from a zip archive with options %% %% Accepted options: @@ -761,6 +602,107 @@ do_list_dir(F, Options) -> Input(close, In1), {ok, Info}. +-doc(#{equiv => zip_open/2}). +-spec(zip_open(Archive) -> {ok, ZipHandle} | {error, Reason} when + Archive :: file:name() | binary(), + ZipHandle :: handle(), + Reason :: term()). + +zip_open(Archive) -> zip_open(Archive, []). + +-doc """ +Opens a zip archive, and reads and saves its directory. This means that later +reading files from the archive is faster than unzipping files one at a time with +[`unzip/1,2`](`unzip/1`). + +The archive must be closed with `zip_close/1`. + +The `ZipHandle` is closed if the process that originally opened the archive +dies. +""". +-spec(zip_open(Archive, Options) -> {ok, ZipHandle} | {error, Reason} when + Archive :: file:name() | binary(), + ZipHandle :: handle(), + Options :: [Option], + Option :: cooked | memory | {cwd, CWD :: file:filename()}, + Reason :: term()). + +zip_open(Archive, Options) -> + Self = self(), + Pid = spawn_link(fun() -> server_init(Self) end), + request(Self, Pid, {open, Archive, Options}). + +-doc(#{equiv => zip_get/2}). +-spec(zip_get(ZipHandle) -> {ok, [Result]} | {error, Reason} when + ZipHandle :: handle(), + Result :: file:name() | {file:name(), binary()}, + Reason :: term()). + +zip_get(Pid) when is_pid(Pid) -> + request(self(), Pid, get). + +-doc """ +Closes a zip archive, previously opened with [`zip_open/1,2`](`zip_open/1`). All +resources are closed, and the handle is not to be used after closing. +""". +-spec(zip_close(ZipHandle) -> ok | {error, einval} when + ZipHandle :: handle()). + +zip_close(Pid) when is_pid(Pid) -> + request(self(), Pid, close). + +-doc """ +Extracts one or all files from an open archive. + +The files are unzipped to memory or to file, depending on the options specified +to function [`zip_open/1,2`](`zip_open/1`) when opening the archive. +""". +-spec(zip_get(FileName, ZipHandle) -> {ok, Result} | {error, Reason} when + FileName :: file:name(), + ZipHandle :: handle(), + Result :: file:name() | {file:name(), binary()}, + Reason :: term()). + +zip_get(FileName, Pid) when is_pid(Pid) -> + request(self(), Pid, {get, FileName}). + +-doc "Extracts one crc32 checksum from an open archive.". +-doc(#{since => <<"OTP 26.0">>}). +-spec(zip_get_crc32(FileName, ZipHandle) -> {ok, CRC} | {error, Reason} when + FileName :: file:name(), + ZipHandle :: handle(), + CRC :: non_neg_integer(), + Reason :: term()). + +zip_get_crc32(FileName, Pid) when is_pid(Pid) -> + request(self(), Pid, {get_crc32, FileName}). + +-doc """ +Returns the file list of an open zip archive. The first returned element is the +zip archive comment. +""". +-spec(zip_list_dir(ZipHandle) -> {ok, Result} | {error, Reason} when + Result :: [zip_comment() | zip_file()], + ZipHandle :: handle(), + Reason :: term()). + +zip_list_dir(Pid) when is_pid(Pid) -> + request(self(), Pid, list_dir). + +request(Self, Pid, Req) -> + Pid ! {Self, Req}, + receive + {Pid, R} -> R + end. + +zip_t(Pid) when is_pid(Pid) -> + Openzip = request(self(), Pid, get_state), + openzip_t(Openzip). + +zip_tt(Pid) when is_pid(Pid) -> + Openzip = request(self(), Pid, get_state), + openzip_tt(Openzip). + %% Print zip directory in short form -doc """ @@ -772,7 +714,6 @@ to `tar t`.) ZipHandle :: handle()). t(F) when is_pid(F) -> zip_t(F); -t(F) when is_record(F, openzip) -> openzip_t(F); t(F) -> t(F, fun raw_short_print_info_etc/5). t(F, RawPrint) -> @@ -800,7 +741,6 @@ the Erlang shell. (Similar to `tar tv`.) ZipHandle :: handle()). tt(F) when is_pid(F) -> zip_tt(F); -tt(F) when is_record(F, openzip) -> openzip_tt(F); tt(F) -> t(F, fun raw_long_print_info_etc/5). @@ -1412,12 +1352,112 @@ local_file_header_from_info_method_name(#file_info{mtime = MTime, type = Type}, extra_field_length = 0, type = Type}. +%% +%% Functions used by zip server +%% + +openzip_open(F, Options) -> + case ?CATCH(do_openzip_open(F, Options)) of + {ok, OpenZip} -> + {ok, OpenZip}; + Error -> + {error, Error} + end. + +do_openzip_open(F, Options) -> + Opts = get_openzip_options(Options), + #openzip_opts{output = Output, open_opts = OpO, cwd = CWD} = Opts, + Input = get_input(F), + In0 = Input({open, F, OpO -- [write]}, []), + {[#zip_comment{comment = C} | Files], In1} = + get_central_dir(In0, fun raw_file_info_etc/5, Input), + Z = zlib:open(), + {ok, #openzip{zip_comment = C, + files = Files, + in = In1, + input = Input, + output = Output, + zlib = Z, + cwd = CWD}}. + +%% retrieve all files from an open archive +openzip_get(OpenZip) -> + case ?CATCH(do_openzip_get(OpenZip)) of + {ok, Result} -> {ok, Result}; + Error -> {error, Error} + end. + +do_openzip_get(#openzip{files = Files, in = In0, input = Input, + output = Output, zlib = Z, cwd = CWD}) -> + ZipOpts = #unzip_opts{output = Output, input = Input, + file_filter = fun all/1, open_opts = [], + feedback = fun silent/1, cwd = CWD}, + R = get_z_files(Files, Z, In0, ZipOpts, []), + {ok, R}; +do_openzip_get(_) -> + throw(einval). + +%% retrieve the crc32 checksum from an open archive +openzip_get_crc32(FileName, #openzip{files = Files}) -> + case file_name_search(FileName, Files) of + {_,#zip_file_extra{crc32=CRC}} -> {ok, CRC}; + _ -> throw(file_not_found) + end. + +%% retrieve a file from an open archive +openzip_get(FileName, OpenZip) -> + case ?CATCH(do_openzip_get(FileName, OpenZip)) of + {ok, Result} -> {ok, Result}; + Error -> {error, Error} + end. + +do_openzip_get(F, #openzip{files = Files, in = In0, input = Input, + output = Output, zlib = Z, cwd = CWD}) -> + %%case lists:keysearch(F, #zip_file.name, Files) of + case file_name_search(F, Files) of + {#zip_file{offset = Offset},_}=ZFile -> + In1 = Input({seek, bof, Offset}, In0), + case get_z_file(In1, Z, Input, Output, [], fun silent/1, + CWD, ZFile, fun all/1) of + {file, R, _In2} -> {ok, R}; + _ -> throw(file_not_found) + end; + _ -> throw(file_not_found) + end; +do_openzip_get(_, _) -> + throw(einval). + +file_name_search(Name,Files) -> + Fun = fun({ZipFile,_}) -> + not string:equal(ZipFile#zip_file.name, Name, + _IgnoreCase = false, _Norm = nfc) + end, + case lists:dropwhile(Fun, Files) of + [ZFile|_] -> ZFile; + [] -> false + end. + +%% get file list from open archive +openzip_list_dir(#openzip{zip_comment = Comment, + files = Files}) -> + {ZipFiles,_Extras} = lists:unzip(Files), + {ok, [#zip_comment{comment = Comment} | ZipFiles]}; +openzip_list_dir(_) -> + {error, einval}. + +%% close an open archive +openzip_close(#openzip{in = In0, input = Input, zlib = Z}) -> + Input(close, In0), + zlib:close(Z); +openzip_close(_) -> + {error, einval}. + +%% small, simple, stupid zip-archive server server_init(Parent) -> %% we want to know if our parent dies process_flag(trap_exit, true), server_loop(Parent, not_open). -%% small, simple, stupid zip-archive server server_loop(Parent, OpenZip) -> receive {From, {open, Archive, Options}} -> @@ -1442,9 +1482,6 @@ server_loop(Parent, OpenZip) -> {From, list_dir} -> From ! {self(), openzip_list_dir(OpenZip)}, server_loop(Parent, OpenZip); - {From, {list_dir, Opts}} -> - From ! {self(), openzip_list_dir(OpenZip, Opts)}, - server_loop(Parent, OpenZip); {From, get_state} -> From ! {self(), OpenZip}, server_loop(Parent, OpenZip); @@ -1455,118 +1492,6 @@ server_loop(Parent, OpenZip) -> {error, bad_msg} end. --doc(#{equiv => zip_open/2}). --spec(zip_open(Archive) -> {ok, ZipHandle} | {error, Reason} when - Archive :: file:name() | binary(), - ZipHandle :: handle(), - Reason :: term()). - -zip_open(Archive) -> zip_open(Archive, []). - --doc """ -Opens a zip archive, and reads and saves its directory. This means that later -reading files from the archive is faster than unzipping files one at a time with -[`unzip/1,2`](`unzip/1`). - -The archive must be closed with `zip_close/1`. - -The `ZipHandle` is closed if the process that originally opened the archive -dies. -""". --spec(zip_open(Archive, Options) -> {ok, ZipHandle} | {error, Reason} when - Archive :: file:name() | binary(), - ZipHandle :: handle(), - Options :: [Option], - Option :: cooked | memory | {cwd, CWD :: file:filename()}, - Reason :: term()). - -zip_open(Archive, Options) -> - Self = self(), - Pid = spawn_link(fun() -> server_init(Self) end), - request(Self, Pid, {open, Archive, Options}). - --doc(#{equiv => zip_get/2}). --spec(zip_get(ZipHandle) -> {ok, [Result]} | {error, Reason} when - ZipHandle :: handle(), - Result :: file:name() | {file:name(), binary()}, - Reason :: term()). - -zip_get(Pid) when is_pid(Pid) -> - request(self(), Pid, get). - --doc """ -Closes a zip archive, previously opened with [`zip_open/1,2`](`zip_open/1`). All -resources are closed, and the handle is not to be used after closing. -""". --spec(zip_close(ZipHandle) -> ok | {error, einval} when - ZipHandle :: handle()). - -zip_close(Pid) when is_pid(Pid) -> - request(self(), Pid, close). - --doc """ -Extracts one or all files from an open archive. - -The files are unzipped to memory or to file, depending on the options specified -to function [`zip_open/1,2`](`zip_open/1`) when opening the archive. -""". --spec(zip_get(FileName, ZipHandle) -> {ok, Result} | {error, Reason} when - FileName :: file:name(), - ZipHandle :: handle(), - Result :: file:name() | {file:name(), binary()}, - Reason :: term()). - -zip_get(FileName, Pid) when is_pid(Pid) -> - request(self(), Pid, {get, FileName}). - --doc "Extracts one crc32 checksum from an open archive.". --doc(#{since => <<"OTP 26.0">>}). --spec(zip_get_crc32(FileName, ZipHandle) -> {ok, CRC} | {error, Reason} when - FileName :: file:name(), - ZipHandle :: handle(), - CRC :: non_neg_integer(), - Reason :: term()). - -zip_get_crc32(FileName, Pid) when is_pid(Pid) -> - request(self(), Pid, {get_crc32, FileName}). - --doc """ -Returns the file list of an open zip archive. The first returned element is the -zip archive comment. -""". --spec(zip_list_dir(ZipHandle) -> {ok, Result} | {error, Reason} when - Result :: [zip_comment() | zip_file()], - ZipHandle :: handle(), - Reason :: term()). - -zip_list_dir(Pid) when is_pid(Pid) -> - request(self(), Pid, list_dir). - --doc false. -zip_list_dir(Pid, Opts) when is_pid(Pid) -> - request(self(), Pid, {list_dir, Opts}). - --doc false. -zip_get_state(Pid) when is_pid(Pid) -> - request(self(), Pid, get_state). - -request(Self, Pid, Req) -> - Pid ! {Self, Req}, - receive - {Pid, R} -> R - end. - --doc false. -zip_t(Pid) when is_pid(Pid) -> - Openzip = request(self(), Pid, get_state), - openzip_t(Openzip). - --doc false. -zip_tt(Pid) when is_pid(Pid) -> - Openzip = request(self(), Pid, get_state), - openzip_tt(Openzip). - --doc false. openzip_tt(#openzip{zip_comment = ZipComment, files = Files}) -> print_comment(ZipComment), lists_foreach(fun({#zip_file{comp_size = CompSize, @@ -1579,7 +1504,6 @@ openzip_tt(#openzip{zip_comment = ZipComment, files = Files}) -> end, Files), ok. --doc false. openzip_t(#openzip{zip_comment = ZipComment, files = Files}) -> print_comment(ZipComment), lists_foreach(fun({#zip_file{name = FileName},_}) -> @@ -2070,27 +1994,6 @@ local_file_header_from_bin(< throw(bad_local_file_header). -%% make a file_info from a local directory header -%% local_file_header_to_file_info( -%% #local_file_header{last_mod_time = ModTime, -%% last_mod_date = ModDate, -%% uncomp_size = UncompSize}) -> -%% T = dos_date_time_to_datetime(ModDate, ModTime), -%% FI = #file_info{size = UncompSize, -%% type = regular, -%% access = read_write, -%% atime = T, -%% mtime = T, -%% ctime = T, -%% mode = 8#066, -%% links = 1, -%% major_device = 0, -%% minor_device = 0, -%% inode = 0, -%% uid = 0, -%% gid = 0}, -%% FI. - %% io functions binary_io({file_info, {_Filename, _B, #file_info{} = FI}}, _A) -> FI; diff --git a/lib/stdlib/test/zip_SUITE.erl b/lib/stdlib/test/zip_SUITE.erl index 3810c0362c04..4e90184445c4 100644 --- a/lib/stdlib/test/zip_SUITE.erl +++ b/lib/stdlib/test/zip_SUITE.erl @@ -24,7 +24,7 @@ bad_zip/1, unzip_from_binary/1, unzip_to_binary/1, zip_to_binary/1, unzip_options/1, zip_options/1, list_dir_options/1, aliases/1, - openzip_api/1, zip_api/1, open_leak/1, unzip_jar/1, + zip_api/1, open_leak/1, unzip_jar/1, unzip_traversal_exploit/1, compress_control/1, foldl/1,fd_leak/1,unicode/1,test_zip_dir/1, @@ -39,9 +39,9 @@ suite() -> [{ct_hooks,[ts_install_cth]}]. all() -> [borderline, atomic, bad_zip, unzip_from_binary, unzip_to_binary, zip_to_binary, unzip_options, - zip_options, list_dir_options, aliases, openzip_api, + zip_options, list_dir_options, aliases, zip_api, open_leak, unzip_jar, compress_control, foldl, - unzip_traversal_exploit,fd_leak,unicode,test_zip_dir, + unzip_traversal_exploit, fd_leak, unicode, test_zip_dir, explicit_file_info]. groups() -> @@ -223,43 +223,6 @@ atomic(Config) when is_list(Config) -> ok. -%% Test the openzip_open/2, openzip_get/1, openzip_get/2, openzip_close/1 -%% and openzip_list_dir/1 functions. -openzip_api(Config) when is_list(Config) -> - ok = file:set_cwd(proplists:get_value(priv_dir, Config)), - DataFiles = data_files(), - Names = [Name || {Name, _, _} <- DataFiles], - io:format("Names: ~p", [Names]), - - %% Create a zip archive - - Zip = "zip.zip", - {ok, Zip} = zip:zip(Zip, Names, []), - - %% Open archive - {ok, OpenZip} = zip:openzip_open(Zip, [memory]), - - %% List dir - Names = names_from_list_dir(zip:openzip_list_dir(OpenZip)), - - %% Get a file - Name1 = hd(Names), - {ok, Data1} = file:read_file(Name1), - {ok, {Name1, Data1}} = zip:openzip_get(Name1, OpenZip), - - %% Get all files - FilesDatas = lists:map(fun(Name) -> {ok, B} = file:read_file(Name), - {Name, B} end, Names), - {ok, FilesDatas} = zip:openzip_get(OpenZip), - - %% Close - ok = zip:openzip_close(OpenZip), - - %% Clean up. - delete_files([Names]), - - ok. - %% Test the zip_open/2, zip_get/1, zip_get/2, zip_close/1, %% and zip_list_dir/1 functions. zip_api(Config) when is_list(Config) -> @@ -533,8 +496,8 @@ bad_zip(Config) when is_list(Config) -> try_bad("bad_eocd", bad_eocd, Config), try_bad("enoent", enoent, Config), GetNotFound = fun(A) -> - {ok, O} = zip:openzip_open(A, []), - zip:openzip_get("not_here", O) + {ok, O} = zip:zip_open(A, []), + zip:zip_get("not_here", O) end, try_bad("abc", file_not_found, GetNotFound, Config), ok. @@ -765,11 +728,11 @@ test_compress_control(Dir, Files, ZipOptions, Expected) -> create_files(Files), {ok, Zip} = zip:create(Zip, [Dir], ZipOptions), - {ok, OpenZip} = zip:openzip_open(Zip, [memory]), - {ok,[#zip_comment{comment = ""} | ZipList]} = zip:openzip_list_dir(OpenZip), + {ok, OpenZip} = zip:zip_open(Zip, [memory]), + {ok,[#zip_comment{comment = ""} | ZipList]} = zip:zip_list_dir(OpenZip), io:format("compress_control: -> ~p -> ~p\n -> ~pn", [Expected, ZipOptions, ZipList]), verify_compression(Files, ZipList, OpenZip, ZipOptions, Expected), - ok = zip:openzip_close(OpenZip), + ok = zip:zip_close(OpenZip), %% Cleanup delete_files([Zip]), @@ -783,7 +746,7 @@ verify_compression([{Name, Kind, _Filler} | Files], ZipList, OpenZip, ZipOptions dir -> {Name ++ "/", 0}; _ -> - {ok, {Name, Bin}} = zip:openzip_get(Name, OpenZip), + {ok, {Name, Bin}} = zip:zip_get(Name, OpenZip), {Name, size(Bin)} end, {Name2, {value, ZipFile}} = {Name2, lists:keysearch(Name2, #zip_file.name, ZipList)}, From 84eed7ee7a17baa11e81b02c1ea75d03b5ed4645 Mon Sep 17 00:00:00 2001 From: Lukas Larsson Date: Thu, 30 May 2024 12:32:43 +0200 Subject: [PATCH 02/21] zip: Fix dos date time convertion of seconds --- erts/preloaded/ebin/prim_zip.beam | Bin 9488 -> 9532 bytes erts/preloaded/src/prim_zip.erl | 2 +- lib/stdlib/src/zip.erl | 4 ++-- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/erts/preloaded/ebin/prim_zip.beam b/erts/preloaded/ebin/prim_zip.beam index 2ab80d4b494d4401f834387c1fc9025caf83826b..44ad467c92954fb9480d17b8ae13a625983cf58f 100644 GIT binary patch delta 1117 zcmY*YU2IfE6rS0+yFD!2WbACIK-s-@FQ5>@g@UQRIBiq;5lgg%MN|s32yIhnED6O} zTg9KMD8)fxga;FOA@rePAsVeu@<0M4Xyac53RYD9p7hNZz9|>MNxsZ?zVn@PW+wLz zt{7PnKDp}Ig^bNwTw7oB^vbqvn;GLP*}XLcdU#*>FMrTvxt2}4x}0MXQ~^{JyZBW8 zP!xdQzj7>KtY*jRYR0^OkmxAxPaF~nKDW3a)yS!Cdn>w3Zz}F7NtJ0!ul6SUX-1)u zrdKLC+J*|tD8;F0>6K-I(+NwvmLWcAsW(xYE|^ml$LFq9`Kr3c zmhRo=^WwLo<9|< zrPtwpA>cH!#Qz3e-e?G@!mP?pOYTNuz3I%Zsd%$Q(zH4LsjwiZOovK)LTAE(Ah7h7 z8OuJx-!_wS^Y}YvNiIqbpsd-n=N8F#!8V}t*?IoCf}$B_D7T>Ry*s_m7eYM6|JzAR z?|XrNT54JEB~-moqSW2*yrn(8NjZ#?GFPH>MkL{jIDI$YKa;ogL2ptIyAcm4T_sY; zar)lUhd97SxJIW$(NKBdWpsXaXlXBojMNI2(ZbUq?mXzuRO?A~RHy-oTkSLbLHVN} zp?nj{f0)7|OU8@*QIdmr;~y>UN3KP3`bj4g=KaFyXGiu8qQ|5s5X zg!5eDbkWiQm%*c4s`kfJjk=o|!#yushU8`pxS~r%gQS_9e!HWg)wmYQ$pK|K{chD>KcXg12GPHNA9$a~$Jz-O?eb?I^qPU)fDdhaq;r zdh0R&rg$8Pq;{W5lvZh_lxW=Xa+CY;mv?qy=RKgg;$M|d@n_b6CV&#)#ryb6;5^nH zPt^CgQnohM@MBEm$FV!^))ipA>B%qc>i~0IZvhi4)%kEUNIT$u$Qyv`&SQN!xbyFg wcNXhU#PL?_uI{eh7VFordydC`=R-W3{RxtEL7dnE?iySLQozJu(;5!l2H0rcR{#J2 delta 1059 zcmZ8gTWnNC7@pZVyZuu*w5wm4oG`eI@q*wBZDMKrO#s1GJyf>zXe0kIb3_JWVR_)T{K!z6#^oA3Mn%S_H0 zZ5n6_7C*J}amMCSV>d*3p=jGWB+JA4g)KPw zLP}0~L%Ws=PG=46T#EBYhF*`vOV{WtRc9W}$`rh9TleY@^Iwcj{5#Eq<6~9YDh%~B zdyLa3hTedEt^T}v2VbWb)KpaWP6?kH+701ppVR1k_cJ8yY4)j7zgjap`X+?+y1!;k z)!q`1W%2ao=TP~Z zTKC(mqze6Aa$fJjLap=JfGQp+=NUTW`0iJp@-j|DW`q=RdzJHs4);X85u}va6sZy- z3SY?S`^Vg`Q-+T8MAe9GF;l8~5rf9*2SZ0Ozy`R+ribBhx$h)Ysy8rn3{6HG1xqO5 zr~r5Fw=*?5p3Di^=iycZcgIKwk#U5G`xxU#L&s6@c@(@borQ+pc0zZIya7D$PlhzK z7d%cstBAw| z=Ty#OtYzDlRrspevX{HD^Opc?^7PPho_Pi^f+#`Y8)y7~IFfnI+F5InYcJ{(*e$l} zGGe_RN-fEB0oh8o5QTMa`+R^-g#FEKAZ&Hc3j8u)>+dh_% <> = <>, <> = <>, {{YearFrom1980+1980, Month, Day}, - {Hour, Min, Sec}}. + {Hour, Min, Sec * 2}}. cd_file_header_from_bin(< <> = <>, <> = <>, {{YearFrom1980+1980, Month, Day}, - {Hour, Min, Sec}}. + {Hour, Min, Sec * 2}}. dos_date_time_from_datetime(Seconds) when is_integer(Seconds) -> DateTime = calendar:now_to_datetime({0, Seconds, 0}), dos_date_time_from_datetime(DateTime); dos_date_time_from_datetime({{Year, Month, Day}, {Hour, Min, Sec}}) -> YearFrom1980 = Year-1980, - <> = <>, + <> = <>, <> = <>, {DosDate, DosTime}. From 97a1eae3febe654babf82edb86a6b3ce348b67ef Mon Sep 17 00:00:00 2001 From: Lukas Larsson Date: Thu, 30 May 2024 10:21:55 +0200 Subject: [PATCH 03/21] zip: Fix zip:foldl to not use prim_zip prim_zip is planned to be removed so we remove usage of it in zip. --- lib/stdlib/src/zip.erl | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/lib/stdlib/src/zip.erl b/lib/stdlib/src/zip.erl index c534a9e4dc36..ad716c3ce087 100644 --- a/lib/stdlib/src/zip.erl +++ b/lib/stdlib/src/zip.erl @@ -406,15 +406,31 @@ _Example:_ Archive :: file:name() | {file:name(), binary()}, Reason :: term()). +foldl(Fun, Acc0, {_Filename, Binary}) -> + foldl(Fun, Acc0, Binary); foldl(Fun, Acc0, Archive) when is_function(Fun, 4) -> - ZipFun = - fun({Name, GetInfo, GetBin}, A) -> - A2 = Fun(Name, GetInfo, GetBin, A), - {true, false, A2} - end, - case prim_zip:open(ZipFun, Acc0, Archive) of - {ok, PrimZip, Acc1} -> - ok = prim_zip:close(PrimZip), + case zip_open(Archive,[memory]) of + {ok, Handle} -> + {ok, Files} = zip_list_dir(Handle), + Acc1 = + lists:foldl( + fun(#zip_comment{}, Acc) -> + Acc; + (#zip_file{ name = Name, info = Info }, Acc) -> + GetInfo = fun() -> Info end, + GetBin = case lists:last(Name) of + $/ -> fun() -> <<>> end; + _ -> + fun() -> + case zip_get(Name, Handle) of + {ok, {Name, Data}} -> Data; + {error, Error} -> throw({Name, Error}) + end + end + end, + Fun(Name, GetInfo, GetBin, Acc) + end, Acc0, Files), + ok = zip_close(Handle), {ok, Acc1}; {error, bad_eocd} -> {error, "Not an archive file"}; From 30988f64babdac240efd7bd365c3d7adc6242331 Mon Sep 17 00:00:00 2001 From: Lukas Larsson Date: Thu, 30 May 2024 15:27:24 +0200 Subject: [PATCH 04/21] zip: The mod time should be in local_time --- lib/stdlib/src/zip.erl | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/lib/stdlib/src/zip.erl b/lib/stdlib/src/zip.erl index ad716c3ce087..d08d0b344f94 100644 --- a/lib/stdlib/src/zip.erl +++ b/lib/stdlib/src/zip.erl @@ -237,7 +237,10 @@ The record `zip_file` contains the following fields: - **`name`** - The filename -- **`info`** - File information as in `file:read_file_info/1` in Kernel +- **`info`** - File information as in `file:read_file_info/1` in Kernel. + `mtime`, `atime` and `ctime` are expected to be + in [`local time`](`erlang:localtime/0`) if represented using `t:calendar:datetime/0`, + or in [OS system time](`e:erts:time_correction.md#os-system-time`) if represented by an integer. - **`comment`** - The comment for the file in the zip archive @@ -345,7 +348,7 @@ do_unzip(F, Options) -> %% Iterate over all files in a zip archive -doc """ -Calls `Fun(FileInArchive, GetInfo , GetBin, AccIn)` on successive files in the +Calls `Fun(FileInArchive, GetInfo, GetBin, AccIn)` on successive files in the `Archive`, starting with `AccIn == Acc0`. `FileInArchive` is the name that the file has in the archive. @@ -1355,7 +1358,9 @@ eocd_to_bin(#eocd{disk_num = DiskNum, local_file_header_from_info_method_name(#file_info{mtime = MTime, type = Type}, UncompSize, CompMethod, Name, GPFlag) -> - {ModDate, ModTime} = dos_date_time_from_datetime(MTime), + {ModDate, ModTime} = dos_date_time_from_datetime( + calendar:system_time_to_local_time( + datetime_to_system_time(MTime), second)), #local_file_header{version_needed = 20, gp_flag = GPFlag, comp_method = CompMethod, @@ -1867,6 +1872,16 @@ dos_date_time_from_datetime({{Year, Month, Day}, {Hour, Min, Sec}}) -> <> = <>, {DosDate, DosTime}. +%% Convert a local datetime or universal time seconds to +%% system time (aka POSIX time, aka Unix time) +datetime_to_system_time(undefined) -> + undefined; +datetime_to_system_time(PosixTime) when is_integer(PosixTime) -> + PosixTime; +datetime_to_system_time(DateTime) -> + erlang:universaltime_to_posixtime( + erlang:localtime_to_universaltime(DateTime)). + %% A pwrite-like function for iolists (used by memory-option) pwrite_binary(B, Pos, Bin) when byte_size(B) =:= Pos -> From 8d537f51a4262d24f3395c4148323eaca9facbbd Mon Sep 17 00:00:00 2001 From: Lukas Larsson Date: Thu, 30 May 2024 15:36:20 +0200 Subject: [PATCH 05/21] zip: List dir now returns directories --- lib/stdlib/src/zip.erl | 46 ++++++++++++++++++++--------------- lib/stdlib/test/zip_SUITE.erl | 27 +++++++++++++++----- 2 files changed, 47 insertions(+), 26 deletions(-) diff --git a/lib/stdlib/src/zip.erl b/lib/stdlib/src/zip.erl index d08d0b344f94..d71e85342692 100644 --- a/lib/stdlib/src/zip.erl +++ b/lib/stdlib/src/zip.erl @@ -1700,7 +1700,8 @@ get_z_files([{#zip_file{offset = Offset},_} = ZFile | Rest], Z, In0, {In2, Acc1} = case get_z_file(In1, Z, Input, Output, OpO, FB, CWD, ZFile, Filter) of - {file, GZD, Inx} -> {Inx, [GZD | Acc0]}; + {Type, GZD, Inx} when Type =:= file; Type =:= dir -> + {Inx, [GZD | Acc0]}; {_, Inx} -> {Inx, Acc0} end, get_z_files(Rest, Z, In2, Opts, Acc1); @@ -1742,9 +1743,8 @@ get_z_file(In0, Z, Input, Output, OpO, FB, true -> case lists:last(FileName) of $/ -> - %% perhaps this should always be done? - Output({ensure_dir,FileName1},[]), - {dir, In3}; + Out1 = Output({ensure_path,FileName1},[]), + {dir, Out1, In3}; _ -> %% FileInfo = local_file_header_to_file_info(LH) %%{Out, In4, CRC, UncompSize} = @@ -1768,18 +1768,24 @@ get_z_file(In0, Z, Input, Output, OpO, FB, %% make sure FileName doesn't have relative path that points over CWD check_valid_location(CWD, FileName) -> + TrailingSlash = case lists:last(FileName) of + $/ -> "/"; + _ -> "" + end, %% check for directory traversal exploit - case check_dir_level(filename:split(FileName), 0) of - {FileOrDir,Level} when Level < 0 -> - CWD1 = if CWD == "" -> "./"; - true -> CWD - end, - error_logger:format("Illegal path: ~ts, extracting in ~ts~n", - [add_cwd(CWD,FileName),CWD1]), - {false,add_cwd(CWD, FileOrDir)}; - _ -> - {true,add_cwd(CWD, FileName)} - end. + {IsValid, Cwd, Name} = + case check_dir_level(filename:split(FileName), 0) of + {FileOrDir,Level} when Level < 0 -> + CWD1 = if CWD == "" -> "./"; + true -> CWD + end, + error_logger:format("Illegal path: ~ts, extracting in ~ts~n", + [add_cwd(CWD,FileName),CWD1]), + {false, CWD, FileOrDir}; + _ -> + {true, CWD, FileName} + end, + {IsValid, string:trim(add_cwd(Cwd, Name), trailing, "/") ++ TrailingSlash}. check_dir_level([FileOrDir], Level) -> {FileOrDir,Level}; @@ -2088,8 +2094,8 @@ binary_io({list_dir, _F}, _B) -> []; binary_io({set_file_info, _F, _FI}, B) -> B; -binary_io({ensure_dir, _Dir}, B) -> - B. +binary_io({ensure_path, Dir}, _B) -> + {Dir, <<>>}. file_io({file_info, F}, _) -> case file:read_file_info(F) of @@ -2149,6 +2155,6 @@ file_io({set_file_info, F, FI}, H) -> ok -> H; {error, Error} -> throw(Error) end; -file_io({ensure_dir, Dir}, H) -> - ok = filelib:ensure_dir(Dir), - H. +file_io({ensure_path, Dir}, _H) -> + ok = filelib:ensure_path(Dir), + Dir. diff --git a/lib/stdlib/test/zip_SUITE.erl b/lib/stdlib/test/zip_SUITE.erl index 4e90184445c4..9d822d0d9fbc 100644 --- a/lib/stdlib/test/zip_SUITE.erl +++ b/lib/stdlib/test/zip_SUITE.erl @@ -380,18 +380,22 @@ unzip_jar(Config) when is_list(Config) -> %% create a temp directory Subdir = filename:join(PrivDir, "jartest"), ok = file:make_dir(Subdir), - ok = file:set_cwd(Subdir), FList = ["META-INF/MANIFEST.MF","test.txt"], - {ok, RetList} = zip:unzip(JarFile), + {ok, RetList} = zip:unzip(JarFile, [{cwd, Subdir}]), %% Verify. lists:foreach(fun(F)-> {ok,B} = file:read_file(filename:join(DataDir, F)), {ok,B} = file:read_file(filename:join(Subdir, F)) end, FList), - lists:foreach(fun(F)-> ok = file:delete(F) end, - RetList), + lists:foreach(fun(F)-> + case lists:last(F) =:= $/ of + true -> ok = file:del_dir(F); + false -> ok = file:delete(F) + end + end, + lists:reverse(RetList)), %% Clean up and verify no more files. 0 = delete_files([Subdir]), @@ -528,6 +532,7 @@ unzip_to_binary(Config) when is_list(Config) -> PrivDir = proplists:get_value(priv_dir, Config), WorkDir = filename:join(PrivDir, "unzip_to_binary"), _ = file:make_dir(WorkDir), + _ = file:make_dir(filename:join(DataDir, "empty")), ok = file:set_cwd(WorkDir), Long = filename:join(DataDir, "abc.zip"), @@ -536,7 +541,16 @@ unzip_to_binary(Config) when is_list(Config) -> {ok, FBList} = zip:unzip(Long, [memory]), %% Verify. - lists:foreach(fun({F,B}) -> {ok,B}=file:read_file(filename:join(DataDir, F)) + lists:foreach(fun({F,B}) -> + Filename = filename:join(DataDir, F), + case lists:last(F) =:= $/ of + true -> + <<>> = B, + {ok, #file_info{ type = directory}} = + file:read_file_info(Filename); + false -> + {ok,B}=file:read_file(filename:join(DataDir, F)) + end end, FBList), %% Make sure no files created in cwd @@ -609,11 +623,12 @@ unzip_from_binary(Config) when is_list(Config) -> Quote = "quotes/rain.txt", Wikipedia = "wikipedia.txt", EmptyFile = "emptyFile", + EmptyDir = "empty/", file:set_cwd(ExtractDir), %% Read a zip file into a binary and extract from the binary. {ok, Bin} = file:read_file(Archive), - {ok, [FileName,Quote,Wikipedia,EmptyFile]} = zip:unzip(Bin), + {ok, [FileName,Quote,EmptyDir,Wikipedia,EmptyFile]} = zip:unzip(Bin), %% Verify. DestFilename = filename:join(ExtractDir, "abc.txt"), From 8043394b9f469eb4f8893ffd3dbf3c19b9748787 Mon Sep 17 00:00:00 2001 From: Lukas Larsson Date: Thu, 30 May 2024 15:40:28 +0200 Subject: [PATCH 06/21] zip: Correctly encode Unix external attributes in zip files --- lib/stdlib/src/zip.erl | 49 +++++++++++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 13 deletions(-) diff --git a/lib/stdlib/src/zip.erl b/lib/stdlib/src/zip.erl index d71e85342692..b3449d0308bf 100644 --- a/lib/stdlib/src/zip.erl +++ b/lib/stdlib/src/zip.erl @@ -175,7 +175,9 @@ convention, add `.zip` to the filename. uncomp_size, file_name_length, extra_field_length, - type}). + %% extra data needed to create cd_file_header with correct + %% mode and timestamps + info :: undefined | file:file_info()}). -define(CENTRAL_FILE_HEADER_SZ,(4+2+2+2+2+2+2+4+4+4+2+2+2+2+2+4+4)). @@ -183,11 +185,13 @@ convention, add `.zip` to the filename. -define(CENTRAL_DIR_SZ, (4+2+2+2+2+4+4+2)). -define(CENTRAL_DIR_DIGITAL_SIG_MAGIC, 16#05054b50). -define(CENTRAL_DIR_DIGITAL_SIG_SZ, (4+2)). --define(CENTRAL_REGULAR_FILE_EXT_ATTRIBUTES, 8#644 bsl 16). --define(CENTRAL_DIRECTORY_FILE_EXT_ATTRIBUTES, 8#744 bsl 16). -define(CENTRAL_FILE_MAGIC, 16#02014b50). +-define(DEFAULT_REGULAR_FILE_MODE, 8#644). +-define(DEFAULT_DIRECTORY_FILE_MODE, 8#744). + -record(cd_file_header, {version_made_by, + os_made_by, version_needed, gp_flag, comp_method, @@ -1261,7 +1265,7 @@ cd_file_header_from_lh_and_pos(LH, Pos) -> uncomp_size = UncompSize, file_name_length = FileNameLength, extra_field_length = ExtraFieldLength, - type = Type} = LH, + info = #file_info{ type = Type, mode = Mode }} = LH, #cd_file_header{version_made_by = ?VERSION_MADE_BY, version_needed = VersionNeeded, gp_flag = GPFlag, @@ -1277,10 +1281,13 @@ cd_file_header_from_lh_and_pos(LH, Pos) -> disk_num_start = 0, % DiskNumStart, internal_attr = 0, % InternalAttr, external_attr = % ExternalAttr - case Type of - regular -> ?CENTRAL_REGULAR_FILE_EXT_ATTRIBUTES; - directory -> ?CENTRAL_DIRECTORY_FILE_EXT_ATTRIBUTES - end, + if Mode =:= undefined -> + case Type of + regular -> ?DEFAULT_REGULAR_FILE_MODE; + directory -> ?DEFAULT_DIRECTORY_FILE_MODE + end; + true -> Mode band 8#777 + end bsl 16, local_header_offset = Pos}. cd_file_header_to_bin( @@ -1355,7 +1362,7 @@ eocd_to_bin(#eocd{disk_num = DiskNum, ZipCommentLength:16/little>>. %% put together a local file header -local_file_header_from_info_method_name(#file_info{mtime = MTime, type = Type}, +local_file_header_from_info_method_name(Info = #file_info{mtime = MTime}, UncompSize, CompMethod, Name, GPFlag) -> {ModDate, ModTime} = dos_date_time_from_datetime( @@ -1371,7 +1378,7 @@ local_file_header_from_info_method_name(#file_info{mtime = MTime, type = Type}, uncomp_size = UncompSize, file_name_length = length(Name), extra_field_length = 0, - type = Type}. + info = Info}. %% %% Functions used by zip server @@ -1655,7 +1662,7 @@ raw_file_info_public(CD, FileName, FileComment, BExtraField, Acc0) -> cd_file_header_to_file_info(FileName, #cd_file_header{uncomp_size = UncompSize, last_mod_time = ModTime, - last_mod_date = ModDate}, + last_mod_date = ModDate} = CDFH, ExtraField) -> T = dos_date_time_to_datetime(ModDate, ModTime), Type = @@ -1663,13 +1670,23 @@ cd_file_header_to_file_info(FileName, $/ -> directory; _ -> regular end, + Mode = + if CDFH#cd_file_header.os_made_by =:= ~"UNIX" -> + (CDFH#cd_file_header.external_attr bsr 16) band 8#777; + true -> + if Type =:= directory -> + ?DEFAULT_DIRECTORY_FILE_MODE; + true -> + ?DEFAULT_REGULAR_FILE_MODE + end + end, FI = #file_info{size = UncompSize, type = Type, access = read_write, atime = T, mtime = T, ctime = T, - mode = 8#066, + mode = Mode, links = 1, major_device = 0, minor_device = 0, @@ -2038,6 +2055,8 @@ binary_io({file_info, {_Filename, #file_info{} = FI, _B}}, _A) -> FI; binary_io({file_info, {_Filename, B}}, A) -> binary_io({file_info, B}, A); +binary_io({file_info, Filename}, A) when is_list(Filename) -> + binary_io({file_info, {Filename, <<>>}}, A); binary_io({file_info, B}, _) -> {Type, Size} = if @@ -2047,7 +2066,11 @@ binary_io({file_info, B}, _) -> Now = calendar:local_time(), #file_info{size = Size, type = Type, access = read_write, atime = Now, - mtime = Now, ctime = Now, mode = 0, + mtime = Now, ctime = Now, mode = + if + Type =:= directory -> ?DEFAULT_DIRECTORY_FILE_MODE; + true -> ?DEFAULT_REGULAR_FILE_MODE + end, links = 1, major_device = 0, minor_device = 0, inode = 0, uid = 0, gid = 0}; From d6c4e1f07233eabc2121c121c35cffa896730085 Mon Sep 17 00:00:00 2001 From: Lukas Larsson Date: Thu, 30 May 2024 15:40:58 +0200 Subject: [PATCH 07/21] zip: Write external attributes to disk when creating files --- lib/stdlib/src/zip.erl | 57 +++++++++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 20 deletions(-) diff --git a/lib/stdlib/src/zip.erl b/lib/stdlib/src/zip.erl index b3449d0308bf..1f8287765d65 100644 --- a/lib/stdlib/src/zip.erl +++ b/lib/stdlib/src/zip.erl @@ -1699,8 +1699,6 @@ cd_file_header_to_file_info(FileName, add_extra_info(FI, _) -> FI. - - %% get all files using file list %% (the offset list is already filtered on which file to get... isn't it?) get_z_files([], _Z, _In, _Opts, Acc) -> @@ -1758,24 +1756,29 @@ get_z_file(In0, Z, Input, Output, OpO, FB, end, case ReadAndWrite of true -> - case lists:last(FileName) of - $/ -> - Out1 = Output({ensure_path,FileName1},[]), - {dir, Out1, In3}; - _ -> - %% FileInfo = local_file_header_to_file_info(LH) - %%{Out, In4, CRC, UncompSize} = - {Out, In4, CRC, _UncompSize} = - get_z_data(CompMethod, In3, FileName1, - CompSize, Input, Output, OpO, Z), - In5 = skip_z_data_descriptor(GPFlag, Input, In4), - %% TODO This should be fixed some day: - %% In5 = Input({set_file_info, FileName, - %% FileInfo#file_info{size=UncompSize}}, In4), - FB(FileName), - CRC =:= CRC32 orelse throw({bad_crc, FileName}), - {file, Out, In5} - end; + {Type, Out, In} = + case lists:last(FileName) of + $/ -> + %% perhaps this should always be done? + Out1 = Output({ensure_path,FileName1},[]), + {dir, Out1, In3}; + _ -> + {Out1, In4, CRC, _UncompSize} = + get_z_data(CompMethod, In3, FileName1, + CompSize, Input, Output, OpO, Z), + In5 = skip_z_data_descriptor(GPFlag, Input, In4), + + FB(FileName), + CRC =:= CRC32 orelse throw({bad_crc, FileName}), + {file, Out1, In5} + end, + + FileInfo = local_file_header_to_file_info( + Output({file_info, FileName1}, Out), + LH, ZipFile), + + Out2 = Output({set_file_info, FileName1, FileInfo, [{time, local}]}, Out), + {Type, Out2, In}; false -> {ignore, In3} end; @@ -1783,6 +1786,13 @@ get_z_file(In0, Z, Input, Output, OpO, FB, throw(bad_local_file_header) end. +local_file_header_to_file_info(FI, LFH, ZipFile) -> + Mtime = dos_date_time_to_datetime( + LFH#local_file_header.last_mod_date, + LFH#local_file_header.last_mod_time), + FI#file_info{ mode = ZipFile#zip_file.info#file_info.mode, + mtime = Mtime, atime = Mtime, ctime = Mtime }. + %% make sure FileName doesn't have relative path that points over CWD check_valid_location(CWD, FileName) -> TrailingSlash = case lists:last(FileName) of @@ -2117,6 +2127,8 @@ binary_io({list_dir, _F}, _B) -> []; binary_io({set_file_info, _F, _FI}, B) -> B; +binary_io({set_file_info, _F, _FI, _O}, B) -> + B; binary_io({ensure_path, Dir}, _B) -> {Dir, <<>>}. @@ -2178,6 +2190,11 @@ file_io({set_file_info, F, FI}, H) -> ok -> H; {error, Error} -> throw(Error) end; +file_io({set_file_info, F, FI, O}, H) -> + case file:write_file_info(F, FI, O) of + ok -> H; + {error, Error} -> throw(Error) + end; file_io({ensure_path, Dir}, _H) -> ok = filelib:ensure_path(Dir), Dir. From cbfeda2852e0778a15cce9bb63febc20059b88be Mon Sep 17 00:00:00 2001 From: Lukas Larsson Date: Thu, 30 May 2024 15:44:44 +0200 Subject: [PATCH 08/21] zip: Polish test suite --- lib/stdlib/test/zip_SUITE.erl | 55 ++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 27 deletions(-) diff --git a/lib/stdlib/test/zip_SUITE.erl b/lib/stdlib/test/zip_SUITE.erl index 9d822d0d9fbc..a08442ae5c71 100644 --- a/lib/stdlib/test/zip_SUITE.erl +++ b/lib/stdlib/test/zip_SUITE.erl @@ -30,7 +30,8 @@ foldl/1,fd_leak/1,unicode/1,test_zip_dir/1, explicit_file_info/1]). --include_lib("common_test/include/ct.hrl"). +-import(proplists,[get_value/2, get_value/3]). + -include_lib("kernel/include/file.hrl"). -include_lib("stdlib/include/zip.hrl"). @@ -64,7 +65,7 @@ end_per_group(_GroupName, Config) -> %% multiple times with different file sizes. Also check that the %% modification date of the extracted file has survived. borderline(Config) when is_list(Config) -> - RootDir = proplists:get_value(priv_dir, Config), + RootDir = get_value(priv_dir, Config), TempDir = filename:join(RootDir, "borderline"), ok = file:make_dir(TempDir), @@ -200,7 +201,7 @@ next_random(X) -> %% Test the 'atomic' operations: zip/unzip/list_dir, on archives. %% Also test the 'cooked' option. atomic(Config) when is_list(Config) -> - ok = file:set_cwd(proplists:get_value(priv_dir, Config)), + ok = file:set_cwd(get_value(priv_dir, Config)), DataFiles = data_files(), Names = [Name || {Name,_,_} <- DataFiles], io:format("Names: ~p", [Names]), @@ -226,7 +227,7 @@ atomic(Config) when is_list(Config) -> %% Test the zip_open/2, zip_get/1, zip_get/2, zip_close/1, %% and zip_list_dir/1 functions. zip_api(Config) when is_list(Config) -> - ok = file:set_cwd(proplists:get_value(priv_dir, Config)), + ok = file:set_cwd(get_value(priv_dir, Config)), DataFiles = data_files(), Names = [Name || {Name, _, _} <- DataFiles], io:format("Names: ~p", [Names]), @@ -299,8 +300,8 @@ spawned_zip_dead(ZipSrv) -> %% Test options for unzip, only cwd and file_list currently. unzip_options(Config) when is_list(Config) -> - DataDir = proplists:get_value(data_dir, Config), - PrivDir = proplists:get_value(priv_dir, Config), + DataDir = get_value(data_dir, Config), + PrivDir = get_value(priv_dir, Config), Long = filename:join(DataDir, "abc.zip"), %% create a temp directory @@ -327,8 +328,8 @@ unzip_options(Config) when is_list(Config) -> %% Test that unzip handles directory traversal exploit (OTP-13633) unzip_traversal_exploit(Config) -> - DataDir = proplists:get_value(data_dir, Config), - PrivDir = proplists:get_value(priv_dir, Config), + DataDir = get_value(data_dir, Config), + PrivDir = get_value(priv_dir, Config), ZipName = filename:join(DataDir, "exploit.zip"), %% $ zipinfo -1 test/zip_SUITE_data/exploit.zip @@ -373,8 +374,8 @@ unzip_traversal_exploit(Config) -> %% Test unzip a jar file (OTP-7382). unzip_jar(Config) when is_list(Config) -> - DataDir = proplists:get_value(data_dir, Config), - PrivDir = proplists:get_value(priv_dir, Config), + DataDir = get_value(data_dir, Config), + PrivDir = get_value(priv_dir, Config), JarFile = filename:join(DataDir, "test.jar"), %% create a temp directory @@ -403,13 +404,13 @@ unzip_jar(Config) when is_list(Config) -> %% Test the options for unzip, only cwd currently. zip_options(Config) when is_list(Config) -> - PrivDir = proplists:get_value(priv_dir, Config), + PrivDir = get_value(priv_dir, Config), ok = file:set_cwd(PrivDir), DataFiles = data_files(), Names = [Name || {Name, _, _} <- DataFiles], %% Make sure cwd is not where we get the files - ok = file:set_cwd(proplists:get_value(data_dir, Config)), + ok = file:set_cwd(get_value(data_dir, Config)), %% Create a zip archive {ok, {_,Zip}} = @@ -493,7 +494,7 @@ create_files([]) -> %% Try zip:unzip/1 on some corrupted zip files. bad_zip(Config) when is_list(Config) -> - ok = file:set_cwd(proplists:get_value(priv_dir, Config)), + ok = file:set_cwd(get_value(priv_dir, Config)), try_bad("bad_crc", {bad_crc, "abc.txt"}, Config), try_bad("bad_central_directory", bad_central_directory, Config), try_bad("bad_file_header", bad_file_header, Config), @@ -513,7 +514,7 @@ try_bad(N, R, Config) -> try_bad(Name0, Reason, What, Config) -> %% Intentionally no macros here. - DataDir = proplists:get_value(data_dir, Config), + DataDir = get_value(data_dir, Config), Name = Name0 ++ ".zip", io:format("~nTrying ~s", [Name]), Full = filename:join(DataDir, Name), @@ -528,8 +529,8 @@ try_bad(Name0, Reason, What, Config) -> %% Test extracting to binary with memory option. unzip_to_binary(Config) when is_list(Config) -> - DataDir = proplists:get_value(data_dir, Config), - PrivDir = proplists:get_value(priv_dir, Config), + DataDir = get_value(data_dir, Config), + PrivDir = get_value(priv_dir, Config), WorkDir = filename:join(PrivDir, "unzip_to_binary"), _ = file:make_dir(WorkDir), _ = file:make_dir(filename:join(DataDir, "empty")), @@ -560,8 +561,8 @@ unzip_to_binary(Config) when is_list(Config) -> %% Test compressing to binary with memory option. zip_to_binary(Config) when is_list(Config) -> - DataDir = proplists:get_value(data_dir, Config), - PrivDir = proplists:get_value(priv_dir, Config), + DataDir = get_value(data_dir, Config), + PrivDir = get_value(priv_dir, Config), WorkDir = filename:join(PrivDir, "zip_to_binary"), _ = file:make_dir(WorkDir), @@ -613,8 +614,8 @@ aliases(Config) when is_list(Config) -> %% Test extracting a zip archive from a binary. unzip_from_binary(Config) when is_list(Config) -> - DataDir = proplists:get_value(data_dir, Config), - PrivDir = proplists:get_value(priv_dir, Config), + DataDir = get_value(data_dir, Config), + PrivDir = get_value(priv_dir, Config), ExtractDir = filename:join(PrivDir, "extract_from_binary"), ok = file:make_dir(ExtractDir), Archive = filename:join(ExtractDir, "abc.zip"), @@ -683,7 +684,7 @@ do_delete_files([Item|Rest], Cnt) -> %% Test control of which files that should be compressed. compress_control(Config) when is_list(Config) -> - ok = file:set_cwd(proplists:get_value(priv_dir, Config)), + ok = file:set_cwd(get_value(priv_dir, Config)), Dir = "compress_control", Files = [ {Dir, dir, $d}, @@ -814,7 +815,7 @@ extensions([], Old) -> Old. foldl(Config) -> - PrivDir = proplists:get_value(priv_dir, Config), + PrivDir = get_value(priv_dir, Config), File = filename:join([PrivDir, "foldl.zip"]), FooBin = <<"FOO">>, @@ -845,8 +846,8 @@ foldl(Config) -> ok. fd_leak(Config) -> - ok = file:set_cwd(proplists:get_value(priv_dir, Config)), - DataDir = proplists:get_value(data_dir, Config), + ok = file:set_cwd(get_value(priv_dir, Config)), + DataDir = get_value(data_dir, Config), Name = filename:join(DataDir, "bad_file_header.zip"), BadExtract = fun() -> {error,bad_file_header} = zip:extract(Name), @@ -880,8 +881,8 @@ unicode(Config) -> latin1 -> {comment, "Native name encoding is Latin-1; skipping all tests"}; utf8 -> - DataDir = proplists:get_value(data_dir, Config), - ok = file:set_cwd(proplists:get_value(priv_dir, Config)), + DataDir = get_value(data_dir, Config), + ok = file:set_cwd(get_value(priv_dir, Config)), test_file_comment(DataDir), test_archive_comment(DataDir), test_bad_comment(DataDir), @@ -1012,7 +1013,7 @@ test_latin1_archive(DataDir) -> test_zip_dir(Config) when is_list(Config) -> case {os:find_executable("unzip"), os:type()} of {UnzipPath, {unix,_}} when is_list(UnzipPath)-> - DataDir = proplists:get_value(data_dir, Config), + DataDir = get_value(data_dir, Config), Dir = filename:join([DataDir, "test-zip", "dir-1"]), TestZipOutputDir = filename:join(DataDir, "test-zip-output"), TestZipOutput = filename:join(TestZipOutputDir, "test.zip"), From 438b2ddb43b18a5332d43813e05a53ed56f3d0f9 Mon Sep 17 00:00:00 2001 From: Lukas Larsson Date: Thu, 30 May 2024 15:45:04 +0200 Subject: [PATCH 09/21] zip: Add basic timestamp test --- lib/stdlib/test/zip_SUITE.erl | 212 ++++++++++++++++++++++++++++++++-- 1 file changed, 202 insertions(+), 10 deletions(-) diff --git a/lib/stdlib/test/zip_SUITE.erl b/lib/stdlib/test/zip_SUITE.erl index a08442ae5c71..724d7e1eb923 100644 --- a/lib/stdlib/test/zip_SUITE.erl +++ b/lib/stdlib/test/zip_SUITE.erl @@ -20,7 +20,10 @@ -module(zip_SUITE). -export([all/0, suite/0,groups/0,init_per_suite/1, end_per_suite/1, - init_per_group/2,end_per_group/2, borderline/1, atomic/1, + init_per_group/2,end_per_group/2, + init_per_testcase/2, end_per_testcase/2]). + +-export([borderline/1, atomic/1, bad_zip/1, unzip_from_binary/1, unzip_to_binary/1, zip_to_binary/1, unzip_options/1, zip_options/1, list_dir_options/1, aliases/1, @@ -28,12 +31,14 @@ unzip_traversal_exploit/1, compress_control/1, foldl/1,fd_leak/1,unicode/1,test_zip_dir/1, - explicit_file_info/1]). + explicit_file_info/1, + basic_timestamp/1]). -import(proplists,[get_value/2, get_value/3]). -include_lib("kernel/include/file.hrl"). -include_lib("stdlib/include/zip.hrl"). +-include_lib("stdlib/include/assert.hrl"). suite() -> [{ct_hooks,[ts_install_cth]}]. @@ -43,10 +48,26 @@ all() -> zip_options, list_dir_options, aliases, zip_api, open_leak, unzip_jar, compress_control, foldl, unzip_traversal_exploit, fd_leak, unicode, test_zip_dir, - explicit_file_info]. + explicit_file_info, {group, zip_group}]. groups() -> - []. + zip_groups(). + +%% zip - Use zip unix tools +%% ezip - Use erlang zip on disk +%% emzip - Use erlang zip in memory +-define(ZIP_MODES,[zip, ezip, emzip]). +-define(UNZIP_MODES,[unzip, unezip, unemzip]). + +zip_groups() -> + + [{zip_group,[],[{group,ZipMode} || ZipMode <- ?ZIP_MODES]}] ++ + [{ZipMode, [], [{group,UnZipMode} || UnZipMode <- ?UNZIP_MODES]} + || ZipMode <- ?ZIP_MODES] ++ + [{G, [parallel], zip_testcases()} || G <- ?UNZIP_MODES]. + +zip_testcases() -> + [basic_timestamp]. init_per_suite(Config) -> Config. @@ -54,12 +75,34 @@ init_per_suite(Config) -> end_per_suite(_Config) -> ok. -init_per_group(_GroupName, Config) -> - Config. +init_per_group(Group, Config) -> + case lists:member(Group, ?ZIP_MODES ++ ?UNZIP_MODES) of + true -> + case get_value(zip, Config) of + undefined -> + Pdir = filename:join(get_value(priv_dir, Config),Group), + ok = filelib:ensure_path(Pdir), + [{pdir, Pdir},{zip, Group} | Config]; + _Zip -> + Pdir = filename:join(get_value(pdir, Config),Group), + ok = filelib:ensure_path(Pdir), + [{pdir, Pdir},{unzip, Group} | Config] + end; + false -> + Config + end. end_per_group(_GroupName, Config) -> Config. +init_per_testcase(TC, Config) -> + PrivDir = filename:join(get_value(pdir, Config,get_value(priv_dir, Config)), TC), + ok = filelib:ensure_path(PrivDir), + [{pdir, PrivDir} | Config]. + +end_per_testcase(_TC, Config) -> + file:del_dir_r(get_value(pdir,Config)), + Config. %% Test creating, listing and extracting one file from an archive %% multiple times with different file sizes. Also check that the @@ -67,7 +110,6 @@ end_per_group(_GroupName, Config) -> borderline(Config) when is_list(Config) -> RootDir = get_value(priv_dir, Config), TempDir = filename:join(RootDir, "borderline"), - ok = file:make_dir(TempDir), Record = 512, Block = 20 * Record, @@ -444,9 +486,6 @@ zip_options(Config) when is_list(Config) -> list_dir_options(Config) when is_list(Config) -> ok. - - - %% convert zip_info as returned from list_dir to a list of names names_from_list_dir({ok, Info}) -> names_from_list_dir(Info); @@ -1042,3 +1081,156 @@ explicit_file_info(_Config) -> {"seconds", <<>>, FileInfo#file_info{mtime=315532800}}], {ok, _} = zip:zip("", Files, [memory]), ok. + +%% Test basic timestamps, the atime and mtime should be the original +%% mtime of the file +basic_timestamp(Config) -> + PrivDir = get_value(pdir, Config), + Archive = filename:join(PrivDir, "archive.zip"), + ExtractDir = filename:join(PrivDir, "extract"), + Testfile = filename:join(PrivDir, "testfile.txt"), + + ok = file:write_file(Testfile, "abc"), + {ok, OndiskFI = #file_info{ mtime = Mtime }} = + file:read_file_info(Testfile), + + %% Sleep a bit to let the timestamp progress + timer:sleep(1000), + + %% Create an archive without extended timestamps + ?assertMatch( + {ok, Archive}, + zip(Config, Archive, "-X", ["testfile.txt"], [{cwd, PrivDir}])), + + {ok, [#zip_comment{}, + #zip_file{ info = ZipFI = #file_info{ mtime = ZMtime }} ]} = + zip:list_dir(Archive), + + ct:log("on disk: ~p",[OndiskFI]), + ct:log("in zip : ~p",[ZipFI]), + ct:log("zipinfo:~n~ts",[os:cmd("zipinfo -v "++Archive)]), + + %% Timestamp in archive is when entry was added to archive + %% Need to add 2 to ZMtime as the dos time in zip archives + %% are in precise. + ?assert(calendar:datetime_to_gregorian_seconds(Mtime) =< + calendar:datetime_to_gregorian_seconds(ZMtime) + 1), + + %% Sleep a bit to let the timestamp progress + timer:sleep(1000), + + ok = file:make_dir(ExtractDir), + ?assertMatch( + {ok, ["testfile.txt"]}, + unzip(Config, Archive, [{cwd,ExtractDir}])), + + {ok, UnzipFI = #file_info{ atime = UnZAtime, + mtime = UnZMtime, + ctime = UnZCtime + }} = + file:read_file_info(filename:join(ExtractDir, "testfile.txt")), + + + ct:log("extract: ~p",[UnzipFI]), + + case get_value(unzip, Config) =/= unemzip of + true -> + ?assertEqual(ZMtime, UnZMtime), + ?assertEqual(UnZAtime, UnZMtime), + + ?assert(UnZMtime < UnZCtime); + false -> + %% emzip does not support timestamps + ok + end, + + ok. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% Generic zip interface +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +zip(Config, Archive, ZipOpts, Filelist, Opts) when is_list(Config) -> + zip(get_value(zip, Config), + Archive, ZipOpts, Filelist, Opts); +zip(zip, Archive, ZipOpts, Filelist, Opts) -> + cmd("cd "++get_value(cwd, Opts)++" && " + "zip "++ZipOpts++" "++Archive++" "++lists:join($ ,Filelist)), + {ok, Archive}; +zip(ezip, Archive, _ZipOpts, Filelist, Opts) -> + ct:log("Creating zip:zip(~p,~n~p,~n~p)",[Archive, Filelist, Opts]), + zip:zip(Archive, Filelist, Opts); +zip(emzip, Archive, _ZipOpts, Filelist, Opts) -> + ct:log("Creating emzip ~ts",[Archive]), + Cwd = get_value(cwd, Opts), + {Files,_Cache} = + lists:mapfoldl( + fun F(Fn, Cache) -> + AbsFn = filename:join(Cwd, Fn), + {ok, Fi} = file:read_file_info(AbsFn), + {SubDirFiles, NewCache} = + if Fi#file_info.type == directory -> + {ok, Files} = file:list_dir(AbsFn), + lists:mapfoldl(F, Cache#{ Fn => <<>> }, + [filename:join(Fn, DirFn) || DirFn <- Files]); + Fi#file_info.type == regular -> + %% For this not to use a huge amount of memory we re-use + %% the binary for files that are links to the same file. + %% This cuts memory usage from ~16GB to ~4GB. + {[], + case file:read_link_all(AbsFn) of + {ok, LinkFn} -> + case maps:get(LinkFn, Cache, undefined) of + undefined -> + {ok, Data} = file:read_file(AbsFn), + Cache#{ LinkFn => Data, Fn => Data }; + Data -> + Cache#{ Fn => Data } + end; + {error, _} -> + {ok, Data} = file:read_file(AbsFn), + Cache#{ Fn => Data } + end} + end, + {[{Fn, maps:get(Fn, NewCache), Fi}|SubDirFiles], NewCache} + end, #{}, Filelist), + zip:zip(Archive, lists:flatten(Files), proplists:delete(cwd,Opts)). + + +unzip(Config, Archive, Opts) when is_list(Config) -> + unzip(get_value(unzip, Config), Archive, Opts); +unzip(unzip, Archive, Opts) -> + UidGid = [" -X " || lists:member(uid_gid, get_value(extra, Opts, []))], + Files = lists:join($ , get_value(file_list, Opts, [])), + Res = cmd("cd "++get_value(cwd, Opts)++" && " + "unzip "++UidGid++" "++Archive++" "++Files), + {ok, lists:sort( + lists:flatmap( + fun(Ln) -> + case re:run(Ln, ~B'\s+[a-z]+: ([^\s]+)', [{capture,all_but_first,list},unicode]) of + nomatch -> []; + {match,Match} -> Match + end + end,string:split(Res,"\n",all)))}; +unzip(unezip, Archive, Opts) -> + Cwd = get_value(cwd, Opts) ++ "/", + {ok, Files} = zip:unzip(Archive, Opts), + {ok, lists:sort([F -- Cwd || F <- Files])}; +unzip(unemzip, Archive, Opts) -> + Cwd = get_value(cwd, Opts) ++ "/", + {ok, Files} = zip:unzip(Archive, [memory | Opts]), + {ok, lists:sort( + [begin + case lists:last(F) of + $/ -> + filelib:ensure_path(F); + _ -> + filelib:ensure_dir(F), + file:write_file(F, B) + end, + F -- Cwd + end || {F, B} <- Files])}. + +cmd(Cmd) -> + Res = os:cmd(Cmd), + ct:log("Cmd: ~ts~nRes: ~ts~n",[Cmd, Res]), + Res. From f37af5d85eb7005a990a832b09c7d8ec3224946e Mon Sep 17 00:00:00 2001 From: Lukas Larsson Date: Thu, 30 May 2024 16:25:08 +0200 Subject: [PATCH 10/21] zip: Add test for external attributes mode handling --- lib/stdlib/src/zip.erl | 2 +- lib/stdlib/test/zip_SUITE.erl | 51 +++++++++++++++++++++++++++++++++-- 2 files changed, 50 insertions(+), 3 deletions(-) diff --git a/lib/stdlib/src/zip.erl b/lib/stdlib/src/zip.erl index 1f8287765d65..b15bb3d59318 100644 --- a/lib/stdlib/src/zip.erl +++ b/lib/stdlib/src/zip.erl @@ -608,7 +608,7 @@ One option is available: RetValue :: {ok, CommentAndFiles} | {error, Reason :: term()}, CommentAndFiles :: [zip_comment() | zip_file()], Options :: [Option], - Option :: cooked). + Option :: cooked | {extra, extra()}). list_dir(F, Options) -> case ?CATCH(do_list_dir(F, Options)) of diff --git a/lib/stdlib/test/zip_SUITE.erl b/lib/stdlib/test/zip_SUITE.erl index 724d7e1eb923..644dc53035c9 100644 --- a/lib/stdlib/test/zip_SUITE.erl +++ b/lib/stdlib/test/zip_SUITE.erl @@ -31,7 +31,7 @@ unzip_traversal_exploit/1, compress_control/1, foldl/1,fd_leak/1,unicode/1,test_zip_dir/1, - explicit_file_info/1, + explicit_file_info/1, mode/1, basic_timestamp/1]). -import(proplists,[get_value/2, get_value/3]). @@ -67,7 +67,7 @@ zip_groups() -> [{G, [parallel], zip_testcases()} || G <- ?UNZIP_MODES]. zip_testcases() -> - [basic_timestamp]. + [mode, basic_timestamp]. init_per_suite(Config) -> Config. @@ -1082,6 +1082,53 @@ explicit_file_info(_Config) -> {ok, _} = zip:zip("", Files, [memory]), ok. +mode(Config) -> + + PrivDir = get_value(pdir, Config), + ExtractDir = filename:join(PrivDir, "extract"), + Archive = filename:join(PrivDir, "archive.zip"), + + Executable = filename:join(PrivDir,"exec"), + file:write_file(Executable, "aaa"), + {ok, ExecFI } = file:read_file_info(Executable), + ok = file:write_file_info(Executable, ExecFI#file_info{ mode = 8#111 bor 8#400 }), + + Directory = filename:join(PrivDir,"dir"), + ok = file:make_dir(Directory), + {ok, DirFI } = file:read_file_info(Executable), + ok = file:write_file_info(Directory, DirFI#file_info{ mode = 8#111 bor 8#400 }), + + ?assertMatch( + {ok, Archive}, + zip(Config, Archive, "-r", ["dir","exec"], [{cwd, PrivDir},{extra,[extended_timestamp]}])), + + ?assertMatch( + {ok, [#zip_comment{}, + #zip_file{ name = "dir/", info = #file_info{ mode = 8#111 bor 8#400}}, + #zip_file{ name = "exec", info = #file_info{ mode = 8#111 bor 8#400}} ]}, + zip:list_dir(Archive)), + + ok = file:make_dir(ExtractDir), + ?assertMatch( + {ok, ["dir/","exec"]}, unzip(Config, Archive, [{cwd,ExtractDir}])), + + case get_value(unzip, Config) =/= unemzip of + true -> + {ok,#file_info{ mode = ExecMode }} = + file:read_file_info(filename:join(ExtractDir,"exec")), + ?assertEqual(8#111 bor 8#400, ExecMode band 8#777), + + {ok,#file_info{ mode = DirMode }} = + file:read_file_info(filename:join(ExtractDir,"dir")), + ?assertEqual(8#111 bor 8#400, DirMode band 8#777); + false -> + %% emzip does not support mode + ok + end, + + ok. + + %% Test basic timestamps, the atime and mtime should be the original %% mtime of the file basic_timestamp(Config) -> From 92ec6f12f64631160cbd3b2c93bda9ecfd32a8ed Mon Sep 17 00:00:00 2001 From: Lukas Larsson Date: Thu, 30 May 2024 17:07:14 +0200 Subject: [PATCH 11/21] zip: Implement extended timestamps --- lib/stdlib/src/zip.erl | 571 ++++++++++++++++++++++++---------- lib/stdlib/test/zip_SUITE.erl | 77 ++++- 2 files changed, 481 insertions(+), 167 deletions(-) diff --git a/lib/stdlib/src/zip.erl b/lib/stdlib/src/zip.erl index b15bb3d59318..8e33143be0de 100644 --- a/lib/stdlib/src/zip.erl +++ b/lib/stdlib/src/zip.erl @@ -44,6 +44,9 @@ convention, add `.zip` to the filename. file by file, without having to reopen the archive. This can be done by functions [`zip_open/1,2`](`zip_open/1`), [`zip_get/1,2`](`zip_get/1`), `zip_list_dir/1`, and `zip_close/1`. +- The ZIP extensions 0x5355 "extended timestamps" is supported. Both extensions + are by default enabled when creating and extracting from an archive. Use the `extra` + option to change how these extensions are used. ## Limitations @@ -92,7 +95,8 @@ convention, add `.zip` to the filename. file_filter, % file filter (boolean fun) open_opts, % options passed to file:open feedback, % feeback (fun) - cwd % directory to relate paths to + cwd, % directory to relate paths to + extra % The extra fields to include }). -record(zip_opts, { @@ -103,19 +107,22 @@ convention, add `.zip` to the filename. feedback, % feeback (fun) cwd, % directory to relate paths to compress, % compress files with these suffixes - uncompress % uncompress files with these suffixes + uncompress, % uncompress files with these suffixes + extra % The extra fields to include }). -record(list_dir_opts, { input, % input object (fun) - raw_iterator, % applied to each dir entry - open_opts % options passed to file:open + raw_iterator,% applied to each dir entry + open_opts, % options passed to file:open + extra % The extra fields to include }). -record(openzip_opts, { output, % output object (fun) open_opts, % file:open options - cwd % directory to relate paths to + cwd, % directory to relate paths to + extra % The extra fields to include }). % openzip record, state for an open zip-file @@ -126,7 +133,8 @@ convention, add `.zip` to the filename. input, % archive io object (fun) output, % output io object (fun) zlib, % handle to open zlib - cwd % directory to relate paths to + cwd, % directory to relate paths to + extra % The extra fields to include }). % Things that I would like to add to the public record #zip_file, @@ -157,27 +165,40 @@ convention, add `.zip` to the filename. -define(PKWARE_RESERVED, 11). -define(BZIP2_COMPRESSED, 12). -%% Version 2.0, attribute compatibility type 3 (Unix) --define(VERSION_MADE_BY, 20 bor (3 bsl 8)). +-define(OS_MADE_BY_UNIX, 3). +-define(VERSION_MADE_BY, 20 bor (?OS_MADE_BY_UNIX bsl 8)). +-define(VERSION_NEEDED_STORE, 10). +-define(VERSION_NEEDED_DEFLATE, 20). -define(GP_BIT_11, 16#800). % Filename and file comment UTF-8 encoded. %% zip-file records -define(LOCAL_FILE_MAGIC,16#04034b50). -define(LOCAL_FILE_HEADER_SZ,(4+2+2+2+2+2+4+4+4+2+2)). -define(LOCAL_FILE_HEADER_CRC32_OFFSET, 4+2+2+2+2+2). --record(local_file_header, {version_needed, - gp_flag, - comp_method, - last_mod_time, - last_mod_date, - crc32, - comp_size, - uncomp_size, - file_name_length, - extra_field_length, - %% extra data needed to create cd_file_header with correct - %% mode and timestamps - info :: undefined | file:file_info()}). +-define(LOCAL_FILE_HEADER_EXTRA_LENGTH_OFFSET, 4+2+2+2+2+2+4+4+4+2). +-record(local_file_header, + { + %% Common with cd_file_header + version_needed, + gp_flag, + comp_method, + last_mod_time, + last_mod_date, + crc32, + comp_size, + uncomp_size, + %% X5455_EXTENDED_TIMESTAMP extension + mtime, + atime, + ctime, + %% local_file_header specific + file_name_length, + extra_field_length, + %% extra data needed to create cd_file_header + info :: undefined | file:file_info() + }). +-define(EXTRA_OPTIONS, [extended_timestamp]). +-define(X5455_EXTENDED_TIMESTAMP, 16#5455). -define(CENTRAL_FILE_HEADER_SZ,(4+2+2+2+2+2+2+4+4+4+2+2+2+2+2+4+4)). @@ -190,23 +211,32 @@ convention, add `.zip` to the filename. -define(DEFAULT_REGULAR_FILE_MODE, 8#644). -define(DEFAULT_DIRECTORY_FILE_MODE, 8#744). --record(cd_file_header, {version_made_by, - os_made_by, - version_needed, - gp_flag, - comp_method, - last_mod_time, - last_mod_date, - crc32, - comp_size, - uncomp_size, - file_name_length, - extra_field_length, - file_comment_length, - disk_num_start, - internal_attr, - external_attr, - local_header_offset}). +-record(cd_file_header, + { + %% Common with local_file_header + version_needed, + gp_flag, + comp_method, + last_mod_time, + last_mod_date, + crc32, + comp_size, + uncomp_size, + %% X5455_EXTENDED_TIMESTAMP extension + mtime, + atime, + ctime, + %% cd_file_header specific + version_made_by, + os_made_by, + file_name_length, + extra_field_length, + file_comment_length, + disk_num_start, + internal_attr, + external_attr, + local_header_offset + }). -define(END_OF_CENTRAL_DIR_MAGIC, 16#06054b50). -define(END_OF_CENTRAL_DIR_SZ, (4+2+2+2+2+4+4+2)). @@ -219,13 +249,22 @@ convention, add `.zip` to the filename. offset, zip_comment_length}). +-doc """ +The possible extra extension that can be used. + +- **`extended_timestamp`** - enables the 0x5455 "extended timestamps" zip extension + that embeds POSIX timestamps for access and modification times for each file in the + archive. +""". +-type extra() :: [extended_timestamp]. -doc "These options are described in [`create/3`](`m:zip#zip_options`).". -type create_option() :: memory | cooked | verbose | {comment, Comment ::string()} | {cwd, CWD :: file:filename()} | {compress, What :: extension_spec()} - | {uncompress, What :: extension_spec()}. + | {uncompress, What :: extension_spec()} + | {extra, extra()}. -type extension() :: string(). -type extension_spec() :: all | [Extension :: extension()] @@ -317,7 +356,8 @@ Options: Options :: [Option], Option :: {file_list, FileList} | cooked | keep_old_files | verbose | memory | - {file_filter, FileFilter} | {cwd, CWD}, + {file_filter, FileFilter} | {cwd, CWD} | + {extra, extra()}, FileList :: [file:name()], FileBinList :: [{file:name(),binary()}], FileFilter :: fun((ZipFile) -> boolean()), @@ -336,10 +376,11 @@ unzip(F, Options) -> do_unzip(F, Options) -> Opts = get_unzip_options(F, Options), - #unzip_opts{input = Input, open_opts = OpO} = Opts, + #unzip_opts{input = Input, open_opts = OpO, + extra = ExtraOpts} = Opts, In0 = Input({open, F, OpO -- [write]}, []), RawIterator = fun raw_file_info_etc/5, - {Info, In1} = get_central_dir(In0, RawIterator, Input), + {Info, In1} = get_central_dir(In0, RawIterator, Input, ExtraOpts), %% get rid of zip-comment Z = zlib:open(), Files = try @@ -619,9 +660,10 @@ list_dir(F, Options) -> do_list_dir(F, Options) -> Opts = get_list_dir_options(F, Options), #list_dir_opts{input = Input, open_opts = OpO, - raw_iterator = RawIterator} = Opts, + raw_iterator = RawIterator, + extra = ExtraOpts} = Opts, In0 = Input({open, F, OpO}, []), - {Info, In1} = get_central_dir(In0, RawIterator, Input), + {Info, In1} = get_central_dir(In0, RawIterator, Input, ExtraOpts), Input(close, In1), {ok, Info}. @@ -647,7 +689,7 @@ dies. Archive :: file:name() | binary(), ZipHandle :: handle(), Options :: [Option], - Option :: cooked | memory | {cwd, CWD :: file:filename()}, + Option :: cooked | memory | {cwd, CWD :: file:filename()} | {extra, extra()}, Reason :: term()). zip_open(Archive, Options) -> @@ -749,7 +791,7 @@ do_t(F, RawPrint) -> Input = get_input(F), OpO = [raw], In0 = Input({open, F, OpO}, []), - {_Info, In1} = get_central_dir(In0, RawPrint, Input), + {_Info, In1} = get_central_dir(In0, RawPrint, Input, [extended_timestamp]), Input(close, In1), ok. @@ -790,6 +832,13 @@ get_unzip_opt([keep_old_files | Rest], Opts) -> Keep = fun keep_old_file/1, Filter = fun_and_1(Keep, Opts#unzip_opts.file_filter), get_unzip_opt(Rest, Opts#unzip_opts{file_filter = Filter}); +get_unzip_opt([{extra, What} = O| Rest], Opts) when is_list(What) -> + case lists:all(fun(E) -> lists:member(E, ?EXTRA_OPTIONS) end, What) of + true -> + get_zip_opt(Rest, Opts#unzip_opts{extra = What}); + false -> + throw({bad_option, O}) + end; get_unzip_opt([Unknown | _Rest], _Opts) -> throw({bad_option, Unknown}). @@ -800,6 +849,13 @@ get_list_dir_opt([cooked | Rest], #list_dir_opts{open_opts = OpO} = Opts) -> get_list_dir_opt([names_only | Rest], Opts) -> get_list_dir_opt(Rest, Opts#list_dir_opts{ raw_iterator = fun(A, B, C, D, E) -> raw_name_only(A, B, C, D, E) end}); +get_list_dir_opt([{extra, What} = O| Rest], Opts) when is_list(What) -> + case lists:all(fun(E) -> lists:member(E, ?EXTRA_OPTIONS) end, What) of + true -> + get_zip_opt(Rest, Opts#list_dir_opts{extra = What}); + false -> + throw({bad_option, O}) + end; %% get_list_dir_opt([{file_output, F} | Rest], Opts) -> %% get_list_dir_opt(Rest, Opts#list_dir_opts{file_output = F}); %% get_list_dir_opt([{file_filter, F} | Rest], Opts) -> @@ -849,6 +905,13 @@ get_zip_opt([{uncompress, Which} = O| Rest], Opts) -> throw({bad_option, O}) end, get_zip_opt(Rest, Opts#zip_opts{uncompress = Which2}); +get_zip_opt([{extra, What} = O| Rest], Opts) when is_list(What) -> + case lists:all(fun(E) -> lists:member(E, ?EXTRA_OPTIONS) end, What) of + true -> + get_zip_opt(Rest, Opts#zip_opts{extra = What}); + false -> + throw({bad_option, O}) + end; get_zip_opt([Unknown | _Rest], _Opts) -> throw({bad_option, Unknown}). @@ -889,7 +952,8 @@ get_zip_options(Files, Options) -> feedback = fun silent/1, cwd = "", compress = all, - uncompress = Suffixes + uncompress = Suffixes, + extra = [extended_timestamp] }, Opts1 = #zip_opts{comment = Comment} = get_zip_opt(Options, Opts), %% UTF-8 encode characters in the interval from 127 to 255. @@ -902,14 +966,16 @@ get_unzip_options(F, Options) -> input = get_input(F), open_opts = [raw], feedback = fun silent/1, - cwd = "" + cwd = "", + extra = [extended_timestamp] }, get_unzip_opt(Options, Opts). get_openzip_options(Options) -> Opts = #openzip_opts{open_opts = [raw, read], output = fun file_io/2, - cwd = ""}, + cwd = "", + extra = [extended_timestamp]}, get_openzip_opt(Options, Opts). get_input(F) when is_binary(F) -> @@ -937,7 +1003,8 @@ get_zip_input(_) -> get_list_dir_options(F, Options) -> Opts = #list_dir_opts{raw_iterator = fun raw_file_info_public/5, input = get_input(F), - open_opts = [raw]}, + open_opts = [raw], + extra = [extended_timestamp]}, get_list_dir_opt(Options, Opts). %% aliases for erl_tar compatibility @@ -1025,25 +1092,29 @@ extract(F, O) -> unzip(F, O). %% put the central directory, at the end of the zip archive put_central_dir(LHS, Pos, Out0, - #zip_opts{output = Output, comment = Comment}) -> - {Out1, Sz} = put_cd_files_loop(LHS, Output, Out0, 0), + #zip_opts{output = Output, comment = Comment, extra = ExtraOpts}) -> + {Out1, Sz} = put_cd_files_loop(LHS, Output, ExtraOpts, Out0, 0), put_eocd(length(LHS), Pos, Sz, Comment, Output, Out1). -put_cd_files_loop([], _Output, Out, Sz) -> +put_cd_files_loop([], _Output, _ExtraOpts, Out, Sz) -> {Out, Sz}; -put_cd_files_loop([{LH, Name, Pos} | LHRest], Output, Out0, Sz0) -> - CDFH = cd_file_header_from_lh_and_pos(LH, Pos), +put_cd_files_loop([{LH, Name, Pos} | LHRest], Output, ExtraOpts, Out0, Sz0) -> + Extra = cd_file_header_extra_from_lh_and_pos(LH, ExtraOpts), + CDFH = cd_file_header_from_lh_pos_and_extra(LH, Pos, Extra), BCDFH = cd_file_header_to_bin(CDFH), - B = [<>, BCDFH, Name], + B = [<>, BCDFH, Name, Extra], Out1 = Output({write, B}, Out0), Sz1 = Sz0 + ?CENTRAL_FILE_HEADER_SZ + - LH#local_file_header.file_name_length, - put_cd_files_loop(LHRest, Output, Out1, Sz1). + CDFH#cd_file_header.file_name_length + CDFH#cd_file_header.extra_field_length, + put_cd_files_loop(LHRest, Output, ExtraOpts, Out1, Sz1). + +cd_file_header_extra_from_lh_and_pos( + #local_file_header{ info = FI }, ExtraOpts) -> + encode_extra(FI#file_info{ atime = undefined }, ExtraOpts). %% put end marker of central directory, the last record in the archive put_eocd(N, Pos, Sz, Comment, Output, Out0) -> - %% BComment = list_to_binary(Comment), - CommentSz = length(Comment), % size(BComment), + CommentSz = length(Comment), EOCD = #eocd{disk_num = 0, start_disk_num = 0, entries_on_disk = N, @@ -1052,7 +1123,7 @@ put_eocd(N, Pos, Sz, Comment, Output, Out0) -> offset = Pos, zip_comment_length = CommentSz}, BEOCD = eocd_to_bin(EOCD), - B = [<>, BEOCD, Comment], % BComment], + B = [<>, BEOCD, Comment], Output({write, B}, Out0). get_filename({Name, _}, Type) -> @@ -1088,13 +1159,16 @@ get_comp_method(F, _, #zip_opts{compress = Compress, uncompress = Uncompress}, _ end. put_z_files([], _Z, Out, Pos, _Opts, Acc) -> - {Out, lists:reverse(Acc, []), Pos}; + {Out, lists:reverse(Acc), Pos}; put_z_files([F | Rest], Z, Out0, Pos0, #zip_opts{input = Input, output = Output, open_opts = OpO, - feedback = FB, cwd = CWD} = Opts, Acc) -> + feedback = FB, cwd = CWD, extra = ExtraOpts} = Opts, Acc) -> + + {Pos0, _} = Output({position, cur, 0}, Out0), %% Assert correct Pos0 + In0 = [], F1 = add_cwd(CWD, F), - FileInfo = Input({file_info, F1}, In0), + FileInfo = Input({file_info, F1, [{time, posix}]}, In0), Type = FileInfo#file_info.type, UncompSize = case Type of @@ -1105,38 +1179,82 @@ put_z_files([F | Rest], Z, Out0, Pos0, %% UTF-8 encode characters in the interval from 127 to 255. {FileName, GPFlag} = encode_string(FileName0), CompMethod = get_comp_method(FileName, UncompSize, Opts, Type), - LH = local_file_header_from_info_method_name(FileInfo, UncompSize, CompMethod, FileName, GPFlag), + + %% Add any extra data needed and patch + Extra = encode_extra(FileInfo, ExtraOpts), + + LH = local_file_header_from_info_method_name(FileInfo, UncompSize, CompMethod, + FileName, GPFlag, Extra), BLH = local_file_header_to_bin(LH), B = [<>, BLH], Out1 = Output({write, B}, Out0), Out2 = Output({write, FileName}, Out1), - {Out3, CompSize, CRC} = put_z_file(CompMethod, UncompSize, Out2, F1, + + %% Start of extra data + Pos1 = Pos0 + ?LOCAL_FILE_HEADER_SZ + LH#local_file_header.file_name_length, + + Out3 = Output({write, Extra}, Out2), + + {Out4, CompSize, CRC} = put_z_file(CompMethod, UncompSize, Out3, F1, 0, Input, Output, OpO, Z, Type), + + Pos2 = Pos1 + LH#local_file_header.extra_field_length + CompSize, FB(FileName0), - Patch = <>, - Out4 = Output({pwrite, Pos0 + ?LOCAL_FILE_HEADER_CRC32_OFFSET, Patch}, Out3), - Out5 = Output({seek, eof, 0}, Out4), - Pos1 = Pos0 + ?LOCAL_FILE_HEADER_SZ + LH#local_file_header.file_name_length, - Pos2 = Pos1 + CompSize, + + %% Patch the CRC + Patch = <>, + Out5 = Output({pwrite, Pos0 + ?LOCAL_FILE_HEADER_CRC32_OFFSET, Patch}, Out4), + + %% Patch comp size if not zip64 + Out6 = Output({pwrite, Pos0 + ?LOCAL_FILE_HEADER_CRC32_OFFSET + 4, <>}, Out5), + + Out7 = Output({seek, eof, 0}, Out6), + + {Pos2, _} = Output({position, cur, 0}, Out7), %% Assert correct Pos2 + LH2 = LH#local_file_header{comp_size = CompSize, crc32 = CRC}, ThisAcc = [{LH2, FileName, Pos0}], - {Out6, SubAcc, Pos3} = + {Out8, SubAcc, Pos3} = case Type of regular -> - {Out5, ThisAcc, Pos2}; + {Out7, ThisAcc, Pos2}; directory -> Files = Input({list_dir, F1}, []), RevFiles = reverse_join_files(F, Files, []), - put_z_files(RevFiles, Z, Out5, Pos2, Opts, ThisAcc) + put_z_files(RevFiles, Z, Out7, Pos2, Opts, ThisAcc) end, Acc2 = lists:reverse(SubAcc) ++ Acc, - put_z_files(Rest, Z, Out6, Pos3, Opts, Acc2). + put_z_files(Rest, Z, Out8, Pos3, Opts, Acc2). reverse_join_files(Dir, [File | Files], Acc) -> reverse_join_files(Dir, Files, [filename:join([Dir, File]) | Acc]); reverse_join_files(_Dir, [], Acc) -> Acc. +encode_extra(FileInfo, ExtraOpts) -> + %% zip64 needs to be first so that we can patch the CompSize + [encode_extra_extended_timestamp(FileInfo) || lists:member(extended_timestamp, ExtraOpts)]. + +encode_extra_header(Header, Value) -> + [<>, Value]. + +encode_extra_extended_timestamp(FI) -> + {Mbit, MSystemTime} = + case datetime_to_system_time(FI#file_info.mtime) of + undefined -> {0, <<>>}; + Mtime -> + {1, <<(datetime_to_system_time(Mtime)):32/little>>} + end, + + {Abit, ASystemTime} = + case datetime_to_system_time(FI#file_info.atime) of + undefined -> {0, <<>>}; + Atime -> + {2, <<(datetime_to_system_time(Atime)):32/little>>} + end, + + encode_extra_header(?X5455_EXTENDED_TIMESTAMP, [Abit bor Mbit, MSystemTime, ASystemTime]). + %% flag for zlib -define(MAX_WBITS, 15). @@ -1208,11 +1326,9 @@ print_file_name(FileName) -> %% for printing directory (tt/1) raw_long_print_info_etc(#cd_file_header{comp_size = CompSize, - uncomp_size = UncompSize, - last_mod_date = LMDate, - last_mod_time = LMTime}, + uncomp_size = UncompSize} = CDFH, FileName, FileComment, _BExtraField, Acc) -> - MTime = dos_date_time_to_datetime(LMDate, LMTime), + MTime = file_header_mtime_to_datetime(CDFH), print_header(CompSize, MTime, UncompSize, FileName, FileComment), Acc; raw_long_print_info_etc(EOCD, _, Comment, _, Acc) when is_record(EOCD, eocd) -> @@ -1254,7 +1370,7 @@ month(11) -> "Nov"; month(12) -> "Dec". %% zip header functions -cd_file_header_from_lh_and_pos(LH, Pos) -> +cd_file_header_from_lh_pos_and_extra(LH, Pos, Extra) -> #local_file_header{version_needed = VersionNeeded, gp_flag = GPFlag, comp_method = CompMethod, @@ -1264,8 +1380,9 @@ cd_file_header_from_lh_and_pos(LH, Pos) -> comp_size = CompSize, uncomp_size = UncompSize, file_name_length = FileNameLength, - extra_field_length = ExtraFieldLength, + extra_field_length = _ExtraFieldLength, info = #file_info{ type = Type, mode = Mode }} = LH, + #cd_file_header{version_made_by = ?VERSION_MADE_BY, version_needed = VersionNeeded, gp_flag = GPFlag, @@ -1276,7 +1393,7 @@ cd_file_header_from_lh_and_pos(LH, Pos) -> comp_size = CompSize, uncomp_size = UncompSize, file_name_length = FileNameLength, - extra_field_length = ExtraFieldLength, + extra_field_length = iolist_size(Extra), file_comment_length = 0, % FileCommentLength, disk_num_start = 0, % DiskNumStart, internal_attr = 0, % InternalAttr, @@ -1354,30 +1471,35 @@ eocd_to_bin(#eocd{disk_num = DiskNum, offset = Offset, zip_comment_length = ZipCommentLength}) -> <>. + StartDiskNum:16/little, + EntriesOnDisk:16/little, + Entries:16/little, + Size:32/little, + Offset:32/little, + ZipCommentLength:16/little>>. %% put together a local file header -local_file_header_from_info_method_name(Info = #file_info{mtime = MTime}, - UncompSize, - CompMethod, Name, GPFlag) -> +local_file_header_from_info_method_name(#file_info{mtime = MTime, + atime = ATime} = Info, + UncompSize, CompMethod, + Name, GPFlag, Extra ) -> + CreationTime = os:system_time(second), {ModDate, ModTime} = dos_date_time_from_datetime( calendar:system_time_to_local_time( datetime_to_system_time(MTime), second)), - #local_file_header{version_needed = 20, + #local_file_header{version_needed = ?VERSION_NEEDED_DEFLATE, gp_flag = GPFlag, comp_method = CompMethod, last_mod_time = ModTime, last_mod_date = ModDate, + mtime = datetime_to_system_time(MTime), + atime = datetime_to_system_time(ATime), + ctime = datetime_to_system_time(CreationTime), crc32 = -1, comp_size = -1, uncomp_size = UncompSize, file_name_length = length(Name), - extra_field_length = 0, + extra_field_length = iolist_size(Extra), info = Info}. %% @@ -1394,11 +1516,12 @@ openzip_open(F, Options) -> do_openzip_open(F, Options) -> Opts = get_openzip_options(Options), - #openzip_opts{output = Output, open_opts = OpO, cwd = CWD} = Opts, + #openzip_opts{output = Output, open_opts = OpO, cwd = CWD, + extra = ExtraOpts} = Opts, Input = get_input(F), In0 = Input({open, F, OpO -- [write]}, []), {[#zip_comment{comment = C} | Files], In1} = - get_central_dir(In0, fun raw_file_info_etc/5, Input), + get_central_dir(In0, fun raw_file_info_etc/5, Input, ExtraOpts), Z = zlib:open(), {ok, #openzip{zip_comment = C, files = Files, @@ -1406,7 +1529,8 @@ do_openzip_open(F, Options) -> input = Input, output = Output, zlib = Z, - cwd = CWD}}. + cwd = CWD, + extra = ExtraOpts}}. %% retrieve all files from an open archive openzip_get(OpenZip) -> @@ -1416,10 +1540,10 @@ openzip_get(OpenZip) -> end. do_openzip_get(#openzip{files = Files, in = In0, input = Input, - output = Output, zlib = Z, cwd = CWD}) -> + output = Output, zlib = Z, cwd = CWD, extra = ExtraOpts}) -> ZipOpts = #unzip_opts{output = Output, input = Input, file_filter = fun all/1, open_opts = [], - feedback = fun silent/1, cwd = CWD}, + feedback = fun silent/1, cwd = CWD, extra = ExtraOpts}, R = get_z_files(Files, Z, In0, ZipOpts, []), {ok, R}; do_openzip_get(_) -> @@ -1440,13 +1564,13 @@ openzip_get(FileName, OpenZip) -> end. do_openzip_get(F, #openzip{files = Files, in = In0, input = Input, - output = Output, zlib = Z, cwd = CWD}) -> + output = Output, zlib = Z, cwd = CWD, extra = ExtraOpts}) -> %%case lists:keysearch(F, #zip_file.name, Files) of case file_name_search(F, Files) of {#zip_file{offset = Offset},_}=ZFile -> In1 = Input({seek, bof, Offset}, In0), case get_z_file(In1, Z, Input, Output, [], fun silent/1, - CWD, ZFile, fun all/1) of + CWD, ZFile, fun all/1, ExtraOpts) of {file, R, _In2} -> {ok, R}; _ -> throw(file_not_found) end; @@ -1554,11 +1678,18 @@ get_openzip_opt([memory | Rest], Opts) -> get_openzip_opt(Rest, Opts#openzip_opts{output = fun binary_io/2}); get_openzip_opt([{cwd, CWD} | Rest], Opts) -> get_openzip_opt(Rest, Opts#openzip_opts{cwd = CWD}); +get_openzip_opt([{extra, What} = O| Rest], Opts) when is_list(What) -> + case lists:all(fun(E) -> lists:member(E, ?EXTRA_OPTIONS) end, What) of + true -> + get_zip_opt(Rest, Opts#openzip_opts{extra = What}); + false -> + throw({bad_option, O}) + end; get_openzip_opt([Unknown | _Rest], _Opts) -> throw({bad_option, Unknown}). %% get the central directory from the archive -get_central_dir(In0, RawIterator, Input) -> +get_central_dir(In0, RawIterator, Input, ExtraOpts) -> {B, In1} = get_end_of_central_dir(In0, ?END_OF_CENTRAL_DIR_SZ, Input), {EOCD, BComment} = eocd_and_comment_from_bin(B), In2 = Input({seek, bof, EOCD#eocd.offset}, In1), @@ -1567,14 +1698,13 @@ get_central_dir(In0, RawIterator, Input) -> %% There is no encoding flag for the archive comment. Comment = heuristic_to_string(BComment), Out0 = RawIterator(EOCD, "", Comment, <<>>, Acc0), - get_cd_loop(N, In2, RawIterator, Input, Out0). + get_cd_loop(N, In2, RawIterator, Input, ExtraOpts, Out0). -get_cd_loop(0, In, _RawIterator, _Input, Acc) -> +get_cd_loop(0, In, _RawIterator, _Input, _ExtraOpts, Acc) -> {lists:reverse(Acc), In}; -get_cd_loop(N, In0, RawIterator, Input, Acc0) -> - {B, In1} = Input({read, ?CENTRAL_FILE_HEADER_SZ}, In0), - BCD = case B of - <> -> XBCD; +get_cd_loop(N, In0, RawIterator, Input, ExtraOpts, Acc0) -> + {BCD, In1} = case Input({read, ?CENTRAL_FILE_HEADER_SZ}, In0) of + {<>, In} -> {XBCD, In}; _ -> throw(bad_central_directory) end, CD = cd_file_header_from_bin(BCD), @@ -1584,17 +1714,58 @@ get_cd_loop(N, In0, RawIterator, Input, Acc0) -> ToRead = FileNameLen + ExtraLen + CommentLen, GPFlag = CD#cd_file_header.gp_flag, {B2, In2} = Input({read, ToRead}, In1), - {FileName, Comment, BExtra} = - get_name_extra_comment(B2, FileNameLen, ExtraLen, CommentLen, GPFlag), - Acc1 = RawIterator(CD, FileName, Comment, BExtra, Acc0), - get_cd_loop(N-1, In2, RawIterator, Input, Acc1). - -get_name_extra_comment(B, FileNameLen, ExtraLen, CommentLen, GPFlag) -> + {FileName, BExtra, Comment} = + get_filename_extra_comment(B2, FileNameLen, ExtraLen, CommentLen, GPFlag), + + ExtraCD = + update_extra_fields(CD, BExtra, ExtraOpts), + + Acc1 = RawIterator(ExtraCD, FileName, Comment, BExtra, Acc0), + get_cd_loop(N-1, In2, RawIterator, Input, ExtraOpts, Acc1). + +%% We parse and apply some extra fields defined by Info-ZIP. For details see: +%% proginfo/extrafld.txt in unzip. https://fossies.org/linux/unzip/proginfo/extrafld.txt +-spec update_extra_fields(#local_file_header{} | #cd_file_header{}, binary(), extra()) -> + #local_file_header{} | #cd_file_header{}. +update_extra_fields(FileHeader, BExtra, ExtraOpts) -> + %% We depend on some fields in the records to be at the same position + #local_file_header.mtime = #cd_file_header.mtime, + #local_file_header.atime = #cd_file_header.atime, + #local_file_header.ctime = #cd_file_header.ctime, + ExtendedTimestamp = lists:member(extended_timestamp, ExtraOpts), + lists:foldl( + fun({?X5455_EXTENDED_TIMESTAMP, Data}, Acc) when ExtendedTimestamp -> + update_extended_timestamp(Acc, Data); + (_, Acc) -> + Acc + end, FileHeader, parse_extra(BExtra)). + +update_extended_timestamp(FileHeader, <<_:5,HasCre:1,HasAcc:1,HasMod:1,Data/binary>> ) -> + {FHMod, DataMod} = update_extended_timestamp(FileHeader, HasMod, Data, #cd_file_header.mtime), + {FHAcc, DataAcc} = update_extended_timestamp(FHMod, HasAcc, DataMod, #cd_file_header.atime), + {FHCre, <<>>} = update_extended_timestamp(FHAcc, HasCre, DataAcc, #cd_file_header.ctime), + FHCre. + +update_extended_timestamp(FH, 1, <>, Field) -> + {setelement(Field, FH, Value), Rest}; +%% It seems like sometimes bits are set, but the data does not include any payload +update_extended_timestamp(FH, 1, <<>>, _Field) -> + {FH, <<>>}; +update_extended_timestamp(FH, 0, Data, _Field) -> + {FH, Data}. + +parse_extra(<>) -> + [{Tag, Data} | parse_extra(Rest)]; +parse_extra(<<>>) -> + []. + +get_filename_extra_comment(B, FileNameLen, ExtraLen, CommentLen, GPFlag) -> try <> = B, {binary_to_chars(BFileName, GPFlag), + BExtra, %% Appendix D says: "If general purpose bit 11 is unset, the %% file name and comment should conform to the original ZIP %% character encoding." However, it seems that at least Linux @@ -1603,8 +1774,7 @@ get_name_extra_comment(B, FileNameLen, ExtraLen, CommentLen, GPFlag) -> %% binary_to_chars/1 could (should?) be called (it can fail), %% but the choice is to employ heuristics in this case too %% (it does not fail). - heuristic_to_string(BComment), - BExtra} + heuristic_to_string(BComment)} catch _:_ -> throw(bad_central_directory) @@ -1634,6 +1804,29 @@ find_eocd_header(<<_:8, Rest/binary>>) find_eocd_header(_) -> none. +%% Taken from APPNOTE.TXT version 6.3.10 section 4.4.2.2 +os_id_to_atom(0) -> ~"MS-DOS and OS/2"; +os_id_to_atom(1) -> ~"Amiga"; +os_id_to_atom(2) -> ~"OpenVMS"; +os_id_to_atom(3) -> ~"UNIX"; +os_id_to_atom(4) -> ~"VM/CMS"; +os_id_to_atom(5) -> ~"Atari ST"; +os_id_to_atom(6) -> ~"OS/2 H.P.F.S"; +os_id_to_atom(7) -> ~"Macintosh"; +os_id_to_atom(8) -> ~"Z-System"; +os_id_to_atom(9) -> ~"CP/M"; +os_id_to_atom(10) -> ~"Windows NTFS"; +os_id_to_atom(11) -> ~"MVS"; +os_id_to_atom(12) -> ~"VSE"; +os_id_to_atom(13) -> ~"Acorn Risc"; +os_id_to_atom(14) -> ~"VFAT"; +os_id_to_atom(15) -> ~"alternate MVS"; +os_id_to_atom(16) -> ~"BeOS"; +os_id_to_atom(17) -> ~"Tandem"; +os_id_to_atom(18) -> ~"OS/400"; +os_id_to_atom(19) -> ~"OS X (Darwin)"; +os_id_to_atom(No) -> No. + %% from a central directory record, filter and accumulate what we need %% with zip_file_extra @@ -1660,11 +1853,11 @@ raw_file_info_public(CD, FileName, FileComment, BExtraField, Acc0) -> %% make a file_info from a central directory header cd_file_header_to_file_info(FileName, - #cd_file_header{uncomp_size = UncompSize, - last_mod_time = ModTime, - last_mod_date = ModDate} = CDFH, - ExtraField) -> - T = dos_date_time_to_datetime(ModDate, ModTime), + #cd_file_header{uncomp_size = UncompSize} = CDFH, + _ExtraField) -> + M = file_header_mtime_to_datetime(CDFH), + A = file_header_atime_to_datetime(CDFH), + C = file_header_ctime_to_datetime(CDFH), Type = case lists:last(FileName) of $/ -> directory; @@ -1680,24 +1873,19 @@ cd_file_header_to_file_info(FileName, ?DEFAULT_REGULAR_FILE_MODE end end, - FI = #file_info{size = UncompSize, - type = Type, - access = read_write, - atime = T, - mtime = T, - ctime = T, - mode = Mode, - links = 1, - major_device = 0, - minor_device = 0, - inode = 0, - uid = 0, - gid = 0}, - add_extra_info(FI, ExtraField). - -%% Currently, we ignore all the extra fields. -add_extra_info(FI, _) -> - FI. + #file_info{size = UncompSize, + type = Type, + access = read_write, + atime = A, + mtime = M, + ctime = C, + mode = Mode, + links = 1, + major_device = 0, + minor_device = 0, + inode = 0, + uid = 0, + gid = 0}. %% get all files using file list %% (the offset list is already filtered on which file to get... isn't it?) @@ -1708,13 +1896,13 @@ get_z_files([#zip_comment{comment = _} | Rest], Z, In, Opts, Acc) -> get_z_files([{#zip_file{offset = Offset},_} = ZFile | Rest], Z, In0, #unzip_opts{input = Input, output = Output, open_opts = OpO, file_filter = Filter, feedback = FB, - cwd = CWD} = Opts, Acc0) -> + cwd = CWD, extra = ExtraOpts} = Opts, Acc0) -> case Filter(ZFile) of true -> In1 = Input({seek, bof, Offset}, In0), {In2, Acc1} = case get_z_file(In1, Z, Input, Output, OpO, FB, - CWD, ZFile, Filter) of + CWD, ZFile, Filter, ExtraOpts) of {Type, GZD, Inx} when Type =:= file; Type =:= dir -> {Inx, [GZD | Acc0]}; {_, Inx} -> {Inx, Acc0} @@ -1726,7 +1914,7 @@ get_z_files([{#zip_file{offset = Offset},_} = ZFile | Rest], Z, In0, %% get a file from the archive, reading chunks get_z_file(In0, Z, Input, Output, OpO, FB, - CWD, {ZipFile,Extra}, Filter) -> + CWD, {ZipFile,ZipExtra}, Filter, ExtraOpts) -> case Input({read, ?LOCAL_FILE_HEADER_SZ}, In0) of {eof, In1} -> {eof, In1}; @@ -1738,25 +1926,29 @@ get_z_file(In0, Z, Input, Output, OpO, FB, file_name_length = FileNameLen, extra_field_length = ExtraLen} = LH, + {BFileN, In3} = Input({read, FileNameLen + ExtraLen}, In1), + {FileName, BLHExtra} = + get_filename_extra(FileNameLen, ExtraLen, BFileN, GPFlag), + LHExtra = + update_extra_fields(LH, BLHExtra, ExtraOpts), + {CompSize,CRC32} = case GPFlag band 8 =:= 8 of true -> {ZipFile#zip_file.comp_size, - Extra#zip_file_extra.crc32}; - false -> {LH#local_file_header.comp_size, - LH#local_file_header.crc32} + ZipExtra#zip_file_extra.crc32}; + false -> {LHExtra#local_file_header.comp_size, + LHExtra#local_file_header.crc32} end, - {BFileN, In3} = Input({read, FileNameLen + ExtraLen}, In1), - {FileName, _} = - get_file_name_extra(FileNameLen, ExtraLen, BFileN, GPFlag), + ReadAndWrite = case check_valid_location(CWD, FileName) of {true,FileName1} -> true; {false,FileName1} -> - Filter({ZipFile#zip_file{name = FileName1},Extra}) + Filter({ZipFile#zip_file{name = FileName1},ZipExtra}) end, case ReadAndWrite of true -> - {Type, Out, In} = + {Type, Out, In} = case lists:last(FileName) of $/ -> %% perhaps this should always be done? @@ -1775,23 +1967,25 @@ get_z_file(In0, Z, Input, Output, OpO, FB, FileInfo = local_file_header_to_file_info( Output({file_info, FileName1}, Out), - LH, ZipFile), + LHExtra, ZipFile), Out2 = Output({set_file_info, FileName1, FileInfo, [{time, local}]}, Out), {Type, Out2, In}; false -> {ignore, In3} end; - _ -> - throw(bad_local_file_header) + Else -> + throw({bad_local_file_header, Else}) end. local_file_header_to_file_info(FI, LFH, ZipFile) -> - Mtime = dos_date_time_to_datetime( - LFH#local_file_header.last_mod_date, - LFH#local_file_header.last_mod_time), + %% Validate that local_file_header mtime is the same as cd_file_header FI#file_info{ mode = ZipFile#zip_file.info#file_info.mode, - mtime = Mtime, atime = Mtime, ctime = Mtime }. + mtime = file_header_mtime_to_datetime(LFH), + atime = file_header_atime_to_datetime(LFH), + ctime = file_header_ctime_to_datetime(LFH) + }. + %% make sure FileName doesn't have relative path that points over CWD check_valid_location(CWD, FileName) -> @@ -1823,7 +2017,7 @@ check_dir_level([".." | Parts], Level) -> check_dir_level([_Dir | Parts], Level) -> check_dir_level(Parts, Level+1). -get_file_name_extra(FileNameLen, ExtraLen, B, GPFlag) -> +get_filename_extra(FileNameLen, ExtraLen, B, GPFlag) -> try <> = B, {binary_to_chars(BFileName, GPFlag), BExtra} @@ -1885,6 +2079,42 @@ skip_z_data_descriptor(GPFlag, Input, In0) when GPFlag band 8 =:= 8 -> skip_z_data_descriptor(_GPFlag, _Input, In0) -> In0. +%% If we have mtime we use that, otherwise use dos time +file_header_mtime_to_datetime(FH) -> + #cd_file_header.mtime = #local_file_header.mtime, + case element(#cd_file_header.mtime, FH) of + undefined -> + dos_date_time_to_datetime( + element(#cd_file_header.last_mod_date, FH), + element(#cd_file_header.last_mod_time, FH)); + MTime -> + calendar:system_time_to_local_time(MTime, second) + end. + +%% If we have atime we use that, otherwise use dos time +file_header_atime_to_datetime(FH) -> + #cd_file_header.atime = #local_file_header.atime, + case element(#cd_file_header.atime, FH) of + undefined -> + dos_date_time_to_datetime( + element(#cd_file_header.last_mod_date, FH), + element(#cd_file_header.last_mod_time, FH)); + Atime -> + calendar:system_time_to_local_time(Atime, second) + end. + +%% If we have ctime we use that, otherwise use dos time +file_header_ctime_to_datetime(FH) -> + #cd_file_header.ctime = #local_file_header.ctime, + case element(#cd_file_header.ctime, FH) of + undefined -> + dos_date_time_to_datetime( + element(#cd_file_header.last_mod_date, FH), + element(#cd_file_header.last_mod_time, FH)); + Ctime -> + calendar:system_time_to_local_time(Ctime, second) + end. + %% convert between erlang datetime and the MSDOS date and time %% that's stored in the zip archive %% MSDOS Time MSDOS Date @@ -1896,9 +2126,6 @@ dos_date_time_to_datetime(DosDate, DosTime) -> {{YearFrom1980+1980, Month, Day}, {Hour, Min, Sec * 2}}. -dos_date_time_from_datetime(Seconds) when is_integer(Seconds) -> - DateTime = calendar:now_to_datetime({0, Seconds, 0}), - dos_date_time_from_datetime(DateTime); dos_date_time_from_datetime({{Year, Month, Day}, {Hour, Min, Sec}}) -> YearFrom1980 = Year-1980, <> = <>, @@ -2000,7 +2227,7 @@ eocd_and_comment_from_bin(< throw(bad_eocd). -cd_file_header_from_bin(<>) -> #cd_file_header{version_made_by = VersionMadeBy, + os_made_by = os_id_to_atom(OsMadeBy), version_needed = VersionNeeded, gp_flag = GPFlag, comp_method = CompMethod, @@ -2059,6 +2287,14 @@ local_file_header_from_bin(_) -> throw(bad_local_file_header). %% io functions +binary_io({file_info, FN, Opts}, A) -> + FI = binary_io({file_info, FN}, A), + case proplists:get_value(time, Opts, local) of + local -> FI; + posix -> FI#file_info{ atime = datetime_to_system_time(FI#file_info.atime), + mtime = datetime_to_system_time(FI#file_info.mtime), + ctime = datetime_to_system_time(FI#file_info.ctime) } + end; binary_io({file_info, {_Filename, _B, #file_info{} = FI}}, _A) -> FI; binary_io({file_info, {_Filename, #file_info{} = FI, _B}}, _A) -> @@ -2115,6 +2351,9 @@ binary_io({seek, cur, Pos}, {OldPos, B}) -> {OldPos + Pos, B}; binary_io({seek, eof, Pos}, {_OldPos, B}) -> {byte_size(B) + Pos, B}; +binary_io({position, Loc, Adj}, File) -> + {Pos, _} = NewFile = binary_io({seek, Loc, Adj}, File), + {Pos, NewFile}; binary_io({pwrite, Pos, Data}, {OldPos, B}) -> {OldPos, pwrite_binary(B, Pos, Data)}; binary_io({write, Data}, {Pos, B}) -> @@ -2137,6 +2376,11 @@ file_io({file_info, F}, _) -> {ok, Info} -> Info; {error, E} -> throw(E) end; +file_io({file_info, F, Opts}, _) -> + case file:read_file_info(F, Opts) of + {ok, Info} -> Info; + {error, E} -> throw(E) + end; file_io({open, FN, Opts}, _) -> case lists:member(write, Opts) of true -> ok = filelib:ensure_dir(FN); @@ -2163,6 +2407,11 @@ file_io({seek, S, Pos}, H) -> {ok, _NewPos} -> H; {error, Error} -> throw(Error) end; +file_io({position, S, Pos}, H) -> + case file:position(H, {S, Pos}) of + {ok, NewPos} -> {NewPos, H}; + {error, Error} -> throw(Error) + end; file_io({write, Data}, H) -> case file:write(H, Data) of ok -> H; diff --git a/lib/stdlib/test/zip_SUITE.erl b/lib/stdlib/test/zip_SUITE.erl index 644dc53035c9..aac5cc33ab3a 100644 --- a/lib/stdlib/test/zip_SUITE.erl +++ b/lib/stdlib/test/zip_SUITE.erl @@ -32,7 +32,7 @@ compress_control/1, foldl/1,fd_leak/1,unicode/1,test_zip_dir/1, explicit_file_info/1, mode/1, - basic_timestamp/1]). + basic_timestamp/1, extended_timestamp/1]). -import(proplists,[get_value/2, get_value/3]). @@ -66,8 +66,9 @@ zip_groups() -> || ZipMode <- ?ZIP_MODES] ++ [{G, [parallel], zip_testcases()} || G <- ?UNZIP_MODES]. + zip_testcases() -> - [mode, basic_timestamp]. + [mode, basic_timestamp, extended_timestamp]. init_per_suite(Config) -> Config. @@ -860,11 +861,11 @@ foldl(Config) -> FooBin = <<"FOO">>, BarBin = <<"BAR">>, Files = [{"foo", FooBin}, {"bar", BarBin}], - {ok, {File, Bin}} = zip:create(File, Files, [memory]), + {ok, {File, Bin}} = zip:create(File, Files, [memory,{extra,[]}]), ZipFun = fun(N, I, B, Acc) -> [{N, B(), I()} | Acc] end, {ok, FileSpec} = zip:foldl(ZipFun, [], {File, Bin}), [{"bar", BarBin, #file_info{}}, {"foo", FooBin, #file_info{}}] = FileSpec, - {ok, {File, Bin}} = zip:create(File, lists:reverse(FileSpec), [memory]), + {ok, {File, Bin}} = zip:create(File, lists:reverse(FileSpec), [memory,{extra,[]}]), {foo_bin, FooBin} = try zip:foldl(fun("foo", _, B, _) -> throw(B()); (_, _, _, Acc) -> Acc end, [], {File, Bin}) @@ -1128,7 +1129,6 @@ mode(Config) -> ok. - %% Test basic timestamps, the atime and mtime should be the original %% mtime of the file basic_timestamp(Config) -> @@ -1147,7 +1147,7 @@ basic_timestamp(Config) -> %% Create an archive without extended timestamps ?assertMatch( {ok, Archive}, - zip(Config, Archive, "-X", ["testfile.txt"], [{cwd, PrivDir}])), + zip(Config, Archive, "-X", ["testfile.txt"], [{cwd, PrivDir}, {extra, []}])), {ok, [#zip_comment{}, #zip_file{ info = ZipFI = #file_info{ mtime = ZMtime }} ]} = @@ -1193,6 +1193,71 @@ basic_timestamp(Config) -> ok. +%% Test extended timestamps, the atime and ctime in the archive are +%% the atime and ctime when the file is added to the archive. +extended_timestamp(Config) -> + + case os:cmd("zip -v | grep USE_EF_UT_TIME") of + "" -> {skip, "zip does not support extended timestamps"}; + _ -> + PrivDir = get_value(pdir, Config), + Archive = filename:join(PrivDir, "archive.zip"), + ExtractDir = filename:join(PrivDir, "extract"), + Testfile = filename:join(PrivDir, "testfile.txt"), + + ok = file:write_file(Testfile, "abc"), + {ok, OndiskFI = #file_info{ mtime = Mtime }} = + file:read_file_info(Testfile), + + %% Sleep a bit to let the timestamp progress + timer:sleep(1000), + + ?assertMatch( + {ok, Archive}, + zip(Config, Archive, "", ["testfile.txt"], [{cwd, PrivDir}])), + + %% list_dir only reads the central directory header and thus only + %% the mtime will be correct here + {ok, [#zip_comment{}, + #zip_file{ info = ZipFI = #file_info{ mtime = ZMtime}} ]} = + zip:list_dir(Archive), + + ct:log("on disk: ~p",[OndiskFI]), + ct:log("in zip : ~p",[ZipFI]), + ct:log("zipinfo:~n~ts",[os:cmd("zipinfo -v "++Archive)]), + + ?assertEqual(Mtime, ZMtime), + + %% Sleep a bit to let the timestamp progress + timer:sleep(1000), + + ok = file:make_dir(ExtractDir), + ?assertMatch( + {ok, ["testfile.txt"]}, + unzip(Config, Archive, [{cwd,ExtractDir}])), + + {ok, UnzipFI = #file_info{ atime = UnZAtime, + mtime = UnZMtime, + ctime = UnZCtime + }} = + file:read_file_info(filename:join(ExtractDir, "testfile.txt")), + + ct:log("extract: ~p",[UnzipFI]), + + case get_value(unzip, Config) =/= unemzip of + true -> + ?assertEqual(ZMtime, UnZMtime), + ?assertEqual(UnZAtime, UnZMtime), + + ?assert(UnZMtime < UnZCtime); + false -> + %% emzip does not support timestamps + ok + end, + + ok + end. + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%% Generic zip interface %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% From 665b467179db25d3cbdffaa9e7a7e8e52f73b9c9 Mon Sep 17 00:00:00 2001 From: Lukas Larsson Date: Fri, 31 May 2024 10:01:38 +0200 Subject: [PATCH 12/21] zip: Add support for UNIX3 (aka uid and gid) extra attribute --- lib/stdlib/src/zip.erl | 56 ++++++++++++++++++++++++++++------- lib/stdlib/test/zip_SUITE.erl | 54 +++++++++++++++++++++++++++++++-- 2 files changed, 97 insertions(+), 13 deletions(-) diff --git a/lib/stdlib/src/zip.erl b/lib/stdlib/src/zip.erl index 8e33143be0de..5daaee94ee1b 100644 --- a/lib/stdlib/src/zip.erl +++ b/lib/stdlib/src/zip.erl @@ -44,8 +44,9 @@ convention, add `.zip` to the filename. file by file, without having to reopen the archive. This can be done by functions [`zip_open/1,2`](`zip_open/1`), [`zip_get/1,2`](`zip_get/1`), `zip_list_dir/1`, and `zip_close/1`. -- The ZIP extensions 0x5355 "extended timestamps" is supported. Both extensions - are by default enabled when creating and extracting from an archive. Use the `extra` +- The ZIP extensions 0x5355 "extended timestamps" and 0x7875 "UID+GID handling" + are supported. Both extensions are by default enabled when creating an archive, + but only "extended timestamps" are enabled when extracting. Use the `extra` option to change how these extensions are used. ## Limitations @@ -191,14 +192,18 @@ convention, add `.zip` to the filename. mtime, atime, ctime, + %% X7875_UNIX3 extension + uid = 0, + gid = 0, %% local_file_header specific file_name_length, extra_field_length, %% extra data needed to create cd_file_header info :: undefined | file:file_info() }). --define(EXTRA_OPTIONS, [extended_timestamp]). +-define(EXTRA_OPTIONS, [extended_timestamp, uid_gid]). -define(X5455_EXTENDED_TIMESTAMP, 16#5455). +-define(X7875_UNIX3, 16#7875). -define(CENTRAL_FILE_HEADER_SZ,(4+2+2+2+2+2+2+4+4+4+2+2+2+2+2+4+4)). @@ -226,6 +231,9 @@ convention, add `.zip` to the filename. mtime, atime, ctime, + %% X7875_UNIX3 extension + uid = 0, + gid = 0, %% cd_file_header specific version_made_by, os_made_by, @@ -255,8 +263,10 @@ The possible extra extension that can be used. - **`extended_timestamp`** - enables the 0x5455 "extended timestamps" zip extension that embeds POSIX timestamps for access and modification times for each file in the archive. +- **`uid_gid`** - enables 0x7875 "UNIX 3rd generation" zip extension that embeds the + UID and GID for each file into the archive. """. --type extra() :: [extended_timestamp]. +-type extra() :: [extended_timestamp | uid_gid]. -doc "These options are described in [`create/3`](`m:zip#zip_options`).". -type create_option() :: memory | cooked | verbose @@ -791,7 +801,7 @@ do_t(F, RawPrint) -> Input = get_input(F), OpO = [raw], In0 = Input({open, F, OpO}, []), - {_Info, In1} = get_central_dir(In0, RawPrint, Input, [extended_timestamp]), + {_Info, In1} = get_central_dir(In0, RawPrint, Input, ?EXTRA_OPTIONS), Input(close, In1), ok. @@ -953,7 +963,7 @@ get_zip_options(Files, Options) -> cwd = "", compress = all, uncompress = Suffixes, - extra = [extended_timestamp] + extra = ?EXTRA_OPTIONS }, Opts1 = #zip_opts{comment = Comment} = get_zip_opt(Options, Opts), %% UTF-8 encode characters in the interval from 127 to 255. @@ -975,7 +985,7 @@ get_openzip_options(Options) -> Opts = #openzip_opts{open_opts = [raw, read], output = fun file_io/2, cwd = "", - extra = [extended_timestamp]}, + extra = ?EXTRA_OPTIONS}, get_openzip_opt(Options, Opts). get_input(F) when is_binary(F) -> @@ -1233,7 +1243,8 @@ reverse_join_files(_Dir, [], Acc) -> encode_extra(FileInfo, ExtraOpts) -> %% zip64 needs to be first so that we can patch the CompSize - [encode_extra_extended_timestamp(FileInfo) || lists:member(extended_timestamp, ExtraOpts)]. + [[encode_extra_extended_timestamp(FileInfo) || lists:member(extended_timestamp, ExtraOpts)], + [encode_extra_uid_gid(FileInfo) || lists:member(uid_gid, ExtraOpts)]]. encode_extra_header(Header, Value) -> [<>, Value]. @@ -1255,6 +1266,13 @@ encode_extra_extended_timestamp(FI) -> encode_extra_header(?X5455_EXTENDED_TIMESTAMP, [Abit bor Mbit, MSystemTime, ASystemTime]). +encode_extra_uid_gid(#file_info{ uid = Uid, gid = Gid }) + when Uid =/= undefined, Gid =/= undefined -> + encode_extra_header(?X7875_UNIX3,<<1, 4, Uid:32/little, + 4, Gid:32/little>>); +encode_extra_uid_gid(_) -> + <<>>. + %% flag for zlib -define(MAX_WBITS, 15). @@ -1480,7 +1498,9 @@ eocd_to_bin(#eocd{disk_num = DiskNum, %% put together a local file header local_file_header_from_info_method_name(#file_info{mtime = MTime, - atime = ATime} = Info, + atime = ATime, + uid = Uid, + gid = Gid} = Info, UncompSize, CompMethod, Name, GPFlag, Extra ) -> CreationTime = os:system_time(second), @@ -1495,6 +1515,8 @@ local_file_header_from_info_method_name(#file_info{mtime = MTime, mtime = datetime_to_system_time(MTime), atime = datetime_to_system_time(ATime), ctime = datetime_to_system_time(CreationTime), + uid = Uid, + gid = Gid, crc32 = -1, comp_size = -1, uncomp_size = UncompSize, @@ -1732,10 +1754,17 @@ update_extra_fields(FileHeader, BExtra, ExtraOpts) -> #local_file_header.mtime = #cd_file_header.mtime, #local_file_header.atime = #cd_file_header.atime, #local_file_header.ctime = #cd_file_header.ctime, + #local_file_header.uid = #cd_file_header.uid, + #local_file_header.gid = #cd_file_header.gid, + ExtendedTimestamp = lists:member(extended_timestamp, ExtraOpts), + UidGid = lists:member(uid_gid, ExtraOpts), + lists:foldl( fun({?X5455_EXTENDED_TIMESTAMP, Data}, Acc) when ExtendedTimestamp -> update_extended_timestamp(Acc, Data); + ({?X7875_UNIX3, Data}, Acc) when UidGid -> + update_unix3(Acc, Data); (_, Acc) -> Acc end, FileHeader, parse_extra(BExtra)). @@ -1754,6 +1783,11 @@ update_extended_timestamp(FH, 1, <<>>, _Field) -> update_extended_timestamp(FH, 0, Data, _Field) -> {FH, Data}. +update_unix3(FH, <<1, UidSize, Uid:(UidSize*8)/little, GidSize, Gid:(GidSize*8)/little>>) -> + setelement(#cd_file_header.gid, setelement(#cd_file_header.uid, FH, Uid), Gid); +update_unix3(FH, <>) when Vsn =/= 1 -> + FH. + parse_extra(<>) -> [{Tag, Data} | parse_extra(Rest)]; parse_extra(<<>>) -> @@ -1884,8 +1918,8 @@ cd_file_header_to_file_info(FileName, major_device = 0, minor_device = 0, inode = 0, - uid = 0, - gid = 0}. + uid = CDFH#cd_file_header.uid, + gid = CDFH#cd_file_header.gid}. %% get all files using file list %% (the offset list is already filtered on which file to get... isn't it?) diff --git a/lib/stdlib/test/zip_SUITE.erl b/lib/stdlib/test/zip_SUITE.erl index aac5cc33ab3a..517cb37aa3e5 100644 --- a/lib/stdlib/test/zip_SUITE.erl +++ b/lib/stdlib/test/zip_SUITE.erl @@ -32,7 +32,8 @@ compress_control/1, foldl/1,fd_leak/1,unicode/1,test_zip_dir/1, explicit_file_info/1, mode/1, - basic_timestamp/1, extended_timestamp/1]). + basic_timestamp/1, extended_timestamp/1, + uid_gid/1]). -import(proplists,[get_value/2, get_value/3]). @@ -68,7 +69,7 @@ zip_groups() -> zip_testcases() -> - [mode, basic_timestamp, extended_timestamp]. + [mode, basic_timestamp, extended_timestamp, uid_gid]. init_per_suite(Config) -> Config. @@ -1258,6 +1259,55 @@ extended_timestamp(Config) -> ok end. +uid_gid(Config) -> + + case os:cmd("zip -v | grep STORE_UNIX_UIDs_GIDs") of + "" -> {skip, "zip does not support uid/gids"}; + _ -> + + PrivDir = get_value(pdir, Config), + ExtractDir = filename:join(PrivDir, "extract"), + Archive = filename:join(PrivDir, "archive.zip"), + Testfile = filename:join(PrivDir, "testfile.txt"), + + ok = file:write_file(Testfile, "abc"), + {ok, OndiskFI = #file_info{ gid = GID, uid = UID }} = + file:read_file_info(Testfile), + + ?assertMatch( + {ok, Archive}, + zip(Config, Archive, "", ["testfile.txt"], [{cwd, PrivDir}])), + + {ok, [#zip_comment{}, + #zip_file{ info = ZipFI = #file_info{ gid = ZGID, uid = ZUID }} ]} = + zip:list_dir(Archive,[{extra, [uid_gid]}]), + + ct:log("on disk: ~p",[OndiskFI]), + ct:log("in zip : ~p",[ZipFI]), + + ?assertEqual(UID, ZUID), + ?assertEqual(GID, ZGID), + + ok = file:make_dir(ExtractDir), + ?assertMatch( + {ok, ["testfile.txt"]}, + unzip(Config, Archive, [{cwd, ExtractDir},{extra,[uid_gid]}])), + + {ok,#file_info{ gid = ExZGID, uid = ExZUID }} = + file:read_file_info(filename:join(ExtractDir,"testfile.txt")), + + case get_value(unzip, Config) =/= unemzip of + true -> + ?assertEqual(UID, ExZUID), + ?assertEqual(GID, ExZGID); + _ -> + %% emzip does not support uid_gid + ok + end, + + ok + end. + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%% Generic zip interface %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% From dac9126dae5706169071a6e1864d603705201c35 Mon Sep 17 00:00:00 2001 From: Lukas Larsson Date: Fri, 31 May 2024 10:40:26 +0200 Subject: [PATCH 13/21] zip: Implement zip64 support --- lib/stdlib/src/zip.erl | 361 +++++++++++++++++++++++++++------- lib/stdlib/test/zip_SUITE.erl | 180 ++++++++++++++++- 2 files changed, 459 insertions(+), 82 deletions(-) diff --git a/lib/stdlib/src/zip.erl b/lib/stdlib/src/zip.erl index 5daaee94ee1b..45c6437a93f6 100644 --- a/lib/stdlib/src/zip.erl +++ b/lib/stdlib/src/zip.erl @@ -26,12 +26,12 @@ format is specified by the "ZIP Appnote.txt" file, available on the PKWARE web site [www.pkware.com](http://www.pkware.com). The zip module supports zip archive versions up to 6.1. However, -password-protection and Zip64 are not supported. +password-protection is not supported. By convention, the name of a zip file is to end with `.zip`. To abide to the convention, add `.zip` to the filename. -- To create zip archives, use function `zip/2` or [`zip/3`](`zip/2`). They are +- To create zip archives, use function `zip/2` or `zip/3`. They are also available as [`create/2,3`](`create/3`), to resemble the `m:erl_tar` module. - To extract files from a zip archive, use function `unzip/1` or `unzip/2`. They are also available as [`extract/1,2`](`extract/1`), to resemble the `m:erl_tar` module. @@ -51,11 +51,9 @@ convention, add `.zip` to the filename. ## Limitations -- Zip64 archives are not supported. - Password-protected and encrypted archives are not supported. - Only the DEFLATE (zlib-compression) and the STORE (uncompressed data) zip methods are supported. -- The archive size is limited to 2 GB (32 bits). - Comments for individual files are not supported when creating zip archives. The zip archive comment for the whole zip archive is supported. - Changing a zip archive is not supported. To add or remove a file from an @@ -73,8 +71,7 @@ convention, add `.zip` to the filename. %% zip server -export([zip_open/1, zip_open/2, zip_get/1, zip_get/2, zip_get_crc32/2, - zip_list_dir/1, - zip_close/1]). + zip_list_dir/1, zip_close/1]). %% includes -include("file.hrl"). % #file_info @@ -86,9 +83,6 @@ convention, add `.zip` to the filename. %% for debugging, to turn off catch -define(CATCH(Expr), (catch (Expr))). -%% Debug. --define(SHOW_GP_BIT_11(B, F), ok). - %% option sets -record(unzip_opts, { output, % output object (fun) @@ -166,17 +160,18 @@ convention, add `.zip` to the filename. -define(PKWARE_RESERVED, 11). -define(BZIP2_COMPRESSED, 12). +%% Version 2.6, attribute compatibility type 3 (Unix) -define(OS_MADE_BY_UNIX, 3). --define(VERSION_MADE_BY, 20 bor (?OS_MADE_BY_UNIX bsl 8)). -define(VERSION_NEEDED_STORE, 10). -define(VERSION_NEEDED_DEFLATE, 20). +-define(VERSION_NEEDED_ZIP64, 45). +-define(VERSION_MADE_BY, 61). -define(GP_BIT_11, 16#800). % Filename and file comment UTF-8 encoded. %% zip-file records -define(LOCAL_FILE_MAGIC,16#04034b50). -define(LOCAL_FILE_HEADER_SZ,(4+2+2+2+2+2+4+4+4+2+2)). -define(LOCAL_FILE_HEADER_CRC32_OFFSET, 4+2+2+2+2+2). --define(LOCAL_FILE_HEADER_EXTRA_LENGTH_OFFSET, 4+2+2+2+2+2+4+4+4+2). -record(local_file_header, { %% Common with cd_file_header @@ -202,6 +197,7 @@ convention, add `.zip` to the filename. info :: undefined | file:file_info() }). -define(EXTRA_OPTIONS, [extended_timestamp, uid_gid]). +-define(X0001_ZIP64, 16#0001). -define(X5455_EXTENDED_TIMESTAMP, 16#5455). -define(X7875_UNIX3, 16#7875). @@ -246,16 +242,33 @@ convention, add `.zip` to the filename. local_header_offset }). +-define(END_OF_CENTRAL_DIR_64_LOCATOR_MAGIC, 16#07064b50). +-define(END_OF_CENTRAL_DIR_64_LOCATOR_SZ, (4+8+4)). +-define(END_OF_CENTRAL_DIR_64_MAGIC, 16#06064b50). +-define(END_OF_CENTRAL_DIR_64_SZ, (2+2+4+4+8+8+8+8)). -define(END_OF_CENTRAL_DIR_MAGIC, 16#06054b50). -define(END_OF_CENTRAL_DIR_SZ, (4+2+2+2+2+4+4+2)). - --record(eocd, {disk_num, +-define(MAX_INT32, 16#FFFF_FFFF). +-define(MAX_INT16, 16#FFFF). + +%% 1.0 default version +%% 2.0 Deflate version +%% 4.5 File used ZIP64 format extension +%% 6.1 Version made by +-type zip_versions() :: 10 | 20 | 45 | 61. + +-record(eocd, {eocd :: undefined | #eocd{}, + version_made_by = 10 :: zip_versions(), + os_made_by = ~"UNIX" :: unicode:chardata() | 0..255, + extract_version = 10 :: zip_versions(), + disk_num, start_disk_num, entries_on_disk, entries, size, offset, - zip_comment_length}). + zip_comment_length, + extra}). -doc """ The possible extra extension that can be used. @@ -1109,7 +1122,7 @@ put_central_dir(LHS, Pos, Out0, put_cd_files_loop([], _Output, _ExtraOpts, Out, Sz) -> {Out, Sz}; put_cd_files_loop([{LH, Name, Pos} | LHRest], Output, ExtraOpts, Out0, Sz0) -> - Extra = cd_file_header_extra_from_lh_and_pos(LH, ExtraOpts), + Extra = cd_file_header_extra_from_lh_and_pos(LH, Pos, ExtraOpts), CDFH = cd_file_header_from_lh_pos_and_extra(LH, Pos, Extra), BCDFH = cd_file_header_to_bin(CDFH), B = [<>, BCDFH, Name, Extra], @@ -1119,11 +1132,15 @@ put_cd_files_loop([{LH, Name, Pos} | LHRest], Output, ExtraOpts, Out0, Sz0) -> put_cd_files_loop(LHRest, Output, ExtraOpts, Out1, Sz1). cd_file_header_extra_from_lh_and_pos( - #local_file_header{ info = FI }, ExtraOpts) -> - encode_extra(FI#file_info{ atime = undefined }, ExtraOpts). + #local_file_header{ comp_size = CompSize, + uncomp_size = UnCompSize, + info = FI }, Pos, ExtraOpts) -> + encode_extra(UnCompSize, CompSize, Pos, + FI#file_info{ atime = undefined }, ExtraOpts). %% put end marker of central directory, the last record in the archive -put_eocd(N, Pos, Sz, Comment, Output, Out0) -> +put_eocd(N, Pos, Sz, Comment, Output, Out0) when + Pos < ?MAX_INT32, N < ?MAX_INT16, Sz < ?MAX_INT32 -> CommentSz = length(Comment), EOCD = #eocd{disk_num = 0, start_disk_num = 0, @@ -1134,7 +1151,36 @@ put_eocd(N, Pos, Sz, Comment, Output, Out0) -> zip_comment_length = CommentSz}, BEOCD = eocd_to_bin(EOCD), B = [<>, BEOCD, Comment], - Output({write, B}, Out0). + Output({write, B}, Out0); +put_eocd(N, Pos, Sz, Comment, Output, Out0) -> + %% Zip64 eocd + EOCD64 = #eocd{os_made_by = ?OS_MADE_BY_UNIX, + version_made_by = ?VERSION_MADE_BY, + extract_version = ?VERSION_NEEDED_ZIP64, + disk_num = 0, + start_disk_num = 0, + entries_on_disk = N, + entries = N, + size = Sz, + offset = Pos, + extra = <<>> }, + BEOCD64 = eocd64_to_bin(EOCD64), + B = [<>, BEOCD64], + Out1 = Output({write, B}, Out0), + Out2 = Output({write, <> %% Total disks + }, Out1), + CommentSz = length(Comment), + EOCD = #eocd{disk_num = 0, + start_disk_num = 0, + entries_on_disk = min(N,?MAX_INT16), + entries = min(N,?MAX_INT16), + size = min(Sz,?MAX_INT32), + offset = min(Pos, ?MAX_INT32), + zip_comment_length = CommentSz}, + Output({write, [<>, eocd_to_bin(EOCD), Comment]}, Out2). get_filename({Name, _}, Type) -> get_filename(Name, Type); @@ -1174,7 +1220,7 @@ put_z_files([F | Rest], Z, Out0, Pos0, #zip_opts{input = Input, output = Output, open_opts = OpO, feedback = FB, cwd = CWD, extra = ExtraOpts} = Opts, Acc) -> - {Pos0, _} = Output({position, cur, 0}, Out0), %% Assert correct Pos0 + %% {Pos0, _} = Output({position, cur, 0}, Out0), %% Assert correct Pos0 In0 = [], F1 = add_cwd(CWD, F), @@ -1191,7 +1237,7 @@ put_z_files([F | Rest], Z, Out0, Pos0, CompMethod = get_comp_method(FileName, UncompSize, Opts, Type), %% Add any extra data needed and patch - Extra = encode_extra(FileInfo, ExtraOpts), + Extra = encode_extra(UncompSize, FileInfo, ExtraOpts), LH = local_file_header_from_info_method_name(FileInfo, UncompSize, CompMethod, FileName, GPFlag, Extra), @@ -1215,14 +1261,21 @@ put_z_files([F | Rest], Z, Out0, Pos0, Patch = <>, Out5 = Output({pwrite, Pos0 + ?LOCAL_FILE_HEADER_CRC32_OFFSET, Patch}, Out4), - %% Patch comp size if not zip64 - Out6 = Output({pwrite, Pos0 + ?LOCAL_FILE_HEADER_CRC32_OFFSET + 4, <>}, Out5), + Out6 = + %% If UncompSize > 4GB we always put the CompSize in the extra field + if UncompSize >= ?MAX_INT32 -> + %% 4 bytes for extra header + size and 8 bytes for UnComp:64 + Output({pwrite, Pos1 + 2 + 2 + 8, <>}, Out5); + true -> + %% Patch comp size if not zip64 + Output({pwrite, Pos0 + ?LOCAL_FILE_HEADER_CRC32_OFFSET + 4, <>}, Out5) + end, Out7 = Output({seek, eof, 0}, Out6), - {Pos2, _} = Output({position, cur, 0}, Out7), %% Assert correct Pos2 + %% {Pos2, _} = Output({position, cur, 0}, Out7), %% Assert correct Pos2 - LH2 = LH#local_file_header{comp_size = CompSize, crc32 = CRC}, + LH2 = LH#local_file_header{uncomp_size = UncompSize, comp_size = CompSize, crc32 = CRC}, ThisAcc = [{LH2, FileName, Pos0}], {Out8, SubAcc, Pos3} = case Type of @@ -1241,14 +1294,25 @@ reverse_join_files(Dir, [File | Files], Acc) -> reverse_join_files(_Dir, [], Acc) -> Acc. -encode_extra(FileInfo, ExtraOpts) -> +encode_extra(UnCompSize, FileInfo, ExtraOpts) -> + encode_extra(UnCompSize, 0, 0, FileInfo, ExtraOpts). +encode_extra(UnCompSize, CompSize, Pos, FileInfo, ExtraOpts) -> %% zip64 needs to be first so that we can patch the CompSize - [[encode_extra_extended_timestamp(FileInfo) || lists:member(extended_timestamp, ExtraOpts)], + [encode_extra_zip64(UnCompSize, CompSize, Pos), + [encode_extra_extended_timestamp(FileInfo) || lists:member(extended_timestamp, ExtraOpts)], [encode_extra_uid_gid(FileInfo) || lists:member(uid_gid, ExtraOpts)]]. encode_extra_header(Header, Value) -> [<>, Value]. +encode_extra_zip64(UncompSize, CompSize, Pos) when UncompSize >= ?MAX_INT32 -> + encode_extra_header(?X0001_ZIP64, [<>, + [<> || Pos >= ?MAX_INT32]]); +encode_extra_zip64(_UncompSize, _CompSize, Pos) when Pos >= ?MAX_INT32 -> + encode_extra_header(?X0001_ZIP64, <>); +encode_extra_zip64(_, _, _) -> + <<>>. + encode_extra_extended_timestamp(FI) -> {Mbit, MSystemTime} = case datetime_to_system_time(FI#file_info.mtime) of @@ -1389,7 +1453,7 @@ month(12) -> "Dec". %% zip header functions cd_file_header_from_lh_pos_and_extra(LH, Pos, Extra) -> - #local_file_header{version_needed = VersionNeeded, + #local_file_header{version_needed = LHVersionNeeded, gp_flag = GPFlag, comp_method = CompMethod, last_mod_time = LastModTime, @@ -1401,15 +1465,28 @@ cd_file_header_from_lh_pos_and_extra(LH, Pos, Extra) -> extra_field_length = _ExtraFieldLength, info = #file_info{ type = Type, mode = Mode }} = LH, - #cd_file_header{version_made_by = ?VERSION_MADE_BY, + VersionNeeded = + if Pos >= ?MAX_INT32 -> + ?VERSION_NEEDED_ZIP64; + true -> + LHVersionNeeded + end, + + #cd_file_header{os_made_by = ?OS_MADE_BY_UNIX, + version_made_by = ?VERSION_MADE_BY, version_needed = VersionNeeded, gp_flag = GPFlag, comp_method = CompMethod, last_mod_time = LastModTime, last_mod_date = LastModDate, crc32 = CRC32, - comp_size = CompSize, - uncomp_size = UncompSize, + comp_size = + if UncompSize >= ?MAX_INT32 -> + ?MAX_INT32; + true -> + CompSize + end, + uncomp_size = min(UncompSize, ?MAX_INT32), file_name_length = FileNameLength, extra_field_length = iolist_size(Extra), file_comment_length = 0, % FileCommentLength, @@ -1423,10 +1500,11 @@ cd_file_header_from_lh_pos_and_extra(LH, Pos, Extra) -> end; true -> Mode band 8#777 end bsl 16, - local_header_offset = Pos}. + local_header_offset = min(Pos, ?MAX_INT32)}. cd_file_header_to_bin( - #cd_file_header{version_made_by = VersionMadeBy, + #cd_file_header{os_made_by = OsMadeBy, + version_made_by = VersionMadeBy, version_needed = VersionNeeded, gp_flag = GPFlag, comp_method = CompMethod, @@ -1442,7 +1520,7 @@ cd_file_header_to_bin( internal_attr = InternalAttr, external_attr = ExternalAttr, local_header_offset = LocalHeaderOffset}) -> - <>. +eocd64_to_bin( + #eocd{os_made_by = OsMadeBy, + version_made_by = VersionMadeBy, + extract_version = ExtractVersion, + disk_num = DiskNum, + start_disk_num = StartDiskNum, + entries_on_disk = EntriesOnDisk, + entries = Entries, + size = Size, + offset = Offset, + extra = Extra}) -> + <>. + %% put together a local file header local_file_header_from_info_method_name(#file_info{mtime = MTime, atime = ATime, @@ -1507,7 +1606,15 @@ local_file_header_from_info_method_name(#file_info{mtime = MTime, {ModDate, ModTime} = dos_date_time_from_datetime( calendar:system_time_to_local_time( datetime_to_system_time(MTime), second)), - #local_file_header{version_needed = ?VERSION_NEEDED_DEFLATE, + VersionNeeded = if UncompSize >= ?MAX_INT32 -> + ?VERSION_NEEDED_ZIP64; + true -> + case CompMethod of + ?STORED -> ?VERSION_NEEDED_STORE; + ?DEFLATED -> ?VERSION_NEEDED_DEFLATE + end + end, + #local_file_header{version_needed = VersionNeeded, gp_flag = GPFlag, comp_method = CompMethod, last_mod_time = ModTime, @@ -1518,16 +1625,15 @@ local_file_header_from_info_method_name(#file_info{mtime = MTime, uid = Uid, gid = Gid, crc32 = -1, - comp_size = -1, - uncomp_size = UncompSize, + comp_size = ?MAX_INT32, + uncomp_size = min(UncompSize, ?MAX_INT32), file_name_length = length(Name), extra_field_length = iolist_size(Extra), info = Info}. %% -%% Functions used by zip server +%% Functions used by zip server to work with archives. %% - openzip_open(F, Options) -> case ?CATCH(do_openzip_open(F, Options)) of {ok, OpenZip} -> @@ -1712,15 +1818,20 @@ get_openzip_opt([Unknown | _Rest], _Opts) -> %% get the central directory from the archive get_central_dir(In0, RawIterator, Input, ExtraOpts) -> - {B, In1} = get_end_of_central_dir(In0, ?END_OF_CENTRAL_DIR_SZ, Input), - {EOCD, BComment} = eocd_and_comment_from_bin(B), - In2 = Input({seek, bof, EOCD#eocd.offset}, In1), + {Size, In1} = Input({position, eof, 0}, In0), + {{EOCD, BComment}, In2} = + get_end_of_central_dir( + In1, ?END_OF_CENTRAL_DIR_SZ, + min(16#ffff + ?END_OF_CENTRAL_DIR_SZ + ?END_OF_CENTRAL_DIR_64_LOCATOR_SZ, Size), + Input), + EOCD#eocd.disk_num == 0 orelse throw(multiple_disks_not_supported), + In3 = Input({seek, bof, EOCD#eocd.offset}, In2), N = EOCD#eocd.entries, Acc0 = [], %% There is no encoding flag for the archive comment. Comment = heuristic_to_string(BComment), Out0 = RawIterator(EOCD, "", Comment, <<>>, Acc0), - get_cd_loop(N, In2, RawIterator, Input, ExtraOpts, Out0). + get_cd_loop(N, In3, RawIterator, Input, ExtraOpts, Out0). get_cd_loop(0, In, _RawIterator, _Input, _ExtraOpts, Acc) -> {lists:reverse(Acc), In}; @@ -1751,6 +1862,8 @@ get_cd_loop(N, In0, RawIterator, Input, ExtraOpts, Acc0) -> #local_file_header{} | #cd_file_header{}. update_extra_fields(FileHeader, BExtra, ExtraOpts) -> %% We depend on some fields in the records to be at the same position + #local_file_header.comp_size = #cd_file_header.comp_size, + #local_file_header.uncomp_size = #cd_file_header.uncomp_size, #local_file_header.mtime = #cd_file_header.mtime, #local_file_header.atime = #cd_file_header.atime, #local_file_header.ctime = #cd_file_header.ctime, @@ -1761,7 +1874,9 @@ update_extra_fields(FileHeader, BExtra, ExtraOpts) -> UidGid = lists:member(uid_gid, ExtraOpts), lists:foldl( - fun({?X5455_EXTENDED_TIMESTAMP, Data}, Acc) when ExtendedTimestamp -> + fun({?X0001_ZIP64, Data}, Acc) -> + update_zip64(Acc, Data); + ({?X5455_EXTENDED_TIMESTAMP, Data}, Acc) when ExtendedTimestamp -> update_extended_timestamp(Acc, Data); ({?X7875_UNIX3, Data}, Acc) when UidGid -> update_unix3(Acc, Data); @@ -1769,6 +1884,17 @@ update_extra_fields(FileHeader, BExtra, ExtraOpts) -> Acc end, FileHeader, parse_extra(BExtra)). +update_zip64(FH, <>) when element(#cd_file_header.uncomp_size, FH) == ?MAX_INT32 -> + update_zip64(setelement(#cd_file_header.uncomp_size, FH, UnComp), Rest); +update_zip64(FH, <>) when element(#cd_file_header.comp_size, FH) == ?MAX_INT32 -> + update_zip64(setelement(#cd_file_header.comp_size, FH, Comp), Rest); +update_zip64(FH, <>) when element(#cd_file_header.local_header_offset, FH) == ?MAX_INT32 -> + update_zip64(setelement(#cd_file_header.local_header_offset, FH, LocalHeaderOffset), Rest); +update_zip64(FH, <>) when element(#cd_file_header.disk_num_start, FH) == ?MAX_INT32 -> + update_zip64(setelement(#cd_file_header.disk_num_start, FH, DiskNumStart), Rest); +update_zip64(FH, <<>>) -> + FH. + update_extended_timestamp(FileHeader, <<_:5,HasCre:1,HasAcc:1,HasMod:1,Data/binary>> ) -> {FHMod, DataMod} = update_extended_timestamp(FileHeader, HasMod, Data, #cd_file_header.mtime), {FHAcc, DataAcc} = update_extended_timestamp(FHMod, HasAcc, DataMod, #cd_file_header.atime), @@ -1817,27 +1943,131 @@ get_filename_extra_comment(B, FileNameLen, ExtraLen, CommentLen, GPFlag) -> %% get end record, containing the offset to the central directory %% the end record is always at the end of the file BUT alas it is %% of variable size (yes that's dumb!) -get_end_of_central_dir(_In, Sz, _Input) when Sz > 16#ffff -> - throw(bad_eocd); -get_end_of_central_dir(In0, Sz, Input) -> +get_end_of_central_dir(In0, Sz, MaxCentralDirSize, Input) -> In1 = Input({seek, eof, -Sz}, In0), {B, In2} = Input({read, Sz}, In1), - case find_eocd_header(B) of + case find_eocd(B) of + none when Sz =:= MaxCentralDirSize -> + throw(bad_eocd); none -> - get_end_of_central_dir(In2, Sz+Sz, Input); + get_end_of_central_dir(In2, min(Sz+Sz, MaxCentralDirSize), MaxCentralDirSize, Input); + {EOCD64Location, EOCD, Comment} -> + case find_eocd64(In2, EOCD64Location, EOCD, Comment, Input) of + none -> + throw(bad_eocd64); + {EOCD64, In3} -> + {EOCD64, In3} + end; Header -> {Header, In2} end. %% find the end record by matching for it -find_eocd_header(<>) -> - Rest; -find_eocd_header(<<_:8, Rest/binary>>) - when byte_size(Rest) > ?END_OF_CENTRAL_DIR_SZ-4 -> - find_eocd_header(Rest); -find_eocd_header(_) -> +%% The ?END_OF_CENTRAL_DIR_MAGIC could be in the comment, +%% so we need to match for the entire structure and make sure +%% the comment size consumes all of the binary. +find_eocd(<>) -> + if DiskNum =:= ?MAX_INT16; + StartDiskNum =:= ?MAX_INT16; + EntriesOnDisk =:= ?MAX_INT16, + Entries =:= ?MAX_INT16; + Size =:= ?MAX_INT32; + Offset =:= ?MAX_INT32 -> + {{EOCD64StartDiskNum, EOCD64Offset, EOCD64TotalDisk}, + #eocd{disk_num = DiskNum, + start_disk_num = StartDiskNum, + entries_on_disk = EntriesOnDisk, + entries = Entries, + size = Size, + offset = Offset, + zip_comment_length = ZipCommentLength}, + Comment}; + true -> + none + end; +find_eocd(<>) -> + if DiskNum =:= ?MAX_INT16; + StartDiskNum =:= ?MAX_INT16; + EntriesOnDisk =:= ?MAX_INT16; + Entries =:= ?MAX_INT16; + Size =:= ?MAX_INT32; + Offset =:= ?MAX_INT32 -> + %% There should be a eocd64 locator before this entry + none; + true -> + {#eocd{disk_num = DiskNum, + start_disk_num = StartDiskNum, + entries_on_disk = EntriesOnDisk, + entries = Entries, + size = Size, + offset = Offset, + zip_comment_length = ZipCommentLength}, + Comment} + end; +find_eocd(<<_:8, Rest/binary>>) when byte_size(Rest) > ?END_OF_CENTRAL_DIR_SZ-4 -> + find_eocd(Rest); +find_eocd(_) -> none. +find_eocd64(In0,{_EOCD64StartDiskNum, EOCD64Offset, _EOCD64TotalDisk}, EOCD, Comment, Input) -> + maybe + In1 = Input({seek, bof, EOCD64Offset}, In0), + + {<>, In2} + ?= Input({read, 4 + 8}, In1), + + {<>, In3} + ?= Input({read, EOCDSize}, In2), + + {{EOCD#eocd{ + eocd = EOCD, + version_made_by = VersionMadeBy, + os_made_by = os_id_to_atom(OsMadeBy), + extract_version = ExtractVersion, + disk_num = DiskNum, + start_disk_num = StartDiskNum, + entries_on_disk = EntriesOnDisk, + entries = Entries, + size = Size, + offset = Offset, + extra = parse_extra(Extra)}, Comment}, In3} + else + {eof, InEOF} -> + {eof, InEOF}; + _ -> + none + end. + + %% Taken from APPNOTE.TXT version 6.3.10 section 4.4.2.2 os_id_to_atom(0) -> ~"MS-DOS and OS/2"; os_id_to_atom(1) -> ~"Amiga"; @@ -2209,7 +2439,6 @@ skip_bin(B, Pos) when is_binary(B) -> end. binary_to_chars(B, GPFlag) -> - ?SHOW_GP_BIT_11(B, GPFlag band ?GP_BIT_11), case GPFlag band ?GP_BIT_11 of 0 -> binary_to_list(B); @@ -2241,26 +2470,6 @@ encode_string(String) -> {String, 0} end. -%% ZIP header manipulations -eocd_and_comment_from_bin(<>) -> - {#eocd{disk_num = DiskNum, - start_disk_num = StartDiskNum, - entries_on_disk = EntriesOnDisk, - entries = Entries, - size = Size, - offset = Offset, - zip_comment_length = ZipCommentLength}, - Comment}; -eocd_and_comment_from_bin(_) -> - throw(bad_eocd). - cd_file_header_from_bin(< zip_options, list_dir_options, aliases, zip_api, open_leak, unzip_jar, compress_control, foldl, unzip_traversal_exploit, fd_leak, unicode, test_zip_dir, - explicit_file_info, {group, zip_group}]. + explicit_file_info, {group, zip_group}, {group, zip64_group}]. groups() -> zip_groups(). @@ -62,33 +64,74 @@ groups() -> zip_groups() -> - [{zip_group,[],[{group,ZipMode} || ZipMode <- ?ZIP_MODES]}] ++ + ZipGroup= + [{zip_group,[],[{group,ZipMode} || ZipMode <- ?ZIP_MODES]}] ++ [{ZipMode, [], [{group,UnZipMode} || UnZipMode <- ?UNZIP_MODES]} || ZipMode <- ?ZIP_MODES] ++ - [{G, [parallel], zip_testcases()} || G <- ?UNZIP_MODES]. + [{G, [parallel], zip_testcases()} || G <- ?UNZIP_MODES], + Zip64Group = [{zip64_group,[],[{group,z64(ZipMode)} || ZipMode <- ?ZIP_MODES]}] ++ + [{z64(ZipMode), [sequence], [zip64_central_headers]++ + [{group,z64(UnZipMode)} || UnZipMode <- ?UNZIP_MODES]} + || ZipMode <- ?ZIP_MODES] ++ + [{z64(G), [], zip64_testcases()} || G <- ?UNZIP_MODES], + + ZipGroup ++ Zip64Group. + +z64(Mode) when is_atom(Mode) -> + list_to_atom(lists:concat([z64_,Mode])); +z64(Modes) when is_list(Modes) -> + [z64(M) || M <- Modes]. + +noz64(Z64Mode) -> + case string:split(atom_to_list(Z64Mode), "_") of + ["z64",Mode] -> + list_to_atom(Mode); + [_Mode] -> + Z64Mode + end. zip_testcases() -> [mode, basic_timestamp, extended_timestamp, uid_gid]. +zip64_testcases() -> + [unzip64_central_headers, + zip64_central_directory]. + init_per_suite(Config) -> Config. end_per_suite(_Config) -> ok. +init_per_group(zip64_group, Config) -> + PrivDir = get_value(priv_dir, Config), + + OneMB = <<0:(8 bsl 20)>>, + Large4GB = filename:join(PrivDir, "large.txt"), + ok = file:write_file(Large4GB, lists:duplicate(4 bsl 10, OneMB)), + Medium4MB = filename:join(PrivDir, "medium.txt"), + ok = file:write_file(Medium4MB, lists:duplicate(4, OneMB)), + + [{large, Large4GB},{medium,Medium4MB}|Config]; init_per_group(Group, Config) -> - case lists:member(Group, ?ZIP_MODES ++ ?UNZIP_MODES) of + case lists:member(Group, ?ZIP_MODES ++ ?UNZIP_MODES ++ z64(?ZIP_MODES ++ ?UNZIP_MODES)) of true -> case get_value(zip, Config) of undefined -> + [throw({skip, "zip does not support zip64"}) || + Group =:= zip andalso + get_value(large,Config) =/= undefined andalso + os:cmd("zip -v | grep ZIP64_SUPPORT") == ""], + ct:print("Zip: ~p", [Group]), Pdir = filename:join(get_value(priv_dir, Config),Group), ok = filelib:ensure_path(Pdir), - [{pdir, Pdir},{zip, Group} | Config]; + [{pdir, Pdir},{zip, noz64(Group)} | Config]; _Zip -> + ct:print("UnZip: ~p", [Group]), Pdir = filename:join(get_value(pdir, Config),Group), ok = filelib:ensure_path(Pdir), - [{pdir, Pdir},{unzip, Group} | Config] + [{pdir, Pdir},{unzip, noz64(Group)} | Config] end; false -> Config @@ -1130,6 +1173,128 @@ mode(Config) -> ok. +%% Test that zip64 local and central headers are respected when unzipping. +%% The fields in the header that can be 64-bit are: +%% * compressed size +%% * uncompressed size +%% * relative offset +%% * starting disk +%% +%% As we do not support using multiple disks, we do not test starting disks +zip64_central_headers(Config) -> + + PrivDir = get_value(pdir, Config), + Archive = filename:join(PrivDir, "../archive.zip"), + + %% Check that ../../large.txt exists and is of correct size + {ok, #file_info{ size = 1 bsl 32 } } = + file:read_file_info(filename:join(PrivDir, "../../large.txt")), + + %% We very carefully create an archive that should contain all + %% different header combinations. + %% - uncomp.txt: uncomp size > 4GB + %% - uncomp.comp.zip: uncomp and comp size > 4GB + %% - offset.txt: offset > 4GB + %% - uncomp.offset.txt: uncomp size and offset > 4GB + %% - uncomp.comp.offset.zip: uncomp and comp size and offset > 4GB + %% + %% The archive will be roughly 8 GBs large + + ok = file:make_symlink("../../large.txt", filename:join(PrivDir, "uncomp.txt")), + ok = file:make_symlink("../../large.txt", filename:join(PrivDir, "uncomp.comp.zip")), + ok = file:make_symlink("../../medium.txt", filename:join(PrivDir, "offset.txt")), + ok = file:make_symlink("../../large.txt", filename:join(PrivDir, "uncomp.offset.txt")), + ok = file:make_symlink("../../large.txt", filename:join(PrivDir, "uncomp.comp.offset.zip")), + ?assertMatch( + {ok, Archive}, + zip(Config, Archive, "-1", + ["uncomp.txt","uncomp.comp.zip","offset.txt", + "uncomp.offset.txt","uncomp.comp.offset.zip"], + [{cwd, PrivDir}])), + + %% Check that list archive works + {ok, [#zip_comment{}, + #zip_file{ name = "uncomp.txt", + info = #file_info{ size = 1 bsl 32 } }, + #zip_file{ name = "uncomp.comp.zip", + comp_size = 1 bsl 32, + info = #file_info{ size = 1 bsl 32 } }, + #zip_file{ name = "offset.txt", + info = #file_info{ size = 4 bsl 20 } }, + #zip_file{ name = "uncomp.offset.txt", + info = #file_info{ size = 1 bsl 32 } }, + #zip_file{ name = "uncomp.comp.offset.zip", + comp_size = 1 bsl 32, + info = #file_info{ size = 1 bsl 32 } } + ]} = + zip:list_dir(Archive), + ok. + +unzip64_central_headers(Config) -> + + PrivDir = get_value(pdir, Config), + ExtractDir = filename:join(PrivDir, "extract"), + Archive = filename:join(PrivDir, "../../archive.zip"), + Large4GB = filename:join(get_value(priv_dir, Config),"large.txt"), + Medium4MB = filename:join(get_value(priv_dir, Config), "medium.txt"), + + %% Test that extraction of each file works + lists:map( + fun F({Name, Compare}) -> + ok = file:make_dir(ExtractDir), + ?assertMatch( + {ok, [Name]}, + unzip(Config, Archive, [{cwd, ExtractDir},{file_list,[Name]}])), + cmp(Compare, filename:join(ExtractDir,Name)), + file:del_dir_r(ExtractDir); + F(Name) -> + F({Name, Large4GB}) + end, ["uncomp.txt","uncomp.comp.zip",{"offset.txt",Medium4MB}, + "uncomp.offset.txt","uncomp.comp.offset.zip"]), + + ok. + +%% Test that zip64 end of central directory are respected when unzipping. +%% The fields in the header that can be 64-bit are: +%% * total number of files > 2 bytes +%% * size of central directory > 4 bytes (cannot test as it requires an archive with 8 million files) +%% * offset of central directory > 4 bytes (implicitly tested when testing large relative location of header) +%% +%% Fields that we don't test as we don't support multiple disks +%% * number of disk where end of central directory is > 2 bytes +%% * number of disk to find central directory > 2 bytes +%% * number central directory entries on this disk > 2 bytes +zip64_central_directory(Config) -> + + PrivDir = get_value(pdir, Config), + Dir = filename:join(PrivDir, "files"), + ExtractDir = filename:join(PrivDir, "extract"), + + Archive = filename:join(PrivDir, "archive.zip"), + + %% To test when total number of files > 65535, we create an archive with 66000 entries + ok = file:make_dir(Dir), + lists:foreach( + fun(I) -> + ok = file:write_file(filename:join(Dir, integer_to_list(I)++".txt"),<<0:8>>) + end, lists:seq(0, 65600)), + ?assertMatch( + {ok, Archive}, + zip(Config, Archive, "-1 -r", ["files"], [{cwd, PrivDir}])), + + {ok, Files} = zip:list_dir(Archive), + ?assertEqual(65603, length(Files)), + + ok = file:make_dir(ExtractDir), + ?assertMatch( + {ok, ["files/1.txt","files/65599.txt"]}, + unzip(Config, Archive, [{cwd, ExtractDir},{file_list,["files/1.txt", + "files/65599.txt"]}])), + cmp(filename:join(ExtractDir,"files/1.txt"), + filename:join(ExtractDir,"files/65599.txt")), + + ok. + %% Test basic timestamps, the atime and mtime should be the original %% mtime of the file basic_timestamp(Config) -> @@ -1392,6 +1557,9 @@ unzip(unemzip, Archive, Opts) -> F -- Cwd end || {F, B} <- Files])}. +cmp(Source, Target) -> + "" = cmd("cmp --silent "++Source++" "++Target++~s' || echo "files are different"'). + cmd(Cmd) -> Res = os:cmd(Cmd), ct:log("Cmd: ~ts~nRes: ~ts~n",[Cmd, Res]), From bc9581de680eee928f742585ed39d6e5fb034bcd Mon Sep 17 00:00:00 2001 From: Lukas Larsson Date: Thu, 20 Jun 2024 14:15:59 +0200 Subject: [PATCH 14/21] zip: Fix zip STORE to work with > 4GB files on Windows --- lib/stdlib/src/zip.erl | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/lib/stdlib/src/zip.erl b/lib/stdlib/src/zip.erl index 45c6437a93f6..6bbe115b372a 100644 --- a/lib/stdlib/src/zip.erl +++ b/lib/stdlib/src/zip.erl @@ -1348,43 +1348,44 @@ put_z_file(_Method, 0, Out, _F, Pos, _Input, _Output, _OpO, _Z, regular) -> put_z_file(?STORED, UncompSize, Out0, F, Pos0, Input, Output, OpO, _Z, regular) -> In0 = [], In1 = Input({open, F, OpO -- [write]}, In0), - {Data, In2} = Input({read, UncompSize}, In1), - Out1 = Output({write, Data}, Out0), - CRC = erlang:crc32(Data), + CRC0 = 0, + {Out1, Pos1, In2, CRC} = + put_z_data_loop(UncompSize, In1, Out0, Pos0, Input, Output, CRC0, fun(Data, _Sync) -> Data end), Input(close, In2), - {Out1, Pos0+erlang:iolist_size(Data), CRC}; + {Out1, Pos1, CRC}; put_z_file(?DEFLATED, UncompSize, Out0, F, Pos0, Input, Output, OpO, Z, regular) -> In0 = [], In1 = Input({open, F, OpO -- [write]}, In0), ok = zlib:deflateInit(Z, default, deflated, -?MAX_WBITS, 8, default), CRC0 = 0, - {Out1, Pos1, CRC} = - put_z_data_loop(UncompSize, In1, Out0, Pos0, Input, Output, CRC0, Z), + {Out1, Pos1, In2, CRC} = + put_z_data_loop(UncompSize, In1, Out0, Pos0, Input, Output, CRC0, + fun(Data, Sync) -> zlib:deflate(Z, Data, Sync) end), ok = zlib:deflateEnd(Z), - Input(close, In1), + Input(close, In2), {Out1, Pos1, CRC}. -%% zlib is finished with the last chunk compressed -get_sync(N, N) -> finish; -get_sync(_, _) -> full. - %% compress data -put_z_data_loop(0, _In, Out, Pos, _Input, _Output, CRC0, _Z) -> - {Out, Pos, CRC0}; -put_z_data_loop(UncompSize, In0, Out0, Pos0, Input, Output, CRC0, Z) -> +put_z_data_loop(0, In, Out, Pos, _Input, _Output, CRC0, _DeflateFun) -> + {Out, Pos, In, CRC0}; +put_z_data_loop(UncompSize, In0, Out0, Pos0, Input, Output, CRC0, DeflateFun) -> N = erlang:min(?WRITE_BLOCK_SIZE, UncompSize), case Input({read, N}, In0) of {eof, _In1} -> {Out0, Pos0}; {Uncompressed, In1} -> CRC1 = erlang:crc32(CRC0, Uncompressed), - Compressed = zlib:deflate(Z, Uncompressed, get_sync(N, UncompSize)), + Compressed = DeflateFun(Uncompressed, get_sync(N, UncompSize)), Sz = erlang:iolist_size(Compressed), Out1 = Output({write, Compressed}, Out0), put_z_data_loop(UncompSize - N, In1, Out1, Pos0 + Sz, - Input, Output, CRC1, Z) + Input, Output, CRC1, DeflateFun) end. +%% zlib is finished with the last chunk compressed +get_sync(N, N) -> finish; +get_sync(_, _) -> full. + %% raw iterators over central dir %% name only From 7e9f3d136e96b5b125eee560df8603701f9f5d2a Mon Sep 17 00:00:00 2001 From: Lukas Larsson Date: Thu, 30 May 2024 10:22:13 +0200 Subject: [PATCH 15/21] zip: Increase size printout for zip:tt to suite zip64 --- lib/stdlib/src/zip.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/stdlib/src/zip.erl b/lib/stdlib/src/zip.erl index 6bbe115b372a..e9340ca44063 100644 --- a/lib/stdlib/src/zip.erl +++ b/lib/stdlib/src/zip.erl @@ -1419,7 +1419,7 @@ raw_long_print_info_etc(EOCD, _, Comment, _, Acc) when is_record(EOCD, eocd) -> Acc. print_header(CompSize, MTime, UncompSize, FileName, FileComment) -> - io:format("~8w ~s ~8w ~2w% ~ts ~ts\n", + io:format("~10w ~s ~10w ~3w% ~ts ~ts\n", [CompSize, time_to_string(MTime), UncompSize, get_percent(CompSize, UncompSize), FileName, FileComment]). From d7542b9d7a6410cf00256d150850ef22d0266984 Mon Sep 17 00:00:00 2001 From: Lukas Larsson Date: Fri, 31 May 2024 13:04:34 +0200 Subject: [PATCH 16/21] zip: zlib can already have been closed --- lib/stdlib/src/zip.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/stdlib/src/zip.erl b/lib/stdlib/src/zip.erl index e9340ca44063..f23db978626b 100644 --- a/lib/stdlib/src/zip.erl +++ b/lib/stdlib/src/zip.erl @@ -635,7 +635,7 @@ do_zip(F, Files, Options) -> {ok, Out3} catch C:R:Stk -> - zlib:close(Z), + ?CATCH(zlib:close(Z)), Output({close, F}, Out0), erlang:raise(C, R, Stk) end. From fc1dc591f9db08437d12b8876f1f645fea3ae138 Mon Sep 17 00:00:00 2001 From: Lukas Larsson Date: Fri, 31 May 2024 14:52:31 +0200 Subject: [PATCH 17/21] zip: Polish documentation --- lib/stdlib/src/zip.erl | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/lib/stdlib/src/zip.erl b/lib/stdlib/src/zip.erl index f23db978626b..b08ef60b775c 100644 --- a/lib/stdlib/src/zip.erl +++ b/lib/stdlib/src/zip.erl @@ -46,7 +46,7 @@ convention, add `.zip` to the filename. `zip_list_dir/1`, and `zip_close/1`. - The ZIP extensions 0x5355 "extended timestamps" and 0x7875 "UID+GID handling" are supported. Both extensions are by default enabled when creating an archive, - but only "extended timestamps" are enabled when extracting. Use the `extra` + but only "extended timestamps" are enabled when extracting. Use the `t:extra/0` option to change how these extensions are used. ## Limitations @@ -275,7 +275,8 @@ The possible extra extension that can be used. - **`extended_timestamp`** - enables the 0x5455 "extended timestamps" zip extension that embeds POSIX timestamps for access and modification times for each file in the - archive. + archive. This makes the timestamps to be in UTC instead of local time and also increases + the time resolution from 2 seconds to 1 second. - **`uid_gid`** - enables 0x7875 "UNIX 3rd generation" zip extension that embeds the UID and GID for each file into the archive. """. @@ -288,6 +289,7 @@ The possible extra extension that can be used. | {compress, What :: extension_spec()} | {uncompress, What :: extension_spec()} | {extra, extra()}. +-doc ~'A filename extension, for example ".txt".'. -type extension() :: string(). -type extension_spec() :: all | [Extension :: extension()] @@ -364,6 +366,11 @@ Options: with option `memory` specified, which means that no files are overwritten, existing files are excluded from the result. +- **`{extra, Extras}`** - The zip "extra" features to respect. The supported + "extra" features are "extended timestamps" and "UID and GID" handling. + By default only "extended timestamps" is enabled when unzipping. + See `t:extra/0` for more details. + - **`verbose`** - Prints an informational message for each extracted file. - **`memory`** - Instead of extracting to the current directory, the result is @@ -577,6 +584,11 @@ Options: zip archive (acting like `file:set_cwd/1` in Kernel, but without changing the global `cwd` property.). +- **`{extra, Extras}`** - The zip "extra" features to respect. The supported + "extra" features are "extended timestamps" and "UID and GID" handling. + By default both these "extra" features are enabled. + See `t:extra/0` for more details. + - **`{compress, What}`** - Controls what types of files to be compressed. Defaults to `all`. The following values of `What` are allowed: @@ -666,6 +678,11 @@ One option is available: which is faster but does not allow a remote (Erlang) file server to be used. Adding `cooked` to the mode list overrides the default and opens the zip file without option `raw`. + +- **`{extra, Extras}`** - The zip "extra" features to respect. The supported + "extra" features are "extended timestamps" and "UID and GID" handling. + By default only "extended timestamps" is enabled when listing files. + See `t:extra/0` for more details. """. -spec(list_dir(Archive, Options) -> RetValue when Archive :: file:name() | binary(), @@ -703,6 +720,8 @@ Opens a zip archive, and reads and saves its directory. This means that later reading files from the archive is faster than unzipping files one at a time with [`unzip/1,2`](`unzip/1`). +The options are equivalent to those in `unzip/2`. + The archive must be closed with `zip_close/1`. The `ZipHandle` is closed if the process that originally opened the archive From 17d19f5c860a1a9bd238f8a78e5aad0e978e25ef Mon Sep 17 00:00:00 2001 From: Lukas Larsson Date: Fri, 31 May 2024 15:19:28 +0200 Subject: [PATCH 18/21] zip: Add skip_directories option --- lib/stdlib/src/zip.erl | 59 ++++++++++++++++++++++++++++------- lib/stdlib/test/zip_SUITE.erl | 55 +++++++++++++++++++++++++++++++- 2 files changed, 102 insertions(+), 12 deletions(-) diff --git a/lib/stdlib/src/zip.erl b/lib/stdlib/src/zip.erl index b08ef60b775c..3e92c51031d7 100644 --- a/lib/stdlib/src/zip.erl +++ b/lib/stdlib/src/zip.erl @@ -91,6 +91,7 @@ convention, add `.zip` to the filename. open_opts, % options passed to file:open feedback, % feeback (fun) cwd, % directory to relate paths to + skip_dirs, % skip creating empty directories extra % The extra fields to include }). @@ -110,6 +111,7 @@ convention, add `.zip` to the filename. input, % input object (fun) raw_iterator,% applied to each dir entry open_opts, % options passed to file:open + skip_dirs, % skip creating empty directories extra % The extra fields to include }). @@ -117,6 +119,7 @@ convention, add `.zip` to the filename. output, % output object (fun) open_opts, % file:open options cwd, % directory to relate paths to + skip_dirs, % skip creating empty directories extra % The extra fields to include }). @@ -129,6 +132,7 @@ convention, add `.zip` to the filename. output, % output io object (fun) zlib, % handle to open zlib cwd, % directory to relate paths to + skip_dirs, % skip creating empty directories extra % The extra fields to include }). @@ -366,6 +370,10 @@ Options: with option `memory` specified, which means that no files are overwritten, existing files are excluded from the result. +- **`skip_directories`** - By default empty directories within zip archives are + extracted. With option `skip_directories` set, empty directories are no longer + created. + - **`{extra, Extras}`** - The zip "extra" features to respect. The supported "extra" features are "extended timestamps" and "UID and GID" handling. By default only "extended timestamps" is enabled when unzipping. @@ -407,7 +415,7 @@ unzip(F, Options) -> do_unzip(F, Options) -> Opts = get_unzip_options(F, Options), #unzip_opts{input = Input, open_opts = OpO, - extra = ExtraOpts} = Opts, + extra = ExtraOpts} = Opts, In0 = Input({open, F, OpO -- [write]}, []), RawIterator = fun raw_file_info_etc/5, {Info, In1} = get_central_dir(In0, RawIterator, Input, ExtraOpts), @@ -679,6 +687,10 @@ One option is available: Adding `cooked` to the mode list overrides the default and opens the zip file without option `raw`. +- **`skip_directories`** - By default empty directories within zip archives are + listed. With option `skip_directories` set, empty directories are no longer + listed. + - **`{extra, Extras}`** - The zip "extra" features to respect. The supported "extra" features are "extended timestamps" and "UID and GID" handling. By default only "extended timestamps" is enabled when listing files. @@ -701,11 +713,22 @@ do_list_dir(F, Options) -> Opts = get_list_dir_options(F, Options), #list_dir_opts{input = Input, open_opts = OpO, raw_iterator = RawIterator, + skip_dirs = SkipDirs, extra = ExtraOpts} = Opts, In0 = Input({open, F, OpO}, []), {Info, In1} = get_central_dir(In0, RawIterator, Input, ExtraOpts), Input(close, In1), - {ok, Info}. + if SkipDirs -> + {ok, + lists:filter( + fun(#zip_file{ name = Name }) -> + lists:last(Name) =/= $/; + (#zip_comment{}) -> + true + end, Info)}; + true -> + {ok, Info} + end. -doc(#{equiv => zip_open/2}). -spec(zip_open(Archive) -> {ok, ZipHandle} | {error, Reason} when @@ -874,6 +897,8 @@ get_unzip_opt([keep_old_files | Rest], Opts) -> Keep = fun keep_old_file/1, Filter = fun_and_1(Keep, Opts#unzip_opts.file_filter), get_unzip_opt(Rest, Opts#unzip_opts{file_filter = Filter}); +get_unzip_opt([skip_directories | Rest], Opts) -> + get_unzip_opt(Rest, Opts#unzip_opts{skip_dirs = true}); get_unzip_opt([{extra, What} = O| Rest], Opts) when is_list(What) -> case lists:all(fun(E) -> lists:member(E, ?EXTRA_OPTIONS) end, What) of true -> @@ -891,6 +916,8 @@ get_list_dir_opt([cooked | Rest], #list_dir_opts{open_opts = OpO} = Opts) -> get_list_dir_opt([names_only | Rest], Opts) -> get_list_dir_opt(Rest, Opts#list_dir_opts{ raw_iterator = fun(A, B, C, D, E) -> raw_name_only(A, B, C, D, E) end}); +get_list_dir_opt([skip_directories | Rest], Opts) -> + get_list_dir_opt(Rest, Opts#list_dir_opts{skip_dirs = true}); get_list_dir_opt([{extra, What} = O| Rest], Opts) when is_list(What) -> case lists:all(fun(E) -> lists:member(E, ?EXTRA_OPTIONS) end, What) of true -> @@ -1008,6 +1035,7 @@ get_unzip_options(F, Options) -> input = get_input(F), open_opts = [raw], feedback = fun silent/1, + skip_dirs = false, cwd = "", extra = [extended_timestamp] }, @@ -1017,6 +1045,7 @@ get_openzip_options(Options) -> Opts = #openzip_opts{open_opts = [raw, read], output = fun file_io/2, cwd = "", + skip_dirs = false, extra = ?EXTRA_OPTIONS}, get_openzip_opt(Options, Opts). @@ -1046,6 +1075,7 @@ get_list_dir_options(F, Options) -> Opts = #list_dir_opts{raw_iterator = fun raw_file_info_public/5, input = get_input(F), open_opts = [raw], + skip_dirs = false, extra = [extended_timestamp]}, get_list_dir_opt(Options, Opts). @@ -1665,7 +1695,7 @@ openzip_open(F, Options) -> do_openzip_open(F, Options) -> Opts = get_openzip_options(Options), #openzip_opts{output = Output, open_opts = OpO, cwd = CWD, - extra = ExtraOpts} = Opts, + skip_dirs = SkipDirs, extra = ExtraOpts} = Opts, Input = get_input(F), In0 = Input({open, F, OpO -- [write]}, []), {[#zip_comment{comment = C} | Files], In1} = @@ -1678,6 +1708,7 @@ do_openzip_open(F, Options) -> output = Output, zlib = Z, cwd = CWD, + skip_dirs = SkipDirs, extra = ExtraOpts}}. %% retrieve all files from an open archive @@ -1688,10 +1719,12 @@ openzip_get(OpenZip) -> end. do_openzip_get(#openzip{files = Files, in = In0, input = Input, - output = Output, zlib = Z, cwd = CWD, extra = ExtraOpts}) -> + output = Output, zlib = Z, cwd = CWD, skip_dirs = SkipDirs, + extra = ExtraOpts}) -> ZipOpts = #unzip_opts{output = Output, input = Input, file_filter = fun all/1, open_opts = [], - feedback = fun silent/1, cwd = CWD, extra = ExtraOpts}, + feedback = fun silent/1, cwd = CWD, skip_dirs = SkipDirs, + extra = ExtraOpts}, R = get_z_files(Files, Z, In0, ZipOpts, []), {ok, R}; do_openzip_get(_) -> @@ -1718,7 +1751,7 @@ do_openzip_get(F, #openzip{files = Files, in = In0, input = Input, {#zip_file{offset = Offset},_}=ZFile -> In1 = Input({seek, bof, Offset}, In0), case get_z_file(In1, Z, Input, Output, [], fun silent/1, - CWD, ZFile, fun all/1, ExtraOpts) of + CWD, ZFile, fun all/1, false, ExtraOpts) of {file, R, _In2} -> {ok, R}; _ -> throw(file_not_found) end; @@ -1826,6 +1859,8 @@ get_openzip_opt([memory | Rest], Opts) -> get_openzip_opt(Rest, Opts#openzip_opts{output = fun binary_io/2}); get_openzip_opt([{cwd, CWD} | Rest], Opts) -> get_openzip_opt(Rest, Opts#openzip_opts{cwd = CWD}); +get_openzip_opt([skip_directories | Rest], Opts) -> + get_openzip_opt(Rest, Opts#openzip_opts{skip_dirs = true}); get_openzip_opt([{extra, What} = O| Rest], Opts) when is_list(What) -> case lists:all(fun(E) -> lists:member(E, ?EXTRA_OPTIONS) end, What) of true -> @@ -2180,13 +2215,13 @@ get_z_files([#zip_comment{comment = _} | Rest], Z, In, Opts, Acc) -> get_z_files([{#zip_file{offset = Offset},_} = ZFile | Rest], Z, In0, #unzip_opts{input = Input, output = Output, open_opts = OpO, file_filter = Filter, feedback = FB, - cwd = CWD, extra = ExtraOpts} = Opts, Acc0) -> + cwd = CWD, skip_dirs = SkipDirs, extra = ExtraOpts} = Opts, Acc0) -> case Filter(ZFile) of true -> In1 = Input({seek, bof, Offset}, In0), {In2, Acc1} = case get_z_file(In1, Z, Input, Output, OpO, FB, - CWD, ZFile, Filter, ExtraOpts) of + CWD, ZFile, Filter, SkipDirs, ExtraOpts) of {Type, GZD, Inx} when Type =:= file; Type =:= dir -> {Inx, [GZD | Acc0]}; {_, Inx} -> {Inx, Acc0} @@ -2198,7 +2233,7 @@ get_z_files([{#zip_file{offset = Offset},_} = ZFile | Rest], Z, In0, %% get a file from the archive, reading chunks get_z_file(In0, Z, Input, Output, OpO, FB, - CWD, {ZipFile,ZipExtra}, Filter, ExtraOpts) -> + CWD, {ZipFile,ZipExtra}, Filter, SkipDirs, ExtraOpts) -> case Input({read, ?LOCAL_FILE_HEADER_SZ}, In0) of {eof, In1} -> {eof, In1}; @@ -2230,12 +2265,14 @@ get_z_file(In0, Z, Input, Output, OpO, FB, {false,FileName1} -> Filter({ZipFile#zip_file{name = FileName1},ZipExtra}) end, - case ReadAndWrite of + + IsDir = lists:last(FileName) =:= $/, + + case ReadAndWrite andalso not (IsDir andalso SkipDirs) of true -> {Type, Out, In} = case lists:last(FileName) of $/ -> - %% perhaps this should always be done? Out1 = Output({ensure_path,FileName1},[]), {dir, Out1, In3}; _ -> diff --git a/lib/stdlib/test/zip_SUITE.erl b/lib/stdlib/test/zip_SUITE.erl index ff8b3973ce83..d42964cd25f0 100644 --- a/lib/stdlib/test/zip_SUITE.erl +++ b/lib/stdlib/test/zip_SUITE.erl @@ -409,6 +409,22 @@ unzip_options(Config) when is_list(Config) -> lists:foreach(fun(F)-> ok = file:delete(F) end, RetList), + %% Clean up and verify no more files. + 0 = delete_files([Subdir]), + + FList2 = ["abc.txt","quotes/rain.txt","wikipedia.txt","emptyFile"], + + %% Unzip a zip file in Subdir + {ok, RetList2} = zip:unzip(Long, [{cwd, Subdir},skip_directories]), + + %% Verify. + true = (length(RetList2) =:= 4), + lists:foreach(fun(F)-> {ok,B} = file:read_file(filename:join(DataDir, F)), + {ok,B} = file:read_file(filename:join(Subdir, F)) end, + FList2), + lists:foreach(fun(F)-> ok = file:delete(F) end, + RetList2), + %% Clean up and verify no more files. 0 = delete_files([Subdir]), ok. @@ -529,6 +545,22 @@ zip_options(Config) when is_list(Config) -> %% Test the options for list_dir... one day. list_dir_options(Config) when is_list(Config) -> + + DataDir = get_value(data_dir, Config), + Archive = filename:join(DataDir, "abc.zip"), + + {ok, + ["abc.txt", "quotes/rain.txt", "empty/", "wikipedia.txt", "emptyFile" ]} = + zip:list_dir(Archive,[names_only]), + + {ok, + [#zip_comment{}, + #zip_file{ name = "abc.txt" }, + #zip_file{ name = "quotes/rain.txt" }, + #zip_file{ name = "wikipedia.txt" }, + #zip_file{ name = "emptyFile" } + ]} = zip:list_dir(Archive,[skip_directories]), + ok. %% convert zip_info as returned from list_dir to a list of names @@ -701,8 +733,9 @@ unzip_from_binary(Config) when is_list(Config) -> DataDir = get_value(data_dir, Config), PrivDir = get_value(priv_dir, Config), ExtractDir = filename:join(PrivDir, "extract_from_binary"), - ok = file:make_dir(ExtractDir), Archive = filename:join(ExtractDir, "abc.zip"), + + ok = file:make_dir(ExtractDir), {ok, _Size} = file:copy(filename:join(DataDir, "abc.zip"), Archive), FileName = "abc.txt", Quote = "quotes/rain.txt", @@ -726,6 +759,26 @@ unzip_from_binary(Config) when is_list(Config) -> %% Clean up. delete_files([DestFilename, DestQuote, Archive, ExtractDir]), + + ok = file:make_dir(ExtractDir), + file:set_cwd(ExtractDir), + + %% Read a zip file into a binary and extract from the binary with skip_directories + {ok, [FileName,Quote,Wikipedia,EmptyFile]} + = zip:unzip(Bin, [skip_directories]), + + %% Verify. + DestFilename = filename:join(ExtractDir, "abc.txt"), + {ok, Data} = file:read_file(filename:join(DataDir, FileName)), + {ok, Data} = file:read_file(DestFilename), + + DestQuote = filename:join([ExtractDir, "quotes", "rain.txt"]), + {ok, QuoteData} = file:read_file(filename:join(DataDir, Quote)), + {ok, QuoteData} = file:read_file(DestQuote), + + %% Clean up. + delete_files([DestFilename, DestQuote, ExtractDir]), + ok. %% oac_files() -> From 134a44253fe2214efde79c51ce9e48b5a1309133 Mon Sep 17 00:00:00 2001 From: Lukas Larsson Date: Wed, 26 Jun 2024 08:58:48 +0200 Subject: [PATCH 19/21] zip: Do not fallback to dostime for ctime On Windows if you write ctime it will change it, while on unix it does not. So we don't update ctime unless it is part of the archive which is never is. --- lib/stdlib/src/zip.erl | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/lib/stdlib/src/zip.erl b/lib/stdlib/src/zip.erl index 3e92c51031d7..7a169205a77f 100644 --- a/lib/stdlib/src/zip.erl +++ b/lib/stdlib/src/zip.erl @@ -2424,14 +2424,13 @@ file_header_atime_to_datetime(FH) -> calendar:system_time_to_local_time(Atime, second) end. -%% If we have ctime we use that, otherwise use dos time +%% Normally ctime will not be set, but if it is we use that. If it is not set +%% we return undefined so that when we later do write_file_info ctime will remain +%% the time that the file was created when extracted from the archive. file_header_ctime_to_datetime(FH) -> #cd_file_header.ctime = #local_file_header.ctime, case element(#cd_file_header.ctime, FH) of - undefined -> - dos_date_time_to_datetime( - element(#cd_file_header.last_mod_date, FH), - element(#cd_file_header.last_mod_time, FH)); + undefined -> undefined; Ctime -> calendar:system_time_to_local_time(Ctime, second) end. From 2e2996f062af633d9208581be739d09a8f1780ca Mon Sep 17 00:00:00 2001 From: Lukas Larsson Date: Mon, 3 Jun 2024 08:23:49 +0200 Subject: [PATCH 20/21] zip: Cleanup testcases to be simpler and more stable --- lib/stdlib/test/zip_SUITE.erl | 487 ++++++++++++++++++++++------------ 1 file changed, 324 insertions(+), 163 deletions(-) diff --git a/lib/stdlib/test/zip_SUITE.erl b/lib/stdlib/test/zip_SUITE.erl index d42964cd25f0..9cb7055ecdda 100644 --- a/lib/stdlib/test/zip_SUITE.erl +++ b/lib/stdlib/test/zip_SUITE.erl @@ -32,13 +32,17 @@ compress_control/1, foldl/1,fd_leak/1,unicode/1,test_zip_dir/1, explicit_file_info/1, mode/1, + zip64_central_headers/0, unzip64_central_headers/0, zip64_central_headers/1, unzip64_central_headers/1, zip64_central_directory/1, basic_timestamp/1, extended_timestamp/1, uid_gid/1]). +-export([zip/5, unzip/3]). + -import(proplists,[get_value/2, get_value/3]). +-include_lib("common_test/include/ct.hrl"). -include_lib("kernel/include/file.hrl"). -include_lib("stdlib/include/zip.hrl"). -include_lib("stdlib/include/assert.hrl"). @@ -60,7 +64,10 @@ groups() -> %% ezip - Use erlang zip on disk %% emzip - Use erlang zip in memory -define(ZIP_MODES,[zip, ezip, emzip]). +%% -define(ZIP_MODES,[emzip]). -define(UNZIP_MODES,[unzip, unezip, unemzip]). +%% How much memory the zip/unzip 64 testcases that zip/unzip from/to are expected to use +-define(EMZIP64_MEM_USAGE, (8 * (1 bsl 30))). zip_groups() -> @@ -83,12 +90,10 @@ z64(Mode) when is_atom(Mode) -> z64(Modes) when is_list(Modes) -> [z64(M) || M <- Modes]. -noz64(Z64Mode) -> - case string:split(atom_to_list(Z64Mode), "_") of - ["z64",Mode] -> - list_to_atom(Mode); - [_Mode] -> - Z64Mode +un_z64(Mode) -> + case atom_to_list(Mode) of + "z64_" ++ ModeString -> list_to_atom(ModeString); + _ -> Mode end. zip_testcases() -> @@ -99,54 +104,94 @@ zip64_testcases() -> zip64_central_directory]. init_per_suite(Config) -> - Config. + {ok, Started} = application:ensure_all_started(os_mon), + cleanup_priv_dir(Config), + [{started, Started} | Config]. -end_per_suite(_Config) -> +end_per_suite(Config) -> + [application:stop(App) || App <- lists:reverse(get_value(started, Config))], + cleanup_priv_dir(Config), ok. +cleanup_priv_dir(Config) -> + %% Cleanup potential files in priv_dir + Pdir = get_value(pdir, Config, get_value(priv_dir,Config)), + ct:log("Cleaning up ~s",[Pdir]), + [ case file:delete(File) of + {error, eperm} -> file:del_dir_r(File); + _ -> ok + end || File <- filelib:wildcard(filename:join(Pdir, "*"))]. + init_per_group(zip64_group, Config) -> PrivDir = get_value(priv_dir, Config), - OneMB = <<0:(8 bsl 20)>>, - Large4GB = filename:join(PrivDir, "large.txt"), - ok = file:write_file(Large4GB, lists:duplicate(4 bsl 10, OneMB)), - Medium4MB = filename:join(PrivDir, "medium.txt"), - ok = file:write_file(Medium4MB, lists:duplicate(4, OneMB)), - - [{large, Large4GB},{medium,Medium4MB}|Config]; + case {erlang:system_info(wordsize), disc_free(PrivDir), memsize()} of + {4, _, _} -> + {skip, "Zip64 tests only work on 64-bit systems"}; + {8, error, _} -> + {skip, "Failed to query disk space for priv_dir. " + "Is it on a remote file system?~n"}; + {8, N,M} when N >= 16 * (1 bsl 20), M >= ?EMZIP64_MEM_USAGE -> + ct:log("Free disk: ~w KByte~n", [N]), + ct:log("Free memory: ~w MByte~n", [M div (1 bsl 20)]), + OneMB = <<0:(8 bsl 20)>>, + Large4GB = filename:join(PrivDir, "large.txt"), + ok = file:write_file(Large4GB, lists:duplicate(4 bsl 10, OneMB)), + Medium4MB = filename:join(PrivDir, "medium.txt"), + ok = file:write_file(Medium4MB, lists:duplicate(4, OneMB)), + + [{large, Large4GB},{medium,Medium4MB}|Config]; + {8,N,M} -> + ct:log("Free disk: ~w KByte~n", [N]), + ct:log("Free memory: ~w MByte~n", [M div (1 bsl 20)]), + {skip,"Less than 16 GByte free disk or less then 8 GB free mem"} + end; init_per_group(Group, Config) -> case lists:member(Group, ?ZIP_MODES ++ ?UNZIP_MODES ++ z64(?ZIP_MODES ++ ?UNZIP_MODES)) of true -> case get_value(zip, Config) of undefined -> - [throw({skip, "zip does not support zip64"}) || - Group =:= zip andalso - get_value(large,Config) =/= undefined andalso - os:cmd("zip -v | grep ZIP64_SUPPORT") == ""], - ct:print("Zip: ~p", [Group]), - Pdir = filename:join(get_value(priv_dir, Config),Group), - ok = filelib:ensure_path(Pdir), - [{pdir, Pdir},{zip, noz64(Group)} | Config]; + case un_z64(Group) =/= zip orelse has_zip() of + true -> + Pdir = filename:join(get_value(priv_dir, Config),Group), + ok = filelib:ensure_path(Pdir), + [{pdir, Pdir},{zip, Group} | Config]; + false -> + {skip, "No zip program found"} + end; _Zip -> - ct:print("UnZip: ~p", [Group]), - Pdir = filename:join(get_value(pdir, Config),Group), - ok = filelib:ensure_path(Pdir), - [{pdir, Pdir},{unzip, noz64(Group)} | Config] + case un_z64(Group) =/= unzip orelse has_zip() of + true -> + Pdir = filename:join(get_value(pdir, Config),Group), + ok = filelib:ensure_path(Pdir), + [{pdir, Pdir},{unzip, Group} | Config]; + false -> + {skip, "No zip program found"} + end end; false -> Config end. end_per_group(_GroupName, Config) -> + cleanup_priv_dir(Config), Config. init_per_testcase(TC, Config) -> - PrivDir = filename:join(get_value(pdir, Config,get_value(priv_dir, Config)), TC), - ok = filelib:ensure_path(PrivDir), - [{pdir, PrivDir} | Config]. + UsesZip = un_z64(get_value(zip, Config)) =:= zip orelse un_z64(get_value(unzip, Config)) =:= unzip, + HasZip = has_zip(), + ct:log("Free memory: ~w MByte~n", [memsize() div (1 bsl 20)]), + if UsesZip andalso not HasZip -> + {skip, "No zip command found"}; + true -> + PrivDir = filename:join(get_value(pdir, Config,get_value(priv_dir, Config)), TC), + ok = filelib:ensure_path(PrivDir), + [{pdir, PrivDir} | Config] + end. + end_per_testcase(_TC, Config) -> - file:del_dir_r(get_value(pdir,Config)), + cleanup_priv_dir(Config), Config. %% Test creating, listing and extracting one file from an archive @@ -422,7 +467,7 @@ unzip_options(Config) when is_list(Config) -> lists:foreach(fun(F)-> {ok,B} = file:read_file(filename:join(DataDir, F)), {ok,B} = file:read_file(filename:join(Subdir, F)) end, FList2), - lists:foreach(fun(F)-> ok = file:delete(F) end, + lists:foreach(fun(F)-> 1 = delete_files([F]) end, RetList2), %% Clean up and verify no more files. @@ -757,6 +802,9 @@ unzip_from_binary(Config) when is_list(Config) -> {ok, QuoteData} = file:read_file(filename:join(DataDir, Quote)), {ok, QuoteData} = file:read_file(DestQuote), + %% Don't be in ExtractDir when we delete it + ok = file:set_cwd(PrivDir), + %% Clean up. delete_files([DestFilename, DestQuote, Archive, ExtractDir]), @@ -1112,7 +1160,7 @@ test_zip1() -> has_zip() andalso zip_is_unicode_aware(). has_zip() -> - os:find_executable("zip") =/= false. + os:find_executable("zip") =/= false andalso element(1, os:type()) =:= unix. zip_is_unicode_aware() -> S = os:cmd("zip -v | grep 'UNICODE_SUPPORT'"), @@ -1190,35 +1238,40 @@ mode(Config) -> file:write_file(Executable, "aaa"), {ok, ExecFI } = file:read_file_info(Executable), ok = file:write_file_info(Executable, ExecFI#file_info{ mode = 8#111 bor 8#400 }), + {ok, #file_info{ mode = OrigExecMode }} = file:read_file_info(Executable), Directory = filename:join(PrivDir,"dir"), ok = file:make_dir(Directory), {ok, DirFI } = file:read_file_info(Executable), ok = file:write_file_info(Directory, DirFI#file_info{ mode = 8#111 bor 8#400 }), + {ok, #file_info{ mode = OrigDirMode }} = file:read_file_info(Directory), ?assertMatch( {ok, Archive}, zip(Config, Archive, "-r", ["dir","exec"], [{cwd, PrivDir},{extra,[extended_timestamp]}])), + OrigExecMode777 = OrigExecMode band 8#777, + OrigDirMode777 = OrigDirMode band 8#777, + ?assertMatch( {ok, [#zip_comment{}, - #zip_file{ name = "dir/", info = #file_info{ mode = 8#111 bor 8#400}}, - #zip_file{ name = "exec", info = #file_info{ mode = 8#111 bor 8#400}} ]}, + #zip_file{ name = "dir/", info = #file_info{ mode = OrigDirMode777 }}, + #zip_file{ name = "exec", info = #file_info{ mode = OrigExecMode777 }} ]}, zip:list_dir(Archive)), ok = file:make_dir(ExtractDir), ?assertMatch( {ok, ["dir/","exec"]}, unzip(Config, Archive, [{cwd,ExtractDir}])), - case get_value(unzip, Config) =/= unemzip of + case un_z64(get_value(unzip, Config)) =/= unemzip of true -> {ok,#file_info{ mode = ExecMode }} = file:read_file_info(filename:join(ExtractDir,"exec")), - ?assertEqual(8#111 bor 8#400, ExecMode band 8#777), + ?assertEqual(ExecMode band 8#777, OrigExecMode777), {ok,#file_info{ mode = DirMode }} = file:read_file_info(filename:join(ExtractDir,"dir")), - ?assertEqual(8#111 bor 8#400, DirMode band 8#777); + ?assertEqual(DirMode band 8#777, OrigDirMode777); false -> %% emzip does not support mode ok @@ -1234,6 +1287,7 @@ mode(Config) -> %% * starting disk %% %% As we do not support using multiple disks, we do not test starting disks +zip64_central_headers() -> [{timetrap, {minutes, 60}}]. zip64_central_headers(Config) -> PrivDir = get_value(pdir, Config), @@ -1253,11 +1307,16 @@ zip64_central_headers(Config) -> %% %% The archive will be roughly 8 GBs large - ok = file:make_symlink("../../large.txt", filename:join(PrivDir, "uncomp.txt")), - ok = file:make_symlink("../../large.txt", filename:join(PrivDir, "uncomp.comp.zip")), - ok = file:make_symlink("../../medium.txt", filename:join(PrivDir, "offset.txt")), - ok = file:make_symlink("../../large.txt", filename:join(PrivDir, "uncomp.offset.txt")), - ok = file:make_symlink("../../large.txt", filename:join(PrivDir, "uncomp.comp.offset.zip")), + ok = file:make_link(filename:join(PrivDir, "../../large.txt"), + filename:join(PrivDir, "uncomp.txt")), + ok = file:make_link(filename:join(PrivDir, "../../large.txt"), + filename:join(PrivDir, "uncomp.comp.zip")), + ok = file:make_link(filename:join(PrivDir, "../../medium.txt"), + filename:join(PrivDir, "offset.txt")), + ok = file:make_link(filename:join(PrivDir, "../../large.txt"), + filename:join(PrivDir, "uncomp.offset.txt")), + ok = file:make_link(filename:join(PrivDir, "../../large.txt"), + filename:join(PrivDir, "uncomp.comp.offset.zip")), ?assertMatch( {ok, Archive}, zip(Config, Archive, "-1", @@ -1283,6 +1342,7 @@ zip64_central_headers(Config) -> zip:list_dir(Archive), ok. +unzip64_central_headers() -> [{timetrap, {minutes, 60}}]. unzip64_central_headers(Config) -> PrivDir = get_value(pdir, Config), @@ -1394,18 +1454,24 @@ basic_timestamp(Config) -> mtime = UnZMtime, ctime = UnZCtime }} = - file:read_file_info(filename:join(ExtractDir, "testfile.txt")), + file:read_file_info(filename:join(ExtractDir, "testfile.txt"),[raw]), ct:log("extract: ~p",[UnzipFI]), - case get_value(unzip, Config) =/= unemzip of - true -> + UnzipMode = un_z64(get_value(unzip, Config)), + + if UnzipMode =/= unemzip -> ?assertEqual(ZMtime, UnZMtime), - ?assertEqual(UnZAtime, UnZMtime), - ?assert(UnZMtime < UnZCtime); - false -> + %% When using unzip, the atime is sometimes set to ctime for unknown reasons... so we cannot test it + %% ?assertEqual(UnZAtime, UnZMtime), + ?assert(UnZAtime =:= UnZMtime orelse UnZAtime =:= UnZCtime), + + %% On windows the ctime and mtime are the same so + %% we cannot compare them. + [?assert(UnZMtime < UnZCtime) || os:type() =/= {win32,nt}]; + UnzipMode =:= unemzip -> %% emzip does not support timestamps ok end, @@ -1416,115 +1482,112 @@ basic_timestamp(Config) -> %% the atime and ctime when the file is added to the archive. extended_timestamp(Config) -> - case os:cmd("zip -v | grep USE_EF_UT_TIME") of - "" -> {skip, "zip does not support extended timestamps"}; - _ -> - PrivDir = get_value(pdir, Config), - Archive = filename:join(PrivDir, "archive.zip"), - ExtractDir = filename:join(PrivDir, "extract"), - Testfile = filename:join(PrivDir, "testfile.txt"), + PrivDir = get_value(pdir, Config), + Archive = filename:join(PrivDir, "archive.zip"), + ExtractDir = filename:join(PrivDir, "extract"), + Testfile = filename:join(PrivDir, "testfile.txt"), - ok = file:write_file(Testfile, "abc"), - {ok, OndiskFI = #file_info{ mtime = Mtime }} = - file:read_file_info(Testfile), + ok = file:write_file(Testfile, "abc"), + {ok, OndiskFI = #file_info{ mtime = Mtime }} = + file:read_file_info(Testfile), - %% Sleep a bit to let the timestamp progress - timer:sleep(1000), + %% Sleep a bit to let the timestamp progress + timer:sleep(1000), - ?assertMatch( - {ok, Archive}, - zip(Config, Archive, "", ["testfile.txt"], [{cwd, PrivDir}])), + ?assertMatch( + {ok, Archive}, + zip(Config, Archive, "", ["testfile.txt"], [{cwd, PrivDir}])), - %% list_dir only reads the central directory header and thus only - %% the mtime will be correct here - {ok, [#zip_comment{}, - #zip_file{ info = ZipFI = #file_info{ mtime = ZMtime}} ]} = - zip:list_dir(Archive), + %% list_dir only reads the central directory header and thus only + %% the mtime will be correct here + {ok, [#zip_comment{}, + #zip_file{ info = ZipFI = #file_info{ mtime = ZMtime}} ]} = + zip:list_dir(Archive), - ct:log("on disk: ~p",[OndiskFI]), - ct:log("in zip : ~p",[ZipFI]), - ct:log("zipinfo:~n~ts",[os:cmd("zipinfo -v "++Archive)]), + ct:log("on disk: ~p",[OndiskFI]), + ct:log("in zip : ~p",[ZipFI]), + ct:log("zipinfo:~n~ts",[os:cmd("zipinfo -v "++Archive)]), - ?assertEqual(Mtime, ZMtime), + ?assertEqual(Mtime, ZMtime), - %% Sleep a bit to let the timestamp progress - timer:sleep(1000), + %% Sleep a bit to let the timestamp progress + timer:sleep(1000), - ok = file:make_dir(ExtractDir), - ?assertMatch( - {ok, ["testfile.txt"]}, - unzip(Config, Archive, [{cwd,ExtractDir}])), + ok = file:make_dir(ExtractDir), + ?assertMatch( + {ok, ["testfile.txt"]}, + unzip(Config, Archive, [{cwd,ExtractDir}])), - {ok, UnzipFI = #file_info{ atime = UnZAtime, - mtime = UnZMtime, - ctime = UnZCtime - }} = - file:read_file_info(filename:join(ExtractDir, "testfile.txt")), + {ok, UnzipFI = #file_info{ atime = UnZAtime, + mtime = UnZMtime, + ctime = UnZCtime + }} = + file:read_file_info(filename:join(ExtractDir, "testfile.txt"),[raw]), - ct:log("extract: ~p",[UnzipFI]), + ct:log("extract: ~p",[UnzipFI]), - case get_value(unzip, Config) =/= unemzip of - true -> - ?assertEqual(ZMtime, UnZMtime), - ?assertEqual(UnZAtime, UnZMtime), + UnzipMode = un_z64(get_value(unzip, Config)), - ?assert(UnZMtime < UnZCtime); - false -> - %% emzip does not support timestamps - ok - end, + if UnzipMode =/= unemzip -> + ?assertEqual(ZMtime, UnZMtime), - ok - end. + %% When using unzip, the atime is sometimes set to ctime for unknown reasons... so we cannot test it + %% ?assertEqual(UnZAtime, UnZMtime), + ?assert(UnZAtime =:= UnZMtime orelse UnZAtime =:= UnZCtime), -uid_gid(Config) -> + %% On windows the ctime and mtime are the same so + %% we cannot compare them. + [?assert(UnZMtime < UnZCtime) || os:type() =/= {win32,nt}]; + UnzipMode =:= unemzip -> + %% emzip does not support timestamps + ok + end, - case os:cmd("zip -v | grep STORE_UNIX_UIDs_GIDs") of - "" -> {skip, "zip does not support uid/gids"}; - _ -> + ok. - PrivDir = get_value(pdir, Config), - ExtractDir = filename:join(PrivDir, "extract"), - Archive = filename:join(PrivDir, "archive.zip"), - Testfile = filename:join(PrivDir, "testfile.txt"), +uid_gid(Config) -> - ok = file:write_file(Testfile, "abc"), - {ok, OndiskFI = #file_info{ gid = GID, uid = UID }} = - file:read_file_info(Testfile), + PrivDir = get_value(pdir, Config), + ExtractDir = filename:join(PrivDir, "extract"), + Archive = filename:join(PrivDir, "archive.zip"), + Testfile = filename:join(PrivDir, "testfile.txt"), - ?assertMatch( - {ok, Archive}, - zip(Config, Archive, "", ["testfile.txt"], [{cwd, PrivDir}])), + ok = file:write_file(Testfile, "abc"), + {ok, OndiskFI = #file_info{ gid = GID, uid = UID }} = + file:read_file_info(Testfile), - {ok, [#zip_comment{}, - #zip_file{ info = ZipFI = #file_info{ gid = ZGID, uid = ZUID }} ]} = - zip:list_dir(Archive,[{extra, [uid_gid]}]), + ?assertMatch( + {ok, Archive}, + zip(Config, Archive, "", ["testfile.txt"], [{cwd, PrivDir}])), - ct:log("on disk: ~p",[OndiskFI]), - ct:log("in zip : ~p",[ZipFI]), + {ok, [#zip_comment{}, + #zip_file{ info = ZipFI = #file_info{ gid = ZGID, uid = ZUID }} ]} = + zip:list_dir(Archive,[{extra, [uid_gid]}]), - ?assertEqual(UID, ZUID), - ?assertEqual(GID, ZGID), + ct:log("on disk: ~p",[OndiskFI]), + ct:log("in zip : ~p",[ZipFI]), - ok = file:make_dir(ExtractDir), - ?assertMatch( - {ok, ["testfile.txt"]}, - unzip(Config, Archive, [{cwd, ExtractDir},{extra,[uid_gid]}])), + ?assertEqual(UID, ZUID), + ?assertEqual(GID, ZGID), - {ok,#file_info{ gid = ExZGID, uid = ExZUID }} = - file:read_file_info(filename:join(ExtractDir,"testfile.txt")), + ok = file:make_dir(ExtractDir), + ?assertMatch( + {ok, ["testfile.txt"]}, + unzip(Config, Archive, [{cwd, ExtractDir},{extra,[uid_gid]}])), - case get_value(unzip, Config) =/= unemzip of - true -> - ?assertEqual(UID, ExZUID), - ?assertEqual(GID, ExZGID); - _ -> - %% emzip does not support uid_gid - ok - end, + {ok,#file_info{ gid = ExZGID, uid = ExZUID }} = + file:read_file_info(filename:join(ExtractDir,"testfile.txt")), + case un_z64(get_value(unzip, Config)) =/= unemzip of + true -> + ?assertEqual(UID, ExZUID), + ?assertEqual(GID, ExZGID); + _ -> + %% emzip does not support uid_gid ok - end. + end, + + ok. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%% Generic zip interface @@ -1532,52 +1595,84 @@ uid_gid(Config) -> zip(Config, Archive, ZipOpts, Filelist, Opts) when is_list(Config) -> zip(get_value(zip, Config), Archive, ZipOpts, Filelist, Opts); +zip(z64_zip, Archive, ZipOpts, Filelist, Opts) -> + zip(zip, Archive, ZipOpts, Filelist, Opts); zip(zip, Archive, ZipOpts, Filelist, Opts) -> cmd("cd "++get_value(cwd, Opts)++" && " "zip "++ZipOpts++" "++Archive++" "++lists:join($ ,Filelist)), {ok, Archive}; +zip(z64_ezip, Archive, _ZipOpts, Filelist, Opts) -> + zip(ezip, Archive, _ZipOpts, Filelist, Opts); zip(ezip, Archive, _ZipOpts, Filelist, Opts) -> ct:log("Creating zip:zip(~p,~n~p,~n~p)",[Archive, Filelist, Opts]), zip:zip(Archive, Filelist, Opts); +zip(z64_emzip, Archive, _ZipOpts, Filelist, Opts) -> + %% Run in peer node so that memory issues don't crash test node + {ok, Peer, Node} = ?CT_PEER(#{ args => emzip_peer_args() }), + try + erpc:call( + Node, + fun() -> + ?MODULE:zip(emzip, Archive, _ZipOpts, Filelist, Opts) + end) + after + catch peer:stop(Peer) + end; zip(emzip, Archive, _ZipOpts, Filelist, Opts) -> ct:log("Creating emzip ~ts",[Archive]), Cwd = get_value(cwd, Opts), + + + %% For this not to use a huge amount of memory we re-use + %% the binary for files that are the same size as those are the same file. + %% This cuts memory usage from ~16GB to ~4GB. + {Files,_Cache} = lists:mapfoldl( - fun F(Fn, Cache) -> - AbsFn = filename:join(Cwd, Fn), - {ok, Fi} = file:read_file_info(AbsFn), - {SubDirFiles, NewCache} = - if Fi#file_info.type == directory -> - {ok, Files} = file:list_dir(AbsFn), - lists:mapfoldl(F, Cache#{ Fn => <<>> }, - [filename:join(Fn, DirFn) || DirFn <- Files]); - Fi#file_info.type == regular -> - %% For this not to use a huge amount of memory we re-use - %% the binary for files that are links to the same file. - %% This cuts memory usage from ~16GB to ~4GB. - {[], - case file:read_link_all(AbsFn) of - {ok, LinkFn} -> - case maps:get(LinkFn, Cache, undefined) of - undefined -> - {ok, Data} = file:read_file(AbsFn), - Cache#{ LinkFn => Data, Fn => Data }; - Data -> - Cache#{ Fn => Data } - end; - {error, _} -> - {ok, Data} = file:read_file(AbsFn), - Cache#{ Fn => Data } - end} - end, - {[{Fn, maps:get(Fn, NewCache), Fi}|SubDirFiles], NewCache} - end, #{}, Filelist), + fun F(Fn, Cache) -> + AbsFn = filename:join(Cwd, Fn), + {ok, Fi} = file:read_file_info(AbsFn), + CacheKey = {Fi#file_info.type, Fi#file_info.size}, + {SubDirFiles, NewCache} = + if Fi#file_info.type == directory -> + {ok, Files} = file:list_dir(AbsFn), + lists:mapfoldl(F, Cache#{ CacheKey => <<>> }, + [filename:join(Fn, DirFn) || DirFn <- Files]); + Fi#file_info.type == regular -> + {[], + case maps:find(CacheKey, Cache) of + {ok, _} -> Cache; + error -> + {ok, Data} = read_file( + file:open(AbsFn, [read, raw, binary]), + Fi#file_info.size), + Cache#{ CacheKey => Data } + end} + end, + {[{Fn, maps:get(CacheKey, NewCache), Fi}|SubDirFiles], NewCache} + end, #{}, Filelist), zip:zip(Archive, lists:flatten(Files), proplists:delete(cwd,Opts)). +%% Special read_file that works on windows on > 4 GB files +read_file({ok, D}, Size) -> + Bin = iolist_to_binary(read_file(D, Size)), + erlang:garbage_collect(), %% Do a GC to get rid of all intermediate binaries + {ok, Bin}; +read_file({error, _} = E, _Size) -> + E; +read_file(eof = E, _Size) -> + E; +read_file(D, 0) -> + file:close(D), + []; +read_file(D, Size) -> + {ok, B} = file:read(D, min(1 bsl 30, Size)), + [B | read_file(D, Size - byte_size(B))]. unzip(Config, Archive, Opts) when is_list(Config) -> unzip(get_value(unzip, Config), Archive, Opts); +unzip(z64_unzip, Archive, Opts) -> + unzip(unzip, Archive, Opts); unzip(unzip, Archive, Opts) -> UidGid = [" -X " || lists:member(uid_gid, get_value(extra, Opts, []))], Files = lists:join($ , get_value(file_list, Opts, [])), @@ -1591,15 +1686,30 @@ unzip(unzip, Archive, Opts) -> {match,Match} -> Match end end,string:split(Res,"\n",all)))}; +unzip(z64_unezip, Archive, Opts) -> + unzip(unezip, Archive, Opts); unzip(unezip, Archive, Opts) -> Cwd = get_value(cwd, Opts) ++ "/", {ok, Files} = zip:unzip(Archive, Opts), {ok, lists:sort([F -- Cwd || F <- Files])}; +unzip(z64_unemzip, Archive, Opts) -> + %% Run in peer node so that memory issues don't crash test node + {ok, Peer, Node} = ?CT_PEER(#{ args => emzip_peer_args() }), + try + erpc:call( + Node, + fun() -> + unzip(unemzip, Archive, Opts) + end) + after + catch peer:stop(Peer) + end; unzip(unemzip, Archive, Opts) -> Cwd = get_value(cwd, Opts) ++ "/", + {ok, Files} = zip:unzip(Archive, [memory | Opts]), {ok, lists:sort( - [begin + [begin case lists:last(F) of $/ -> filelib:ensure_path(F); @@ -1610,10 +1720,61 @@ unzip(unemzip, Archive, Opts) -> F -- Cwd end || {F, B} <- Files])}. +emzip_peer_args() -> + 8 = erlang:system_info(wordsize),%% Supercarrier only supported on 64-bit + ["+MMscs",integer_to_list(?EMZIP64_MEM_USAGE div (1024 * 1024))]. + cmp(Source, Target) -> - "" = cmd("cmp --silent "++Source++" "++Target++~s' || echo "files are different"'). + {ok, SrcInfo} = file:read_file_info(Source), + {ok, TgtInfo} = file:read_file_info(Target), + ?assertEqual(SrcInfo#file_info.size, TgtInfo#file_info.size), + ?assertEqual(SrcInfo#file_info.mode, TgtInfo#file_info.mode), + + {ok, Src} = file:open(Source, [read, binary]), + {ok, Tgt} = file:open(Target, [read, binary]), + + cmp(Src, Tgt, 0), + + file:close(Src), + file:close(Tgt). + +%% Check if first 100 MB are the same +cmp(Src, Tgt, Pos) when Pos < 100 bsl 20 -> + erlang:garbage_collect(), + case {file:read(Src, 20 bsl 20), file:read(Tgt, 20 bsl 20)} of + {{ok, Data}, {ok, Data}} -> + cmp(Src, Tgt, Pos + 20 bsl 20); + {E, E} -> + ok + end; +cmp(_Src, _Tgt, _) -> + ok. cmd(Cmd) -> Res = os:cmd(Cmd), ct:log("Cmd: ~ts~nRes: ~ts~n",[Cmd, Res]), Res. + +disc_free(Path) -> + Data = disksup:get_disk_data(), + + %% What partitions could Data be mounted on? + Partitions = + [D || {P, _Tot, _Perc}=D <- Data, + lists:prefix(filename:nativename(P), filename:nativename(Path))], + + %% Sorting in descending order places the partition with the most specific + %% path first. + case lists:sort(fun erlang:'>='/2, Partitions) of + [{_,Tot, Perc} | _] -> round(Tot * (1-(Perc/100))); + [] -> error + end. + +memsize() -> + case proplists:get_value(available_memory, memsup:get_system_memory_data()) of + undefined -> + {Tot,_Used,_} = memsup:get_memory_data(), + Tot; + Available -> + Available + end. From efc2de2bc9efec4d757f2dc7cba0d8a925bbde92 Mon Sep 17 00:00:00 2001 From: Lukas Larsson Date: Tue, 20 Aug 2024 09:52:09 +0200 Subject: [PATCH 21/21] erts: Polish open_port env opts documentation --- erts/preloaded/src/erlang.erl | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/erts/preloaded/src/erlang.erl b/erts/preloaded/src/erlang.erl index 5095998225bc..d7052f999482 100644 --- a/erts/preloaded/src/erlang.erl +++ b/erts/preloaded/src/erlang.erl @@ -7128,20 +7128,17 @@ follows: `{spawn_executable, FileName}`. The external program starts using `Dir` as its working directory. `Dir` must be a string. -- **`{env, Env}`** - Types: -   `Name = ``t:os:env_var_name/0` -   `Val = ``t:os:env_var_value/0`` | false` -   `Env = [{Name, Val}]` - - Only valid for `{spawn, Command}`, and `{spawn_executable, FileName}`. The - environment of the started process is extended using the environment +- **`{env, Env}`** - Only valid for `{spawn, Command}`, and `{spawn_executable, FileName}`. + The environment of the started process is extended using the environment specifications in `Env`. - `Env` is to be a list of tuples `{Name, Val}`, where `Name` is the name of an - environment variable, and `Val` is the value it is to have in the spawned port - process. Both `Name` and `Val` must be strings. The one exception is `Val` - being the atom `false` (in analogy with `os:getenv/1`), which removes the - environment variable. + `Env` is to be a list of tuples `{Name, Val}`, where `Name` is a `t:os:env_var_name/0` + representing the name of an environment variable, and `Val` is a `t:os:env_var_name/0` + representing the value it is to have in the spawned port process. Both `Name` and `Val` must + be strings. + + If `Val` is set to the atom `false` or the empty string (that is `""` or `[]`), open_port + will consider those variables unset just as if `os:unsetenv/1` had been called. For information about encoding requirements, see documentation of the types for `Name` and `Val`. @@ -7327,7 +7324,7 @@ by passing command-line flag [`+Q`](erl_cmd.md#max_ports) to [erl](erl_cmd.md). | stream | {line, L :: non_neg_integer()} | {cd, Dir :: string() | binary()} - | {env, Env :: [{Name :: os:env_var_name(), Val :: os:env_var_value() | false}]} + | {env, Env :: [{Name :: os:env_var_name(), Val :: os:env_var_value() | [] | false}]} | {args, [string() | binary()]} | {arg0, string() | binary()} | exit_status