diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 03a0708b..87b91af6 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -2,6 +2,8 @@ name: CI on: push: + branches: + - master pull_request: release: types: @@ -180,7 +182,7 @@ jobs: run: pip install maturin - name: Build Wheels run: | - maturin build -i python --release --out dist --no-sdist --target ${{ matrix.platform.target }} --manylinux ${{ matrix.platform.manylinux }} + maturin build -i python --release --out dist --no-sdist --target ${{ matrix.platform.target }} --manylinux ${{ matrix.platform.manylinux }} --cargo-extra-args="--no-default-features" --cargo-extra-args="--features=abi3" - uses: uraimo/run-on-arch-action@v2.0.5 name: Install built wheel with: diff --git a/Cargo.toml b/Cargo.toml index 5c68b9bf..5324deb3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,8 +11,14 @@ description = "Thin Python bindings to de/compression algorithms in Rust" crate-type = ["cdylib"] [features] -default = ["abi3"] +default = ["abi3", "mimallocator"] abi3 = ["pyo3/abi3-py36"] +mimallocator = ["mimalloc"] + +[profile.release] +lto = "fat" +codegen-units = 1 +opt-level = 3 [dependencies] pyo3 = { version = "0.13.2", features = ["extension-module"] } @@ -22,3 +28,8 @@ lz-fear = "0.1.1" flate2 = "^1" zstd = "0.6.0+zstd.1.4.8" numpy = "0.13.0" + +[dependencies.mimalloc] +version = "0.1.24" +default-features = false +optional = true diff --git a/Makefile b/Makefile index 20f85341..f807d388 100644 --- a/Makefile +++ b/Makefile @@ -6,8 +6,11 @@ test: bench: python -m pytest -v --benchmark-only --benchmark-sort name benchmarks/ -bench-snappy: - $(BASE_BENCH_CMD) test_snappy +bench-snappy-framed: + $(BASE_BENCH_CMD) test_snappy_framed + +bench-snappy-raw: + $(BASE_BENCH_CMD) test_snappy_raw bench-snappy-compress-into: $(BASE_BENCH_CMD) snappy_de_compress_into @@ -28,7 +31,6 @@ dev-install: rm -rf ./dist maturin build --release --out dist --no-sdist --interpreter $(shell which python) pip uninstall cramjam -y - rm dist/*.tar.gz pip install cramjam --no-index --find-links dist/ pypy-build: diff --git a/benchmarks/README.md b/benchmarks/README.md index 5f5f440c..ffe5285f 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -48,93 +48,128 @@ of the performance benefit that can be expected from various files/sizes. ---------------------------------------------------------------------------------------------------------------------- benchmark: 48 tests ---------------------------------------------------------------------------------------------------------------------- Name (time in us) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -test_gzip[Mark.Twain-Tom.Sawyer.txt-used-output_len=False-cramjam] 519.7970 (1.00) 1,039.8630 (1.10) 554.8131 (1.0) 57.9148 (1.11) 539.2260 (1.0) 35.0980 (1.0) 116;117 1,802.4088 (1.0) 1548 1 -test_gzip[Mark.Twain-Tom.Sawyer.txt-used-output_len=False-gzip] 574.1360 (1.11) 949.8510 (1.00) 612.0804 (1.10) 53.3911 (1.02) 597.0000 (1.11) 40.9002 (1.17) 157;125 1,633.7722 (0.91) 1529 1 -test_gzip[Mark.Twain-Tom.Sawyer.txt-used-output_len=True-cramjam] 518.2490 (1.0) 1,060.6960 (1.12) 557.3019 (1.00) 58.1539 (1.11) 540.5990 (1.00) 38.5972 (1.10) 173;161 1,794.3595 (1.00) 1733 1 -test_gzip[Mark.Twain-Tom.Sawyer.txt-used-output_len=True-gzip] 573.6970 (1.11) 948.0410 (1.0) 610.1559 (1.10) 52.2425 (1.0) 595.9855 (1.11) 39.6890 (1.13) 137;112 1,638.9253 (0.91) 1402 1 -test_gzip[alice29.txt-used-output_len=False-cramjam] 11,126.7610 (21.47) 12,438.6120 (13.12) 11,511.1064 (20.75) 218.1752 (4.18) 11,467.2385 (21.27) 284.9400 (8.12) 19;2 86.8726 (0.05) 84 1 -test_gzip[alice29.txt-used-output_len=False-gzip] 11,987.2370 (23.13) 13,140.4910 (13.86) 12,407.8334 (22.36) 229.7621 (4.40) 12,373.9645 (22.95) 245.7390 (7.00) 25;3 80.5942 (0.04) 78 1 -test_gzip[alice29.txt-used-output_len=True-cramjam] 10,691.9930 (20.63) 12,840.6640 (13.54) 11,162.0962 (20.12) 263.0858 (5.04) 11,151.9600 (20.68) 216.9515 (6.18) 17;3 89.5889 (0.05) 85 1 -test_gzip[alice29.txt-used-output_len=True-gzip] 12,080.5280 (23.31) 14,159.6150 (14.94) 12,455.4363 (22.45) 258.2839 (4.94) 12,409.5640 (23.01) 197.6640 (5.63) 14;4 80.2862 (0.04) 80 1 -test_gzip[asyoulik.txt-used-output_len=False-cramjam] 8,554.8190 (16.51) 11,292.3600 (11.91) 8,928.3944 (16.09) 373.9090 (7.16) 8,848.9265 (16.41) 243.9890 (6.95) 4;4 112.0022 (0.06) 110 1 -test_gzip[asyoulik.txt-used-output_len=False-gzip] 9,105.9290 (17.57) 11,139.4500 (11.75) 9,525.5454 (17.17) 300.7739 (5.76) 9,455.9970 (17.54) 232.7680 (6.63) 14;5 104.9809 (0.06) 103 1 -test_gzip[asyoulik.txt-used-output_len=True-cramjam] 8,229.7350 (15.88) 10,414.6030 (10.99) 8,601.9106 (15.50) 305.7064 (5.85) 8,556.6525 (15.87) 263.3710 (7.50) 11;3 116.2532 (0.06) 116 1 -test_gzip[asyoulik.txt-used-output_len=True-gzip] 9,188.1850 (17.73) 11,324.9780 (11.95) 9,595.0928 (17.29) 296.8139 (5.68) 9,532.5355 (17.68) 308.6825 (8.79) 21;2 104.2199 (0.06) 104 1 -test_gzip[fireworks.jpeg-used-output_len=False-cramjam] 4,112.5220 (7.94) 4,831.0910 (5.10) 4,302.3705 (7.75) 135.0648 (2.59) 4,285.4420 (7.95) 177.0450 (5.04) 69;7 232.4300 (0.13) 233 1 -test_gzip[fireworks.jpeg-used-output_len=False-gzip] 3,199.6390 (6.17) 5,390.3660 (5.69) 3,390.9647 (6.11) 209.0182 (4.00) 3,358.5240 (6.23) 161.0110 (4.59) 19;14 294.9013 (0.16) 291 1 -test_gzip[fireworks.jpeg-used-output_len=True-cramjam] 949.3340 (1.83) 1,899.1360 (2.00) 1,005.6629 (1.81) 86.4962 (1.66) 986.2460 (1.83) 62.0280 (1.77) 72;50 994.3690 (0.55) 1031 1 -test_gzip[fireworks.jpeg-used-output_len=True-gzip] 3,200.2920 (6.18) 4,227.5530 (4.46) 3,387.2603 (6.11) 136.9476 (2.62) 3,354.4090 (6.22) 182.4480 (5.20) 77;4 295.2238 (0.16) 291 1 -test_gzip[geo.protodata-used-output_len=False-cramjam] 1,738.4310 (3.35) 2,604.3500 (2.75) 1,849.7277 (3.33) 106.3325 (2.04) 1,827.7090 (3.39) 90.1718 (2.57) 62;33 540.6201 (0.30) 555 1 -test_gzip[geo.protodata-used-output_len=False-gzip] 2,147.9430 (4.14) 2,577.6970 (2.72) 2,257.4456 (4.07) 84.2493 (1.61) 2,242.1480 (4.16) 99.5392 (2.84) 114;18 442.9786 (0.25) 453 1 -test_gzip[geo.protodata-used-output_len=True-cramjam] 1,725.6690 (3.33) 2,718.6700 (2.87) 1,836.1617 (3.31) 89.9981 (1.72) 1,815.8690 (3.37) 85.5890 (2.44) 101;31 544.6144 (0.30) 510 1 -test_gzip[geo.protodata-used-output_len=True-gzip] 2,127.9530 (4.11) 3,138.8670 (3.31) 2,272.0031 (4.10) 122.4131 (2.34) 2,247.9540 (4.17) 115.8465 (3.30) 47;18 440.1402 (0.24) 429 1 -test_gzip[html-used-output_len=False-cramjam] 2,104.5090 (4.06) 3,175.1630 (3.35) 2,225.5431 (4.01) 115.0906 (2.20) 2,201.1520 (4.08) 104.5572 (2.98) 53;20 449.3285 (0.25) 443 1 -test_gzip[html-used-output_len=False-gzip] 2,500.7590 (4.83) 3,616.7810 (3.82) 2,641.0550 (4.76) 123.6254 (2.37) 2,620.8410 (4.86) 111.6068 (3.18) 56;20 378.6366 (0.21) 359 1 -test_gzip[html-used-output_len=True-cramjam] 2,079.2420 (4.01) 3,400.4040 (3.59) 2,222.3146 (4.01) 138.7664 (2.66) 2,188.7960 (4.06) 103.3685 (2.95) 44;30 449.9813 (0.25) 453 1 -test_gzip[html-used-output_len=True-gzip] 2,478.4130 (4.78) 3,809.0550 (4.02) 2,638.6508 (4.76) 126.2851 (2.42) 2,616.6670 (4.85) 128.5782 (3.66) 50;9 378.9816 (0.21) 333 1 -test_gzip[html_x_4-used-output_len=False-cramjam] 10,130.5030 (19.55) 11,372.8290 (12.00) 10,590.8331 (19.09) 273.9986 (5.24) 10,554.1310 (19.57) 426.2085 (12.14) 26;0 94.4213 (0.05) 91 1 -test_gzip[html_x_4-used-output_len=False-gzip] 11,047.6520 (21.32) 14,428.7200 (15.22) 11,484.8105 (20.70) 479.1162 (9.17) 11,389.2690 (21.12) 283.3758 (8.07) 5;5 87.0715 (0.05) 89 1 -test_gzip[html_x_4-used-output_len=True-cramjam] 9,458.3850 (18.25) 11,396.0560 (12.02) 9,943.5637 (17.92) 322.5896 (6.17) 9,892.4310 (18.35) 408.9162 (11.65) 28;3 100.5676 (0.06) 101 1 -test_gzip[html_x_4-used-output_len=True-gzip] 10,967.9200 (21.16) 12,620.0610 (13.31) 11,653.9219 (21.01) 420.0944 (8.04) 11,559.7885 (21.44) 674.0520 (19.20) 24;0 85.8080 (0.05) 76 1 -test_gzip[kppkn.gtb-used-output_len=False-cramjam] 40,256.6710 (77.68) 43,371.8420 (45.75) 41,135.3838 (74.14) 763.5835 (14.62) 41,052.4730 (76.13) 553.2935 (15.76) 4;3 24.3100 (0.01) 25 1 -test_gzip[kppkn.gtb-used-output_len=False-gzip] 64,079.5300 (123.65) 66,277.4240 (69.91) 64,924.9089 (117.02) 565.3003 (10.82) 64,802.9350 (120.18) 815.0115 (23.22) 3;0 15.4024 (0.01) 16 1 -test_gzip[kppkn.gtb-used-output_len=True-cramjam] 40,419.5090 (77.99) 47,952.8860 (50.58) 42,200.0288 (76.06) 1,794.2808 (34.35) 41,631.6520 (77.21) 2,457.0025 (70.00) 3;1 23.6967 (0.01) 24 1 -test_gzip[kppkn.gtb-used-output_len=True-gzip] 64,210.5460 (123.90) 66,613.3220 (70.26) 65,246.4269 (117.60) 741.0967 (14.19) 65,081.6270 (120.69) 1,015.2845 (28.93) 6;0 15.3265 (0.01) 16 1 -test_gzip[lcet10.txt-used-output_len=False-cramjam] 27,573.1010 (53.20) 34,978.2250 (36.90) 29,197.9029 (52.63) 1,652.9253 (31.64) 28,802.4685 (53.41) 736.2490 (20.98) 3;3 34.2490 (0.02) 36 1 -test_gzip[lcet10.txt-used-output_len=False-gzip] 30,280.9470 (58.43) 32,562.8860 (34.35) 31,243.7204 (56.31) 571.5751 (10.94) 31,290.7620 (58.03) 880.7153 (25.09) 12;0 32.0064 (0.02) 33 1 -test_gzip[lcet10.txt-used-output_len=True-cramjam] 9,555.1300 (18.44) 10,782.8600 (11.37) 9,912.5000 (17.87) 228.0582 (4.37) 9,872.4590 (18.31) 251.0783 (7.15) 26;4 100.8827 (0.06) 101 1 -test_gzip[lcet10.txt-used-output_len=True-gzip] 29,901.7910 (57.70) 32,865.9390 (34.67) 30,719.4480 (55.37) 747.3969 (14.31) 30,474.8960 (56.52) 705.1667 (20.09) 4;3 32.5527 (0.02) 33 1 -test_gzip[paper-100k.pdf-used-output_len=False-cramjam] 2,967.7560 (5.73) 4,924.8240 (5.19) 3,123.3734 (5.63) 150.6200 (2.88) 3,098.6220 (5.75) 129.4330 (3.69) 28;14 320.1667 (0.18) 317 1 -test_gzip[paper-100k.pdf-used-output_len=False-gzip] 3,213.1740 (6.20) 4,502.4180 (4.75) 3,398.0615 (6.12) 153.2343 (2.93) 3,375.8890 (6.26) 153.5790 (4.38) 51;12 294.2854 (0.16) 286 1 -test_gzip[paper-100k.pdf-used-output_len=True-cramjam] 2,516.1480 (4.86) 3,818.5730 (4.03) 2,644.5456 (4.77) 114.8152 (2.20) 2,623.0940 (4.86) 130.8360 (3.73) 69;8 378.1368 (0.21) 346 1 -test_gzip[paper-100k.pdf-used-output_len=True-gzip] 3,219.8630 (6.21) 4,964.1200 (5.24) 3,397.6387 (6.12) 181.2687 (3.47) 3,368.3955 (6.25) 150.5255 (4.29) 25;11 294.3221 (0.16) 280 1 -test_gzip[plrabn12.txt-used-output_len=False-cramjam] 49,762.2040 (96.02) 52,859.5690 (55.76) 50,738.7668 (91.45) 738.6969 (14.14) 50,563.2525 (93.77) 699.4550 (19.93) 4;2 19.7088 (0.01) 20 1 -test_gzip[plrabn12.txt-used-output_len=False-gzip] 53,138.4560 (102.53) 57,021.3700 (60.15) 54,463.9891 (98.17) 935.8547 (17.91) 54,264.2380 (100.63) 1,299.5680 (37.03) 5;0 18.3608 (0.01) 19 1 -test_gzip[plrabn12.txt-used-output_len=True-cramjam] 11,388.8090 (21.98) 15,003.8590 (15.83) 11,920.0316 (21.48) 539.3715 (10.32) 11,808.2690 (21.90) 325.8090 (9.28) 7;7 83.8924 (0.05) 83 1 -test_gzip[plrabn12.txt-used-output_len=True-gzip] 53,538.2260 (103.31) 56,180.2010 (59.26) 54,724.8323 (98.64) 721.1164 (13.80) 54,695.3400 (101.43) 949.2280 (27.05) 7;0 18.2732 (0.01) 19 1 -test_gzip[urls.10K-used-output_len=False-cramjam] 38,097.4640 (73.51) 45,489.4830 (47.98) 39,572.1519 (71.33) 1,431.1925 (27.40) 39,344.5560 (72.96) 1,227.8805 (34.98) 2;1 25.2703 (0.01) 24 1 -test_gzip[urls.10K-used-output_len=False-gzip] 41,281.0130 (79.65) 43,838.5800 (46.24) 42,369.0851 (76.37) 765.5144 (14.65) 42,351.6570 (78.54) 1,138.2070 (32.43) 9;0 23.6021 (0.01) 23 1 -test_gzip[urls.10K-used-output_len=True-cramjam] 7,313.0000 (14.11) 9,913.0270 (10.46) 7,718.4330 (13.91) 313.5716 (6.00) 7,633.9700 (14.16) 345.0630 (9.83) 17;5 129.5600 (0.07) 130 1 -test_gzip[urls.10K-used-output_len=True-gzip] 41,650.3010 (80.37) 44,860.9970 (47.32) 42,876.6647 (77.28) 703.7033 (13.47) 42,893.8560 (79.55) 851.1110 (24.25) 6;1 23.3227 (0.01) 24 1 +test_gzip[Mark.Twain-Tom.Sawyer.txt-used-output_len=False-cramjam] 498.6320 (1.0) 904.3340 (1.02) 538.3901 (1.02) 48.5933 (1.17) 524.5310 (1.01) 37.6930 (1.09) 152;105 1,857.3891 (0.98) 1505 1 +test_gzip[Mark.Twain-Tom.Sawyer.txt-used-output_len=False-gzip] 574.8140 (1.15) 1,032.3880 (1.16) 616.4469 (1.17) 53.9548 (1.30) 602.6820 (1.17) 45.1167 (1.31) 122;85 1,622.1997 (0.86) 1475 1 +test_gzip[Mark.Twain-Tom.Sawyer.txt-used-output_len=True-cramjam] 499.6490 (1.00) 887.7370 (1.0) 528.1259 (1.0) 41.6166 (1.0) 517.1500 (1.0) 34.4905 (1.0) 149;93 1,893.4877 (1.0) 1676 1 +test_gzip[Mark.Twain-Tom.Sawyer.txt-used-output_len=True-gzip] 575.3320 (1.15) 961.8130 (1.08) 617.2423 (1.17) 48.7969 (1.17) 601.3870 (1.16) 41.9803 (1.22) 127;71 1,620.1094 (0.86) 1329 1 +test_gzip[alice29.txt-used-output_len=False-cramjam] 10,896.4080 (21.85) 12,870.5330 (14.50) 11,282.3297 (21.36) 330.3431 (7.94) 11,201.8925 (21.66) 364.0470 (10.55) 15;3 88.6342 (0.05) 86 1 +test_gzip[alice29.txt-used-output_len=False-gzip] 12,027.0880 (24.12) 14,208.4480 (16.01) 12,427.6071 (23.53) 395.2144 (9.50) 12,312.2350 (23.81) 363.9955 (10.55) 12;3 80.4660 (0.04) 80 1 +test_gzip[alice29.txt-used-output_len=True-cramjam] 10,912.5080 (21.88) 13,144.6680 (14.81) 11,313.8446 (21.42) 331.2081 (7.96) 11,236.6165 (21.73) 315.2310 (9.14) 11;3 88.3873 (0.05) 90 1 +test_gzip[alice29.txt-used-output_len=True-gzip] 11,991.7980 (24.05) 14,076.5000 (15.86) 12,457.9527 (23.59) 439.7621 (10.57) 12,263.0020 (23.71) 461.3358 (13.38) 11;5 80.2700 (0.04) 81 1 +test_gzip[asyoulik.txt-used-output_len=False-cramjam] 8,340.2480 (16.73) 9,570.2210 (10.78) 8,648.5055 (16.38) 233.7639 (5.62) 8,591.8990 (16.61) 269.5990 (7.82) 27;5 115.6269 (0.06) 118 1 +test_gzip[asyoulik.txt-used-output_len=False-gzip] 9,158.9250 (18.37) 9,988.6440 (11.25) 9,459.7682 (17.91) 186.1147 (4.47) 9,412.7690 (18.20) 276.7113 (8.02) 38;0 105.7108 (0.06) 107 1 +test_gzip[asyoulik.txt-used-output_len=True-cramjam] 8,319.9830 (16.69) 10,737.6260 (12.10) 8,666.3427 (16.41) 322.9455 (7.76) 8,599.2010 (16.63) 258.3250 (7.49) 12;3 115.3889 (0.06) 113 1 +test_gzip[asyoulik.txt-used-output_len=True-gzip] 9,188.2840 (18.43) 11,275.7290 (12.70) 9,525.4683 (18.04) 351.3017 (8.44) 9,419.5680 (18.21) 268.0395 (7.77) 8;6 104.9817 (0.06) 100 1 +test_gzip[fireworks.jpeg-used-output_len=False-cramjam] 3,934.7320 (7.89) 6,228.3300 (7.02) 4,123.2676 (7.81) 232.6841 (5.59) 4,057.8555 (7.85) 139.8660 (4.06) 21;20 242.5261 (0.13) 238 1 +test_gzip[fireworks.jpeg-used-output_len=False-gzip] 3,225.5690 (6.47) 5,195.1360 (5.85) 3,414.4240 (6.47) 234.7346 (5.64) 3,347.1610 (6.47) 129.6175 (3.76) 25;26 292.8752 (0.15) 285 1 +test_gzip[fireworks.jpeg-used-output_len=True-cramjam] 3,937.0940 (7.90) 6,554.8870 (7.38) 4,114.5315 (7.79) 247.2504 (5.94) 4,073.0180 (7.88) 147.3470 (4.27) 7;7 243.0410 (0.13) 131 1 +test_gzip[fireworks.jpeg-used-output_len=True-gzip] 3,198.9300 (6.42) 5,349.8960 (6.03) 3,404.4112 (6.45) 244.9796 (5.89) 3,344.0150 (6.47) 147.7962 (4.29) 19;18 293.7366 (0.16) 279 1 +test_gzip[geo.protodata-used-output_len=False-cramjam] 1,585.4920 (3.18) 2,736.4010 (3.08) 1,661.7925 (3.15) 102.5084 (2.46) 1,640.5520 (3.17) 73.7418 (2.14) 37;31 601.7598 (0.32) 597 1 +test_gzip[geo.protodata-used-output_len=False-gzip] 2,139.4700 (4.29) 3,134.1810 (3.53) 2,243.2067 (4.25) 124.7361 (3.00) 2,215.2610 (4.28) 91.4940 (2.65) 33;21 445.7904 (0.24) 442 1 +test_gzip[geo.protodata-used-output_len=True-cramjam] 1,581.0920 (3.17) 2,745.9970 (3.09) 1,656.5480 (3.14) 113.7994 (2.73) 1,627.7865 (3.15) 66.2845 (1.92) 41;41 603.6650 (0.32) 612 1 +test_gzip[geo.protodata-used-output_len=True-gzip] 2,150.2560 (4.31) 3,237.2160 (3.65) 2,254.2443 (4.27) 149.2253 (3.59) 2,217.4510 (4.29) 92.2995 (2.68) 30;29 443.6076 (0.23) 437 1 +test_gzip[html-used-output_len=False-cramjam] 1,960.5420 (3.93) 3,141.3660 (3.54) 2,048.0674 (3.88) 109.5254 (2.63) 2,023.5200 (3.91) 78.3633 (2.27) 35;27 488.2652 (0.26) 495 1 +test_gzip[html-used-output_len=False-gzip] 2,473.2450 (4.96) 3,558.9080 (4.01) 2,594.7358 (4.91) 128.5785 (3.09) 2,570.0210 (4.97) 103.8637 (3.01) 33;20 385.3957 (0.20) 379 1 +test_gzip[html-used-output_len=True-cramjam] 1,958.1090 (3.93) 3,045.2460 (3.43) 2,048.7684 (3.88) 118.1116 (2.84) 2,024.7970 (3.92) 73.6460 (2.14) 25;25 488.0981 (0.26) 460 1 +test_gzip[html-used-output_len=True-gzip] 2,459.9110 (4.93) 3,534.8000 (3.98) 2,597.0396 (4.92) 153.6156 (3.69) 2,559.4020 (4.95) 97.3550 (2.82) 31;30 385.0538 (0.20) 387 1 +test_gzip[html_x_4-used-output_len=False-cramjam] 9,386.4630 (18.82) 10,598.7610 (11.94) 9,729.7528 (18.42) 273.3211 (6.57) 9,641.9175 (18.64) 232.4350 (6.74) 18;9 102.7775 (0.05) 100 1 +test_gzip[html_x_4-used-output_len=False-gzip] 10,954.5380 (21.97) 12,229.1090 (13.78) 11,388.3416 (21.56) 314.4430 (7.56) 11,291.9835 (21.84) 441.7480 (12.81) 28;0 87.8091 (0.05) 88 1 +test_gzip[html_x_4-used-output_len=True-cramjam] 9,362.4440 (18.78) 10,832.3570 (12.20) 9,753.7416 (18.47) 237.0947 (5.70) 9,740.3650 (18.83) 287.9575 (8.35) 20;4 102.5248 (0.05) 99 1 +test_gzip[html_x_4-used-output_len=True-gzip] 10,925.2580 (21.91) 12,430.4210 (14.00) 11,443.7624 (21.67) 264.1632 (6.35) 11,395.8990 (22.04) 319.3560 (9.26) 22;2 87.3838 (0.05) 87 1 +test_gzip[kppkn.gtb-used-output_len=False-cramjam] 39,132.5940 (78.48) 40,894.0630 (46.07) 39,748.0645 (75.26) 372.1767 (8.94) 39,747.7660 (76.86) 304.5605 (8.83) 6;1 25.1585 (0.01) 25 1 +test_gzip[kppkn.gtb-used-output_len=False-gzip] 63,357.7700 (127.06) 64,867.0860 (73.07) 63,931.2074 (121.05) 431.4790 (10.37) 63,917.2790 (123.60) 455.9250 (13.22) 5;2 15.6418 (0.01) 16 1 +test_gzip[kppkn.gtb-used-output_len=True-cramjam] 39,015.7680 (78.25) 42,225.1180 (47.56) 39,948.5314 (75.64) 763.3876 (18.34) 39,720.7660 (76.81) 784.1145 (22.73) 6;2 25.0322 (0.01) 25 1 +test_gzip[kppkn.gtb-used-output_len=True-gzip] 63,601.0280 (127.55) 65,441.2490 (73.72) 64,262.9742 (121.68) 533.9574 (12.83) 64,180.3945 (124.10) 821.0655 (23.81) 5;0 15.5611 (0.01) 16 1 +test_gzip[lcet10.txt-used-output_len=False-cramjam] 26,893.0920 (53.93) 28,413.5420 (32.01) 27,353.3749 (51.79) 383.5836 (9.22) 27,217.9150 (52.63) 500.9510 (14.52) 9;2 36.5586 (0.02) 37 1 +test_gzip[lcet10.txt-used-output_len=False-gzip] 29,453.3110 (59.07) 32,535.7740 (36.65) 30,217.1804 (57.22) 580.9736 (13.96) 30,086.3775 (58.18) 727.5420 (21.09) 6;1 33.0938 (0.02) 34 1 +test_gzip[lcet10.txt-used-output_len=True-cramjam] 26,531.6900 (53.21) 27,972.4670 (31.51) 27,264.7757 (51.63) 359.6821 (8.64) 27,271.8585 (52.73) 541.0055 (15.69) 12;0 36.6774 (0.02) 36 1 +test_gzip[lcet10.txt-used-output_len=True-gzip] 29,633.5980 (59.43) 31,146.0220 (35.08) 30,229.0893 (57.24) 363.9110 (8.74) 30,206.0570 (58.41) 489.7215 (14.20) 10;0 33.0807 (0.02) 33 1 +test_gzip[paper-100k.pdf-used-output_len=False-cramjam] 2,830.4720 (5.68) 3,538.2900 (3.99) 2,964.3271 (5.61) 107.5574 (2.58) 2,941.1885 (5.69) 120.8570 (3.50) 64;15 337.3447 (0.18) 318 1 +test_gzip[paper-100k.pdf-used-output_len=False-gzip] 3,236.7040 (6.49) 4,065.8930 (4.58) 3,425.2506 (6.49) 142.2416 (3.42) 3,401.9400 (6.58) 147.6630 (4.28) 69;16 291.9494 (0.15) 290 1 +test_gzip[paper-100k.pdf-used-output_len=True-cramjam] 2,829.1540 (5.67) 3,389.4550 (3.82) 2,957.7600 (5.60) 108.1616 (2.60) 2,929.9220 (5.67) 113.1508 (3.28) 58;24 338.0937 (0.18) 331 1 +test_gzip[paper-100k.pdf-used-output_len=True-gzip] 3,244.5330 (6.51) 5,180.7690 (5.84) 3,436.5281 (6.51) 201.4648 (4.84) 3,392.0260 (6.56) 125.4383 (3.64) 26;26 290.9914 (0.15) 261 1 +test_gzip[plrabn12.txt-used-output_len=False-cramjam] 49,320.9690 (98.91) 74,427.5540 (83.84) 53,266.0228 (100.86) 5,882.3811 (141.35) 51,283.4770 (99.17) 2,127.7145 (61.69) 2;3 18.7737 (0.01) 20 1 +test_gzip[plrabn12.txt-used-output_len=False-gzip] 58,243.8110 (116.81) 62,047.1040 (69.89) 60,300.9332 (114.18) 1,419.1243 (34.10) 60,473.5410 (116.94) 2,739.9335 (79.44) 7;0 16.5835 (0.01) 16 1 +test_gzip[plrabn12.txt-used-output_len=True-cramjam] 49,100.9320 (98.47) 63,868.7720 (71.95) 52,104.9511 (98.66) 4,869.3169 (117.00) 49,982.7855 (96.65) 1,824.6450 (52.90) 3;3 19.1920 (0.01) 16 1 +test_gzip[plrabn12.txt-used-output_len=True-gzip] 58,425.0160 (117.17) 63,285.9410 (71.29) 59,995.6096 (113.60) 1,132.8758 (27.22) 60,199.5660 (116.41) 1,314.8300 (38.12) 4;1 16.6679 (0.01) 17 1 +test_gzip[urls.10K-used-output_len=False-cramjam] 36,073.3510 (72.34) 41,615.1860 (46.88) 38,465.0136 (72.83) 1,701.3922 (40.88) 38,056.7860 (73.59) 3,375.4912 (97.87) 10;0 25.9977 (0.01) 23 1 +test_gzip[urls.10K-used-output_len=False-gzip] 40,484.9950 (81.19) 42,920.4170 (48.35) 41,712.5562 (78.98) 718.7713 (17.27) 41,697.8900 (80.63) 1,178.5225 (34.17) 7;0 23.9736 (0.01) 25 1 +test_gzip[urls.10K-used-output_len=True-cramjam] 36,067.1570 (72.33) 39,557.0310 (44.56) 37,054.0519 (70.16) 729.0753 (17.52) 36,985.7040 (71.52) 730.4390 (21.18) 7;1 26.9876 (0.01) 27 1 +test_gzip[urls.10K-used-output_len=True-gzip] 40,734.0010 (81.69) 43,980.7760 (49.54) 42,111.1947 (79.74) 836.1285 (20.09) 42,234.7625 (81.67) 1,114.0880 (32.30) 9;0 23.7467 (0.01) 24 1 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - ``` #### Snappy -`make bench-snappy` +There are two types here, `framed` and `raw`; the recommended one being snappy's `framed` format. + +`make bench-snappy-framed` + +```bash +------------------------------------------------------------------------------------------------------------ benchmark: 24 tests ------------------------------------------------------------------------------------------------------------ +Name (time in us) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +test_snappy_framed[Mark.Twain-Tom.Sawyer.txt-cramjam] 66.9240 (1.0) 154.2760 (1.0) 72.3902 (1.0) 9.7916 (1.0) 69.3480 (1.0) 3.1895 (1.0) 529;887 13,814.0175 (1.0) 5297 1 +test_snappy_framed[Mark.Twain-Tom.Sawyer.txt-snappy] 127.9920 (1.91) 269.7400 (1.75) 136.3288 (1.88) 15.0409 (1.54) 129.3530 (1.87) 7.1885 (2.25) 496;644 7,335.2064 (0.53) 4257 1 +test_snappy_framed[alice29.txt-cramjam] 668.5400 (9.99) 1,039.1390 (6.74) 704.8739 (9.74) 41.9223 (4.28) 692.8285 (9.99) 43.8280 (13.74) 153;70 1,418.6935 (0.10) 1362 1 +test_snappy_framed[alice29.txt-snappy] 1,369.6490 (20.47) 1,804.7350 (11.70) 1,451.3804 (20.05) 73.8900 (7.55) 1,436.7360 (20.72) 64.1737 (20.12) 164;51 688.9993 (0.05) 657 1 +test_snappy_framed[asyoulik.txt-cramjam] 594.3560 (8.88) 909.6040 (5.90) 654.6089 (9.04) 58.4995 (5.97) 635.0080 (9.16) 35.4340 (11.11) 229;170 1,527.6298 (0.11) 1505 1 +test_snappy_framed[asyoulik.txt-snappy] 1,159.2780 (17.32) 1,648.2730 (10.68) 1,226.9825 (16.95) 69.1631 (7.06) 1,208.1210 (17.42) 62.4248 (19.57) 100;62 815.0076 (0.06) 801 1 +test_snappy_framed[fireworks.jpeg-cramjam] 82.0640 (1.23) 188.1610 (1.22) 93.4016 (1.29) 11.2305 (1.15) 89.0435 (1.28) 7.2770 (2.28) 1139;1063 10,706.4547 (0.78) 9054 1 +test_snappy_framed[fireworks.jpeg-snappy] 642.6270 (9.60) 1,080.5030 (7.00) 691.3958 (9.55) 46.4952 (4.75) 683.1805 (9.85) 41.4210 (12.99) 406;100 1,446.3496 (0.10) 1374 1 +test_snappy_framed[geo.protodata-cramjam] 207.2620 (3.10) 404.8150 (2.62) 221.4278 (3.06) 21.8556 (2.23) 213.4810 (3.08) 13.1203 (4.11) 504;529 4,516.1454 (0.33) 4301 1 +test_snappy_framed[geo.protodata-snappy] 727.0740 (10.86) 1,081.0720 (7.01) 774.2563 (10.70) 51.2997 (5.24) 760.7860 (10.97) 44.8170 (14.05) 134;97 1,291.5619 (0.09) 1094 1 +test_snappy_framed[html-cramjam] 209.9630 (3.14) 379.6720 (2.46) 223.6982 (3.09) 18.5359 (1.89) 217.6590 (3.14) 13.9565 (4.38) 501;360 4,470.3094 (0.32) 3931 1 +test_snappy_framed[html-snappy] 662.3100 (9.90) 1,010.6180 (6.55) 708.9370 (9.79) 51.5825 (5.27) 696.5880 (10.04) 47.8210 (14.99) 191;106 1,410.5626 (0.10) 1404 1 +test_snappy_framed[html_x_4-cramjam] 813.9490 (12.16) 1,234.0220 (8.00) 875.8345 (12.10) 64.5237 (6.59) 859.8725 (12.40) 51.4810 (16.14) 138;83 1,141.7682 (0.08) 1122 1 +test_snappy_framed[html_x_4-snappy] 2,665.8330 (39.83) 3,384.9470 (21.94) 2,865.7697 (39.59) 118.5848 (12.11) 2,843.3000 (41.00) 136.0955 (42.67) 97;13 348.9464 (0.03) 337 1 +test_snappy_framed[kppkn.gtb-cramjam] 569.1570 (8.50) 941.9720 (6.11) 612.0818 (8.46) 48.9091 (5.00) 598.0705 (8.62) 40.8400 (12.80) 145;100 1,633.7685 (0.12) 1518 1 +test_snappy_framed[kppkn.gtb-snappy] 1,418.3590 (21.19) 1,969.5570 (12.77) 1,541.2999 (21.29) 97.1448 (9.92) 1,517.0740 (21.88) 86.4275 (27.10) 132;59 648.8030 (0.05) 648 1 +test_snappy_framed[lcet10.txt-cramjam] 1,760.2000 (26.30) 2,350.9470 (15.24) 1,882.3709 (26.00) 99.5655 (10.17) 1,852.9640 (26.72) 107.8530 (33.82) 110;25 531.2449 (0.04) 473 1 +test_snappy_framed[lcet10.txt-snappy] 3,777.4390 (56.44) 4,641.5480 (30.09) 4,027.3592 (55.63) 167.2311 (17.08) 3,996.9500 (57.64) 213.6315 (66.98) 69;7 248.3017 (0.02) 233 1 +test_snappy_framed[paper-100k.pdf-cramjam] 79.9450 (1.19) 166.5010 (1.08) 87.5469 (1.21) 9.9207 (1.01) 84.0460 (1.21) 3.9595 (1.24) 1020;1387 11,422.4499 (0.83) 9037 1 +test_snappy_framed[paper-100k.pdf-snappy] 541.7030 (8.09) 832.1680 (5.39) 572.2008 (7.90) 37.8356 (3.86) 562.3680 (8.11) 40.0157 (12.55) 178;77 1,747.6383 (0.13) 1701 1 +test_snappy_framed[plrabn12.txt-cramjam] 2,624.9240 (39.22) 3,795.2400 (24.60) 2,784.9409 (38.47) 156.2531 (15.96) 2,745.7185 (39.59) 123.5140 (38.73) 37;29 359.0741 (0.03) 350 1 +test_snappy_framed[plrabn12.txt-snappy] 5,029.2020 (75.15) 6,492.6980 (42.08) 5,319.8571 (73.49) 213.0427 (21.76) 5,263.8185 (75.90) 233.6150 (73.25) 36;8 187.9750 (0.01) 186 1 +test_snappy_framed[urls.10K-cramjam] 2,206.7890 (32.97) 3,433.8090 (22.26) 2,489.4787 (34.39) 195.0847 (19.92) 2,530.7845 (36.49) 303.4110 (95.13) 132;3 401.6905 (0.03) 386 1 +test_snappy_framed[urls.10K-snappy] 5,900.9280 (88.17) 7,273.0480 (47.14) 6,206.1277 (85.73) 222.2105 (22.69) 6,145.8630 (88.62) 200.1830 (62.76) 32;10 161.1311 (0.01) 157 1 +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +``` + +`make bench-snappy-raw` ```bash --------------------------------------------------------------------------------------------------------- benchmark: 24 tests -------------------------------------------------------------------------------------------------------- -Name (time in us) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -test_snappy[Mark.Twain-Tom.Sawyer.txt-cramjam] 69.6919 (4.60) 142.3401 (1.32) 72.4619 (4.52) 6.4226 (2.10) 70.8679 (4.57) 0.6619 (2.97) 413;562 13,800.3567 (0.22) 5500 1 -test_snappy[Mark.Twain-Tom.Sawyer.txt-snappy] 52.7790 (3.48) 107.7750 (1.0) 54.9983 (3.43) 5.1327 (1.68) 53.7359 (3.47) 0.3818 (1.71) 963;1700 18,182.3879 (0.29) 14399 1 -test_snappy[alice29.txt-cramjam] 675.4770 (44.54) 922.2139 (8.56) 694.3961 (43.30) 30.4206 (9.93) 679.6580 (43.86) 20.9867 (94.14) 126;114 1,440.1002 (0.02) 1367 1 -test_snappy[alice29.txt-snappy] 600.0660 (39.57) 959.7030 (8.90) 619.9690 (38.66) 33.7294 (11.01) 604.7899 (39.03) 20.5748 (92.29) 119;117 1,612.9838 (0.03) 1535 1 -test_snappy[asyoulik.txt-cramjam] 595.2580 (39.25) 2,173.1190 (20.16) 618.6623 (38.57) 78.2697 (25.56) 600.8621 (38.77) 19.6720 (88.24) 63;127 1,616.3906 (0.03) 1617 1 -test_snappy[asyoulik.txt-snappy] 533.4230 (35.17) 2,030.4810 (18.84) 551.4113 (34.38) 59.9924 (19.59) 536.2805 (34.61) 19.8210 (88.91) 83;141 1,813.5282 (0.03) 1832 1 -test_snappy[fireworks.jpeg-cramjam] 89.4699 (5.90) 168.3640 (1.56) 92.3831 (5.76) 7.5078 (2.45) 90.2121 (5.82) 0.3100 (1.39) 670;1513 10,824.4885 (0.17) 8857 1 -test_snappy[fireworks.jpeg-snappy] 15.1660 (1.0) 144.0961 (1.34) 16.0380 (1.0) 3.3644 (1.10) 15.4970 (1.0) 0.2229 (1.0) 1358;3544 62,352.1041 (1.0) 37029 1 -test_snappy[geo.protodata-cramjam] 207.6969 (13.69) 372.3470 (3.45) 215.9245 (13.46) 16.9630 (5.54) 210.1055 (13.56) 2.4871 (11.16) 354;941 4,631.2487 (0.07) 4218 1 -test_snappy[geo.protodata-snappy] 143.2810 (9.45) 255.3989 (2.37) 149.7182 (9.34) 11.3959 (3.72) 146.7150 (9.47) 2.2070 (9.90) 537;917 6,679.2132 (0.11) 6094 1 -test_snappy[html-cramjam] 211.1669 (13.92) 377.4320 (3.50) 219.2299 (13.67) 16.2401 (5.30) 213.4370 (13.77) 1.9480 (8.74) 374;1008 4,561.4215 (0.07) 4364 1 -test_snappy[html-snappy] 156.2310 (10.30) 251.9019 (2.34) 161.9493 (10.10) 11.7708 (3.84) 157.8455 (10.19) 1.1481 (5.15) 417;989 6,174.7707 (0.10) 4234 1 -test_snappy[html_x_4-cramjam] 829.1621 (54.67) 2,369.9310 (21.99) 856.4618 (53.40) 65.0095 (21.23) 834.2875 (53.84) 25.6934 (115.25) 74;99 1,167.5944 (0.02) 1164 1 -test_snappy[html_x_4-snappy] 634.9960 (41.87) 1,006.6601 (9.34) 654.3856 (40.80) 35.3621 (11.55) 639.4909 (41.27) 20.0983 (90.15) 119;119 1,528.1509 (0.02) 1535 1 -test_snappy[kppkn.gtb-cramjam] 583.8150 (38.49) 920.7830 (8.54) 602.0537 (37.54) 32.0894 (10.48) 586.6180 (37.85) 21.3237 (95.65) 139;124 1,660.9815 (0.03) 1665 1 -test_snappy[kppkn.gtb-snappy] 504.3701 (33.26) 711.1890 (6.60) 518.6457 (32.34) 29.0123 (9.47) 506.0320 (32.65) 19.3256 (86.69) 142;109 1,928.0986 (0.03) 1939 1 -test_snappy[lcet10.txt-cramjam] 1,779.8330 (117.36) 3,901.6550 (36.20) 1,833.9115 (114.35) 107.2414 (35.02) 1,807.4660 (116.63) 51.8297 (232.49) 49;66 545.2826 (0.01) 547 1 -test_snappy[lcet10.txt-snappy] 1,590.7111 (104.89) 4,109.4429 (38.13) 1,666.3600 (103.90) 156.8394 (51.22) 1,637.2090 (105.65) 71.3950 (320.25) 15;21 600.1104 (0.01) 571 1 -test_snappy[paper-100k.pdf-cramjam] 85.2570 (5.62) 158.4151 (1.47) 88.1079 (5.49) 7.7540 (2.53) 85.9520 (5.55) 0.2950 (1.32) 607;1133 11,349.7140 (0.18) 8483 1 -test_snappy[paper-100k.pdf-snappy] 20.6160 (1.36) 254.8001 (2.36) 21.7089 (1.35) 3.0623 (1.0) 21.1160 (1.36) 0.4601 (2.06) 985;1616 46,064.1370 (0.74) 24429 1 -test_snappy[plrabn12.txt-cramjam] 2,700.6660 (178.07) 3,423.9670 (31.77) 2,828.7554 (176.38) 88.5154 (28.91) 2,796.6120 (180.46) 74.7202 (335.17) 62;49 353.5124 (0.01) 319 1 -test_snappy[plrabn12.txt-snappy] 2,183.2390 (143.96) 2,672.1179 (24.79) 2,244.7869 (139.97) 70.4361 (23.00) 2,216.0790 (143.00) 63.4236 (284.49) 63;55 445.4766 (0.01) 389 1 -test_snappy[urls.10K-cramjam] 2,545.5580 (167.85) 3,284.1910 (30.47) 2,729.0232 (170.16) 91.2420 (29.80) 2,694.1620 (173.85) 73.3960 (329.23) 62;51 366.4315 (0.01) 319 1 -test_snappy[urls.10K-snappy] 1,812.0790 (119.48) 3,354.0629 (31.12) 1,869.3891 (116.56) 102.9202 (33.61) 1,840.3350 (118.75) 46.6825 (209.40) 46;58 534.9341 (0.01) 424 1 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +---------------------------------------------------------------------------------------------------------- benchmark: 24 tests ---------------------------------------------------------------------------------------------------------- +Name (time in us) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +test_snappy_raw[Mark.Twain-Tom.Sawyer.txt-cramjam] 52.4700 (3.41) 115.3440 (2.15) 56.0561 (3.36) 6.2831 (2.30) 53.4030 (3.35) 3.1860 (5.31) 497;515 17,839.2709 (0.30) 4861 1 +test_snappy_raw[Mark.Twain-Tom.Sawyer.txt-snappy] 52.9360 (3.44) 110.8180 (2.07) 56.3054 (3.38) 6.1002 (2.23) 54.0580 (3.40) 3.0085 (5.01) 1021;1043 17,760.2996 (0.30) 11152 1 +test_snappy_raw[alice29.txt-cramjam] 611.5550 (39.69) 960.9500 (17.93) 646.3615 (38.78) 47.3504 (17.32) 634.9345 (39.88) 39.8180 (66.36) 115;86 1,547.1217 (0.03) 1582 1 +test_snappy_raw[alice29.txt-snappy] 599.5750 (38.91) 943.2070 (17.59) 630.4942 (37.82) 37.0993 (13.57) 623.3310 (39.15) 36.2815 (60.47) 104;66 1,586.0575 (0.03) 1433 1 +test_snappy_raw[asyoulik.txt-cramjam] 541.9610 (35.17) 900.4050 (16.80) 570.1357 (34.20) 37.2299 (13.62) 563.6110 (35.40) 35.4925 (59.16) 132;73 1,753.9685 (0.03) 1660 1 +test_snappy_raw[asyoulik.txt-snappy] 532.0560 (34.53) 829.2950 (15.47) 557.7337 (33.46) 31.0406 (11.36) 552.0020 (34.67) 33.1413 (55.24) 163;69 1,792.9705 (0.03) 1805 1 +test_snappy_raw[fireworks.jpeg-cramjam] 40.8240 (2.65) 86.5680 (1.61) 43.1699 (2.59) 4.9490 (1.81) 41.2540 (2.59) 1.9842 (3.31) 768;868 23,164.3098 (0.39) 8581 1 +test_snappy_raw[fireworks.jpeg-snappy] 15.4080 (1.0) 53.6070 (1.0) 16.6693 (1.0) 2.7334 (1.0) 15.9200 (1.0) 0.6000 (1.0) 2163;2428 59,990.6697 (1.0) 37582 1 +test_snappy_raw[geo.protodata-cramjam] 162.4500 (10.54) 319.1430 (5.95) 171.8914 (10.31) 16.4284 (6.01) 164.2120 (10.31) 9.8320 (16.39) 472;475 5,817.6275 (0.10) 5195 1 +test_snappy_raw[geo.protodata-snappy] 142.5780 (9.25) 289.1950 (5.39) 151.9618 (9.12) 14.7480 (5.40) 146.5465 (9.21) 8.7910 (14.65) 496;513 6,580.6001 (0.11) 5564 1 +test_snappy_raw[html-cramjam] 171.6680 (11.14) 353.2230 (6.59) 182.8124 (10.97) 16.0733 (5.88) 177.6490 (11.16) 10.3890 (17.32) 445;445 5,470.0872 (0.09) 4338 1 +test_snappy_raw[html-snappy] 156.5440 (10.16) 289.5440 (5.40) 166.6782 (10.00) 15.6719 (5.73) 159.3490 (10.01) 9.7975 (16.33) 555;545 5,999.5841 (0.10) 5796 1 +test_snappy_raw[html_x_4-cramjam] 687.9800 (44.65) 1,191.3240 (22.22) 732.5265 (43.94) 61.2486 (22.41) 719.6200 (45.20) 37.0050 (61.68) 73;73 1,365.1383 (0.02) 1154 1 +test_snappy_raw[html_x_4-snappy] 635.2680 (41.23) 1,000.9560 (18.67) 665.0148 (39.89) 33.7608 (12.35) 658.7670 (41.38) 38.5295 (64.22) 132;46 1,503.7259 (0.03) 1424 1 +test_snappy_raw[kppkn.gtb-cramjam] 513.0010 (33.29) 876.9870 (16.36) 538.6927 (32.32) 37.5722 (13.75) 529.6960 (33.27) 32.9270 (54.88) 126;76 1,856.3459 (0.03) 1614 1 +test_snappy_raw[kppkn.gtb-snappy] 504.0010 (32.71) 819.8180 (15.29) 531.0207 (31.86) 36.3322 (13.29) 521.1320 (32.73) 33.4015 (55.67) 136;80 1,883.1657 (0.03) 1605 1 +test_snappy_raw[lcet10.txt-cramjam] 1,621.1720 (105.22) 2,280.9690 (42.55) 1,702.4589 (102.13) 88.1738 (32.26) 1,684.9780 (105.84) 74.5165 (124.20) 53;40 587.3857 (0.01) 563 1 +test_snappy_raw[lcet10.txt-snappy] 1,590.3340 (103.21) 2,250.7950 (41.99) 1,675.8838 (100.54) 87.7583 (32.11) 1,661.6440 (104.37) 76.0752 (126.79) 52;35 596.7001 (0.01) 591 1 +test_snappy_raw[paper-100k.pdf-cramjam] 39.0380 (2.53) 83.4620 (1.56) 41.1788 (2.47) 4.3254 (1.58) 39.5485 (2.48) 2.2180 (3.70) 857;895 24,284.3381 (0.40) 10786 1 +test_snappy_raw[paper-100k.pdf-snappy] 20.1120 (1.31) 109.0120 (2.03) 21.8335 (1.31) 3.4175 (1.25) 20.9960 (1.32) 0.8850 (1.48) 1381;1503 45,801.1219 (0.76) 21602 1 +test_snappy_raw[plrabn12.txt-cramjam] 2,218.0820 (143.96) 3,440.0420 (64.17) 2,339.6884 (140.36) 136.2661 (49.85) 2,309.2965 (145.06) 95.7930 (159.66) 35;31 427.4073 (0.01) 354 1 +test_snappy_raw[plrabn12.txt-snappy] 2,316.6310 (150.35) 3,451.9990 (64.39) 2,440.0261 (146.38) 149.6083 (54.73) 2,407.0390 (151.20) 97.8185 (163.03) 30;27 409.8317 (0.01) 417 1 +test_snappy_raw[urls.10K-cramjam] 1,908.7650 (123.88) 2,981.4070 (55.62) 2,040.6282 (122.42) 160.7180 (58.80) 2,011.1260 (126.33) 81.4197 (135.70) 28;40 490.0452 (0.01) 423 1 +test_snappy_raw[urls.10K-snappy] 1,940.1910 (125.92) 3,101.1850 (57.85) 2,052.7867 (123.15) 146.2709 (53.51) 2,021.3160 (126.97) 89.7110 (149.52) 25;25 487.1427 (0.01) 394 1 +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- ``` + ##### Benchmarks for using `compress_into` and `decompress_into` All variants except `lz4` for now, implement a `compress_into` and `decompress_into` This is where, if you have a Python numpy array pre-allocated to the size of the @@ -144,35 +179,34 @@ Again, since basically no variants implement similar functionality as we saw in benchmarks, this benchmark is specific to `cramjam` ```bash ------------------------------------------------------------------------------------------------------------------------- benchmark: 24 tests ------------------------------------------------------------------------------------------------------------------------ -Name (time in us) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -test_cramjam_snappy_de_compress_into[Mark.Twain-Tom.Sawyer.txt-compress_into] 40.5479 (1.96) 92.6349 (1.63) 42.6280 (1.96) 3.9048 (1.43) 41.5810 (1.98) 0.7870 (3.10) 1078;2036 23,458.7680 (0.51) 15468 1 -test_cramjam_snappy_de_compress_into[Mark.Twain-Tom.Sawyer.txt-decompress_into] 20.7290 (1.0) 56.6919 (1.0) 21.7574 (1.0) 2.7272 (1.0) 20.9850 (1.0) 0.2539 (1.0) 1139;3593 45,961.2983 (1.0) 17873 1 -test_cramjam_snappy_de_compress_into[alice29.txt-compress_into] 465.8590 (22.47) 734.1170 (12.95) 492.3764 (22.63) 36.7509 (13.48) 481.4934 (22.94) 32.5410 (128.16) 215;151 2,030.9665 (0.04) 2002 1 -test_cramjam_snappy_de_compress_into[alice29.txt-decompress_into] 189.1120 (9.12) 366.5941 (6.47) 204.2178 (9.39) 25.1372 (9.22) 195.4500 (9.31) 12.6192 (49.70) 475;586 4,896.7338 (0.11) 4737 1 -test_cramjam_snappy_de_compress_into[asyoulik.txt-compress_into] 412.6530 (19.91) 678.6539 (11.97) 437.5261 (20.11) 40.1624 (14.73) 417.2896 (19.89) 24.7700 (97.56) 239;239 2,285.5782 (0.05) 2214 1 -test_cramjam_snappy_de_compress_into[asyoulik.txt-decompress_into] 166.3340 (8.02) 298.4459 (5.26) 176.6029 (8.12) 16.2447 (5.96) 171.6875 (8.18) 10.1075 (39.81) 510;522 5,662.4205 (0.12) 4348 1 -test_cramjam_snappy_de_compress_into[fireworks.jpeg-compress_into] 38.2040 (1.84) 92.6780 (1.63) 41.3553 (1.90) 5.0435 (1.85) 39.8530 (1.90) 2.1397 (8.43) 2339;2662 24,180.6924 (0.53) 22063 1 -test_cramjam_snappy_de_compress_into[fireworks.jpeg-decompress_into] 27.4120 (1.32) 342.3600 (6.04) 28.8981 (1.33) 4.3700 (1.60) 27.7550 (1.32) 0.8330 (3.28) 1856;2320 34,604.3022 (0.75) 29230 1 -test_cramjam_snappy_de_compress_into[geo.protodata-compress_into] 121.6651 (5.87) 247.4930 (4.37) 132.2174 (6.08) 15.0302 (5.51) 126.7961 (6.04) 8.4882 (33.43) 758;831 7,563.3022 (0.16) 6179 1 -test_cramjam_snappy_de_compress_into[geo.protodata-decompress_into] 64.3190 (3.10) 132.8751 (2.34) 69.9674 (3.22) 8.3110 (3.05) 66.6840 (3.18) 4.1929 (16.51) 1641;2219 14,292.3797 (0.31) 11606 1 -test_cramjam_snappy_de_compress_into[html-compress_into] 129.1960 (6.23) 244.5440 (4.31) 134.7474 (6.19) 11.0703 (4.06) 131.1535 (6.25) 1.8530 (7.30) 599;1200 7,421.2955 (0.16) 7240 1 -test_cramjam_snappy_de_compress_into[html-decompress_into] 64.9870 (3.14) 131.4470 (2.32) 67.4074 (3.10) 6.2745 (2.30) 65.5680 (3.12) 0.3350 (1.32) 1060;2021 14,835.1691 (0.32) 12760 1 -test_cramjam_snappy_de_compress_into[html_x_4-compress_into] 515.1191 (24.85) 776.8241 (13.70) 534.2439 (24.55) 30.4098 (11.15) 519.7320 (24.77) 21.6207 (85.15) 171;139 1,871.8044 (0.04) 1907 1 -test_cramjam_snappy_de_compress_into[html_x_4-decompress_into] 254.3870 (12.27) 414.4181 (7.31) 263.4683 (12.11) 19.0163 (6.97) 255.5045 (12.18) 7.4761 (29.44) 322;571 3,795.5233 (0.08) 3426 1 -test_cramjam_snappy_de_compress_into[kppkn.gtb-compress_into] 363.9220 (17.56) 603.6321 (10.65) 378.2792 (17.39) 23.9013 (8.76) 366.1391 (17.45) 20.6419 (81.30) 233;146 2,643.5504 (0.06) 2690 1 -test_cramjam_snappy_de_compress_into[kppkn.gtb-decompress_into] 195.1340 (9.41) 427.1740 (7.54) 203.5845 (9.36) 14.4862 (5.31) 198.9135 (9.48) 4.6330 (18.25) 466;788 4,911.9653 (0.11) 4846 1 -test_cramjam_snappy_de_compress_into[lcet10.txt-compress_into] 1,223.2770 (59.01) 1,540.7839 (27.18) 1,265.2557 (58.15) 48.1862 (17.67) 1,251.2550 (59.63) 47.2469 (186.08) 95;65 790.3541 (0.02) 790 1 -test_cramjam_snappy_de_compress_into[lcet10.txt-decompress_into] 493.2970 (23.80) 773.3600 (13.64) 510.9751 (23.49) 29.4123 (10.78) 495.8690 (23.63) 22.5334 (88.75) 153;124 1,957.0427 (0.04) 1833 1 -test_cramjam_snappy_de_compress_into[paper-100k.pdf-compress_into] 34.6331 (1.67) 80.5630 (1.42) 37.0645 (1.70) 4.0766 (1.49) 35.3340 (1.68) 1.7233 (6.79) 2131;3151 26,979.9880 (0.59) 22285 1 -test_cramjam_snappy_de_compress_into[paper-100k.pdf-decompress_into] 28.1190 (1.36) 132.0021 (2.33) 29.8972 (1.37) 3.7721 (1.38) 28.4681 (1.36) 0.8692 (3.42) 2683;3118 33,447.9984 (0.73) 26970 1 -test_cramjam_snappy_de_compress_into[plrabn12.txt-compress_into] 1,659.3040 (80.05) 2,075.3290 (36.61) 1,721.3891 (79.12) 58.4017 (21.41) 1,699.4040 (80.98) 63.4862 (250.04) 105;44 580.9262 (0.01) 583 1 -test_cramjam_snappy_de_compress_into[plrabn12.txt-decompress_into] 667.1869 (32.19) 1,016.5200 (17.93) 689.3431 (31.68) 35.4147 (12.99) 670.6540 (31.96) 25.9486 (102.20) 125;95 1,450.6564 (0.03) 1245 1 -test_cramjam_snappy_de_compress_into[urls.10K-compress_into] 1,456.3091 (70.25) 1,919.0491 (33.85) 1,503.5026 (69.10) 55.8022 (20.46) 1,485.0110 (70.77) 52.9042 (208.36) 89;59 665.1136 (0.01) 635 1 -test_cramjam_snappy_de_compress_into[urls.10K-decompress_into] 604.8420 (29.18) 839.7710 (14.81) 624.8297 (28.72) 29.0141 (10.64) 608.5989 (29.00) 27.0902 (106.70) 102;63 1,600.4361 (0.03) 1139 1 ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - +------------------------------------------------------------------------------------------------------------------------ benchmark: 24 tests ------------------------------------------------------------------------------------------------------------------------- +Name (time in us) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +test_cramjam_snappy_de_compress_into[Mark.Twain-Tom.Sawyer.txt-compress_into] 39.8240 (1.87) 85.1870 (1.49) 42.1991 (1.85) 4.7949 (1.36) 40.4410 (1.84) 1.5060 (2.22) 1579;1739 23,697.2108 (0.54) 18429 1 +test_cramjam_snappy_de_compress_into[Mark.Twain-Tom.Sawyer.txt-decompress_into] 21.2630 (1.0) 57.2360 (1.0) 22.8284 (1.0) 3.5271 (1.0) 21.9730 (1.0) 0.6780 (1.0) 776;888 43,805.1557 (1.0) 9271 1 +test_cramjam_snappy_de_compress_into[alice29.txt-compress_into] 462.6910 (21.76) 812.9580 (14.20) 508.4205 (22.27) 37.7018 (10.69) 496.9830 (22.62) 26.8805 (39.65) 215;177 1,966.8760 (0.04) 1976 1 +test_cramjam_snappy_de_compress_into[alice29.txt-decompress_into] 190.4900 (8.96) 446.4370 (7.80) 210.2874 (9.21) 25.0303 (7.10) 202.8200 (9.23) 16.8353 (24.83) 274;244 4,755.3978 (0.11) 3229 1 +test_cramjam_snappy_de_compress_into[asyoulik.txt-compress_into] 405.8760 (19.09) 697.8110 (12.19) 430.5061 (18.86) 33.1445 (9.40) 421.2690 (19.17) 25.4420 (37.52) 187;147 2,322.8476 (0.05) 2260 1 +test_cramjam_snappy_de_compress_into[asyoulik.txt-decompress_into] 166.7260 (7.84) 345.2520 (6.03) 175.3985 (7.68) 15.2901 (4.34) 168.3880 (7.66) 9.8003 (14.45) 552;511 5,701.3018 (0.13) 5725 1 +test_cramjam_snappy_de_compress_into[fireworks.jpeg-compress_into] 44.3380 (2.09) 98.5800 (1.72) 47.4147 (2.08) 5.4927 (1.56) 45.6420 (2.08) 2.0510 (3.03) 1932;2103 21,090.4861 (0.48) 19150 1 +test_cramjam_snappy_de_compress_into[fireworks.jpeg-decompress_into] 30.1790 (1.42) 75.7750 (1.32) 32.0674 (1.40) 4.0658 (1.15) 30.7100 (1.40) 1.1880 (1.75) 2248;2630 31,184.3311 (0.71) 28385 1 +test_cramjam_snappy_de_compress_into[geo.protodata-compress_into] 121.2680 (5.70) 400.9240 (7.00) 130.3795 (5.71) 15.5604 (4.41) 125.4560 (5.71) 7.5100 (11.08) 808;1079 7,669.9154 (0.18) 7686 1 +test_cramjam_snappy_de_compress_into[geo.protodata-decompress_into] 61.8930 (2.91) 135.6580 (2.37) 65.3579 (2.86) 7.0477 (2.00) 62.6300 (2.85) 2.0933 (3.09) 1215;1502 15,300.3577 (0.35) 13125 1 +test_cramjam_snappy_de_compress_into[html-compress_into] 128.4720 (6.04) 257.3410 (4.50) 136.2710 (5.97) 12.7074 (3.60) 130.5210 (5.94) 7.5215 (11.09) 854;877 7,338.3187 (0.17) 7303 1 +test_cramjam_snappy_de_compress_into[html-decompress_into] 62.6920 (2.95) 173.2130 (3.03) 70.8147 (3.10) 11.3212 (3.21) 66.6290 (3.03) 6.4820 (9.56) 1799;1844 14,121.3670 (0.32) 12717 1 +test_cramjam_snappy_de_compress_into[html_x_4-compress_into] 511.0130 (24.03) 891.8790 (15.58) 549.7244 (24.08) 41.6672 (11.81) 542.8110 (24.70) 33.1573 (48.90) 150;109 1,819.0935 (0.04) 1447 1 +test_cramjam_snappy_de_compress_into[html_x_4-decompress_into] 258.6400 (12.16) 541.0200 (9.45) 292.8625 (12.83) 37.7322 (10.70) 276.6950 (12.59) 27.8205 (41.03) 338;317 3,414.5720 (0.08) 2543 1 +test_cramjam_snappy_de_compress_into[kppkn.gtb-compress_into] 359.4940 (16.91) 634.0840 (11.08) 385.2166 (16.87) 29.2549 (8.29) 381.5820 (17.37) 28.8383 (42.53) 265;155 2,595.9421 (0.06) 2735 1 +test_cramjam_snappy_de_compress_into[kppkn.gtb-decompress_into] 197.0240 (9.27) 405.3810 (7.08) 209.6119 (9.18) 21.6750 (6.15) 203.1435 (9.25) 12.2140 (18.01) 460;523 4,770.7209 (0.11) 4784 1 +test_cramjam_snappy_de_compress_into[lcet10.txt-compress_into] 1,220.0120 (57.38) 1,754.6500 (30.66) 1,354.5943 (59.34) 97.3167 (27.59) 1,317.4960 (59.96) 82.2825 (121.36) 126;69 738.2284 (0.02) 692 1 +test_cramjam_snappy_de_compress_into[lcet10.txt-decompress_into] 500.1370 (23.52) 904.8270 (15.81) 548.9960 (24.05) 51.5708 (14.62) 534.0950 (24.31) 34.9655 (51.57) 165;156 1,821.5069 (0.04) 1645 1 +test_cramjam_snappy_de_compress_into[paper-100k.pdf-compress_into] 38.5840 (1.81) 253.0600 (4.42) 41.1213 (1.80) 5.1050 (1.45) 39.6210 (1.80) 1.7125 (2.53) 1968;2146 24,318.3137 (0.56) 21464 1 +test_cramjam_snappy_de_compress_into[paper-100k.pdf-decompress_into] 30.2860 (1.42) 75.8210 (1.32) 32.1635 (1.41) 4.2167 (1.20) 30.7870 (1.40) 1.1800 (1.74) 2233;2458 31,091.0992 (0.71) 26565 1 +test_cramjam_snappy_de_compress_into[plrabn12.txt-compress_into] 1,650.9060 (77.64) 2,430.3730 (42.46) 1,770.4333 (77.55) 99.3839 (28.18) 1,755.6610 (79.90) 93.0350 (137.22) 104;35 564.8335 (0.01) 569 1 +test_cramjam_snappy_de_compress_into[plrabn12.txt-decompress_into] 670.7310 (31.54) 1,143.7440 (19.98) 713.4885 (31.25) 64.8742 (18.39) 695.7285 (31.66) 43.1620 (63.66) 72;69 1,401.5642 (0.03) 1078 1 +test_cramjam_snappy_de_compress_into[urls.10K-compress_into] 1,449.2240 (68.16) 2,248.9360 (39.29) 1,585.3133 (69.44) 128.9953 (36.57) 1,550.0310 (70.54) 93.1475 (137.39) 83;54 630.7902 (0.01) 607 1 +test_cramjam_snappy_de_compress_into[urls.10K-decompress_into] 611.5930 (28.76) 1,102.2170 (19.26) 660.0188 (28.91) 50.9816 (14.45) 650.2550 (29.59) 38.4440 (56.70) 133;94 1,515.1083 (0.03) 1110 1 +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- ``` #### Lz4 @@ -180,34 +214,34 @@ test_cramjam_snappy_de_compress_into[urls.10K-decompress_into] `make bench-lz4` ```bash ------------------------------------------------------------------------------------------------------------- benchmark: 24 tests ------------------------------------------------------------------------------------------------------------ -Name (time in us) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -test_lz4[Mark.Twain-Tom.Sawyer.txt-cramjam] 205.9030 (1.0) 487.6750 (1.16) 235.1050 (1.0) 42.5729 (1.46) 218.6220 (1.0) 21.7648 (1.13) 482;537 4,253.4186 (1.0) 3949 1 -test_lz4[Mark.Twain-Tom.Sawyer.txt-python-lz4] 211.8470 (1.03) 421.1400 (1.0) 258.1943 (1.10) 61.8064 (2.11) 223.0840 (1.02) 60.6820 (3.16) 453;175 3,873.0528 (0.91) 1935 1 -test_lz4[alice29.txt-cramjam] 1,829.0490 (8.88) 2,390.3910 (5.68) 1,939.9762 (8.25) 101.4219 (3.47) 1,913.4190 (8.75) 93.0095 (4.84) 103;34 515.4702 (0.12) 501 1 -test_lz4[alice29.txt-python-lz4] 3,035.4070 (14.74) 3,972.2100 (9.43) 3,263.6920 (13.88) 156.1966 (5.34) 3,215.2680 (14.71) 166.8995 (8.68) 73;16 306.4015 (0.07) 309 1 -test_lz4[asyoulik.txt-cramjam] 1,440.7250 (7.00) 2,176.8530 (5.17) 1,530.6210 (6.51) 86.4176 (2.95) 1,508.6560 (6.90) 75.1295 (3.91) 133;45 653.3296 (0.15) 604 1 -test_lz4[asyoulik.txt-python-lz4] 2,627.0850 (12.76) 4,329.6350 (10.28) 2,802.7557 (11.92) 209.5091 (7.16) 2,757.2175 (12.61) 113.6400 (5.91) 24;26 356.7917 (0.08) 322 1 -test_lz4[fireworks.jpeg-cramjam] 298.4680 (1.45) 534.6860 (1.27) 321.2755 (1.37) 29.2533 (1.0) 307.8030 (1.41) 19.2315 (1.0) 214;195 3,112.5937 (0.73) 1987 1 -test_lz4[fireworks.jpeg-python-lz4] 2,646.1140 (12.85) 3,782.5490 (8.98) 2,811.4612 (11.96) 141.2595 (4.83) 2,785.4975 (12.74) 133.7265 (6.95) 25;9 355.6869 (0.08) 204 1 -test_lz4[geo.protodata-cramjam] 488.7870 (2.37) 840.0170 (1.99) 522.8667 (2.22) 49.3122 (1.69) 508.8640 (2.33) 35.3760 (1.84) 169;150 1,912.5334 (0.45) 1686 1 -test_lz4[geo.protodata-python-lz4] 671.5480 (3.26) 1,211.8090 (2.88) 719.1932 (3.06) 47.4541 (1.62) 706.5320 (3.23) 35.7863 (1.86) 115;89 1,390.4469 (0.33) 1137 1 -test_lz4[html-cramjam] 508.0540 (2.47) 870.8210 (2.07) 556.4302 (2.37) 55.0444 (1.88) 539.5200 (2.47) 40.0038 (2.08) 162;130 1,797.1706 (0.42) 1387 1 -test_lz4[html-python-lz4] 696.9610 (3.38) 1,266.9380 (3.01) 769.8121 (3.27) 89.4869 (3.06) 740.7540 (3.39) 54.1665 (2.82) 94;98 1,299.0183 (0.31) 1085 1 -test_lz4[html_x_4-cramjam] 2,149.2940 (10.44) 4,065.1930 (9.65) 2,412.1607 (10.26) 305.1893 (10.43) 2,325.4890 (10.64) 184.0815 (9.57) 38;39 414.5661 (0.10) 348 1 -test_lz4[html_x_4-python-lz4] 2,932.0430 (14.24) 3,806.8100 (9.04) 3,143.4955 (13.37) 165.2213 (5.65) 3,102.5030 (14.19) 111.0107 (5.77) 40;22 318.1172 (0.07) 231 1 -test_lz4[kppkn.gtb-cramjam] 1,679.8610 (8.16) 2,542.5370 (6.04) 1,789.8313 (7.61) 106.6801 (3.65) 1,768.8005 (8.09) 87.1930 (4.53) 59;27 558.7119 (0.13) 462 1 -test_lz4[kppkn.gtb-python-lz4] 3,209.9940 (15.59) 4,404.1140 (10.46) 3,393.7427 (14.44) 157.5778 (5.39) 3,351.8800 (15.33) 150.1042 (7.81) 31;12 294.6599 (0.07) 275 1 -test_lz4[lcet10.txt-cramjam] 4,852.5360 (23.57) 7,953.2970 (18.89) 5,275.6863 (22.44) 474.4112 (16.22) 5,182.7910 (23.71) 278.4967 (14.48) 9;9 189.5488 (0.04) 199 1 -test_lz4[lcet10.txt-python-lz4] 8,251.3110 (40.07) 12,421.4480 (29.49) 8,792.2057 (37.40) 554.4787 (18.95) 8,662.1010 (39.62) 373.2330 (19.41) 7;7 113.7371 (0.03) 114 1 -test_lz4[paper-100k.pdf-cramjam] 405.3170 (1.97) 673.3370 (1.60) 429.1101 (1.83) 34.4027 (1.18) 419.5350 (1.92) 25.4660 (1.32) 165;148 2,330.4041 (0.55) 2110 1 -test_lz4[paper-100k.pdf-python-lz4] 1,752.0420 (8.51) 2,357.7380 (5.60) 1,847.5694 (7.86) 86.1250 (2.94) 1,821.4480 (8.33) 86.6703 (4.51) 125;34 541.2517 (0.13) 517 1 -test_lz4[plrabn12.txt-cramjam] 5,994.5400 (29.11) 10,160.5840 (24.13) 6,528.1635 (27.77) 479.6425 (16.40) 6,415.1230 (29.34) 244.5412 (12.72) 11;11 153.1824 (0.04) 127 1 -test_lz4[plrabn12.txt-python-lz4] 11,911.5420 (57.85) 19,488.4880 (46.28) 13,580.5948 (57.76) 1,975.2401 (67.52) 12,662.1220 (57.92) 1,491.3778 (77.55) 13;13 73.6345 (0.02) 81 1 -test_lz4[urls.10K-cramjam] 6,125.3900 (29.75) 10,022.6590 (23.80) 6,906.8542 (29.38) 800.5537 (27.37) 6,660.1280 (30.46) 327.7380 (17.04) 15;16 144.7837 (0.03) 153 1 -test_lz4[urls.10K-python-lz4] 9,350.5950 (45.41) 13,417.3250 (31.86) 10,231.2214 (43.52) 722.7599 (24.71) 10,112.5745 (46.26) 622.8635 (32.39) 11;8 97.7400 (0.02) 96 1 ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +---------------------------------------------------------------------------------------------------------- benchmark: 24 tests ---------------------------------------------------------------------------------------------------------- +Name (time in us) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +test_lz4[Mark.Twain-Tom.Sawyer.txt-cramjam] 265.4710 (1.29) 1,709.2960 (3.48) 283.5229 (1.30) 36.6529 (1.45) 273.4540 (1.29) 20.1355 (1.62) 288;307 3,527.0517 (0.77) 3672 1 +test_lz4[Mark.Twain-Tom.Sawyer.txt-python-lz4] 206.1390 (1.0) 491.0620 (1.0) 218.6584 (1.0) 25.2966 (1.0) 212.6340 (1.0) 12.4433 (1.0) 71;88 4,573.3440 (1.0) 969 1 +test_lz4[alice29.txt-cramjam] 1,591.6270 (7.72) 2,422.1640 (4.93) 1,670.5685 (7.64) 105.3038 (4.16) 1,641.9690 (7.72) 69.4932 (5.58) 47;41 598.5986 (0.13) 583 1 +test_lz4[alice29.txt-python-lz4] 3,014.7940 (14.63) 5,390.9890 (10.98) 3,217.0799 (14.71) 294.7618 (11.65) 3,150.8370 (14.82) 147.8563 (11.88) 20;26 310.8409 (0.07) 315 1 +test_lz4[asyoulik.txt-cramjam] 1,341.9520 (6.51) 2,609.5330 (5.31) 1,435.5756 (6.57) 147.8273 (5.84) 1,403.8710 (6.60) 73.3415 (5.89) 39;52 696.5847 (0.15) 700 1 +test_lz4[asyoulik.txt-python-lz4] 2,624.6560 (12.73) 4,103.2770 (8.36) 2,832.4582 (12.95) 198.1849 (7.83) 2,785.6950 (13.10) 156.1410 (12.55) 42;25 353.0502 (0.08) 351 1 +test_lz4[fireworks.jpeg-cramjam] 279.7980 (1.36) 1,311.0030 (2.67) 312.3522 (1.43) 38.4700 (1.52) 300.8090 (1.41) 26.9838 (2.17) 231;178 3,201.5137 (0.70) 2419 1 +test_lz4[fireworks.jpeg-python-lz4] 2,608.3740 (12.65) 4,068.8360 (8.29) 2,757.1025 (12.61) 187.2245 (7.40) 2,709.8345 (12.74) 98.5295 (7.92) 26;28 362.6996 (0.08) 340 1 +test_lz4[geo.protodata-cramjam] 540.2170 (2.62) 954.5990 (1.94) 576.6946 (2.64) 59.2219 (2.34) 558.5890 (2.63) 40.7170 (3.27) 135;113 1,734.0201 (0.38) 1662 1 +test_lz4[geo.protodata-python-lz4] 670.0600 (3.25) 1,260.5120 (2.57) 706.9607 (3.23) 64.6132 (2.55) 691.1930 (3.25) 34.7968 (2.80) 62;67 1,414.5058 (0.31) 1371 1 +test_lz4[html-cramjam] 557.7070 (2.71) 952.2150 (1.94) 589.8092 (2.70) 40.2866 (1.59) 580.9055 (2.73) 41.6970 (3.35) 131;55 1,695.4636 (0.37) 1478 1 +test_lz4[html-python-lz4] 693.6250 (3.36) 1,243.8680 (2.53) 731.9760 (3.35) 57.6403 (2.28) 717.0350 (3.37) 42.7220 (3.43) 79;68 1,366.1650 (0.30) 1381 1 +test_lz4[html_x_4-cramjam] 1,834.1350 (8.90) 3,089.3180 (6.29) 1,947.9892 (8.91) 130.8457 (5.17) 1,913.9850 (9.00) 93.6955 (7.53) 41;30 513.3499 (0.11) 480 1 +test_lz4[html_x_4-python-lz4] 2,873.1570 (13.94) 4,251.4200 (8.66) 3,030.2486 (13.86) 164.9341 (6.52) 2,984.9600 (14.04) 127.3245 (10.23) 25;20 330.0059 (0.07) 301 1 +test_lz4[kppkn.gtb-cramjam] 1,491.9130 (7.24) 2,781.5860 (5.66) 1,570.2841 (7.18) 87.5343 (3.46) 1,553.1860 (7.30) 72.3630 (5.82) 49;32 636.8274 (0.14) 571 1 +test_lz4[kppkn.gtb-python-lz4] 3,199.8520 (15.52) 4,217.5750 (8.59) 3,359.6494 (15.36) 124.7223 (4.93) 3,325.6840 (15.64) 118.4477 (9.52) 39;21 297.6501 (0.07) 297 1 +test_lz4[lcet10.txt-cramjam] 4,124.9650 (20.01) 5,752.2620 (11.71) 4,313.8181 (19.73) 200.0237 (7.91) 4,261.9530 (20.04) 164.7980 (13.24) 16;9 231.8132 (0.05) 149 1 +test_lz4[lcet10.txt-python-lz4] 8,179.7110 (39.68) 9,336.4010 (19.01) 8,561.2170 (39.15) 252.1781 (9.97) 8,493.2420 (39.94) 365.4150 (29.37) 27;1 116.8058 (0.03) 115 1 +test_lz4[paper-100k.pdf-cramjam] 430.1570 (2.09) 892.4910 (1.82) 460.2170 (2.10) 50.9308 (2.01) 445.1600 (2.09) 32.3620 (2.60) 109;104 2,172.8878 (0.48) 1670 1 +test_lz4[paper-100k.pdf-python-lz4] 1,705.9020 (8.28) 3,126.1760 (6.37) 1,840.7896 (8.42) 179.6683 (7.10) 1,791.6345 (8.43) 88.1440 (7.08) 53;65 543.2451 (0.12) 570 1 +test_lz4[plrabn12.txt-cramjam] 5,020.0420 (24.35) 6,864.6530 (13.98) 5,292.4698 (24.20) 253.9889 (10.04) 5,205.3250 (24.48) 209.6257 (16.85) 18;16 188.9477 (0.04) 161 1 +test_lz4[plrabn12.txt-python-lz4] 11,864.6600 (57.56) 13,558.0060 (27.61) 12,295.0743 (56.23) 312.6359 (12.36) 12,212.9405 (57.44) 337.8520 (27.15) 19;4 81.3334 (0.02) 78 1 +test_lz4[urls.10K-cramjam] 4,636.7460 (22.49) 5,603.8480 (11.41) 4,867.3892 (22.26) 182.9029 (7.23) 4,814.5570 (22.64) 200.3785 (16.10) 27;6 205.4489 (0.04) 125 1 +test_lz4[urls.10K-python-lz4] 9,835.2580 (47.71) 12,131.1310 (24.70) 10,280.7427 (47.02) 417.8376 (16.52) 10,100.7300 (47.50) 393.7080 (31.64) 14;6 97.2692 (0.02) 74 1 +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- ``` #### Brotli @@ -218,30 +252,30 @@ test_lz4[urls.10K-python-lz4] 9,350.5950 (45.41) 13,417 ----------------------------------------------------------------------------------------------------- benchmark: 24 tests ------------------------------------------------------------------------------------------------------ Name (time in ms) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -test_brotli[Mark.Twain-Tom.Sawyer.txt-brotli] 18.2539 (1.0) 22.8656 (1.14) 18.9863 (1.0) 0.7931 (1.94) 18.8713 (1.0) 0.3620 (1.0) 3;3 52.6695 (1.0) 51 1 -test_brotli[Mark.Twain-Tom.Sawyer.txt-cramjam] 18.4573 (1.01) 19.9932 (1.0) 19.0165 (1.00) 0.4094 (1.0) 18.9207 (1.00) 0.5462 (1.51) 18;0 52.5860 (1.00) 51 1 -test_brotli[alice29.txt-brotli] 225.5098 (12.35) 233.2325 (11.67) 227.9498 (12.01) 3.2069 (7.83) 226.3776 (12.00) 4.0514 (11.19) 1;0 4.3869 (0.08) 5 1 -test_brotli[alice29.txt-cramjam] 233.5171 (12.79) 235.4350 (11.78) 234.6430 (12.36) 0.7237 (1.77) 234.8783 (12.45) 0.8757 (2.42) 2;0 4.2618 (0.08) 5 1 -test_brotli[asyoulik.txt-brotli] 187.4845 (10.27) 197.4504 (9.88) 193.1492 (10.17) 3.7175 (9.08) 194.0205 (10.28) 4.5618 (12.60) 2;0 5.1773 (0.10) 5 1 -test_brotli[asyoulik.txt-cramjam] 185.8101 (10.18) 192.5796 (9.63) 189.0552 (9.96) 2.4759 (6.05) 189.2976 (10.03) 3.5369 (9.77) 2;0 5.2895 (0.10) 6 1 -test_brotli[fireworks.jpeg-brotli] 74.7665 (4.10) 107.0830 (5.36) 82.2891 (4.33) 8.4381 (20.61) 80.2819 (4.25) 8.3648 (23.11) 1;1 12.1523 (0.23) 13 1 -test_brotli[fireworks.jpeg-cramjam] 74.2872 (4.07) 80.4223 (4.02) 76.5993 (4.03) 2.0518 (5.01) 75.4147 (4.00) 3.2772 (9.05) 4;0 13.0549 (0.25) 13 1 -test_brotli[geo.protodata-brotli] 128.6399 (7.05) 130.4648 (6.53) 129.3095 (6.81) 0.6225 (1.52) 129.3085 (6.85) 0.8858 (2.45) 2;0 7.7334 (0.15) 8 1 -test_brotli[geo.protodata-cramjam] 132.1978 (7.24) 135.9461 (6.80) 133.4739 (7.03) 1.3324 (3.25) 132.8753 (7.04) 1.8923 (5.23) 2;0 7.4921 (0.14) 8 1 -test_brotli[html-brotli] 134.8012 (7.38) 145.0839 (7.26) 136.9357 (7.21) 3.3821 (8.26) 135.6421 (7.19) 1.6070 (4.44) 1;1 7.3027 (0.14) 8 1 -test_brotli[html-cramjam] 138.4083 (7.58) 142.2173 (7.11) 139.8572 (7.37) 1.4800 (3.62) 139.2130 (7.38) 2.5172 (6.95) 2;0 7.1501 (0.14) 8 1 -test_brotli[html_x_4-brotli] 171.6389 (9.40) 180.3112 (9.02) 176.3479 (9.29) 3.4427 (8.41) 176.0761 (9.33) 6.0443 (16.70) 3;0 5.6706 (0.11) 6 1 -test_brotli[html_x_4-cramjam] 180.0620 (9.86) 196.8542 (9.85) 188.7382 (9.94) 7.2089 (17.61) 189.4560 (10.04) 15.2504 (42.13) 3;0 5.2983 (0.10) 6 1 -test_brotli[kppkn.gtb-brotli] 447.6251 (24.52) 475.9302 (23.80) 463.3429 (24.40) 10.8179 (26.43) 464.2027 (24.60) 15.2439 (42.12) 2;0 2.1582 (0.04) 5 1 -test_brotli[kppkn.gtb-cramjam] 446.2874 (24.45) 456.8182 (22.85) 450.9967 (23.75) 4.4023 (10.75) 452.4705 (23.98) 6.8669 (18.97) 2;0 2.2173 (0.04) 5 1 -test_brotli[lcet10.txt-brotli] 714.6673 (39.15) 734.2637 (36.73) 723.5614 (38.11) 8.6142 (21.04) 720.8160 (38.20) 15.2477 (42.13) 2;0 1.3821 (0.03) 5 1 -test_brotli[lcet10.txt-cramjam] 736.3283 (40.34) 811.1870 (40.57) 764.1775 (40.25) 30.8189 (75.28) 754.4741 (39.98) 46.2688 (127.83) 1;0 1.3086 (0.02) 5 1 -test_brotli[paper-100k.pdf-brotli] 406.3972 (22.26) 408.4076 (20.43) 407.4876 (21.46) 0.8200 (2.00) 407.7548 (21.61) 1.3093 (3.62) 2;0 2.4541 (0.05) 5 1 -test_brotli[paper-100k.pdf-cramjam] 409.8010 (22.45) 434.1073 (21.71) 421.4300 (22.20) 9.7730 (23.87) 417.9097 (22.15) 14.9918 (41.42) 2;0 2.3729 (0.05) 5 1 -test_brotli[plrabn12.txt-brotli] 794.1873 (43.51) 819.0397 (40.97) 806.7543 (42.49) 10.6155 (25.93) 803.9709 (42.60) 18.2808 (50.51) 2;0 1.2395 (0.02) 5 1 -test_brotli[plrabn12.txt-cramjam] 802.7903 (43.98) 818.8850 (40.96) 811.7899 (42.76) 7.6594 (18.71) 814.7583 (43.17) 14.3045 (39.52) 1;0 1.2318 (0.02) 5 1 -test_brotli[urls.10K-brotli] 1,282.8863 (70.28) 1,329.7179 (66.51) 1,303.8467 (68.67) 19.9188 (48.66) 1,294.4292 (68.59) 32.3713 (89.43) 2;0 0.7670 (0.01) 5 1 -test_brotli[urls.10K-cramjam] 1,300.4646 (71.24) 1,344.7246 (67.26) 1,317.4961 (69.39) 20.5248 (50.14) 1,305.3920 (69.17) 34.9071 (96.44) 1;0 0.7590 (0.01) 5 1 +test_brotli[Mark.Twain-Tom.Sawyer.txt-brotli] 19.3134 (1.05) 27.2349 (1.30) 21.3094 (1.10) 2.6975 (4.93) 19.9170 (1.03) 2.5291 (4.19) 4;2 46.9276 (0.91) 19 1 +test_brotli[Mark.Twain-Tom.Sawyer.txt-cramjam] 18.4340 (1.0) 21.0191 (1.0) 19.3708 (1.0) 0.5474 (1.0) 19.3012 (1.0) 0.6043 (1.0) 19;1 51.6241 (1.0) 49 1 +test_brotli[alice29.txt-brotli] 224.1868 (12.16) 227.8789 (10.84) 225.7909 (11.66) 1.3771 (2.52) 225.6526 (11.69) 1.7266 (2.86) 2;0 4.4289 (0.09) 5 1 +test_brotli[alice29.txt-cramjam] 231.9518 (12.58) 234.0236 (11.13) 233.0551 (12.03) 0.9388 (1.72) 233.5518 (12.10) 1.6091 (2.66) 2;0 4.2908 (0.08) 5 1 +test_brotli[asyoulik.txt-brotli] 179.9550 (9.76) 184.0013 (8.75) 182.1853 (9.41) 1.4951 (2.73) 182.4221 (9.45) 2.1882 (3.62) 2;0 5.4889 (0.11) 6 1 +test_brotli[asyoulik.txt-cramjam] 184.3326 (10.00) 186.8581 (8.89) 185.3278 (9.57) 0.9119 (1.67) 185.2473 (9.60) 1.0866 (1.80) 2;0 5.3958 (0.10) 6 1 +test_brotli[fireworks.jpeg-brotli] 71.5389 (3.88) 75.3505 (3.58) 73.0897 (3.77) 0.9971 (1.82) 72.9035 (3.78) 0.9791 (1.62) 4;1 13.6818 (0.27) 14 1 +test_brotli[fireworks.jpeg-cramjam] 72.5284 (3.93) 79.9106 (3.80) 73.9391 (3.82) 1.8016 (3.29) 73.4397 (3.80) 0.7829 (1.30) 1;1 13.5246 (0.26) 14 1 +test_brotli[geo.protodata-brotli] 126.7453 (6.88) 131.5479 (6.26) 127.8292 (6.60) 1.5408 (2.81) 127.3805 (6.60) 0.6052 (1.00) 1;1 7.8229 (0.15) 8 1 +test_brotli[geo.protodata-cramjam] 130.8282 (7.10) 133.5629 (6.35) 131.9438 (6.81) 0.9135 (1.67) 131.6701 (6.82) 1.2637 (2.09) 3;0 7.5790 (0.15) 8 1 +test_brotli[html-brotli] 134.3089 (7.29) 136.8154 (6.51) 135.5654 (7.00) 0.9767 (1.78) 135.6640 (7.03) 1.7840 (2.95) 4;0 7.3765 (0.14) 8 1 +test_brotli[html-cramjam] 137.2950 (7.45) 141.9890 (6.76) 138.5814 (7.15) 1.6047 (2.93) 137.9258 (7.15) 1.2205 (2.02) 1;1 7.2160 (0.14) 7 1 +test_brotli[html_x_4-brotli] 164.2704 (8.91) 167.2311 (7.96) 165.5355 (8.55) 1.1562 (2.11) 165.5598 (8.58) 2.0072 (3.32) 3;0 6.0410 (0.12) 6 1 +test_brotli[html_x_4-cramjam] 167.8992 (9.11) 172.8526 (8.22) 169.7993 (8.77) 1.6606 (3.03) 169.4014 (8.78) 0.9248 (1.53) 2;1 5.8893 (0.11) 6 1 +test_brotli[kppkn.gtb-brotli] 427.2876 (23.18) 436.4801 (20.77) 431.6189 (22.28) 3.8362 (7.01) 430.7206 (22.32) 6.4873 (10.73) 2;0 2.3169 (0.04) 5 1 +test_brotli[kppkn.gtb-cramjam] 466.2677 (25.29) 497.4289 (23.67) 478.3085 (24.69) 13.1943 (24.10) 475.5884 (24.64) 21.5743 (35.70) 1;0 2.0907 (0.04) 5 1 +test_brotli[lcet10.txt-brotli] 745.3311 (40.43) 848.7560 (40.38) 799.6870 (41.28) 48.8617 (89.26) 809.2787 (41.93) 93.6578 (154.98) 2;0 1.2505 (0.02) 5 1 +test_brotli[lcet10.txt-cramjam] 762.7996 (41.38) 880.4882 (41.89) 798.9608 (41.25) 47.1400 (86.11) 788.5498 (40.85) 44.9762 (74.43) 1;0 1.2516 (0.02) 5 1 +test_brotli[paper-100k.pdf-brotli] 404.2915 (21.93) 409.0611 (19.46) 406.8449 (21.00) 1.9765 (3.61) 407.1359 (21.09) 3.3423 (5.53) 2;0 2.4579 (0.05) 5 1 +test_brotli[paper-100k.pdf-cramjam] 406.3386 (22.04) 410.8830 (19.55) 408.1782 (21.07) 1.8307 (3.34) 407.8163 (21.13) 2.7956 (4.63) 2;0 2.4499 (0.05) 5 1 +test_brotli[plrabn12.txt-brotli] 784.4900 (42.56) 797.3552 (37.93) 789.0102 (40.73) 4.9220 (8.99) 787.9514 (40.82) 4.6725 (7.73) 1;0 1.2674 (0.02) 5 1 +test_brotli[plrabn12.txt-cramjam] 793.1241 (43.02) 845.8696 (40.24) 816.3642 (42.14) 26.2341 (47.92) 802.2609 (41.57) 48.5658 (80.37) 2;0 1.2249 (0.02) 5 1 +test_brotli[urls.10K-brotli] 1,251.5425 (67.89) 1,265.7612 (60.22) 1,257.9783 (64.94) 6.7086 (12.26) 1,256.7268 (65.11) 12.8291 (21.23) 1;0 0.7949 (0.02) 5 1 +test_brotli[urls.10K-cramjam] 1,290.7796 (70.02) 1,365.9914 (64.99) 1,322.5113 (68.27) 33.0967 (60.46) 1,304.2875 (67.58) 54.4744 (90.14) 1;0 0.7561 (0.01) 5 1 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- ``` diff --git a/benchmarks/test_bench.py b/benchmarks/test_bench.py index b7389df2..83fe2cef 100644 --- a/benchmarks/test_bench.py +++ b/benchmarks/test_bench.py @@ -20,18 +20,18 @@ def round_trip(compress, decompress, data, **kwargs): "use_cramjam", (True, False), ids=lambda val: "cramjam" if val else "snappy" ) @pytest.mark.parametrize("file", FILES, ids=lambda val: val.name) -def test_snappy(benchmark, file, use_cramjam: bool): +def test_snappy_raw(benchmark, file, use_cramjam: bool): """ - Uses snappy compression + Uses snappy compression raw """ import snappy - data = file.read_bytes() + data = bytearray(file.read_bytes()) if use_cramjam: benchmark( round_trip, - compress=cramjam.snappy.compress, - decompress=cramjam.snappy.decompress, + compress=cramjam.snappy.compress_raw, + decompress=cramjam.snappy.decompress_raw, data=data, ) else: @@ -43,6 +43,34 @@ def test_snappy(benchmark, file, use_cramjam: bool): ) +@pytest.mark.parametrize( + "use_cramjam", (True, False), ids=lambda val: "cramjam" if val else "snappy" +) +@pytest.mark.parametrize("file", FILES, ids=lambda val: val.name) +def test_snappy_framed(benchmark, file, use_cramjam: bool): + """ + Uses snappy compression framed + """ + import snappy + + data = bytearray(file.read_bytes()) + if use_cramjam: + benchmark( + round_trip, + compress=cramjam.snappy.compress, + decompress=cramjam.snappy.decompress, + data=data, + ) + else: + compressor = snappy.StreamCompressor() + decompressor = snappy.StreamDecompressor() + benchmark( + round_trip, + compress=compressor.compress, + decompress=decompressor.decompress, + data=data, + ) + @pytest.mark.parametrize("op", ("decompress_into", "compress_into")) @pytest.mark.parametrize("file", FILES, ids=lambda val: val.name) def test_cramjam_snappy_de_compress_into(benchmark, op, file): diff --git a/src/brotli.rs b/src/brotli.rs index 55906c90..61dc44fd 100644 --- a/src/brotli.rs +++ b/src/brotli.rs @@ -1,10 +1,11 @@ use crate::exceptions::{CompressionError, DecompressionError}; -use crate::{to_py_err, BytesType, Output}; +use crate::{to_py_err, BytesType, WriteablePyByteArray}; use numpy::PyArray1; use pyo3::prelude::*; -use pyo3::types::{PyByteArray, PyBytes}; +use pyo3::types::PyBytes; use pyo3::wrap_pyfunction; use pyo3::{PyResult, Python}; +use std::io::Cursor; pub fn init_py_module(m: &PyModule) -> PyResult<()> { m.add_function(wrap_pyfunction!(compress, m)?)?; @@ -23,42 +24,7 @@ pub fn init_py_module(m: &PyModule) -> PyResult<()> { /// ``` #[pyfunction] pub fn decompress<'a>(py: Python<'a>, data: BytesType<'a>, output_len: Option) -> PyResult> { - match data { - BytesType::Bytes(input) => match output_len { - Some(len) => { - let pybytes = PyBytes::new_with(py, len, |buffer| { - let output = Output::Slice(buffer); - to_py_err!(DecompressionError -> self::internal::decompress(input.as_bytes(), output))?; - Ok(()) - })?; - Ok(BytesType::Bytes(pybytes)) - } - None => { - let mut buffer = Vec::with_capacity(data.len() / 10); - let output = Output::Vector(&mut buffer); - to_py_err!(DecompressionError -> self::internal::decompress(input.as_bytes(), output))?; - Ok(BytesType::Bytes(PyBytes::new(py, &buffer))) - } - }, - BytesType::ByteArray(input) => match output_len { - Some(len) => { - let mut size = 0; - let pybytes = PyByteArray::new_with(py, len, |buffer| { - let output = Output::Slice(buffer); - size = to_py_err!(DecompressionError -> self::internal::decompress(unsafe { input.as_bytes() }, output))?; - Ok(()) - })?; - pybytes.resize(size)?; - Ok(BytesType::ByteArray(pybytes)) - } - None => { - let mut buffer = Vec::with_capacity(data.len() / 10); - let output = Output::Vector(&mut buffer); - to_py_err!(DecompressionError -> self::internal::decompress(unsafe { input.as_bytes() }, output))?; - Ok(BytesType::ByteArray(PyByteArray::new(py, &buffer))) - } - }, - } + crate::generic!(decompress(data), py = py, output_len = output_len) } /// Brotli compression. @@ -75,42 +41,7 @@ pub fn compress<'a>( level: Option, output_len: Option, ) -> PyResult> { - match data { - BytesType::Bytes(input) => match output_len { - Some(len) => { - let pybytes = PyBytes::new_with(py, len, |buffer| { - let output = Output::Slice(buffer); - to_py_err!(CompressionError -> self::internal::compress(input.as_bytes(), output, level))?; - Ok(()) - })?; - Ok(BytesType::Bytes(pybytes)) - } - None => { - let mut buffer = Vec::with_capacity(data.len() / 10); - let output = Output::Vector(&mut buffer); - to_py_err!(CompressionError -> self::internal::compress(input.as_bytes(), output, level))?; - Ok(BytesType::Bytes(PyBytes::new(py, &buffer))) - } - }, - BytesType::ByteArray(input) => match output_len { - Some(len) => { - let mut size = 0; - let pybytes = PyByteArray::new_with(py, len, |buffer| { - let output = Output::Slice(buffer); - size = to_py_err!(CompressionError -> self::internal::compress(unsafe { input.as_bytes() }, output, level))?; - Ok(()) - })?; - pybytes.resize(size)?; - Ok(BytesType::ByteArray(pybytes)) - } - None => { - let mut buffer = Vec::with_capacity(data.len() / 10); - let output = Output::Vector(&mut buffer); - to_py_err!(CompressionError -> self::internal::compress(unsafe { input.as_bytes() }, output, level))?; - Ok(BytesType::ByteArray(PyByteArray::new(py, &buffer))) - } - }, - } + crate::generic!(compress(data), py = py, output_len = output_len, level = level) } /// Compress directly into an output buffer @@ -121,59 +52,33 @@ pub fn compress_into<'a>( array: &PyArray1, level: Option, ) -> PyResult { - crate::de_compress_into(data.as_bytes(), array, |bytes, out| { - self::internal::compress(bytes, out, level) - }) + crate::generic_into!(compress(data -> array), level) } /// Decompress directly into an output buffer #[pyfunction] pub fn decompress_into<'a>(_py: Python<'a>, data: BytesType<'a>, array: &'a PyArray1) -> PyResult { - crate::de_compress_into(data.as_bytes(), array, self::internal::decompress) + crate::generic_into!(decompress(data -> array)) } pub(crate) mod internal { - use crate::Output; use brotli2::read::{BrotliDecoder, BrotliEncoder}; use std::io::prelude::*; - use std::io::{Cursor, Error}; + use std::io::Error; /// Decompress via Brotli - pub fn decompress<'a>(data: &[u8], output: Output<'a>) -> Result { - let mut decoder = BrotliDecoder::new(data); - match output { - Output::Slice(slice) => { - let mut n_bytes = 0; - loop { - let count = decoder.read(&mut slice[n_bytes..])?; - if count == 0 { - break; - } - n_bytes += count; - } - Ok(n_bytes) - } - Output::Vector(v) => decoder.read_to_end(v), - } + pub fn decompress(input: &[u8], output: &mut W) -> Result { + let mut decoder = BrotliDecoder::new(input); + let n_bytes = std::io::copy(&mut decoder, output)?; + Ok(n_bytes as usize) } /// Compress via Brotli - pub fn compress<'a>(data: &'a [u8], output: Output<'a>, level: Option) -> Result { + pub fn compress(input: &[u8], output: &mut W, level: Option) -> Result { let level = level.unwrap_or_else(|| 11); - - match output { - Output::Slice(slice) => { - let buffer = Cursor::new(slice); - let mut encoder = brotli2::write::BrotliEncoder::new(buffer, level); - encoder.write_all(data)?; - let buffer = encoder.finish()?; - Ok(buffer.position() as usize) - } - Output::Vector(v) => { - let mut encoder = BrotliEncoder::new(data, level); - encoder.read_to_end(v) - } - } + let mut encoder = BrotliEncoder::new(input, level); + let n_bytes = std::io::copy(&mut encoder, output)?; + Ok(n_bytes as usize) } } diff --git a/src/deflate.rs b/src/deflate.rs index f6d72410..c0a2f5df 100644 --- a/src/deflate.rs +++ b/src/deflate.rs @@ -1,10 +1,11 @@ use crate::exceptions::{CompressionError, DecompressionError}; -use crate::{to_py_err, BytesType, Output}; +use crate::{to_py_err, BytesType, WriteablePyByteArray}; use numpy::PyArray1; use pyo3::prelude::*; -use pyo3::types::{PyByteArray, PyBytes}; +use pyo3::types::PyBytes; use pyo3::wrap_pyfunction; use pyo3::{PyResult, Python}; +use std::io::Cursor; pub fn init_py_module(m: &PyModule) -> PyResult<()> { m.add_function(wrap_pyfunction!(compress, m)?)?; @@ -23,42 +24,7 @@ pub fn init_py_module(m: &PyModule) -> PyResult<()> { /// ``` #[pyfunction] pub fn decompress<'a>(py: Python<'a>, data: BytesType<'a>, output_len: Option) -> PyResult> { - match data { - BytesType::Bytes(input) => match output_len { - Some(len) => { - let pybytes = PyBytes::new_with(py, len, |buffer| { - let output = Output::Slice(buffer); - to_py_err!(DecompressionError -> self::internal::decompress(input.as_bytes(), output))?; - Ok(()) - })?; - Ok(BytesType::Bytes(pybytes)) - } - None => { - let mut buffer = Vec::with_capacity(data.len() / 10); - let output = Output::Vector(&mut buffer); - to_py_err!(DecompressionError -> self::internal::decompress(input.as_bytes(), output))?; - Ok(BytesType::Bytes(PyBytes::new(py, &buffer))) - } - }, - BytesType::ByteArray(input) => match output_len { - Some(len) => { - let mut size = 0; - let pybytes = PyByteArray::new_with(py, len, |buffer| { - let output = Output::Slice(buffer); - size = to_py_err!(DecompressionError -> self::internal::decompress(unsafe { input.as_bytes() }, output))?; - Ok(()) - })?; - pybytes.resize(size)?; - Ok(BytesType::ByteArray(pybytes)) - } - None => { - let mut buffer = Vec::with_capacity(data.len() / 10); - let output = Output::Vector(&mut buffer); - to_py_err!(DecompressionError -> self::internal::decompress(unsafe { input.as_bytes() }, output))?; - Ok(BytesType::ByteArray(PyByteArray::new(py, &buffer))) - } - }, - } + crate::generic!(decompress(data), py = py, output_len = output_len) } /// Deflate compression. @@ -75,42 +41,7 @@ pub fn compress<'a>( level: Option, output_len: Option, ) -> PyResult> { - match data { - BytesType::Bytes(input) => match output_len { - Some(len) => { - let pybytes = PyBytes::new_with(py, len, |buffer| { - let output = Output::Slice(buffer); - to_py_err!(CompressionError -> self::internal::compress(input.as_bytes(), output, level))?; - Ok(()) - })?; - Ok(BytesType::Bytes(pybytes)) - } - None => { - let mut buffer = Vec::with_capacity(data.len() / 10); - let output = Output::Vector(&mut buffer); - to_py_err!(CompressionError -> self::internal::compress(input.as_bytes(), output, level))?; - Ok(BytesType::Bytes(PyBytes::new(py, &buffer))) - } - }, - BytesType::ByteArray(input) => match output_len { - Some(len) => { - let mut size = 0; - let pybytes = PyByteArray::new_with(py, len, |buffer| { - let output = Output::Slice(buffer); - size = to_py_err!(CompressionError -> self::internal::compress(unsafe { input.as_bytes() }, output, level))?; - Ok(()) - })?; - pybytes.resize(size)?; - Ok(BytesType::ByteArray(pybytes)) - } - None => { - let mut buffer = Vec::with_capacity(data.len() / 10); - let output = Output::Vector(&mut buffer); - to_py_err!(CompressionError -> self::internal::compress(unsafe { input.as_bytes() }, output, level))?; - Ok(BytesType::ByteArray(PyByteArray::new(py, &buffer))) - } - }, - } + crate::generic!(compress(data), py = py, output_len = output_len, level = level) } /// Compress directly into an output buffer @@ -121,60 +52,35 @@ pub fn compress_into<'a>( array: &PyArray1, level: Option, ) -> PyResult { - crate::de_compress_into(data.as_bytes(), array, |bytes, out| { - self::internal::compress(bytes, out, level) - }) + crate::generic_into!(compress(data -> array), level) } /// Decompress directly into an output buffer #[pyfunction] pub fn decompress_into<'a>(_py: Python<'a>, data: BytesType<'a>, array: &'a PyArray1) -> PyResult { - crate::de_compress_into(data.as_bytes(), array, self::internal::decompress) + crate::generic_into!(decompress(data -> array)) } pub(crate) mod internal { - use crate::Output; use flate2::read::{DeflateDecoder, DeflateEncoder}; use flate2::Compression; use std::io::prelude::*; - use std::io::{Cursor, Error}; + use std::io::Error; /// Decompress gzip data - pub fn decompress<'a>(data: &[u8], output: Output<'a>) -> Result { - let mut decoder = DeflateDecoder::new(data); - match output { - Output::Slice(slice) => { - let mut n_bytes = 0; - loop { - let count = decoder.read(&mut slice[n_bytes..])?; - if count == 0 { - break; - } - n_bytes += count; - } - Ok(n_bytes) - } - Output::Vector(v) => decoder.read_to_end(v), - } + pub fn decompress(input: &[u8], output: &mut W) -> Result { + let mut decoder = DeflateDecoder::new(input); + let n_bytes = std::io::copy(&mut decoder, output)?; + Ok(n_bytes as usize) } /// Compress gzip data - pub fn compress<'a>(data: &'a [u8], output: Output<'a>, level: Option) -> Result { + pub fn compress(input: &[u8], output: &mut W, level: Option) -> Result { let level = level.unwrap_or_else(|| 6); - match output { - Output::Slice(slice) => { - let buffer = Cursor::new(slice); - let mut encoder = flate2::write::DeflateEncoder::new(buffer, Compression::new(level)); - encoder.write_all(data)?; - let buffer = encoder.finish()?; - Ok(buffer.position() as usize) - } - Output::Vector(v) => { - let mut encoder = DeflateEncoder::new(data, Compression::new(level)); - encoder.read_to_end(v) - } - } + let mut encoder = DeflateEncoder::new(input, Compression::new(level)); + let n_bytes = std::io::copy(&mut encoder, output)?; + Ok(n_bytes as usize) } } diff --git a/src/gzip.rs b/src/gzip.rs index 5d0b592f..381fb093 100644 --- a/src/gzip.rs +++ b/src/gzip.rs @@ -1,10 +1,11 @@ use crate::exceptions::{CompressionError, DecompressionError}; -use crate::{to_py_err, BytesType, Output}; +use crate::{to_py_err, BytesType, WriteablePyByteArray}; use numpy::PyArray1; use pyo3::prelude::*; -use pyo3::types::{PyByteArray, PyBytes}; +use pyo3::types::PyBytes; use pyo3::wrap_pyfunction; use pyo3::{PyResult, Python}; +use std::io::Cursor; pub fn init_py_module(m: &PyModule) -> PyResult<()> { m.add_function(wrap_pyfunction!(compress, m)?)?; @@ -23,42 +24,7 @@ pub fn init_py_module(m: &PyModule) -> PyResult<()> { /// ``` #[pyfunction] pub fn decompress<'a>(py: Python<'a>, data: BytesType<'a>, output_len: Option) -> PyResult> { - match data { - BytesType::Bytes(input) => match output_len { - Some(len) => { - let pybytes = PyBytes::new_with(py, len, |buffer| { - let output = Output::Slice(buffer); - to_py_err!(DecompressionError -> self::internal::decompress(input.as_bytes(), output))?; - Ok(()) - })?; - Ok(BytesType::Bytes(pybytes)) - } - None => { - let mut buffer = Vec::with_capacity(data.len() / 10); - let output = Output::Vector(&mut buffer); - to_py_err!(DecompressionError -> self::internal::decompress(input.as_bytes(), output))?; - Ok(BytesType::Bytes(PyBytes::new(py, &buffer))) - } - }, - BytesType::ByteArray(input) => match output_len { - Some(len) => { - let mut size = 0; - let pybytes = PyByteArray::new_with(py, len, |buffer| { - let output = Output::Slice(buffer); - size = to_py_err!(DecompressionError -> self::internal::decompress(unsafe { input.as_bytes() }, output))?; - Ok(()) - })?; - pybytes.resize(size)?; - Ok(BytesType::ByteArray(pybytes)) - } - None => { - let mut buffer = Vec::with_capacity(data.len() / 10); - let output = Output::Vector(&mut buffer); - to_py_err!(DecompressionError -> self::internal::decompress(unsafe { input.as_bytes() }, output))?; - Ok(BytesType::ByteArray(PyByteArray::new(py, &buffer))) - } - }, - } + crate::generic!(decompress(data), py = py, output_len = output_len) } /// Gzip compression. @@ -75,42 +41,7 @@ pub fn compress<'a>( level: Option, output_len: Option, ) -> PyResult> { - match data { - BytesType::Bytes(input) => match output_len { - Some(len) => { - let pybytes = PyBytes::new_with(py, len, |buffer| { - let output = Output::Slice(buffer); - to_py_err!(CompressionError -> self::internal::compress(input.as_bytes(), output, level))?; - Ok(()) - })?; - Ok(BytesType::Bytes(pybytes)) - } - None => { - let mut buffer = Vec::with_capacity(data.len() / 10); - let output = Output::Vector(&mut buffer); - to_py_err!(CompressionError -> self::internal::compress(input.as_bytes(), output, level))?; - Ok(BytesType::Bytes(PyBytes::new(py, &buffer))) - } - }, - BytesType::ByteArray(input) => match output_len { - Some(len) => { - let mut size = 0; - let pybytes = PyByteArray::new_with(py, len, |buffer| { - let output = Output::Slice(buffer); - size = to_py_err!(CompressionError -> self::internal::compress(unsafe { input.as_bytes() }, output, level))?; - Ok(()) - })?; - pybytes.resize(size)?; - Ok(BytesType::ByteArray(pybytes)) - } - None => { - let mut buffer = Vec::with_capacity(data.len() / 10); - let output = Output::Vector(&mut buffer); - to_py_err!(CompressionError -> self::internal::compress(unsafe { input.as_bytes() }, output, level))?; - Ok(BytesType::ByteArray(PyByteArray::new(py, &buffer))) - } - }, - } + crate::generic!(compress(data), py = py, output_len = output_len, level = level) } /// Compress directly into an output buffer @@ -121,62 +52,33 @@ pub fn compress_into<'a>( array: &PyArray1, level: Option, ) -> PyResult { - crate::de_compress_into(data.as_bytes(), array, |bytes, out| { - self::internal::compress(bytes, out, level) - }) + crate::generic_into!(compress(data -> array), level) } /// Decompress directly into an output buffer #[pyfunction] pub fn decompress_into<'a>(_py: Python<'a>, data: BytesType<'a>, array: &'a PyArray1) -> PyResult { - crate::de_compress_into(data.as_bytes(), array, self::internal::decompress) + crate::generic_into!(decompress(data -> array)) } pub(crate) mod internal { - use crate::Output; - use flate2::read::GzDecoder; + use flate2::read::{GzDecoder, GzEncoder}; use flate2::Compression; use std::io::prelude::*; - use std::io::{Cursor, Error}; + use std::io::Error; /// Decompress gzip data - pub fn decompress<'a>(data: &'a [u8], output: Output<'a>) -> Result { - let mut decoder = GzDecoder::new(data); - match output { - Output::Slice(slice) => { - let mut n_bytes = 0; - loop { - let count = decoder.read(&mut slice[n_bytes..])?; - if count == 0 { - break; - } - n_bytes += count; - } - Ok(n_bytes) - } - Output::Vector(v) => decoder.read_to_end(v), - } + pub fn decompress(input: &[u8], output: &mut W) -> Result { + let mut decoder = GzDecoder::new(input); + let n_bytes = std::io::copy(&mut decoder, output)?; + Ok(n_bytes as usize) } /// Compress gzip data - pub fn compress<'a>(data: &'a [u8], output: Output<'a>, level: Option) -> Result { + pub fn compress(input: &[u8], output: &mut W, level: Option) -> Result { let level = level.unwrap_or_else(|| 6); - match output { - Output::Slice(slice) => { - // GzEncoder::read does not output the 'tail' of the gzip encoding. So we need to - // calculate the checksum and the data length manually. - use flate2::write::GzEncoder; - let cursor = Cursor::new(slice); - let mut encoder = GzEncoder::new(cursor, Compression::new(level)); - encoder.write_all(data)?; - let writer = encoder.finish()?; - Ok(writer.position() as usize) - } - Output::Vector(v) => { - use flate2::read::GzEncoder; - let mut encoder = GzEncoder::new(data, Compression::new(level)); - encoder.read_to_end(v) - } - } + let mut encoder = GzEncoder::new(input, Compression::new(level)); + let n_bytes = std::io::copy(&mut encoder, output)?; + Ok(n_bytes as usize) } } diff --git a/src/lib.rs b/src/lib.rs index 85ebfbc4..c5a42c5b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -30,7 +30,11 @@ use pyo3::prelude::*; use pyo3::types::{PyByteArray, PyBytes}; use exceptions::{CompressionError, DecompressionError}; -use numpy::PyArray1; +use std::io::Write; + +#[cfg(feature = "mimallocator")] +#[global_allocator] +static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; #[derive(FromPyObject)] pub enum BytesType<'a> { @@ -41,6 +45,7 @@ pub enum BytesType<'a> { } impl<'a> BytesType<'a> { + #[allow(dead_code)] fn len(&self) -> usize { self.as_bytes().len() } @@ -61,12 +66,47 @@ impl<'a> IntoPy for BytesType<'a> { } } -/// Buffer to de/compression algorithms' output. -/// ::Vector used when the output len cannot be determined, and/or resulting -/// python object cannot be resized to what the actual bytes decoded was. -pub enum Output<'a> { - Slice(&'a mut [u8]), - Vector(&'a mut Vec), +/// A wrapper to PyByteArray, providing the std::io::Write impl +pub struct WriteablePyByteArray<'a> { + array: &'a PyByteArray, + position: usize, +} + +impl<'a> WriteablePyByteArray<'a> { + pub fn new(py: Python<'a>, len: usize) -> Self { + Self { + array: PyByteArray::new_with(py, len, |_| Ok(())).unwrap(), + position: 0, + } + } + pub fn into_inner(mut self) -> PyResult<&'a PyByteArray> { + self.flush() + .map_err(|e| pyo3::exceptions::PyBufferError::new_err(e.to_string()))?; + Ok(self.array) + } +} + +impl<'a> Write for WriteablePyByteArray<'a> { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + if (self.position + buf.len()) > self.array.len() { + self.array.resize(self.position + buf.len()).unwrap() + } + let array_bytes = unsafe { self.array.as_bytes_mut() }; + + //let mut wtr = Cursor::new(&mut array_bytes[self.position..]); + //let n_bytes = wtr.write(buf).unwrap(); + let buf_len = buf.len(); + array_bytes[self.position..self.position + buf_len].copy_from_slice(buf); + + self.position += buf.len(); + Ok(buf.len()) + } + fn flush(&mut self) -> std::io::Result<()> { + if self.array.len() != self.position { + self.array.resize(self.position).unwrap(); + } + Ok(()) + } } /// Expose de/compression_into(data: BytesType<'_>, array: &PyArray1) -> PyResult @@ -74,19 +114,64 @@ pub enum Output<'a> { /// /// This will handle gaining access to the Python's array as a buffer for an underlying de/compression /// function which takes the normal `&[u8]` and `Output` types -pub fn de_compress_into(data: &[u8], array: &PyArray1, func: F) -> PyResult -where - F: for<'a> FnOnce(&'a [u8], Output<'a>) -> std::io::Result, -{ - let mut array_mut = unsafe { array.as_array_mut() }; - - let buffer: &mut [u8] = to_py_err!(DecompressionError -> array_mut.as_slice_mut().ok_or_else(|| { - pyo3::exceptions::PyBufferError::new_err("Failed to get mutable slice from array.") - }))?; - - let output = Output::Slice(buffer); - let size = to_py_err!(DecompressionError -> func(data, output))?; - Ok(size) +#[macro_export] +macro_rules! generic_into { + ($op:ident($input:ident -> $output:ident) $(, $level:ident)?) => { + { + let mut array_mut = unsafe { $output.as_array_mut() }; + + let buffer: &mut [u8] = to_py_err!(DecompressionError -> array_mut.as_slice_mut().ok_or_else(|| { + pyo3::exceptions::PyBufferError::new_err("Failed to get mutable slice from array.") + }))?; + let mut cursor = Cursor::new(buffer); + let size = to_py_err!(DecompressionError -> self::internal::$op($input.as_bytes(), &mut cursor $(, $level)?))?; + Ok(size) + } + } +} + +#[macro_export] +macro_rules! generic { + ($op:ident($input:ident), py=$py:ident, output_len=$output_len:ident $(, level=$level:ident)?) => { + { + let bytes = $input.as_bytes(); + match $input { + BytesType::Bytes(_) => match $output_len { + Some(len) => { + let pybytes = PyBytes::new_with($py, len, |buffer| { + let mut cursor = Cursor::new(buffer); + if stringify!($op) == "compress" { + to_py_err!(CompressionError -> self::internal::$op(bytes, &mut cursor $(, $level)? ))?; + } else { + to_py_err!(DecompressionError -> self::internal::$op(bytes, &mut cursor $(, $level)? ))?; + } + Ok(()) + })?; + Ok(BytesType::Bytes(pybytes)) + } + None => { + let mut buffer = Vec::new(); + if stringify!($op) == "compress" { + to_py_err!(CompressionError -> self::internal::$op(bytes, &mut buffer $(, $level)? ))?; + } else { + to_py_err!(DecompressionError -> self::internal::$op(bytes, &mut buffer $(, $level)? ))?; + } + + Ok(BytesType::Bytes(PyBytes::new($py, &buffer))) + } + }, + BytesType::ByteArray(_) => { + let mut pybytes = WriteablePyByteArray::new($py, $output_len.unwrap_or_else(|| 0)); + if stringify!($op) == "compress" { + to_py_err!(CompressionError -> self::internal::$op(bytes, &mut pybytes $(, $level)? ))?; + } else { + to_py_err!(DecompressionError -> self::internal::$op(bytes, &mut pybytes $(, $level)? ))?; + } + Ok(BytesType::ByteArray(pybytes.into_inner()?)) + } + } + } + } } #[macro_export] @@ -122,7 +207,7 @@ fn cramjam(py: Python, m: &PyModule) -> PyResult<()> { #[cfg(test)] mod tests { - use super::Output; + use std::io::Cursor; // Default testing data fn gen_data() -> Vec { @@ -138,12 +223,31 @@ mod tests { #[test] fn $name() { let data = gen_data(); - let mut compressed = if stringify!($compress_output) == "Slice" { vec![0; $compressed_len] } else { Vec::new() }; - let compressed_size = crate::$variant::internal::compress(&data, Output::$compress_output(&mut compressed) $(, $level)? ).unwrap(); + + let mut compressed = Vec::new(); + + let compressed_size = if stringify!($decompress_output) == "Slice" { + compressed = (0..data.len()).map(|_| 0).collect::>(); + let mut cursor = Cursor::new(compressed.as_mut_slice()); + crate::$variant::internal::compress(&data, &mut cursor $(, $level)?).unwrap() + } else { + + crate::$variant::internal::compress(&data, &mut compressed $(, $level)?).unwrap() + }; + assert_eq!(compressed_size, $compressed_len); + compressed.truncate(compressed_size); + + let mut decompressed = Vec::new(); + + let decompressed_size = if stringify!($decompress_output) == "Slice" { + decompressed = (0..data.len()).map(|_| 0).collect::>(); + let mut cursor = Cursor::new(decompressed.as_mut_slice()); + crate::$variant::internal::decompress(&compressed, &mut cursor).unwrap() + } else { - let mut decompressed = if stringify!($decompress_output) == "Slice" { vec![0; data.len()] } else { Vec::new() }; - let decompressed_size = crate::$variant::internal::decompress(&compressed, Output::$decompress_output(&mut decompressed)).unwrap(); + crate::$variant::internal::decompress(&compressed, &mut decompressed).unwrap() + }; assert_eq!(decompressed_size, data.len()); if &decompressed[..decompressed_size] != &data { panic!("Decompressed and original data do not match! :-(") diff --git a/src/lz4.rs b/src/lz4.rs index 1ba630a4..cc5ff345 100644 --- a/src/lz4.rs +++ b/src/lz4.rs @@ -50,8 +50,6 @@ pub fn compress<'a>( level: Option, output_len: Option, ) -> PyResult> { - let level = level.unwrap_or_else(|| 4); - match data { BytesType::Bytes(input) => { let out = to_py_err!(CompressionError -> self::internal::compress(input.as_bytes(), level))?; @@ -74,7 +72,8 @@ pub(crate) mod internal { /// Compress lz4 data // TODO: lz-fear does not yet support level - pub fn compress(data: &[u8], _level: u32) -> Result, Box> { + pub fn compress(data: &[u8], level: Option) -> Result, Box> { + let _ = level.unwrap_or_else(|| 4); let mut buf = vec![]; lz_fear::framed::CompressionSettings::default().compress(data, &mut buf)?; Ok(buf) diff --git a/src/snappy.rs b/src/snappy.rs index 6db0b2e3..fb443f89 100644 --- a/src/snappy.rs +++ b/src/snappy.rs @@ -1,11 +1,11 @@ use crate::exceptions::{CompressionError, DecompressionError}; -use crate::{to_py_err, BytesType, Output}; +use crate::{to_py_err, BytesType, WriteablePyByteArray}; use numpy::PyArray1; use pyo3::prelude::*; use pyo3::types::{PyByteArray, PyBytes}; use pyo3::wrap_pyfunction; use pyo3::{PyResult, Python}; -use snap::raw::max_compress_len; +use std::io::Cursor; pub fn init_py_module(m: &PyModule) -> PyResult<()> { m.add_function(wrap_pyfunction!(compress, m)?)?; @@ -27,45 +27,7 @@ pub fn init_py_module(m: &PyModule) -> PyResult<()> { /// ``` #[pyfunction] pub fn decompress<'a>(py: Python<'a>, data: BytesType<'a>, output_len: Option) -> PyResult> { - let result = match data { - BytesType::Bytes(bytes) => { - let pybytes = match output_len { - Some(len) => PyBytes::new_with(py, len, |output| { - to_py_err!(DecompressionError -> self::internal::decompress(bytes.as_bytes(), Output::Slice(output)))?; - Ok(()) - })?, - None => { - let mut output = Vec::with_capacity(data.len()); - - to_py_err!(DecompressionError -> self::internal::decompress(bytes.as_bytes(), Output::Vector(&mut output)))?; - PyBytes::new(py, &output) - } - }; - BytesType::Bytes(pybytes) - } - BytesType::ByteArray(bytes_array) => { - let bytes = unsafe { bytes_array.as_bytes() }; - match output_len { - Some(len) => { - let mut actual_len = 0; - let pybytes = PyByteArray::new_with(py, len, |output| { - actual_len = - to_py_err!(DecompressionError -> self::internal::decompress(bytes, Output::Slice(output)))?; - Ok(()) - })?; - pybytes.resize(actual_len)?; - BytesType::ByteArray(pybytes) - } - None => { - let mut output = Vec::with_capacity(data.len()); - to_py_err!(DecompressionError -> self::internal::decompress(bytes, Output::Vector(&mut output)))?; - let pybytes = PyByteArray::new(py, &output); - BytesType::ByteArray(pybytes) - } - } - } - }; - Ok(result) + crate::generic!(decompress(data), py = py, output_len = output_len) } /// Snappy compression. @@ -78,41 +40,7 @@ pub fn decompress<'a>(py: Python<'a>, data: BytesType<'a>, output_len: Option(py: Python<'a>, data: BytesType<'a>, output_len: Option) -> PyResult> { - // Prefer the user's output_len, fallback to estimate the output len - let estimated_len = output_len.unwrap_or_else(|| max_compress_len(data.len())); - - let result = match data { - BytesType::Bytes(bytes) => { - // user provided the exact output len - if output_len.is_some() { - let pybytes = PyBytes::new_with(py, estimated_len, |buffer| { - to_py_err!(CompressionError -> self::internal::compress(bytes.as_bytes(), Output::Slice(buffer)))?; - Ok(()) - })?; - BytesType::Bytes(pybytes) - - // we can use the estimated length, but need to use buffer as we don't know for sure the length - } else { - let mut buffer = Vec::with_capacity(estimated_len); - - to_py_err!(CompressionError -> self::internal::compress(bytes.as_bytes(), Output::Vector(&mut buffer)))?; - - let pybytes = PyBytes::new(py, &buffer); - BytesType::Bytes(pybytes) - } - } - BytesType::ByteArray(bytes_array) => { - let bytes = unsafe { bytes_array.as_bytes() }; - let mut actual_len = 0; - let pybytes = PyByteArray::new_with(py, estimated_len, |output| { - actual_len = to_py_err!(CompressionError -> self::internal::compress(bytes, Output::Slice(output)))?; - Ok(()) - })?; - pybytes.resize(actual_len)?; - BytesType::ByteArray(pybytes) - } - }; - Ok(result) + crate::generic!(compress(data), py = py, output_len = output_len) } /// Snappy decompression, raw @@ -162,21 +90,19 @@ pub fn compress_raw<'a>(py: Python<'a>, data: BytesType<'a>) -> PyResult(_py: Python<'a>, data: BytesType<'a>, array: &PyArray1) -> PyResult { - crate::de_compress_into(data.as_bytes(), array, self::internal::compress) + crate::generic_into!(compress(data -> array)) } /// Decompress directly into an output buffer #[pyfunction] pub fn decompress_into<'a>(_py: Python<'a>, data: BytesType<'a>, array: &'a PyArray1) -> PyResult { - crate::de_compress_into(data.as_bytes(), array, self::internal::decompress) + crate::generic_into!(decompress(data -> array)) } pub(crate) mod internal { use snap::raw::{Decoder, Encoder}; use snap::read::{FrameDecoder, FrameEncoder}; - use std::io::{Error, Cursor}; - - use crate::Output; + use std::io::{Error, Write}; /// Decompress snappy data raw pub fn decompress_raw(data: &[u8]) -> Result, snap::Error> { @@ -191,36 +117,16 @@ pub(crate) mod internal { } /// Decompress snappy data framed - pub fn decompress<'a>(data: &'a [u8], output: Output<'a>) -> Result { - let mut decoder = FrameDecoder::new(data); - match output { - Output::Slice(slice) => { - let mut wtr = Cursor::new(slice); - let n_bytes = std::io::copy(&mut decoder, &mut wtr)?; - Ok(n_bytes as usize) - } - Output::Vector(v) => { - let mut wtr = Cursor::new(v); - let n_bytes = std::io::copy(&mut decoder, &mut wtr)?; - Ok(n_bytes as usize) - }, - } + pub fn decompress(input: &[u8], output: &mut W) -> Result { + let mut decoder = FrameDecoder::new(input); + let n_bytes = std::io::copy(&mut decoder, output)?; + Ok(n_bytes as usize) } /// Decompress snappy data framed - pub fn compress<'a>(data: &'a [u8], output: Output<'a>) -> Result { + pub fn compress(data: &[u8], output: &mut W) -> Result { let mut encoder = FrameEncoder::new(data); - match output { - Output::Slice(slice) => { - let mut wtr = Cursor::new(slice); - let n_bytes = std::io::copy(&mut encoder, &mut wtr)?; - Ok(n_bytes as usize) - } - Output::Vector(v) => { - let mut wtr = Cursor::new(v); - let n_bytes = std::io::copy(&mut encoder, &mut wtr)?; - Ok(n_bytes as usize) - }, - } + let n_bytes = std::io::copy(&mut encoder, output)?; + Ok(n_bytes as usize) } } diff --git a/src/zstd.rs b/src/zstd.rs index d94f325d..0e3c8ea9 100644 --- a/src/zstd.rs +++ b/src/zstd.rs @@ -1,10 +1,11 @@ use crate::exceptions::{CompressionError, DecompressionError}; -use crate::{to_py_err, BytesType, Output}; +use crate::{to_py_err, BytesType, WriteablePyByteArray}; use numpy::PyArray1; use pyo3::prelude::*; -use pyo3::types::{PyByteArray, PyBytes}; +use pyo3::types::PyBytes; use pyo3::wrap_pyfunction; use pyo3::{PyResult, Python}; +use std::io::Cursor; pub fn init_py_module(m: &PyModule) -> PyResult<()> { m.add_function(wrap_pyfunction!(compress, m)?)?; @@ -23,42 +24,7 @@ pub fn init_py_module(m: &PyModule) -> PyResult<()> { /// ``` #[pyfunction] pub fn decompress<'a>(py: Python<'a>, data: BytesType<'a>, output_len: Option) -> PyResult> { - match data { - BytesType::Bytes(input) => match output_len { - Some(len) => { - let pybytes = PyBytes::new_with(py, len, |buffer| { - let output = Output::Slice(buffer); - to_py_err!(DecompressionError -> self::internal::decompress(input.as_bytes(), output))?; - Ok(()) - })?; - Ok(BytesType::Bytes(pybytes)) - } - None => { - let mut buffer = Vec::with_capacity(data.len() / 10); - let output = Output::Vector(&mut buffer); - to_py_err!(DecompressionError -> self::internal::decompress(input.as_bytes(), output))?; - Ok(BytesType::Bytes(PyBytes::new(py, &buffer))) - } - }, - BytesType::ByteArray(input) => match output_len { - Some(len) => { - let mut size = 0; - let pybytes = PyByteArray::new_with(py, len, |buffer| { - let output = Output::Slice(buffer); - size = to_py_err!(DecompressionError -> self::internal::decompress(unsafe { input.as_bytes() }, output))?; - Ok(()) - })?; - pybytes.resize(size)?; - Ok(BytesType::ByteArray(pybytes)) - } - None => { - let mut buffer = Vec::with_capacity(data.len() / 10); - let output = Output::Vector(&mut buffer); - to_py_err!(DecompressionError -> self::internal::decompress(unsafe { input.as_bytes() }, output))?; - Ok(BytesType::ByteArray(PyByteArray::new(py, &buffer))) - } - }, - } + crate::generic!(decompress(data), py = py, output_len = output_len) } /// ZSTD compression. @@ -75,42 +41,7 @@ pub fn compress<'a>( level: Option, output_len: Option, ) -> PyResult> { - match data { - BytesType::Bytes(input) => match output_len { - Some(len) => { - let pybytes = PyBytes::new_with(py, len, |buffer| { - let output = Output::Slice(buffer); - to_py_err!(CompressionError -> self::internal::compress(input.as_bytes(), output, level))?; - Ok(()) - })?; - Ok(BytesType::Bytes(pybytes)) - } - None => { - let mut buffer = Vec::with_capacity(data.len() / 10); - let output = Output::Vector(&mut buffer); - to_py_err!(CompressionError -> self::internal::compress(input.as_bytes(), output, level))?; - Ok(BytesType::Bytes(PyBytes::new(py, &buffer))) - } - }, - BytesType::ByteArray(input) => match output_len { - Some(len) => { - let mut size = 0; - let pybytes = PyByteArray::new_with(py, len, |buffer| { - let output = Output::Slice(buffer); - size = to_py_err!(CompressionError -> self::internal::compress(unsafe { input.as_bytes() }, output, level))?; - Ok(()) - })?; - pybytes.resize(size)?; - Ok(BytesType::ByteArray(pybytes)) - } - None => { - let mut buffer = Vec::with_capacity(data.len() / 10); - let output = Output::Vector(&mut buffer); - to_py_err!(CompressionError -> self::internal::compress(unsafe { input.as_bytes() }, output, level))?; - Ok(BytesType::ByteArray(PyByteArray::new(py, &buffer))) - } - }, - } + crate::generic!(compress(data), py = py, output_len = output_len, level = level) } /// Compress directly into an output buffer @@ -121,56 +52,31 @@ pub fn compress_into<'a>( array: &PyArray1, level: Option, ) -> PyResult { - crate::de_compress_into(data.as_bytes(), array, |bytes, out| { - self::internal::compress(bytes, out, level) - }) + crate::generic_into!(compress(data -> array), level) } /// Decompress directly into an output buffer #[pyfunction] pub fn decompress_into<'a>(_py: Python<'a>, data: BytesType<'a>, array: &'a PyArray1) -> PyResult { - crate::de_compress_into(data.as_bytes(), array, self::internal::decompress) + crate::generic_into!(decompress(data -> array)) } pub(crate) mod internal { - use crate::Output; - use std::io::{Cursor, Error, Read, Write}; + use std::io::{Error, Write}; /// Decompress gzip data - pub fn decompress<'a>(data: &'a [u8], output: Output<'a>) -> Result { - let mut decoder = zstd::stream::read::Decoder::new(data)?; - match output { - Output::Slice(slice) => { - let mut n_bytes = 0; - loop { - let count = decoder.read(&mut slice[n_bytes..])?; - if count == 0 { - break; - } - n_bytes += count; - } - Ok(n_bytes) - } - Output::Vector(v) => decoder.read_to_end(v), - } + pub fn decompress(input: &[u8], output: &mut W) -> Result { + let mut decoder = zstd::stream::read::Decoder::new(input)?; + let n_bytes = std::io::copy(&mut decoder, output)?; + Ok(n_bytes as usize) } /// Compress gzip data - pub fn compress<'a>(data: &'a [u8], output: Output<'a>, level: Option) -> Result { + pub fn compress(input: &[u8], output: &mut W, level: Option) -> Result { let level = level.unwrap_or_else(|| 0); // 0 will use zstd's default, currently 11 - match output { - Output::Slice(slice) => { - let buffer = Cursor::new(slice); - let mut encoder = zstd::stream::write::Encoder::new(buffer, level)?; - encoder.write_all(data)?; - let buffer = encoder.finish()?; - Ok(buffer.position() as usize) - } - Output::Vector(v) => { - let mut encoder = zstd::stream::read::Encoder::new(data, level)?; - encoder.read_to_end(v) - } - } + let mut encoder = zstd::stream::read::Encoder::new(input, level)?; + let n_bytes = std::io::copy(&mut encoder, output)?; + Ok(n_bytes as usize) } }