From 950e3f9944e5479d9fad607e8fc9238e11e5cc03 Mon Sep 17 00:00:00 2001 From: milesgranger Date: Fri, 19 Feb 2021 22:38:28 +0100 Subject: [PATCH] Fix by not estimating decompressed size; another allocation --- Cargo.toml | 2 +- Makefile | 2 +- benchmarks/README.md | 105 ++++++++++++++++++++------------------- benchmarks/test_bench.py | 7 +-- src/snappy.rs | 69 ++++++++++++++----------- tests/test_variants.py | 15 ++++-- 6 files changed, 108 insertions(+), 92 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 71dd1ce2..f428d381 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cramjam" -version = "2.0.1" +version = "2.0.2" authors = ["Miles Granger "] edition = "2018" license-file = "LICENSE" diff --git a/Makefile b/Makefile index 974210d8..01872d75 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,7 @@ bench: python -m pytest -v --benchmark-only --benchmark-sort name benchmarks/ bench-snappy: - $(BASE_BENCH_CMD) snappy + $(BASE_BENCH_CMD) test_snappy bench-snappy-compress-into: $(BASE_BENCH_CMD) snappy_de_compress_into diff --git a/benchmarks/README.md b/benchmarks/README.md index 6f36dbe8..831fdf3d 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -108,30 +108,30 @@ test_gzip[urls.10K-used-output_len=True-gzip] 41,650.30 -------------------------------------------------------------------------------------------------------- benchmark: 24 tests --------------------------------------------------------------------------------------------------------- Name (time in us) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -test_snappy[Mark.Twain-Tom.Sawyer.txt-cramjam] 68.0490 (4.44) 236.4670 (3.23) 74.1624 (4.38) 12.2258 (2.78) 70.6645 (4.42) 3.8065 (6.30) 393;654 13,483.9307 (0.23) 4220 1 -test_snappy[Mark.Twain-Tom.Sawyer.txt-snappy] 52.8250 (3.45) 209.7710 (2.86) 57.8734 (3.42) 10.0498 (2.29) 55.1665 (3.45) 2.0325 (3.37) 1153;1990 17,279.1036 (0.29) 13972 1 -test_snappy[alice29.txt-cramjam] 296.4050 (19.34) 513.2750 (7.00) 319.8824 (18.88) 26.2029 (5.96) 314.6850 (19.70) 20.1032 (33.30) 257;192 3,126.1485 (0.05) 2463 1 -test_snappy[alice29.txt-snappy] 601.3360 (39.24) 896.2440 (12.23) 642.0986 (37.90) 43.3138 (9.85) 637.0050 (39.87) 44.1420 (73.11) 176;101 1,557.3933 (0.03) 1583 1 -test_snappy[asyoulik.txt-cramjam] 314.2370 (20.51) 638.6460 (8.72) 337.8341 (19.94) 34.3076 (7.80) 325.6750 (20.38) 22.5742 (37.39) 273;269 2,960.0329 (0.05) 2917 1 -test_snappy[asyoulik.txt-snappy] 532.4610 (34.75) 881.1000 (12.02) 567.8900 (33.52) 43.3191 (9.85) 555.2595 (34.75) 41.0570 (68.00) 214;144 1,760.9043 (0.03) 1830 1 -test_snappy[fireworks.jpeg-cramjam] 41.9580 (2.74) 401.9790 (5.49) 46.1660 (2.73) 9.2259 (2.10) 44.0280 (2.76) 2.2282 (3.69) 1134;2184 21,660.9429 (0.37) 16493 1 -test_snappy[fireworks.jpeg-snappy] 15.3240 (1.0) 125.1180 (1.71) 16.9414 (1.0) 4.3968 (1.0) 15.9770 (1.0) 0.6038 (1.0) 1364;1766 59,027.0559 (1.0) 21891 1 -test_snappy[geo.protodata-cramjam] 109.6360 (7.15) 224.8130 (3.07) 119.6756 (7.06) 13.9476 (3.17) 114.2810 (7.15) 7.8245 (12.96) 765;871 8,355.9254 (0.14) 6107 1 -test_snappy[geo.protodata-snappy] 142.9090 (9.33) 335.9500 (4.58) 157.0690 (9.27) 18.1315 (4.12) 150.7170 (9.43) 10.6040 (17.56) 789;833 6,366.6276 (0.11) 5992 1 -test_snappy[html-cramjam] 148.6780 (9.70) 305.4290 (4.17) 159.0390 (9.39) 15.5382 (3.53) 154.0180 (9.64) 9.1190 (15.10) 702;779 6,287.7660 (0.11) 5853 1 -test_snappy[html-snappy] 156.2010 (10.19) 305.7490 (4.17) 168.3579 (9.94) 17.6330 (4.01) 161.9390 (10.14) 10.3047 (17.07) 563;600 5,939.7280 (0.10) 4239 1 -test_snappy[html_x_4-cramjam] 159.5320 (10.41) 504.7690 (6.89) 168.8123 (9.96) 18.3322 (4.17) 160.9430 (10.07) 9.5325 (15.79) 254;297 5,923.7385 (0.10) 2973 1 -test_snappy[html_x_4-snappy] 633.7830 (41.36) 1,055.7020 (14.41) 683.7402 (40.36) 56.0549 (12.75) 667.5120 (41.78) 45.0975 (74.70) 180;105 1,462.5438 (0.02) 1408 1 -test_snappy[kppkn.gtb-cramjam] 205.8880 (13.44) 391.8670 (5.35) 219.9534 (12.98) 21.6411 (4.92) 212.6530 (13.31) 14.3255 (23.73) 316;301 4,546.4180 (0.08) 2705 1 -test_snappy[kppkn.gtb-snappy] 504.1990 (32.90) 879.5870 (12.00) 546.0574 (32.23) 51.4486 (11.70) 531.9175 (33.29) 42.8270 (70.93) 237;198 1,831.3095 (0.03) 1774 1 -test_snappy[lcet10.txt-cramjam] 286.5250 (18.70) 559.8080 (7.64) 311.1426 (18.37) 33.3866 (7.59) 297.2815 (18.61) 28.1930 (46.70) 356;283 3,213.9606 (0.05) 3010 1 -test_snappy[lcet10.txt-snappy] 1,591.0940 (103.83) 2,195.9160 (29.97) 1,711.3473 (101.02) 106.9863 (24.33) 1,676.0640 (104.90) 107.2650 (177.66) 126;32 584.3349 (0.01) 586 1 -test_snappy[paper-100k.pdf-cramjam] 48.8270 (3.19) 107.3140 (1.46) 53.8996 (3.18) 7.4700 (1.70) 50.9340 (3.19) 3.0750 (5.09) 1894;2399 18,553.0061 (0.31) 12112 1 -test_snappy[paper-100k.pdf-snappy] 19.9350 (1.30) 73.2760 (1.0) 22.8507 (1.35) 4.4256 (1.01) 21.3490 (1.34) 1.7780 (2.94) 4126;4357 43,762.4046 (0.74) 26118 1 -test_snappy[plrabn12.txt-cramjam] 524.0590 (34.20) 988.9050 (13.50) 558.4253 (32.96) 50.0324 (11.38) 542.4060 (33.95) 38.8713 (64.38) 171;133 1,790.7497 (0.03) 1629 1 -test_snappy[plrabn12.txt-snappy] 2,316.0010 (151.14) 3,066.4880 (41.85) 2,449.0969 (144.56) 122.0491 (27.76) 2,421.2130 (151.54) 113.4473 (187.90) 74;30 408.3138 (0.01) 401 1 -test_snappy[urls.10K-cramjam] 480.5240 (31.36) 960.4570 (13.11) 516.9837 (30.52) 53.2631 (12.11) 497.5085 (31.14) 38.3690 (63.55) 178;165 1,934.2969 (0.03) 1622 1 -test_snappy[urls.10K-snappy] 1,934.9160 (126.27) 2,981.1910 (40.68) 2,050.5994 (121.04) 125.9153 (28.64) 2,022.6760 (126.60) 100.3135 (166.15) 48;29 487.6623 (0.01) 501 1 +test_snappy[Mark.Twain-Tom.Sawyer.txt-cramjam] 69.2171 (4.54) 149.0290 (2.72) 73.1161 (4.54) 7.7180 (3.43) 70.4250 (4.51) 1.9299 (10.37) 509;734 13,676.8874 (0.22) 5670 1 +test_snappy[Mark.Twain-Tom.Sawyer.txt-snappy] 52.7661 (3.46) 105.1690 (1.92) 56.7747 (3.53) 5.9326 (2.64) 55.1520 (3.54) 2.1239 (11.42) 1478;1540 17,613.4710 (0.28) 11746 1 +test_snappy[alice29.txt-cramjam] 682.5200 (44.75) 975.6830 (17.79) 703.6449 (43.73) 33.2239 (14.78) 686.4200 (44.00) 23.5155 (126.41) 127;112 1,421.1715 (0.02) 1427 1 +test_snappy[alice29.txt-snappy] 599.2460 (39.29) 882.8780 (16.10) 623.0939 (38.72) 36.4527 (16.22) 604.3134 (38.74) 25.0159 (134.47) 149;136 1,604.8945 (0.03) 1562 1 +test_snappy[asyoulik.txt-cramjam] 601.4019 (39.43) 854.9349 (15.59) 622.4320 (38.68) 33.5336 (14.92) 606.4030 (38.87) 21.9197 (117.83) 132;117 1,606.6013 (0.03) 1605 1 +test_snappy[asyoulik.txt-snappy] 532.4670 (34.91) 736.0550 (13.42) 550.2792 (34.20) 33.5633 (14.93) 535.9120 (34.35) 20.3420 (109.35) 129;123 1,817.2593 (0.03) 1757 1 +test_snappy[fireworks.jpeg-cramjam] 85.6650 (5.62) 167.3910 (3.05) 88.8776 (5.52) 6.6898 (2.98) 86.8581 (5.57) 0.5090 (2.74) 907;2265 11,251.4299 (0.18) 10136 1 +test_snappy[fireworks.jpeg-snappy] 15.2531 (1.0) 54.8310 (1.0) 16.0913 (1.0) 2.2481 (1.0) 15.6009 (1.0) 0.1860 (1.0) 1693;4101 62,145.3748 (1.0) 38244 1 +test_snappy[geo.protodata-cramjam] 213.6840 (14.01) 340.9790 (6.22) 222.0157 (13.80) 15.4238 (6.86) 215.8895 (13.84) 6.1710 (33.17) 393;585 4,504.1852 (0.07) 4188 1 +test_snappy[geo.protodata-snappy] 143.2620 (9.39) 258.2670 (4.71) 149.8566 (9.31) 10.4448 (4.65) 146.8370 (9.41) 2.3311 (12.53) 581;1011 6,673.0481 (0.11) 6194 1 +test_snappy[html-cramjam] 217.7460 (14.28) 362.0189 (6.60) 225.4839 (14.01) 14.4640 (6.43) 220.0430 (14.10) 3.1231 (16.79) 446;802 4,434.9067 (0.07) 4214 1 +test_snappy[html-snappy] 155.9999 (10.23) 279.6430 (5.10) 162.2625 (10.08) 12.9784 (5.77) 157.8589 (10.12) 1.2950 (6.96) 545;1372 6,162.8537 (0.10) 5863 1 +test_snappy[html_x_4-cramjam] 859.3800 (56.34) 1,246.6990 (22.74) 902.2537 (56.07) 58.6088 (26.07) 885.4220 (56.75) 49.6265 (266.76) 118;89 1,108.3357 (0.02) 1131 1 +test_snappy[html_x_4-snappy] 633.2881 (41.52) 915.0940 (16.69) 656.2599 (40.78) 33.9714 (15.11) 637.9770 (40.89) 28.7280 (154.43) 139;106 1,523.7866 (0.02) 1538 1 +test_snappy[kppkn.gtb-cramjam] 592.4229 (38.84) 860.7230 (15.70) 615.7461 (38.27) 36.7628 (16.35) 597.0570 (38.27) 25.1530 (135.21) 145;131 1,624.0461 (0.03) 1644 1 +test_snappy[kppkn.gtb-snappy] 502.8270 (32.97) 681.5820 (12.43) 520.1125 (32.32) 28.6254 (12.73) 505.6889 (32.41) 20.8102 (111.86) 159;143 1,922.6608 (0.03) 1835 1 +test_snappy[lcet10.txt-cramjam] 1,805.6750 (118.38) 2,199.8071 (40.12) 1,863.9462 (115.84) 60.0574 (26.72) 1,846.9085 (118.38) 81.8840 (440.16) 74;14 536.4962 (0.01) 512 1 +test_snappy[lcet10.txt-snappy] 1,589.5769 (104.21) 2,488.7241 (45.39) 1,640.2298 (101.93) 69.9862 (31.13) 1,618.5340 (103.75) 58.5833 (314.91) 63;43 609.6707 (0.01) 605 1 +test_snappy[paper-100k.pdf-cramjam] 87.1780 (5.72) 175.4561 (3.20) 91.7674 (5.70) 9.0491 (4.03) 88.3376 (5.66) 2.5189 (13.54) 864;1374 10,897.1133 (0.18) 8940 1 +test_snappy[paper-100k.pdf-snappy] 20.3040 (1.33) 60.3660 (1.10) 21.4873 (1.34) 2.6823 (1.19) 20.8070 (1.33) 0.4639 (2.49) 1315;2721 46,539.1012 (0.75) 25931 1 +test_snappy[plrabn12.txt-cramjam] 2,857.7200 (187.35) 3,364.1340 (61.35) 3,055.4821 (189.88) 97.3973 (43.32) 3,048.8640 (195.43) 138.2728 (743.28) 96;1 327.2806 (0.01) 299 1 +test_snappy[plrabn12.txt-snappy] 2,185.6411 (143.29) 3,217.7820 (58.69) 2,362.0467 (146.79) 160.2942 (71.30) 2,330.2045 (149.36) 148.5945 (798.76) 64;27 423.3617 (0.01) 384 1 +test_snappy[urls.10K-cramjam] 2,831.2660 (185.62) 4,660.4241 (85.00) 3,266.6863 (203.01) 267.0078 (118.77) 3,213.5800 (205.99) 276.7573 (>1000.0) 45;12 306.1206 (0.00) 265 1 +test_snappy[urls.10K-snappy] 1,812.4740 (118.83) 2,824.8890 (51.52) 1,967.3516 (122.26) 179.3886 (79.80) 1,920.4395 (123.10) 143.2970 (770.28) 36;27 508.2976 (0.01) 406 1 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- ``` @@ -144,34 +144,35 @@ Again, since basically no variants implement similar functionality as we saw in benchmarks, this benchmark is specific to `cramjam` ```bash --------------------------------------------------------------------------------------------------------------------- benchmark: 24 tests --------------------------------------------------------------------------------------------------------------------- -Name (time in us) Min Max Mean StdDev Median IQR Outliers OPS (Kops/s) Rounds Iterations --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -test_cramjam_snappy_de_compress_into[Mark.Twain-Tom.Sawyer.txt-compress_into] 41.7650 (2.38) 104.0260 (1.76) 46.2884 (2.42) 6.5739 (2.29) 43.9400 (2.39) 1.6860 (4.70) 2130;2781 21.6037 (0.41) 17246 1 -test_cramjam_snappy_de_compress_into[Mark.Twain-Tom.Sawyer.txt-decompress_into] 21.3400 (1.22) 61.8010 (1.04) 23.0667 (1.21) 3.2925 (1.14) 22.2090 (1.21) 0.3585 (1.0) 974;2318 43.3526 (0.83) 13000 1 -test_cramjam_snappy_de_compress_into[alice29.txt-compress_into] 205.7540 (11.72) 510.2070 (8.62) 223.4270 (11.67) 20.4450 (7.11) 214.1430 (11.66) 14.0580 (39.21) 557;474 4.4757 (0.09) 4574 1 -test_cramjam_snappy_de_compress_into[alice29.txt-decompress_into] 83.6290 (4.76) 216.7800 (3.66) 91.1061 (4.76) 12.1865 (4.24) 86.5520 (4.71) 2.8550 (7.96) 880;1660 10.9762 (0.21) 8572 1 -test_cramjam_snappy_de_compress_into[asyoulik.txt-compress_into] 220.0860 (12.54) 616.0500 (10.41) 241.1679 (12.60) 25.5963 (8.90) 229.8620 (12.52) 16.8125 (46.89) 394;349 4.1465 (0.08) 3348 1 -test_cramjam_snappy_de_compress_into[asyoulik.txt-decompress_into] 87.8920 (5.01) 178.8330 (3.02) 95.8756 (5.01) 11.5868 (4.03) 91.4080 (4.98) 3.2860 (9.17) 949;1703 10.4302 (0.20) 8875 1 -test_cramjam_snappy_de_compress_into[fireworks.jpeg-compress_into] 25.6260 (1.46) 63.8190 (1.08) 27.7547 (1.45) 3.7059 (1.29) 26.6040 (1.45) 0.7890 (2.20) 2774;3234 36.0300 (0.69) 28518 1 -test_cramjam_snappy_de_compress_into[fireworks.jpeg-decompress_into] 17.5530 (1.0) 59.1750 (1.0) 19.1416 (1.0) 2.8766 (1.0) 18.3660 (1.0) 0.4600 (1.28) 3385;4030 52.2422 (1.0) 41681 1 -test_cramjam_snappy_de_compress_into[geo.protodata-compress_into] 62.9040 (3.58) 139.3470 (2.35) 68.9734 (3.60) 8.2557 (2.87) 65.8000 (3.58) 2.5645 (7.15) 1312;2267 14.4983 (0.28) 12772 1 -test_cramjam_snappy_de_compress_into[geo.protodata-decompress_into] 35.1860 (2.00) 82.0800 (1.39) 38.0429 (1.99) 4.8740 (1.69) 36.5620 (1.99) 1.1445 (3.19) 2212;2524 26.2861 (0.50) 20812 1 -test_cramjam_snappy_de_compress_into[html-compress_into] 93.2700 (5.31) 192.9050 (3.26) 101.1416 (5.28) 10.3539 (3.60) 97.0850 (5.29) 3.9940 (11.14) 1048;1560 9.8871 (0.19) 9303 1 -test_cramjam_snappy_de_compress_into[html-decompress_into] 45.6410 (2.60) 109.9500 (1.86) 49.4838 (2.59) 5.8214 (2.02) 47.5050 (2.59) 1.6060 (4.48) 2094;2462 20.2087 (0.39) 17202 1 -test_cramjam_snappy_de_compress_into[html_x_4-compress_into] 92.9150 (5.29) 198.6570 (3.36) 101.2886 (5.29) 11.0534 (3.84) 96.9960 (5.28) 3.7150 (10.36) 1016;1618 9.8728 (0.19) 9170 1 -test_cramjam_snappy_de_compress_into[html_x_4-decompress_into] 45.4800 (2.59) 118.9590 (2.01) 49.8765 (2.61) 6.6306 (2.31) 47.4450 (2.58) 1.5590 (4.35) 2276;2776 20.0495 (0.38) 17336 1 -test_cramjam_snappy_de_compress_into[kppkn.gtb-compress_into] 127.1530 (7.24) 223.4660 (3.78) 137.4548 (7.18) 12.9390 (4.50) 131.7830 (7.18) 7.1705 (20.00) 844;886 7.2751 (0.14) 6095 1 -test_cramjam_snappy_de_compress_into[kppkn.gtb-decompress_into] 69.3210 (3.95) 162.5740 (2.75) 76.7793 (4.01) 10.2444 (3.56) 73.8570 (4.02) 2.7220 (7.59) 1118;2146 13.0243 (0.25) 11894 1 -test_cramjam_snappy_de_compress_into[lcet10.txt-compress_into] 191.7940 (10.93) 338.6870 (5.72) 207.6322 (10.85) 18.0247 (6.27) 199.7430 (10.88) 11.4570 (31.96) 587;531 4.8162 (0.09) 5000 1 -test_cramjam_snappy_de_compress_into[lcet10.txt-decompress_into] 77.7750 (4.43) 338.1339 (5.71) 84.4765 (4.41) 10.4472 (3.63) 80.7050 (4.39) 2.7620 (7.70) 984;2034 11.8376 (0.23) 10322 1 -test_cramjam_snappy_de_compress_into[paper-100k.pdf-compress_into] 26.6640 (1.52) 242.2140 (4.09) 29.3447 (1.53) 4.2104 (1.46) 28.1900 (1.53) 0.9080 (2.53) 2437;2960 34.0777 (0.65) 26911 1 -test_cramjam_snappy_de_compress_into[paper-100k.pdf-decompress_into] 21.8910 (1.25) 244.9150 (4.14) 23.8577 (1.25) 3.6118 (1.26) 22.8060 (1.24) 0.8357 (2.33) 2815;3565 41.9152 (0.80) 30571 1 -test_cramjam_snappy_de_compress_into[plrabn12.txt-compress_into] 232.9760 (13.27) 423.5980 (7.16) 252.4664 (13.19) 22.3262 (7.76) 242.1575 (13.19) 16.2451 (45.31) 452;341 3.9609 (0.08) 3832 1 -test_cramjam_snappy_de_compress_into[plrabn12.txt-decompress_into] 93.4720 (5.33) 191.9320 (3.24) 100.7861 (5.27) 10.8995 (3.79) 96.8360 (5.27) 3.2852 (9.16) 680;1188 9.9220 (0.19) 6673 1 -test_cramjam_snappy_de_compress_into[urls.10K-compress_into] 140.6160 (8.01) 267.2590 (4.52) 152.2335 (7.95) 14.4990 (5.04) 146.4630 (7.97) 6.3553 (17.73) 682;938 6.5689 (0.13) 5901 1 -test_cramjam_snappy_de_compress_into[urls.10K-decompress_into] 58.6630 (3.34) 126.4510 (2.14) 63.6568 (3.33) 7.5369 (2.62) 60.9710 (3.32) 1.9845 (5.54) 1022;1536 15.7092 (0.30) 9257 1 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------------------------------------ benchmark: 24 tests ------------------------------------------------------------------------------------------------------------------------ +Name (time in us) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +test_cramjam_snappy_de_compress_into[Mark.Twain-Tom.Sawyer.txt-compress_into] 40.5479 (1.96) 92.6349 (1.63) 42.6280 (1.96) 3.9048 (1.43) 41.5810 (1.98) 0.7870 (3.10) 1078;2036 23,458.7680 (0.51) 15468 1 +test_cramjam_snappy_de_compress_into[Mark.Twain-Tom.Sawyer.txt-decompress_into] 20.7290 (1.0) 56.6919 (1.0) 21.7574 (1.0) 2.7272 (1.0) 20.9850 (1.0) 0.2539 (1.0) 1139;3593 45,961.2983 (1.0) 17873 1 +test_cramjam_snappy_de_compress_into[alice29.txt-compress_into] 465.8590 (22.47) 734.1170 (12.95) 492.3764 (22.63) 36.7509 (13.48) 481.4934 (22.94) 32.5410 (128.16) 215;151 2,030.9665 (0.04) 2002 1 +test_cramjam_snappy_de_compress_into[alice29.txt-decompress_into] 189.1120 (9.12) 366.5941 (6.47) 204.2178 (9.39) 25.1372 (9.22) 195.4500 (9.31) 12.6192 (49.70) 475;586 4,896.7338 (0.11) 4737 1 +test_cramjam_snappy_de_compress_into[asyoulik.txt-compress_into] 412.6530 (19.91) 678.6539 (11.97) 437.5261 (20.11) 40.1624 (14.73) 417.2896 (19.89) 24.7700 (97.56) 239;239 2,285.5782 (0.05) 2214 1 +test_cramjam_snappy_de_compress_into[asyoulik.txt-decompress_into] 166.3340 (8.02) 298.4459 (5.26) 176.6029 (8.12) 16.2447 (5.96) 171.6875 (8.18) 10.1075 (39.81) 510;522 5,662.4205 (0.12) 4348 1 +test_cramjam_snappy_de_compress_into[fireworks.jpeg-compress_into] 38.2040 (1.84) 92.6780 (1.63) 41.3553 (1.90) 5.0435 (1.85) 39.8530 (1.90) 2.1397 (8.43) 2339;2662 24,180.6924 (0.53) 22063 1 +test_cramjam_snappy_de_compress_into[fireworks.jpeg-decompress_into] 27.4120 (1.32) 342.3600 (6.04) 28.8981 (1.33) 4.3700 (1.60) 27.7550 (1.32) 0.8330 (3.28) 1856;2320 34,604.3022 (0.75) 29230 1 +test_cramjam_snappy_de_compress_into[geo.protodata-compress_into] 121.6651 (5.87) 247.4930 (4.37) 132.2174 (6.08) 15.0302 (5.51) 126.7961 (6.04) 8.4882 (33.43) 758;831 7,563.3022 (0.16) 6179 1 +test_cramjam_snappy_de_compress_into[geo.protodata-decompress_into] 64.3190 (3.10) 132.8751 (2.34) 69.9674 (3.22) 8.3110 (3.05) 66.6840 (3.18) 4.1929 (16.51) 1641;2219 14,292.3797 (0.31) 11606 1 +test_cramjam_snappy_de_compress_into[html-compress_into] 129.1960 (6.23) 244.5440 (4.31) 134.7474 (6.19) 11.0703 (4.06) 131.1535 (6.25) 1.8530 (7.30) 599;1200 7,421.2955 (0.16) 7240 1 +test_cramjam_snappy_de_compress_into[html-decompress_into] 64.9870 (3.14) 131.4470 (2.32) 67.4074 (3.10) 6.2745 (2.30) 65.5680 (3.12) 0.3350 (1.32) 1060;2021 14,835.1691 (0.32) 12760 1 +test_cramjam_snappy_de_compress_into[html_x_4-compress_into] 515.1191 (24.85) 776.8241 (13.70) 534.2439 (24.55) 30.4098 (11.15) 519.7320 (24.77) 21.6207 (85.15) 171;139 1,871.8044 (0.04) 1907 1 +test_cramjam_snappy_de_compress_into[html_x_4-decompress_into] 254.3870 (12.27) 414.4181 (7.31) 263.4683 (12.11) 19.0163 (6.97) 255.5045 (12.18) 7.4761 (29.44) 322;571 3,795.5233 (0.08) 3426 1 +test_cramjam_snappy_de_compress_into[kppkn.gtb-compress_into] 363.9220 (17.56) 603.6321 (10.65) 378.2792 (17.39) 23.9013 (8.76) 366.1391 (17.45) 20.6419 (81.30) 233;146 2,643.5504 (0.06) 2690 1 +test_cramjam_snappy_de_compress_into[kppkn.gtb-decompress_into] 195.1340 (9.41) 427.1740 (7.54) 203.5845 (9.36) 14.4862 (5.31) 198.9135 (9.48) 4.6330 (18.25) 466;788 4,911.9653 (0.11) 4846 1 +test_cramjam_snappy_de_compress_into[lcet10.txt-compress_into] 1,223.2770 (59.01) 1,540.7839 (27.18) 1,265.2557 (58.15) 48.1862 (17.67) 1,251.2550 (59.63) 47.2469 (186.08) 95;65 790.3541 (0.02) 790 1 +test_cramjam_snappy_de_compress_into[lcet10.txt-decompress_into] 493.2970 (23.80) 773.3600 (13.64) 510.9751 (23.49) 29.4123 (10.78) 495.8690 (23.63) 22.5334 (88.75) 153;124 1,957.0427 (0.04) 1833 1 +test_cramjam_snappy_de_compress_into[paper-100k.pdf-compress_into] 34.6331 (1.67) 80.5630 (1.42) 37.0645 (1.70) 4.0766 (1.49) 35.3340 (1.68) 1.7233 (6.79) 2131;3151 26,979.9880 (0.59) 22285 1 +test_cramjam_snappy_de_compress_into[paper-100k.pdf-decompress_into] 28.1190 (1.36) 132.0021 (2.33) 29.8972 (1.37) 3.7721 (1.38) 28.4681 (1.36) 0.8692 (3.42) 2683;3118 33,447.9984 (0.73) 26970 1 +test_cramjam_snappy_de_compress_into[plrabn12.txt-compress_into] 1,659.3040 (80.05) 2,075.3290 (36.61) 1,721.3891 (79.12) 58.4017 (21.41) 1,699.4040 (80.98) 63.4862 (250.04) 105;44 580.9262 (0.01) 583 1 +test_cramjam_snappy_de_compress_into[plrabn12.txt-decompress_into] 667.1869 (32.19) 1,016.5200 (17.93) 689.3431 (31.68) 35.4147 (12.99) 670.6540 (31.96) 25.9486 (102.20) 125;95 1,450.6564 (0.03) 1245 1 +test_cramjam_snappy_de_compress_into[urls.10K-compress_into] 1,456.3091 (70.25) 1,919.0491 (33.85) 1,503.5026 (69.10) 55.8022 (20.46) 1,485.0110 (70.77) 52.9042 (208.36) 89;59 665.1136 (0.01) 635 1 +test_cramjam_snappy_de_compress_into[urls.10K-decompress_into] 604.8420 (29.18) 839.7710 (14.81) 624.8297 (28.72) 29.0141 (10.64) 608.5989 (29.00) 27.0902 (106.70) 102;63 1,600.4361 (0.03) 1139 1 +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + ``` #### Lz4 diff --git a/benchmarks/test_bench.py b/benchmarks/test_bench.py index 527f8492..b7389df2 100644 --- a/benchmarks/test_bench.py +++ b/benchmarks/test_bench.py @@ -26,10 +26,7 @@ def test_snappy(benchmark, file, use_cramjam: bool): """ import snappy - data = bytearray( - file.read_bytes() - ) # bytearray avoids double allocation in cramjam snappy by default - # Can be slightly faster if passing output_len to compress/decompress ops + data = file.read_bytes() if use_cramjam: benchmark( round_trip, @@ -54,7 +51,7 @@ def test_cramjam_snappy_de_compress_into(benchmark, op, file): """ from cramjam import snappy - data = bytearray(file.read_bytes()) + data = file.read_bytes() compressed_data = cramjam.snappy.compress(data) operation = getattr(snappy, op) diff --git a/src/snappy.rs b/src/snappy.rs index 19670132..cb9cd478 100644 --- a/src/snappy.rs +++ b/src/snappy.rs @@ -5,7 +5,7 @@ use pyo3::prelude::*; use pyo3::types::{PyByteArray, PyBytes}; use pyo3::wrap_pyfunction; use pyo3::{PyResult, Python}; -use snap::raw::{decompress_len, max_compress_len}; +use snap::raw::max_compress_len; pub fn init_py_module(m: &PyModule) -> PyResult<()> { m.add_function(wrap_pyfunction!(compress, m)?)?; @@ -27,34 +27,43 @@ pub fn init_py_module(m: &PyModule) -> PyResult<()> { /// ``` #[pyfunction] pub fn decompress<'a>(py: Python<'a>, data: BytesType<'a>, output_len: Option) -> PyResult> { - let estimated_len = match output_len { - Some(len) => len, - None => to_py_err!(DecompressionError -> decompress_len(data.as_bytes()))?, - }; let result = match data { BytesType::Bytes(bytes) => { - let pybytes = if output_len.is_some() { - PyBytes::new_with(py, estimated_len, |buffer| { - to_py_err!(DecompressionError -> self::internal::decompress(bytes.as_bytes(), Output::Slice(buffer)))?; + let pybytes = match output_len { + Some(len) => PyBytes::new_with(py, len, |output| { + to_py_err!(DecompressionError -> self::internal::decompress(bytes.as_bytes(), Output::Slice(output)))?; Ok(()) - })? - } else { - let mut buffer = Vec::with_capacity(estimated_len); + })?, + None => { + let mut output = Vec::with_capacity(data.len()); - to_py_err!(DecompressionError -> self::internal::decompress(bytes.as_bytes(), Output::Vector(&mut buffer)))?; - PyBytes::new(py, &buffer) + to_py_err!(DecompressionError -> self::internal::decompress(bytes.as_bytes(), Output::Vector(&mut output)))?; + PyBytes::new(py, &output) + } }; BytesType::Bytes(pybytes) } - BytesType::ByteArray(bytes_array) => unsafe { - let mut actual_len = 0; - let pybytes = PyByteArray::new_with(py, estimated_len, |output| { - actual_len = to_py_err!(DecompressionError -> self::internal::decompress(bytes_array.as_bytes(), Output::Slice(output)))?; - Ok(()) - })?; - pybytes.resize(actual_len)?; - BytesType::ByteArray(pybytes) - }, + BytesType::ByteArray(bytes_array) => { + let bytes = unsafe { bytes_array.as_bytes() }; + match output_len { + Some(len) => { + let mut actual_len = 0; + let pybytes = PyByteArray::new_with(py, len, |output| { + actual_len = + to_py_err!(DecompressionError -> self::internal::decompress(bytes, Output::Slice(output)))?; + Ok(()) + })?; + pybytes.resize(actual_len)?; + BytesType::ByteArray(pybytes) + } + None => { + let mut output = Vec::with_capacity(data.len()); + to_py_err!(DecompressionError -> self::internal::decompress(bytes, Output::Vector(&mut output)))?; + let pybytes = PyByteArray::new(py, &output); + BytesType::ByteArray(pybytes) + } + } + } }; Ok(result) } @@ -165,7 +174,7 @@ pub fn decompress_into<'a>(_py: Python<'a>, data: BytesType<'a>, array: &'a PyAr pub(crate) mod internal { use snap::raw::{Decoder, Encoder}; use snap::read::{FrameDecoder, FrameEncoder}; - use std::io::{Cursor, Error, Read, Write}; + use std::io::{Error, Read}; use crate::Output; @@ -186,7 +195,6 @@ pub(crate) mod internal { let mut decoder = FrameDecoder::new(data); match output { Output::Slice(slice) => { - let mut decoder = FrameDecoder::new(data); let mut n_bytes = 0; loop { let count = decoder.read(&mut slice[n_bytes..])?; @@ -206,10 +214,15 @@ pub(crate) mod internal { let mut encoder = FrameEncoder::new(data); match output { Output::Slice(slice) => { - let buffer = Cursor::new(slice); - let mut encoder = snap::write::FrameEncoder::new(buffer); - encoder.write_all(data)?; - Ok(encoder.get_ref().position() as usize) + let mut n_bytes = 0; + loop { + let count = encoder.read(&mut slice[n_bytes..])?; + if count == 0 { + break; + } + n_bytes += count; + } + Ok(n_bytes) } Output::Vector(v) => encoder.read_to_end(v), } diff --git a/tests/test_variants.py b/tests/test_variants.py index 68698f03..5ab8b37e 100644 --- a/tests/test_variants.py +++ b/tests/test_variants.py @@ -3,23 +3,30 @@ import cramjam import hashlib + def same_same(a, b): return hashlib.md5(a).hexdigest() == hashlib.md5(b).hexdigest() + +@pytest.mark.parametrize("is_bytearray", (True, False)) @pytest.mark.parametrize( "variant_str", ("snappy", "brotli", "lz4", "gzip", "deflate", "zstd") ) -def test_variants_simple(variant_str): +def test_variants_simple(variant_str, is_bytearray): variant = getattr(cramjam, variant_str) uncompressed = b"some bytes to compress 123" * 1000 + if is_bytearray: + uncompressed = bytearray(uncompressed) compressed = variant.compress(uncompressed) assert compressed != uncompressed + assert type(compressed) == type(uncompressed) decompressed = variant.decompress(compressed, output_len=len(uncompressed)) assert decompressed == uncompressed + assert type(decompressed) == type(uncompressed) @pytest.mark.parametrize( @@ -28,12 +35,10 @@ def test_variants_simple(variant_str): def test_variants_raise_exception(variant_str): variant = getattr(cramjam, variant_str) with pytest.raises(cramjam.DecompressionError): - variant.decompress(b'sknow') + variant.decompress(b"sknow") -@pytest.mark.parametrize( - "variant_str", ("snappy", "brotli", "gzip", "deflate", "zstd") -) +@pytest.mark.parametrize("variant_str", ("snappy", "brotli", "gzip", "deflate", "zstd")) def test_variants_de_compress_into(variant_str): # TODO: support lz4 de/compress_into