From cbe8d222a82c14485cf416c4b85fd5e58363c0e0 Mon Sep 17 00:00:00 2001 From: Brian Anderson Date: Mon, 21 Feb 2011 00:56:09 -0500 Subject: [PATCH] Implement sha-1 in standard library. Closes #228 --- src/lib/sha1.rs | 284 ++++++++++++++++++++++++++++++++++ src/lib/std.rc | 1 + src/test/run-pass/lib-sha1.rs | 115 ++++++++++++++ 3 files changed, 400 insertions(+) create mode 100644 src/lib/sha1.rs create mode 100644 src/test/run-pass/lib-sha1.rs diff --git a/src/lib/sha1.rs b/src/lib/sha1.rs new file mode 100644 index 0000000000000..8be969aa5966a --- /dev/null +++ b/src/lib/sha1.rs @@ -0,0 +1,284 @@ +/* + * A SHA-1 implementation derived from Paul E. Jones's reference + * implementation, which is written for clarity, not speed. At some + * point this will want to be rewritten. + */ + +import std._vec; +import std._str; + +export sha1; +export mk_sha1; + +state type sha1 = state obj { + // Provide message input as bytes + fn input(&vec[u8]); + + // Provide message input as string + fn input_str(&str); + + // Read the digest as a vector of 20 bytes. After + // calling this no further input may provided + // until reset is called + fn result() -> vec[u8]; + + // Reset the sha1 state for reuse. This is called + // automatically during construction + fn reset(); +}; + +// Some unexported constants +const uint digest_buf_len = 5; +const uint msg_block_len = 64; + +// Builds a sha1 object +fn mk_sha1() -> sha1 { + + state type sha1state = rec(vec[mutable u32] h, + mutable u32 len_low, + mutable u32 len_high, + vec[mutable u8] msg_block, + mutable uint msg_block_idx, + mutable bool computed); + + impure fn add_input(&sha1state st, &vec[u8] msg) { + // FIXME: Should be typestate precondition + check (!st.computed); + + for (u8 element in msg) { + st.msg_block.(st.msg_block_idx) = element; + st.msg_block_idx += 1u; + + st.len_low += 8u32; + if (st.len_low == 0u32) { + st.len_high += 1u32; + if (st.len_high == 0u32) { + // FIXME: Need better failure mode + fail; + } + } + + if (st.msg_block_idx == msg_block_len) { + process_msg_block(st); + } + } + } + + impure fn process_msg_block(&sha1state st) { + + // FIXME: Make precondition + check (_vec.len[mutable u32](st.h) == digest_buf_len); + + // Constants + auto k = vec(0x5A827999u32, + 0x6ED9EBA1u32, + 0x8F1BBCDCu32, + 0xCA62C1D6u32); + + let int t; // Loop counter + let vec[mutable u32] w = _vec.init_elt[mutable u32](0u32, 80u); + + // Initialize the first 16 words of the vector w + t = 0; + while (t < 16) { + w.(t) = (st.msg_block.(t * 4) as u32) << 24u32; + w.(t) = w.(t) | ((st.msg_block.(t * 4 + 1) as u32) << 16u32); + w.(t) = w.(t) | ((st.msg_block.(t * 4 + 2) as u32) << 8u32); + w.(t) = w.(t) | (st.msg_block.(t * 4 + 3) as u32); + t += 1; + } + + // Initialize the rest of vector w + while (t < 80) { + auto val = w.(t-3) ^ w.(t-8) ^ w.(t-14) ^ w.(t-16); + w.(t) = circular_shift(1u32, val); + t += 1; + } + + auto a = st.h.(0); + auto b = st.h.(1); + auto c = st.h.(2); + auto d = st.h.(3); + auto e = st.h.(4); + + let u32 temp; + + t = 0; + while (t < 20) { + temp = circular_shift(5u32, a) + + ((b & c) | ((~b) & d)) + e + w.(t) + k.(0); + e = d; + d = c; + c = circular_shift(30u32, b); + b = a; + a = temp; + t += 1; + } + + while (t < 40) { + temp = circular_shift(5u32, a) + + (b ^ c ^ d) + e + w.(t) + k.(1); + e = d; + d = c; + c = circular_shift(30u32, b); + b = a; + a = temp; + t += 1; + } + + while (t < 60) { + temp = circular_shift(5u32, a) + + ((b & c) | (b & d) | (c & d)) + e + w.(t) + k.(2); + e = d; + d = c; + c = circular_shift(30u32, b); + b = a; + a = temp; + t += 1; + } + + while (t < 80) { + temp = circular_shift(5u32, a) + + (b ^ c ^ d) + e + w.(t) + k.(3); + e = d; + d = c; + c = circular_shift(30u32, b); + b = a; + a = temp; + t += 1; + } + + st.h.(0) = st.h.(0) + a; + st.h.(1) = st.h.(1) + b; + st.h.(2) = st.h.(2) + c; + st.h.(3) = st.h.(3) + d; + st.h.(4) = st.h.(4) + e; + + st.msg_block_idx = 0u; + } + + fn circular_shift(u32 bits, u32 word) -> u32 { + // FIXME: This is a workaround for a rustboot + // "unrecognized quads" codegen bug + auto bits_hack = bits; + ret (word << bits_hack) | (word >> (32u32 - bits)); + } + + impure fn mk_result(&sha1state st) -> vec[u8] { + if (!st.computed) { + pad_msg(st); + st.computed = true; + } + + let vec[u8] res = vec(); + for (u32 hpart in st.h) { + res += (hpart >> 24u32) & 0xFFu32 as u8; + res += (hpart >> 16u32) & 0xFFu32 as u8; + res += (hpart >> 8u32) & 0xFFu32 as u8; + res += hpart & 0xFFu32 as u8; + } + ret res; + } + + /* + * According to the standard, the message must be padded to an even + * 512 bits. The first padding bit must be a '1'. The last 64 bits + * represent the length of the original message. All bits in between + * should be 0. This function will pad the message according to those + * rules by filling the message_block array accordingly. It will also + * call ProcessMessageBlock() appropriately. When it returns, it + * can be assumed that the message digest has been computed. + */ + impure fn pad_msg(&sha1state st) { + // FIXME: Should be a precondition + check (_vec.len[mutable u8](st.msg_block) == msg_block_len); + + /* + * Check to see if the current message block is too small to hold + * the initial padding bits and length. If so, we will pad the + * block, process it, and then continue padding into a second block. + */ + if (st.msg_block_idx > 55u) { + st.msg_block.(st.msg_block_idx) = 0x80u8; + st.msg_block_idx += 1u; + + while (st.msg_block_idx < msg_block_len) { + st.msg_block.(st.msg_block_idx) = 0u8; + st.msg_block_idx += 1u; + } + + process_msg_block(st); + } else { + st.msg_block.(st.msg_block_idx) = 0x80u8; + st.msg_block_idx += 1u; + } + + while (st.msg_block_idx < 56u) { + st.msg_block.(st.msg_block_idx) = 0u8; + st.msg_block_idx += 1u; + } + + // Store the message length as the last 8 octets + st.msg_block.(56) = (st.len_high >> 24u32) & 0xFFu32 as u8; + st.msg_block.(57) = (st.len_high >> 16u32) & 0xFFu32 as u8; + st.msg_block.(58) = (st.len_high >> 8u32) & 0xFFu32 as u8; + st.msg_block.(59) = st.len_high & 0xFFu32 as u8; + st.msg_block.(60) = (st.len_low >> 24u32) & 0xFFu32 as u8; + st.msg_block.(61) = (st.len_low >> 16u32) & 0xFFu32 as u8; + st.msg_block.(62) = (st.len_low >> 8u32) & 0xFFu32 as u8; + st.msg_block.(63) = st.len_low & 0xFFu32 as u8; + + process_msg_block(st); + } + + state obj sha1(sha1state st) { + + fn reset() { + // FIXME: Should be typestate precondition + check (_vec.len[mutable u32](st.h) == digest_buf_len); + + st.len_low = 0u32; + st.len_high = 0u32; + st.msg_block_idx = 0u; + + st.h.(0) = 0x67452301u32; + st.h.(1) = 0xEFCDAB89u32; + st.h.(2) = 0x98BADCFEu32; + st.h.(3) = 0x10325476u32; + st.h.(4) = 0xC3D2E1F0u32; + + st.computed = false; + } + + fn input(&vec[u8] msg) { + add_input(st, msg); + } + + fn input_str(&str msg) { + add_input(st, _str.bytes(msg)); + } + + fn result() -> vec[u8] { + ret mk_result(st); + } + } + + auto st = rec(h = _vec.init_elt[mutable u32](0u32, digest_buf_len), + mutable len_low = 0u32, + mutable len_high = 0u32, + msg_block = _vec.init_elt[mutable u8](0u8, msg_block_len), + mutable msg_block_idx = 0u, + mutable computed = false); + auto sh = sha1(st); + sh.reset(); + ret sh; +} + +// Local Variables: +// mode: rust; +// fill-column: 78; +// indent-tabs-mode: nil +// c-basic-offset: 4 +// buffer-file-coding-system: utf-8-unix +// compile-command: "make -k -C .. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; +// End: diff --git a/src/lib/std.rc b/src/lib/std.rc index 102aa4d15161c..3500c4b3d5250 100644 --- a/src/lib/std.rc +++ b/src/lib/std.rc @@ -57,6 +57,7 @@ mod dbg; mod bitv; mod sort; mod path; +mod sha1; // Local Variables: // mode: rust; diff --git a/src/test/run-pass/lib-sha1.rs b/src/test/run-pass/lib-sha1.rs new file mode 100644 index 0000000000000..57e3cdc856b6d --- /dev/null +++ b/src/test/run-pass/lib-sha1.rs @@ -0,0 +1,115 @@ +// -*- rust -*- + +use std; + +import std.sha1; +import std._vec; +import std._str; + +fn main() { + + type test = rec(str input, vec[u8] output); + + fn a_million_letter_a() -> str { + auto i = 0; + auto res = ""; + while (i < 100000) { + res += "aaaaaaaaaa"; + i += 1; + } + ret res; + } + + // Test messages from FIPS 180-1 + let vec[test] fips_180_1_tests = + vec( + rec(input = "abc", + output = vec(0xA9u8, 0x99u8, 0x3Eu8, 0x36u8, 0x47u8, + 0x06u8, 0x81u8, 0x6Au8, 0xBAu8, 0x3Eu8, + 0x25u8, 0x71u8, 0x78u8, 0x50u8, 0xC2u8, + 0x6Cu8, 0x9Cu8, 0xD0u8, 0xD8u8, 0x9Du8) + ), + rec(input = "abcdbcdecdefdefgefghfghighij" + + "hijkijkljklmklmnlmnomnopnopq", + output = vec(0x84u8, 0x98u8, 0x3Eu8, 0x44u8, 0x1Cu8, + 0x3Bu8, 0xD2u8, 0x6Eu8, 0xBAu8, 0xAEu8, + 0x4Au8, 0xA1u8, 0xF9u8, 0x51u8, 0x29u8, + 0xE5u8, 0xE5u8, 0x46u8, 0x70u8, 0xF1u8) + ) + // FIXME: This test is disabled because it takes some + // minutes to run under rustboot+valgrind. It may be + // possible to reenable once things are more optimized. + /*, + rec(input = a_million_letter_a(), + output = vec(0x34u8, 0xAAu8, 0x97u8, 0x3Cu8, 0xD4u8, + 0xC4u8, 0xDAu8, 0xA4u8, 0xF6u8, 0x1Eu8, + 0xEBu8, 0x2Bu8, 0xDBu8, 0xADu8, 0x27u8, + 0x31u8, 0x65u8, 0x34u8, 0x01u8, 0x6Fu8) + ) + */ + ); + + // Examples from wikipedia + let vec[test] wikipedia_tests = + vec( + rec(input = "The quick brown fox jumps over the lazy dog", + output = vec(0x2fu8, 0xd4u8, 0xe1u8, 0xc6u8, 0x7au8, + 0x2du8, 0x28u8, 0xfcu8, 0xedu8, 0x84u8, + 0x9eu8, 0xe1u8, 0xbbu8, 0x76u8, 0xe7u8, + 0x39u8, 0x1bu8, 0x93u8, 0xebu8, 0x12u8) + ), + rec(input = "The quick brown fox jumps over the lazy cog", + output = vec(0xdeu8, 0x9fu8, 0x2cu8, 0x7fu8, 0xd2u8, + 0x5eu8, 0x1bu8, 0x3au8, 0xfau8, 0xd3u8, + 0xe8u8, 0x5au8, 0x0bu8, 0xd1u8, 0x7du8, + 0x9bu8, 0x10u8, 0x0du8, 0xb4u8, 0xb3u8) + ) + ); + + auto tests = fips_180_1_tests + wikipedia_tests; + + fn check_vec_eq(vec[u8] v0, vec[u8] v1) { + check (_vec.len[u8](v0) == _vec.len[u8](v1)); + auto len = _vec.len[u8](v0); + auto i = 0u; + while (i < len) { + auto a = v0.(i); + auto b = v1.(i); + check (a == b); + i += 1u; + } + } + + // Test that it works when accepting the message all at once + auto sh = sha1.mk_sha1(); + for (test t in tests) { + sh.input_str(t.input); + auto out = sh.result(); + check_vec_eq(t.output, out); + sh.reset(); + } + + // Test that it works when accepting the message in pieces + for (test t in tests) { + auto len = _str.byte_len(t.input); + auto left = len; + while (left > 0u) { + auto take = (left + 1u) / 2u; + sh.input_str(_str.substr(t.input, len - left, take)); + left = left - take; + } + auto out = sh.result(); + check_vec_eq(t.output, out); + sh.reset(); + } +} + + +// Local Variables: +// mode: rust; +// fill-column: 78; +// indent-tabs-mode: nil +// c-basic-offset: 4 +// buffer-file-coding-system: utf-8-unix +// compile-command: "make -k -C .. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; +// End: