LLVM  14.0.0git
MD5.cpp
Go to the documentation of this file.
1 /*
2  * This code is derived from (original license follows):
3  *
4  * This is an OpenSSL-compatible implementation of the RSA Data Security, Inc.
5  * MD5 Message-Digest Algorithm (RFC 1321).
6  *
7  * Homepage:
8  * http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5
9  *
10  * Author:
11  * Alexander Peslyak, better known as Solar Designer <solar at openwall.com>
12  *
13  * This software was written by Alexander Peslyak in 2001. No copyright is
14  * claimed, and the software is hereby placed in the public domain.
15  * In case this attempt to disclaim copyright and place the software in the
16  * public domain is deemed null and void, then the software is
17  * Copyright (c) 2001 Alexander Peslyak and it is hereby released to the
18  * general public under the following terms:
19  *
20  * Redistribution and use in source and binary forms, with or without
21  * modification, are permitted.
22  *
23  * There's ABSOLUTELY NO WARRANTY, express or implied.
24  *
25  * (This is a heavily cut-down "BSD license".)
26  *
27  * This differs from Colin Plumb's older public domain implementation in that
28  * no exactly 32-bit integer data type is required (any 32-bit or wider
29  * unsigned integer data type will do), there's no compile-time endianness
30  * configuration, and the function prototypes match OpenSSL's. No code from
31  * Colin Plumb's implementation has been reused; this comment merely compares
32  * the properties of the two independent implementations.
33  *
34  * The primary goals of this implementation are portability and ease of use.
35  * It is meant to be fast, but not as fast as possible. Some known
36  * optimizations are not included to reduce source code size and avoid
37  * compile-time configuration.
38  */
39 
40 #include "llvm/Support/MD5.h"
41 #include "llvm/ADT/ArrayRef.h"
42 #include "llvm/ADT/SmallString.h"
43 #include "llvm/ADT/StringRef.h"
44 #include "llvm/Support/Endian.h"
45 #include "llvm/Support/Format.h"
47 #include <array>
48 #include <cstdint>
49 #include <cstring>
50 
51 // The basic MD5 functions.
52 
53 // F and G are optimized compared to their RFC 1321 definitions for
54 // architectures that lack an AND-NOT instruction, just like in Colin Plumb's
55 // implementation.
56 #define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z))))
57 #define G(x, y, z) ((y) ^ ((z) & ((x) ^ (y))))
58 #define H(x, y, z) ((x) ^ (y) ^ (z))
59 #define I(x, y, z) ((y) ^ ((x) | ~(z)))
60 
61 // The MD5 transformation for all four rounds.
62 #define STEP(f, a, b, c, d, x, t, s) \
63  (a) += f((b), (c), (d)) + (x) + (t); \
64  (a) = (((a) << (s)) | (((a) & 0xffffffff) >> (32 - (s)))); \
65  (a) += (b);
66 
67 // SET reads 4 input bytes in little-endian byte order and stores them
68 // in a properly aligned word in host byte order.
69 #define SET(n) \
70  (block[(n)] = \
71  (MD5_u32plus) ptr[(n) * 4] | ((MD5_u32plus) ptr[(n) * 4 + 1] << 8) | \
72  ((MD5_u32plus) ptr[(n) * 4 + 2] << 16) | \
73  ((MD5_u32plus) ptr[(n) * 4 + 3] << 24))
74 #define GET(n) (block[(n)])
75 
76 using namespace llvm;
77 
78 /// This processes one or more 64-byte data blocks, but does NOT update
79 ///the bit counters. There are no alignment requirements.
80 const uint8_t *MD5::body(ArrayRef<uint8_t> Data) {
81  const uint8_t *ptr;
82  MD5_u32plus a, b, c, d;
83  MD5_u32plus saved_a, saved_b, saved_c, saved_d;
84  unsigned long Size = Data.size();
85 
86  ptr = Data.data();
87 
88  a = this->a;
89  b = this->b;
90  c = this->c;
91  d = this->d;
92 
93  do {
94  saved_a = a;
95  saved_b = b;
96  saved_c = c;
97  saved_d = d;
98 
99  // Round 1
100  STEP(F, a, b, c, d, SET(0), 0xd76aa478, 7)
101  STEP(F, d, a, b, c, SET(1), 0xe8c7b756, 12)
102  STEP(F, c, d, a, b, SET(2), 0x242070db, 17)
103  STEP(F, b, c, d, a, SET(3), 0xc1bdceee, 22)
104  STEP(F, a, b, c, d, SET(4), 0xf57c0faf, 7)
105  STEP(F, d, a, b, c, SET(5), 0x4787c62a, 12)
106  STEP(F, c, d, a, b, SET(6), 0xa8304613, 17)
107  STEP(F, b, c, d, a, SET(7), 0xfd469501, 22)
108  STEP(F, a, b, c, d, SET(8), 0x698098d8, 7)
109  STEP(F, d, a, b, c, SET(9), 0x8b44f7af, 12)
110  STEP(F, c, d, a, b, SET(10), 0xffff5bb1, 17)
111  STEP(F, b, c, d, a, SET(11), 0x895cd7be, 22)
112  STEP(F, a, b, c, d, SET(12), 0x6b901122, 7)
113  STEP(F, d, a, b, c, SET(13), 0xfd987193, 12)
114  STEP(F, c, d, a, b, SET(14), 0xa679438e, 17)
115  STEP(F, b, c, d, a, SET(15), 0x49b40821, 22)
116 
117  // Round 2
118  STEP(G, a, b, c, d, GET(1), 0xf61e2562, 5)
119  STEP(G, d, a, b, c, GET(6), 0xc040b340, 9)
120  STEP(G, c, d, a, b, GET(11), 0x265e5a51, 14)
121  STEP(G, b, c, d, a, GET(0), 0xe9b6c7aa, 20)
122  STEP(G, a, b, c, d, GET(5), 0xd62f105d, 5)
123  STEP(G, d, a, b, c, GET(10), 0x02441453, 9)
124  STEP(G, c, d, a, b, GET(15), 0xd8a1e681, 14)
125  STEP(G, b, c, d, a, GET(4), 0xe7d3fbc8, 20)
126  STEP(G, a, b, c, d, GET(9), 0x21e1cde6, 5)
127  STEP(G, d, a, b, c, GET(14), 0xc33707d6, 9)
128  STEP(G, c, d, a, b, GET(3), 0xf4d50d87, 14)
129  STEP(G, b, c, d, a, GET(8), 0x455a14ed, 20)
130  STEP(G, a, b, c, d, GET(13), 0xa9e3e905, 5)
131  STEP(G, d, a, b, c, GET(2), 0xfcefa3f8, 9)
132  STEP(G, c, d, a, b, GET(7), 0x676f02d9, 14)
133  STEP(G, b, c, d, a, GET(12), 0x8d2a4c8a, 20)
134 
135  // Round 3
136  STEP(H, a, b, c, d, GET(5), 0xfffa3942, 4)
137  STEP(H, d, a, b, c, GET(8), 0x8771f681, 11)
138  STEP(H, c, d, a, b, GET(11), 0x6d9d6122, 16)
139  STEP(H, b, c, d, a, GET(14), 0xfde5380c, 23)
140  STEP(H, a, b, c, d, GET(1), 0xa4beea44, 4)
141  STEP(H, d, a, b, c, GET(4), 0x4bdecfa9, 11)
142  STEP(H, c, d, a, b, GET(7), 0xf6bb4b60, 16)
143  STEP(H, b, c, d, a, GET(10), 0xbebfbc70, 23)
144  STEP(H, a, b, c, d, GET(13), 0x289b7ec6, 4)
145  STEP(H, d, a, b, c, GET(0), 0xeaa127fa, 11)
146  STEP(H, c, d, a, b, GET(3), 0xd4ef3085, 16)
147  STEP(H, b, c, d, a, GET(6), 0x04881d05, 23)
148  STEP(H, a, b, c, d, GET(9), 0xd9d4d039, 4)
149  STEP(H, d, a, b, c, GET(12), 0xe6db99e5, 11)
150  STEP(H, c, d, a, b, GET(15), 0x1fa27cf8, 16)
151  STEP(H, b, c, d, a, GET(2), 0xc4ac5665, 23)
152 
153  // Round 4
154  STEP(I, a, b, c, d, GET(0), 0xf4292244, 6)
155  STEP(I, d, a, b, c, GET(7), 0x432aff97, 10)
156  STEP(I, c, d, a, b, GET(14), 0xab9423a7, 15)
157  STEP(I, b, c, d, a, GET(5), 0xfc93a039, 21)
158  STEP(I, a, b, c, d, GET(12), 0x655b59c3, 6)
159  STEP(I, d, a, b, c, GET(3), 0x8f0ccc92, 10)
160  STEP(I, c, d, a, b, GET(10), 0xffeff47d, 15)
161  STEP(I, b, c, d, a, GET(1), 0x85845dd1, 21)
162  STEP(I, a, b, c, d, GET(8), 0x6fa87e4f, 6)
163  STEP(I, d, a, b, c, GET(15), 0xfe2ce6e0, 10)
164  STEP(I, c, d, a, b, GET(6), 0xa3014314, 15)
165  STEP(I, b, c, d, a, GET(13), 0x4e0811a1, 21)
166  STEP(I, a, b, c, d, GET(4), 0xf7537e82, 6)
167  STEP(I, d, a, b, c, GET(11), 0xbd3af235, 10)
168  STEP(I, c, d, a, b, GET(2), 0x2ad7d2bb, 15)
169  STEP(I, b, c, d, a, GET(9), 0xeb86d391, 21)
170 
171  a += saved_a;
172  b += saved_b;
173  c += saved_c;
174  d += saved_d;
175 
176  ptr += 64;
177  } while (Size -= 64);
178 
179  this->a = a;
180  this->b = b;
181  this->c = c;
182  this->d = d;
183 
184  return ptr;
185 }
186 
187 MD5::MD5() = default;
188 
189 /// Incrementally add the bytes in \p Data to the hash.
191  MD5_u32plus saved_lo;
192  unsigned long used, free;
193  const uint8_t *Ptr = Data.data();
194  unsigned long Size = Data.size();
195 
196  saved_lo = lo;
197  if ((lo = (saved_lo + Size) & 0x1fffffff) < saved_lo)
198  hi++;
199  hi += Size >> 29;
200 
201  used = saved_lo & 0x3f;
202 
203  if (used) {
204  free = 64 - used;
205 
206  if (Size < free) {
207  memcpy(&buffer[used], Ptr, Size);
208  return;
209  }
210 
211  memcpy(&buffer[used], Ptr, free);
212  Ptr = Ptr + free;
213  Size -= free;
214  body(makeArrayRef(buffer, 64));
215  }
216 
217  if (Size >= 64) {
218  Ptr = body(makeArrayRef(Ptr, Size & ~(unsigned long) 0x3f));
219  Size &= 0x3f;
220  }
221 
222  memcpy(buffer, Ptr, Size);
223 }
224 
225 /// Add the bytes in the StringRef \p Str to the hash.
226 // Note that this isn't a string and so this won't include any trailing NULL
227 // bytes.
229  ArrayRef<uint8_t> SVal((const uint8_t *)Str.data(), Str.size());
230  update(SVal);
231 }
232 
233 /// Finish the hash and place the resulting hash into \p result.
234 /// \param Result is assumed to be a minimum of 16-bytes in size.
235 void MD5::final(MD5Result &Result) {
236  unsigned long used, free;
237 
238  used = lo & 0x3f;
239 
240  buffer[used++] = 0x80;
241 
242  free = 64 - used;
243 
244  if (free < 8) {
245  memset(&buffer[used], 0, free);
246  body(makeArrayRef(buffer, 64));
247  used = 0;
248  free = 64;
249  }
250 
251  memset(&buffer[used], 0, free - 8);
252 
253  lo <<= 3;
254  support::endian::write32le(&buffer[56], lo);
255  support::endian::write32le(&buffer[60], hi);
256 
257  body(makeArrayRef(buffer, 64));
258 
259  support::endian::write32le(&Result[0], a);
260  support::endian::write32le(&Result[4], b);
261  support::endian::write32le(&Result[8], c);
262  support::endian::write32le(&Result[12], d);
263 }
264 
266  SmallString<32> Str;
267  raw_svector_ostream Res(Str);
268  for (int i = 0; i < 16; ++i)
269  Res << format("%.2x", Bytes[i]);
270  return Str;
271 }
272 
274  Str = Result.digest();
275 }
276 
277 std::array<uint8_t, 16> MD5::hash(ArrayRef<uint8_t> Data) {
278  MD5 Hash;
279  Hash.update(Data);
280  MD5::MD5Result Res;
281  Hash.final(Res);
282 
283  return Res;
284 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
i
i
Definition: README.txt:29
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::MD5::update
void update(ArrayRef< uint8_t > Data)
Updates the hash for the byte stream provided.
Definition: MD5.cpp:190
StringRef.h
llvm::MD5::stringifyResult
static void stringifyResult(MD5Result &Result, SmallString< 32 > &Str)
Translates the bytes in Res to a hex string that is deposited into Str.
Definition: MD5.cpp:273
llvm::support::endian::write32le
void write32le(void *P, uint32_t V)
Definition: Endian.h:416
Format.h
llvm::Data
@ Data
Definition: SIMachineScheduler.h:56
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::MD5::MD5
MD5()
a
=0.0 ? 0.0 :(a > 0.0 ? 1.0 :-1.0) a
Definition: README.txt:489
llvm::MD5::final
void final(MD5Result &Result)
Finishes off the hash and puts the result in result.
Definition: MD5.cpp:235
MD5.h
SmallString.h
b
the resulting code requires compare and branches when and if the revised code is with conditional branches instead of More there is a byte word extend before each where there should be only and the condition codes are not remembered when the same two values are compared twice More LSR enhancements i8 and i32 load store addressing modes are identical int b
Definition: README.txt:418
llvm::MD5::MD5Result::Bytes
std::array< uint8_t, 16 > Bytes
Definition: MD5.h:56
c
the resulting code requires compare and branches when and if the revised code is with conditional branches instead of More there is a byte word extend before each where there should be only and the condition codes are not remembered when the same two values are compared twice More LSR enhancements i8 and i32 load store addressing modes are identical int int c
Definition: README.txt:418
llvm::SmallString< 32 >
llvm::MD5
Definition: MD5.h:41
I
#define I(x, y, z)
Definition: MD5.cpp:59
ArrayRef.h
G
#define G(x, y, z)
Definition: MD5.cpp:57
memcpy
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
STEP
#define STEP(f, a, b, c, d, x, t, s)
Definition: MD5.cpp:62
llvm::ArrayRef< uint8_t >
SET
#define SET(n)
Definition: MD5.cpp:69
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
uint32_t
llvm::format
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:124
llvm::MD5::MD5Result
Definition: MD5.h:55
H
#define H(x, y, z)
Definition: MD5.cpp:58
llvm::MD5::hash
static std::array< uint8_t, 16 > hash(ArrayRef< uint8_t > Data)
Computes the hash for a given bytes.
Definition: MD5.cpp:277
llvm::makeArrayRef
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:476
used
This might compile to this xmm1 xorps xmm0 movss xmm0 ret Now consider if the code caused xmm1 to get spilled This might produce this xmm1 movaps xmm0 movaps xmm1 movss xmm0 ret since the reload is only used by these we could fold it into the producing something like xmm1 movaps xmm0 ret saving two instructions The basic idea is that a reload from a spill if only one byte chunk is used
Definition: README-SSE.txt:270
llvm::raw_svector_ostream
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:656
raw_ostream.h
d
the resulting code requires compare and branches when and if the revised code is with conditional branches instead of More there is a byte word extend before each where there should be only and the condition codes are not remembered when the same two values are compared twice More LSR enhancements i8 and i32 load store addressing modes are identical int int int d
Definition: README.txt:418
GET
#define GET(n)
Definition: MD5.cpp:74
Endian.h
llvm::MD5::MD5Result::digest
SmallString< 32 > digest() const
Definition: MD5.cpp:265