LLVM  14.0.0git
MD5.cpp
Go to the documentation of this file.
1 /*
2  * This code is derived from (original license follows):
3  *
4  * This is an OpenSSL-compatible implementation of the RSA Data Security, Inc.
5  * MD5 Message-Digest Algorithm (RFC 1321).
6  *
7  * Homepage:
8  * http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5
9  *
10  * Author:
11  * Alexander Peslyak, better known as Solar Designer <solar at openwall.com>
12  *
13  * This software was written by Alexander Peslyak in 2001. No copyright is
14  * claimed, and the software is hereby placed in the public domain.
15  * In case this attempt to disclaim copyright and place the software in the
16  * public domain is deemed null and void, then the software is
17  * Copyright (c) 2001 Alexander Peslyak and it is hereby released to the
18  * general public under the following terms:
19  *
20  * Redistribution and use in source and binary forms, with or without
21  * modification, are permitted.
22  *
23  * There's ABSOLUTELY NO WARRANTY, express or implied.
24  *
25  * (This is a heavily cut-down "BSD license".)
26  *
27  * This differs from Colin Plumb's older public domain implementation in that
28  * no exactly 32-bit integer data type is required (any 32-bit or wider
29  * unsigned integer data type will do), there's no compile-time endianness
30  * configuration, and the function prototypes match OpenSSL's. No code from
31  * Colin Plumb's implementation has been reused; this comment merely compares
32  * the properties of the two independent implementations.
33  *
34  * The primary goals of this implementation are portability and ease of use.
35  * It is meant to be fast, but not as fast as possible. Some known
36  * optimizations are not included to reduce source code size and avoid
37  * compile-time configuration.
38  */
39 
40 #include "llvm/Support/MD5.h"
41 #include "llvm/ADT/ArrayRef.h"
42 #include "llvm/ADT/SmallString.h"
43 #include "llvm/ADT/StringExtras.h"
44 #include "llvm/ADT/StringRef.h"
45 #include "llvm/Support/Endian.h"
46 #include <array>
47 #include <cstdint>
48 #include <cstring>
49 
50 // The basic MD5 functions.
51 
52 // F and G are optimized compared to their RFC 1321 definitions for
53 // architectures that lack an AND-NOT instruction, just like in Colin Plumb's
54 // implementation.
55 #define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z))))
56 #define G(x, y, z) ((y) ^ ((z) & ((x) ^ (y))))
57 #define H(x, y, z) ((x) ^ (y) ^ (z))
58 #define I(x, y, z) ((y) ^ ((x) | ~(z)))
59 
60 // The MD5 transformation for all four rounds.
61 #define STEP(f, a, b, c, d, x, t, s) \
62  (a) += f((b), (c), (d)) + (x) + (t); \
63  (a) = (((a) << (s)) | (((a) & 0xffffffff) >> (32 - (s)))); \
64  (a) += (b);
65 
66 // SET reads 4 input bytes in little-endian byte order and stores them
67 // in a properly aligned word in host byte order.
68 #define SET(n) \
69  (InternalState.block[(n)] = (MD5_u32plus)ptr[(n)*4] | \
70  ((MD5_u32plus)ptr[(n)*4 + 1] << 8) | \
71  ((MD5_u32plus)ptr[(n)*4 + 2] << 16) | \
72  ((MD5_u32plus)ptr[(n)*4 + 3] << 24))
73 #define GET(n) (InternalState.block[(n)])
74 
75 using namespace llvm;
76 
77 /// This processes one or more 64-byte data blocks, but does NOT update
78 ///the bit counters. There are no alignment requirements.
79 const uint8_t *MD5::body(ArrayRef<uint8_t> Data) {
80  const uint8_t *ptr;
81  MD5_u32plus a, b, c, d;
82  MD5_u32plus saved_a, saved_b, saved_c, saved_d;
83  unsigned long Size = Data.size();
84 
85  ptr = Data.data();
86 
87  a = InternalState.a;
88  b = InternalState.b;
89  c = InternalState.c;
90  d = InternalState.d;
91 
92  do {
93  saved_a = a;
94  saved_b = b;
95  saved_c = c;
96  saved_d = d;
97 
98  // Round 1
99  STEP(F, a, b, c, d, SET(0), 0xd76aa478, 7)
100  STEP(F, d, a, b, c, SET(1), 0xe8c7b756, 12)
101  STEP(F, c, d, a, b, SET(2), 0x242070db, 17)
102  STEP(F, b, c, d, a, SET(3), 0xc1bdceee, 22)
103  STEP(F, a, b, c, d, SET(4), 0xf57c0faf, 7)
104  STEP(F, d, a, b, c, SET(5), 0x4787c62a, 12)
105  STEP(F, c, d, a, b, SET(6), 0xa8304613, 17)
106  STEP(F, b, c, d, a, SET(7), 0xfd469501, 22)
107  STEP(F, a, b, c, d, SET(8), 0x698098d8, 7)
108  STEP(F, d, a, b, c, SET(9), 0x8b44f7af, 12)
109  STEP(F, c, d, a, b, SET(10), 0xffff5bb1, 17)
110  STEP(F, b, c, d, a, SET(11), 0x895cd7be, 22)
111  STEP(F, a, b, c, d, SET(12), 0x6b901122, 7)
112  STEP(F, d, a, b, c, SET(13), 0xfd987193, 12)
113  STEP(F, c, d, a, b, SET(14), 0xa679438e, 17)
114  STEP(F, b, c, d, a, SET(15), 0x49b40821, 22)
115 
116  // Round 2
117  STEP(G, a, b, c, d, GET(1), 0xf61e2562, 5)
118  STEP(G, d, a, b, c, GET(6), 0xc040b340, 9)
119  STEP(G, c, d, a, b, GET(11), 0x265e5a51, 14)
120  STEP(G, b, c, d, a, GET(0), 0xe9b6c7aa, 20)
121  STEP(G, a, b, c, d, GET(5), 0xd62f105d, 5)
122  STEP(G, d, a, b, c, GET(10), 0x02441453, 9)
123  STEP(G, c, d, a, b, GET(15), 0xd8a1e681, 14)
124  STEP(G, b, c, d, a, GET(4), 0xe7d3fbc8, 20)
125  STEP(G, a, b, c, d, GET(9), 0x21e1cde6, 5)
126  STEP(G, d, a, b, c, GET(14), 0xc33707d6, 9)
127  STEP(G, c, d, a, b, GET(3), 0xf4d50d87, 14)
128  STEP(G, b, c, d, a, GET(8), 0x455a14ed, 20)
129  STEP(G, a, b, c, d, GET(13), 0xa9e3e905, 5)
130  STEP(G, d, a, b, c, GET(2), 0xfcefa3f8, 9)
131  STEP(G, c, d, a, b, GET(7), 0x676f02d9, 14)
132  STEP(G, b, c, d, a, GET(12), 0x8d2a4c8a, 20)
133 
134  // Round 3
135  STEP(H, a, b, c, d, GET(5), 0xfffa3942, 4)
136  STEP(H, d, a, b, c, GET(8), 0x8771f681, 11)
137  STEP(H, c, d, a, b, GET(11), 0x6d9d6122, 16)
138  STEP(H, b, c, d, a, GET(14), 0xfde5380c, 23)
139  STEP(H, a, b, c, d, GET(1), 0xa4beea44, 4)
140  STEP(H, d, a, b, c, GET(4), 0x4bdecfa9, 11)
141  STEP(H, c, d, a, b, GET(7), 0xf6bb4b60, 16)
142  STEP(H, b, c, d, a, GET(10), 0xbebfbc70, 23)
143  STEP(H, a, b, c, d, GET(13), 0x289b7ec6, 4)
144  STEP(H, d, a, b, c, GET(0), 0xeaa127fa, 11)
145  STEP(H, c, d, a, b, GET(3), 0xd4ef3085, 16)
146  STEP(H, b, c, d, a, GET(6), 0x04881d05, 23)
147  STEP(H, a, b, c, d, GET(9), 0xd9d4d039, 4)
148  STEP(H, d, a, b, c, GET(12), 0xe6db99e5, 11)
149  STEP(H, c, d, a, b, GET(15), 0x1fa27cf8, 16)
150  STEP(H, b, c, d, a, GET(2), 0xc4ac5665, 23)
151 
152  // Round 4
153  STEP(I, a, b, c, d, GET(0), 0xf4292244, 6)
154  STEP(I, d, a, b, c, GET(7), 0x432aff97, 10)
155  STEP(I, c, d, a, b, GET(14), 0xab9423a7, 15)
156  STEP(I, b, c, d, a, GET(5), 0xfc93a039, 21)
157  STEP(I, a, b, c, d, GET(12), 0x655b59c3, 6)
158  STEP(I, d, a, b, c, GET(3), 0x8f0ccc92, 10)
159  STEP(I, c, d, a, b, GET(10), 0xffeff47d, 15)
160  STEP(I, b, c, d, a, GET(1), 0x85845dd1, 21)
161  STEP(I, a, b, c, d, GET(8), 0x6fa87e4f, 6)
162  STEP(I, d, a, b, c, GET(15), 0xfe2ce6e0, 10)
163  STEP(I, c, d, a, b, GET(6), 0xa3014314, 15)
164  STEP(I, b, c, d, a, GET(13), 0x4e0811a1, 21)
165  STEP(I, a, b, c, d, GET(4), 0xf7537e82, 6)
166  STEP(I, d, a, b, c, GET(11), 0xbd3af235, 10)
167  STEP(I, c, d, a, b, GET(2), 0x2ad7d2bb, 15)
168  STEP(I, b, c, d, a, GET(9), 0xeb86d391, 21)
169 
170  a += saved_a;
171  b += saved_b;
172  c += saved_c;
173  d += saved_d;
174 
175  ptr += 64;
176  } while (Size -= 64);
177 
178  InternalState.a = a;
179  InternalState.b = b;
180  InternalState.c = c;
181  InternalState.d = d;
182 
183  return ptr;
184 }
185 
186 MD5::MD5() = default;
187 
188 /// Incrementally add the bytes in \p Data to the hash.
190  MD5_u32plus saved_lo;
191  unsigned long used, free;
192  const uint8_t *Ptr = Data.data();
193  unsigned long Size = Data.size();
194 
195  saved_lo = InternalState.lo;
196  if ((InternalState.lo = (saved_lo + Size) & 0x1fffffff) < saved_lo)
197  InternalState.hi++;
198  InternalState.hi += Size >> 29;
199 
200  used = saved_lo & 0x3f;
201 
202  if (used) {
203  free = 64 - used;
204 
205  if (Size < free) {
206  memcpy(&InternalState.buffer[used], Ptr, Size);
207  return;
208  }
209 
210  memcpy(&InternalState.buffer[used], Ptr, free);
211  Ptr = Ptr + free;
212  Size -= free;
213  body(makeArrayRef(InternalState.buffer, 64));
214  }
215 
216  if (Size >= 64) {
217  Ptr = body(makeArrayRef(Ptr, Size & ~(unsigned long) 0x3f));
218  Size &= 0x3f;
219  }
220 
221  memcpy(InternalState.buffer, Ptr, Size);
222 }
223 
224 /// Add the bytes in the StringRef \p Str to the hash.
225 // Note that this isn't a string and so this won't include any trailing NULL
226 // bytes.
228  ArrayRef<uint8_t> SVal((const uint8_t *)Str.data(), Str.size());
229  update(SVal);
230 }
231 
232 /// Finish the hash and place the resulting hash into \p result.
233 /// \param Result is assumed to be a minimum of 16-bytes in size.
234 void MD5::final(MD5Result &Result) {
235  unsigned long used, free;
236 
237  used = InternalState.lo & 0x3f;
238 
239  InternalState.buffer[used++] = 0x80;
240 
241  free = 64 - used;
242 
243  if (free < 8) {
244  memset(&InternalState.buffer[used], 0, free);
245  body(makeArrayRef(InternalState.buffer, 64));
246  used = 0;
247  free = 64;
248  }
249 
250  memset(&InternalState.buffer[used], 0, free - 8);
251 
252  InternalState.lo <<= 3;
253  support::endian::write32le(&InternalState.buffer[56], InternalState.lo);
254  support::endian::write32le(&InternalState.buffer[60], InternalState.hi);
255 
256  body(makeArrayRef(InternalState.buffer, 64));
257 
258  support::endian::write32le(&Result[0], InternalState.a);
259  support::endian::write32le(&Result[4], InternalState.b);
260  support::endian::write32le(&Result[8], InternalState.c);
261  support::endian::write32le(&Result[12], InternalState.d);
262 }
263 
265  final(Result);
266  return StringRef(reinterpret_cast<char *>(Result.Bytes.data()),
267  Result.Bytes.size());
268 }
269 
271  auto StateToRestore = InternalState;
272 
273  auto Hash = final();
274 
275  // Restore the state
276  InternalState = StateToRestore;
277 
278  return Hash;
279 }
280 
282  SmallString<32> Str;
283  toHex(Bytes, /*LowerCase*/ true, Str);
284  return Str;
285 }
286 
288  toHex(Result.Bytes, /*LowerCase*/ true, Str);
289 }
290 
291 std::array<uint8_t, 16> MD5::hash(ArrayRef<uint8_t> Data) {
292  MD5 Hash;
293  Hash.update(Data);
294  MD5::MD5Result Res;
295  Hash.final(Res);
296 
297  return Res;
298 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
llvm::MD5::b
MD5_u32plus b
Definition: MD5.h:103
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:22
llvm::MD5::update
void update(ArrayRef< uint8_t > Data)
Updates the hash for the byte stream provided.
Definition: MD5.cpp:189
llvm::MD5::stringifyResult
static void stringifyResult(MD5Result &Result, SmallVectorImpl< char > &Str)
Translates the bytes in Res to a hex string that is deposited into Str.
Definition: MD5.cpp:287
StringRef.h
llvm::support::endian::write32le
void write32le(void *P, uint32_t V)
Definition: Endian.h:416
llvm::Data
@ Data
Definition: SIMachineScheduler.h:55
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::MD5::MD5
MD5()
llvm::MD5::d
MD5_u32plus d
Definition: MD5.h:105
a
=0.0 ? 0.0 :(a > 0.0 ? 1.0 :-1.0) a
Definition: README.txt:489
llvm::MD5::final
void final(MD5Result &Result)
Finishes off the hash and puts the result in result.
Definition: MD5.cpp:234
MD5.h
SmallString.h
llvm::MD5::a
MD5_u32plus a
Definition: MD5.h:102
llvm::MD5::MD5Result::Bytes
std::array< uint8_t, 16 > Bytes
Definition: MD5.h:44
llvm::SmallString< 32 >
llvm::MD5::result
StringRef result()
Finishes off the hash, and returns a reference to the 16-byte hash data.
Definition: MD5.cpp:270
llvm::symbolize::toHex
static std::string toHex(uint64_t V)
Definition: DIPrinter.cpp:276
llvm::MD5
Definition: MD5.h:41
I
#define I(x, y, z)
Definition: MD5.cpp:58
StringExtras.h
ArrayRef.h
G
#define G(x, y, z)
Definition: MD5.cpp:56
memcpy
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
STEP
#define STEP(f, a, b, c, d, x, t, s)
Definition: MD5.cpp:61
llvm::ArrayRef< uint8_t >
SET
#define SET(n)
Definition: MD5.cpp:68
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
uint32_t
llvm::MD5::final
StringRef final()
Finishes off the hash, and returns a reference to the 16-byte hash data.
Definition: MD5.cpp:264
llvm::MD5::MD5Result
Definition: MD5.h:43
H
#define H(x, y, z)
Definition: MD5.cpp:57
llvm::MD5::hash
static std::array< uint8_t, 16 > hash(ArrayRef< uint8_t > Data)
Computes the hash for a given bytes.
Definition: MD5.cpp:291
llvm::makeArrayRef
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:474
used
This might compile to this xmm1 xorps xmm0 movss xmm0 ret Now consider if the code caused xmm1 to get spilled This might produce this xmm1 movaps xmm0 movaps xmm1 movss xmm0 ret since the reload is only used by these we could fold it into the producing something like xmm1 movaps xmm0 ret saving two instructions The basic idea is that a reload from a spill if only one byte chunk is used
Definition: README-SSE.txt:270
llvm::SmallVectorImpl< char >
llvm::MD5::c
MD5_u32plus c
Definition: MD5.h:104
GET
#define GET(n)
Definition: MD5.cpp:73
Endian.h
llvm::MD5::MD5Result::digest
SmallString< 32 > digest() const
Definition: MD5.cpp:281