LLVM  10.0.0svn
DataExtractor.h
Go to the documentation of this file.
1 //===-- DataExtractor.h -----------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_SUPPORT_DATAEXTRACTOR_H
10 #define LLVM_SUPPORT_DATAEXTRACTOR_H
11 
12 #include "llvm/ADT/StringRef.h"
13 #include "llvm/Support/DataTypes.h"
14 
15 namespace llvm {
16 
17 /// An auxiliary type to facilitate extraction of 3-byte entities.
18 struct Uint24 {
19  uint8_t Bytes[3];
20  Uint24(uint8_t U) {
21  Bytes[0] = Bytes[1] = Bytes[2] = U;
22  }
23  Uint24(uint8_t U0, uint8_t U1, uint8_t U2) {
24  Bytes[0] = U0; Bytes[1] = U1; Bytes[2] = U2;
25  }
26  uint32_t getAsUint32(bool IsLittleEndian) const {
27  int LoIx = IsLittleEndian ? 0 : 2;
28  return Bytes[LoIx] + (Bytes[1] << 8) + (Bytes[2-LoIx] << 16);
29  }
30 };
31 
32 using uint24_t = Uint24;
33 static_assert(sizeof(uint24_t) == 3, "sizeof(uint24_t) != 3");
34 
35 /// Needed by swapByteOrder().
37  return uint24_t(C.Bytes[2], C.Bytes[1], C.Bytes[0]);
38 }
39 
42  uint8_t IsLittleEndian;
43  uint8_t AddressSize;
44 public:
45  /// Construct with a buffer that is owned by the caller.
46  ///
47  /// This constructor allows us to use data that is owned by the
48  /// caller. The data must stay around as long as this object is
49  /// valid.
50  DataExtractor(StringRef Data, bool IsLittleEndian, uint8_t AddressSize)
51  : Data(Data), IsLittleEndian(IsLittleEndian), AddressSize(AddressSize) {}
52 
53  /// Get the data pointed to by this extractor.
54  StringRef getData() const { return Data; }
55  /// Get the endianness for this extractor.
56  bool isLittleEndian() const { return IsLittleEndian; }
57  /// Get the address size for this extractor.
58  uint8_t getAddressSize() const { return AddressSize; }
59  /// Set the address size for this extractor.
60  void setAddressSize(uint8_t Size) { AddressSize = Size; }
61 
62  /// Extract a C string from \a *offset_ptr.
63  ///
64  /// Returns a pointer to a C String from the data at the offset
65  /// pointed to by \a offset_ptr. A variable length NULL terminated C
66  /// string will be extracted and the \a offset_ptr will be
67  /// updated with the offset of the byte that follows the NULL
68  /// terminator byte.
69  ///
70  /// @param[in,out] offset_ptr
71  /// A pointer to an offset within the data that will be advanced
72  /// by the appropriate number of bytes if the value is extracted
73  /// correctly. If the offset is out of bounds or there are not
74  /// enough bytes to extract this value, the offset will be left
75  /// unmodified.
76  ///
77  /// @return
78  /// A pointer to the C string value in the data. If the offset
79  /// pointed to by \a offset_ptr is out of bounds, or if the
80  /// offset plus the length of the C string is out of bounds,
81  /// NULL will be returned.
82  const char *getCStr(uint64_t *offset_ptr) const;
83 
84  /// Extract a C string from \a *offset_ptr.
85  ///
86  /// Returns a StringRef for the C String from the data at the offset
87  /// pointed to by \a offset_ptr. A variable length NULL terminated C
88  /// string will be extracted and the \a offset_ptr will be
89  /// updated with the offset of the byte that follows the NULL
90  /// terminator byte.
91  ///
92  /// \param[in,out] offset_ptr
93  /// A pointer to an offset within the data that will be advanced
94  /// by the appropriate number of bytes if the value is extracted
95  /// correctly. If the offset is out of bounds or there are not
96  /// enough bytes to extract this value, the offset will be left
97  /// unmodified.
98  ///
99  /// \return
100  /// A StringRef for the C string value in the data. If the offset
101  /// pointed to by \a offset_ptr is out of bounds, or if the
102  /// offset plus the length of the C string is out of bounds,
103  /// a default-initialized StringRef will be returned.
104  StringRef getCStrRef(uint64_t *offset_ptr) const;
105 
106  /// Extract an unsigned integer of size \a byte_size from \a
107  /// *offset_ptr.
108  ///
109  /// Extract a single unsigned integer value and update the offset
110  /// pointed to by \a offset_ptr. The size of the extracted integer
111  /// is specified by the \a byte_size argument. \a byte_size should
112  /// have a value greater than or equal to one and less than or equal
113  /// to eight since the return value is 64 bits wide. Any
114  /// \a byte_size values less than 1 or greater than 8 will result in
115  /// nothing being extracted, and zero being returned.
116  ///
117  /// @param[in,out] offset_ptr
118  /// A pointer to an offset within the data that will be advanced
119  /// by the appropriate number of bytes if the value is extracted
120  /// correctly. If the offset is out of bounds or there are not
121  /// enough bytes to extract this value, the offset will be left
122  /// unmodified.
123  ///
124  /// @param[in] byte_size
125  /// The size in byte of the integer to extract.
126  ///
127  /// @return
128  /// The unsigned integer value that was extracted, or zero on
129  /// failure.
130  uint64_t getUnsigned(uint64_t *offset_ptr, uint32_t byte_size) const;
131 
132  /// Extract an signed integer of size \a byte_size from \a *offset_ptr.
133  ///
134  /// Extract a single signed integer value (sign extending if required)
135  /// and update the offset pointed to by \a offset_ptr. The size of
136  /// the extracted integer is specified by the \a byte_size argument.
137  /// \a byte_size should have a value greater than or equal to one
138  /// and less than or equal to eight since the return value is 64
139  /// bits wide. Any \a byte_size values less than 1 or greater than
140  /// 8 will result in nothing being extracted, and zero being returned.
141  ///
142  /// @param[in,out] offset_ptr
143  /// A pointer to an offset within the data that will be advanced
144  /// by the appropriate number of bytes if the value is extracted
145  /// correctly. If the offset is out of bounds or there are not
146  /// enough bytes to extract this value, the offset will be left
147  /// unmodified.
148  ///
149  /// @param[in] size
150  /// The size in bytes of the integer to extract.
151  ///
152  /// @return
153  /// The sign extended signed integer value that was extracted,
154  /// or zero on failure.
155  int64_t getSigned(uint64_t *offset_ptr, uint32_t size) const;
156 
157  //------------------------------------------------------------------
158  /// Extract an pointer from \a *offset_ptr.
159  ///
160  /// Extract a single pointer from the data and update the offset
161  /// pointed to by \a offset_ptr. The size of the extracted pointer
162  /// is \a getAddressSize(), so the address size has to be
163  /// set correctly prior to extracting any pointer values.
164  ///
165  /// @param[in,out] offset_ptr
166  /// A pointer to an offset within the data that will be advanced
167  /// by the appropriate number of bytes if the value is extracted
168  /// correctly. If the offset is out of bounds or there are not
169  /// enough bytes to extract this value, the offset will be left
170  /// unmodified.
171  ///
172  /// @return
173  /// The extracted pointer value as a 64 integer.
174  uint64_t getAddress(uint64_t *offset_ptr) const {
175  return getUnsigned(offset_ptr, AddressSize);
176  }
177 
178  /// Extract a uint8_t value from \a *offset_ptr.
179  ///
180  /// Extract a single uint8_t from the binary data at the offset
181  /// pointed to by \a offset_ptr, and advance the offset on success.
182  ///
183  /// @param[in,out] offset_ptr
184  /// A pointer to an offset within the data that will be advanced
185  /// by the appropriate number of bytes if the value is extracted
186  /// correctly. If the offset is out of bounds or there are not
187  /// enough bytes to extract this value, the offset will be left
188  /// unmodified.
189  ///
190  /// @return
191  /// The extracted uint8_t value.
192  uint8_t getU8(uint64_t *offset_ptr) const;
193 
194  /// Extract \a count uint8_t values from \a *offset_ptr.
195  ///
196  /// Extract \a count uint8_t values from the binary data at the
197  /// offset pointed to by \a offset_ptr, and advance the offset on
198  /// success. The extracted values are copied into \a dst.
199  ///
200  /// @param[in,out] offset_ptr
201  /// A pointer to an offset within the data that will be advanced
202  /// by the appropriate number of bytes if the value is extracted
203  /// correctly. If the offset is out of bounds or there are not
204  /// enough bytes to extract this value, the offset will be left
205  /// unmodified.
206  ///
207  /// @param[out] dst
208  /// A buffer to copy \a count uint8_t values into. \a dst must
209  /// be large enough to hold all requested data.
210  ///
211  /// @param[in] count
212  /// The number of uint8_t values to extract.
213  ///
214  /// @return
215  /// \a dst if all values were properly extracted and copied,
216  /// NULL otherise.
217  uint8_t *getU8(uint64_t *offset_ptr, uint8_t *dst, uint32_t count) const;
218 
219  //------------------------------------------------------------------
220  /// Extract a uint16_t value from \a *offset_ptr.
221  ///
222  /// Extract a single uint16_t from the binary data at the offset
223  /// pointed to by \a offset_ptr, and update the offset on success.
224  ///
225  /// @param[in,out] offset_ptr
226  /// A pointer to an offset within the data that will be advanced
227  /// by the appropriate number of bytes if the value is extracted
228  /// correctly. If the offset is out of bounds or there are not
229  /// enough bytes to extract this value, the offset will be left
230  /// unmodified.
231  ///
232  /// @return
233  /// The extracted uint16_t value.
234  //------------------------------------------------------------------
235  uint16_t getU16(uint64_t *offset_ptr) const;
236 
237  /// Extract \a count uint16_t values from \a *offset_ptr.
238  ///
239  /// Extract \a count uint16_t values from the binary data at the
240  /// offset pointed to by \a offset_ptr, and advance the offset on
241  /// success. The extracted values are copied into \a dst.
242  ///
243  /// @param[in,out] offset_ptr
244  /// A pointer to an offset within the data that will be advanced
245  /// by the appropriate number of bytes if the value is extracted
246  /// correctly. If the offset is out of bounds or there are not
247  /// enough bytes to extract this value, the offset will be left
248  /// unmodified.
249  ///
250  /// @param[out] dst
251  /// A buffer to copy \a count uint16_t values into. \a dst must
252  /// be large enough to hold all requested data.
253  ///
254  /// @param[in] count
255  /// The number of uint16_t values to extract.
256  ///
257  /// @return
258  /// \a dst if all values were properly extracted and copied,
259  /// NULL otherise.
260  uint16_t *getU16(uint64_t *offset_ptr, uint16_t *dst, uint32_t count) const;
261 
262  /// Extract a 24-bit unsigned value from \a *offset_ptr and return it
263  /// in a uint32_t.
264  ///
265  /// Extract 3 bytes from the binary data at the offset pointed to by
266  /// \a offset_ptr, construct a uint32_t from them and update the offset
267  /// on success.
268  ///
269  /// @param[in,out] offset_ptr
270  /// A pointer to an offset within the data that will be advanced
271  /// by the 3 bytes if the value is extracted correctly. If the offset
272  /// is out of bounds or there are not enough bytes to extract this value,
273  /// the offset will be left unmodified.
274  ///
275  /// @return
276  /// The extracted 24-bit value represented in a uint32_t.
277  uint32_t getU24(uint64_t *offset_ptr) const;
278 
279  /// Extract a uint32_t value from \a *offset_ptr.
280  ///
281  /// Extract a single uint32_t from the binary data at the offset
282  /// pointed to by \a offset_ptr, and update the offset on success.
283  ///
284  /// @param[in,out] offset_ptr
285  /// A pointer to an offset within the data that will be advanced
286  /// by the appropriate number of bytes if the value is extracted
287  /// correctly. If the offset is out of bounds or there are not
288  /// enough bytes to extract this value, the offset will be left
289  /// unmodified.
290  ///
291  /// @return
292  /// The extracted uint32_t value.
293  uint32_t getU32(uint64_t *offset_ptr) const;
294 
295  /// Extract \a count uint32_t values from \a *offset_ptr.
296  ///
297  /// Extract \a count uint32_t values from the binary data at the
298  /// offset pointed to by \a offset_ptr, and advance the offset on
299  /// success. The extracted values are copied into \a dst.
300  ///
301  /// @param[in,out] offset_ptr
302  /// A pointer to an offset within the data that will be advanced
303  /// by the appropriate number of bytes if the value is extracted
304  /// correctly. If the offset is out of bounds or there are not
305  /// enough bytes to extract this value, the offset will be left
306  /// unmodified.
307  ///
308  /// @param[out] dst
309  /// A buffer to copy \a count uint32_t values into. \a dst must
310  /// be large enough to hold all requested data.
311  ///
312  /// @param[in] count
313  /// The number of uint32_t values to extract.
314  ///
315  /// @return
316  /// \a dst if all values were properly extracted and copied,
317  /// NULL otherise.
318  uint32_t *getU32(uint64_t *offset_ptr, uint32_t *dst, uint32_t count) const;
319 
320  /// Extract a uint64_t value from \a *offset_ptr.
321  ///
322  /// Extract a single uint64_t from the binary data at the offset
323  /// pointed to by \a offset_ptr, and update the offset on success.
324  ///
325  /// @param[in,out] offset_ptr
326  /// A pointer to an offset within the data that will be advanced
327  /// by the appropriate number of bytes if the value is extracted
328  /// correctly. If the offset is out of bounds or there are not
329  /// enough bytes to extract this value, the offset will be left
330  /// unmodified.
331  ///
332  /// @return
333  /// The extracted uint64_t value.
334  uint64_t getU64(uint64_t *offset_ptr) const;
335 
336  /// Extract \a count uint64_t values from \a *offset_ptr.
337  ///
338  /// Extract \a count uint64_t values from the binary data at the
339  /// offset pointed to by \a offset_ptr, and advance the offset on
340  /// success. The extracted values are copied into \a dst.
341  ///
342  /// @param[in,out] offset_ptr
343  /// A pointer to an offset within the data that will be advanced
344  /// by the appropriate number of bytes if the value is extracted
345  /// correctly. If the offset is out of bounds or there are not
346  /// enough bytes to extract this value, the offset will be left
347  /// unmodified.
348  ///
349  /// @param[out] dst
350  /// A buffer to copy \a count uint64_t values into. \a dst must
351  /// be large enough to hold all requested data.
352  ///
353  /// @param[in] count
354  /// The number of uint64_t values to extract.
355  ///
356  /// @return
357  /// \a dst if all values were properly extracted and copied,
358  /// NULL otherise.
359  uint64_t *getU64(uint64_t *offset_ptr, uint64_t *dst, uint32_t count) const;
360 
361  /// Extract a signed LEB128 value from \a *offset_ptr.
362  ///
363  /// Extracts an signed LEB128 number from this object's data
364  /// starting at the offset pointed to by \a offset_ptr. The offset
365  /// pointed to by \a offset_ptr will be updated with the offset of
366  /// the byte following the last extracted byte.
367  ///
368  /// @param[in,out] offset_ptr
369  /// A pointer to an offset within the data that will be advanced
370  /// by the appropriate number of bytes if the value is extracted
371  /// correctly. If the offset is out of bounds or there are not
372  /// enough bytes to extract this value, the offset will be left
373  /// unmodified.
374  ///
375  /// @return
376  /// The extracted signed integer value.
377  int64_t getSLEB128(uint64_t *offset_ptr) const;
378 
379  /// Extract a unsigned LEB128 value from \a *offset_ptr.
380  ///
381  /// Extracts an unsigned LEB128 number from this object's data
382  /// starting at the offset pointed to by \a offset_ptr. The offset
383  /// pointed to by \a offset_ptr will be updated with the offset of
384  /// the byte following the last extracted byte.
385  ///
386  /// @param[in,out] offset_ptr
387  /// A pointer to an offset within the data that will be advanced
388  /// by the appropriate number of bytes if the value is extracted
389  /// correctly. If the offset is out of bounds or there are not
390  /// enough bytes to extract this value, the offset will be left
391  /// unmodified.
392  ///
393  /// @return
394  /// The extracted unsigned integer value.
395  uint64_t getULEB128(uint64_t *offset_ptr) const;
396 
397  /// Test the validity of \a offset.
398  ///
399  /// @return
400  /// \b true if \a offset is a valid offset into the data in this
401  /// object, \b false otherwise.
402  bool isValidOffset(uint64_t offset) const { return Data.size() > offset; }
403 
404  /// Test the availability of \a length bytes of data from \a offset.
405  ///
406  /// @return
407  /// \b true if \a offset is a valid offset and there are \a
408  /// length bytes available at that offset, \b false otherwise.
409  bool isValidOffsetForDataOfSize(uint64_t offset, uint64_t length) const {
410  return offset + length >= offset && isValidOffset(offset + length - 1);
411  }
412 
413  /// Test the availability of enough bytes of data for a pointer from
414  /// \a offset. The size of a pointer is \a getAddressSize().
415  ///
416  /// @return
417  /// \b true if \a offset is a valid offset and there are enough
418  /// bytes for a pointer available at that offset, \b false
419  /// otherwise.
420  bool isValidOffsetForAddress(uint64_t offset) const {
421  return isValidOffsetForDataOfSize(offset, AddressSize);
422  }
423 };
424 
425 } // namespace llvm
426 
427 #endif
uint64_t CallInst * C
This class represents lattice values for constants.
Definition: AllocatorList.h:23
uint32_t getAsUint32(bool IsLittleEndian) const
Definition: DataExtractor.h:26
StringRef getData() const
Get the data pointed to by this extractor.
Definition: DataExtractor.h:54
An auxiliary type to facilitate extraction of 3-byte entities.
Definition: DataExtractor.h:18
Uint24(uint8_t U)
Definition: DataExtractor.h:20
LLVM_NODISCARD size_t size() const
size - Get the string size.
Definition: StringRef.h:130
bool isLittleEndian() const
Get the endianness for this extractor.
Definition: DataExtractor.h:56
auto count(R &&Range, const E &Element) -> typename std::iterator_traits< decltype(adl_begin(Range))>::difference_type
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1231
uint8_t getAddressSize() const
Get the address size for this extractor.
Definition: DataExtractor.h:58
DataExtractor(StringRef Data, bool IsLittleEndian, uint8_t AddressSize)
Construct with a buffer that is owned by the caller.
Definition: DataExtractor.h:50
Uint24 uint24_t
Definition: DataExtractor.h:32
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1146
bool isValidOffsetForDataOfSize(uint64_t offset, uint64_t length) const
Test the availability of length bytes of data from offset.
void setAddressSize(uint8_t Size)
Set the address size for this extractor.
Definition: DataExtractor.h:60
bool isValidOffset(uint64_t offset) const
Test the validity of offset.
uint24_t getSwappedBytes(uint24_t C)
Needed by swapByteOrder().
Definition: DataExtractor.h:36
uint8_t Bytes[3]
Definition: DataExtractor.h:19
Uint24(uint8_t U0, uint8_t U1, uint8_t U2)
Definition: DataExtractor.h:23
uint32_t Size
Definition: Profile.cpp:46
uint64_t getAddress(uint64_t *offset_ptr) const
Extract an pointer from *offset_ptr.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
bool isValidOffsetForAddress(uint64_t offset) const
Test the availability of enough bytes of data for a pointer from offset.