OpenZWave Library  1.5.0
aesopt.h
Go to the documentation of this file.
1 /*
2 ---------------------------------------------------------------------------
3 Copyright (c) 1998-2013, Brian Gladman, Worcester, UK. All rights reserved.
4 
5 The redistribution and use of this software (with or without changes)
6 is allowed without the payment of fees or royalties provided that:
7 
8  source code distributions include the above copyright notice, this
9  list of conditions and the following disclaimer;
10 
11  binary distributions include the above copyright notice, this list
12  of conditions and the following disclaimer in their documentation.
13 
14 This software is provided 'as is' with no explicit or implied warranties
15 in respect of its operation, including, but not limited to, correctness
16 and fitness for purpose.
17 ---------------------------------------------------------------------------
18 Issue Date: 20/12/2007
19 
20  This file contains the compilation options for AES (Rijndael) and code
21  that is common across encryption, key scheduling and table generation.
22 
23  OPERATION
24 
25  These source code files implement the AES algorithm Rijndael designed by
26  Joan Daemen and Vincent Rijmen. This version is designed for the standard
27  block size of 16 bytes and for key sizes of 128, 192 and 256 bits (16, 24
28  and 32 bytes).
29 
30  This version is designed for flexibility and speed using operations on
31  32-bit words rather than operations on bytes. It can be compiled with
32  either big or little endian internal byte order but is faster when the
33  native byte order for the processor is used.
34 
35  THE CIPHER INTERFACE
36 
37  The cipher interface is implemented as an array of bytes in which lower
38  AES bit sequence indexes map to higher numeric significance within bytes.
39 
40  uint8_t (an unsigned 8-bit type)
41  uint32_t (an unsigned 32-bit type)
42  struct aes_encrypt_ctx (structure for the cipher encryption context)
43  struct aes_decrypt_ctx (structure for the cipher decryption context)
44  AES_RETURN the function return type
45 
46  C subroutine calls:
47 
48  AES_RETURN aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1]);
49  AES_RETURN aes_encrypt_key192(const unsigned char *key, aes_encrypt_ctx cx[1]);
50  AES_RETURN aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1]);
51  AES_RETURN aes_encrypt(const unsigned char *in, unsigned char *out,
52  const aes_encrypt_ctx cx[1]);
53 
54  AES_RETURN aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1]);
55  AES_RETURN aes_decrypt_key192(const unsigned char *key, aes_decrypt_ctx cx[1]);
56  AES_RETURN aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1]);
57  AES_RETURN aes_decrypt(const unsigned char *in, unsigned char *out,
58  const aes_decrypt_ctx cx[1]);
59 
60  IMPORTANT NOTE: If you are using this C interface with dynamic tables make sure that
61  you call aes_init() before AES is used so that the tables are initialised.
62 
63  C++ aes class subroutines:
64 
65  Class AESencrypt for encryption
66 
67  Construtors:
68  AESencrypt(void)
69  AESencrypt(const unsigned char *key) - 128 bit key
70  Members:
71  AES_RETURN key128(const unsigned char *key)
72  AES_RETURN key192(const unsigned char *key)
73  AES_RETURN key256(const unsigned char *key)
74  AES_RETURN encrypt(const unsigned char *in, unsigned char *out) const
75 
76  Class AESdecrypt for encryption
77  Construtors:
78  AESdecrypt(void)
79  AESdecrypt(const unsigned char *key) - 128 bit key
80  Members:
81  AES_RETURN key128(const unsigned char *key)
82  AES_RETURN key192(const unsigned char *key)
83  AES_RETURN key256(const unsigned char *key)
84  AES_RETURN decrypt(const unsigned char *in, unsigned char *out) const
85 */
86 
87 #if !defined( _AESOPT_H )
88 #define _AESOPT_H
89 
90 #if defined( __cplusplus )
91 #include "aescpp.h"
92 #else
93 #include "aes.h"
94 #endif
95 
96 /* PLATFORM SPECIFIC INCLUDES */
97 
98 #include "brg_endian.h"
99 
100 /* CONFIGURATION - THE USE OF DEFINES
101 
102  Later in this section there are a number of defines that control the
103  operation of the code. In each section, the purpose of each define is
104  explained so that the relevant form can be included or excluded by
105  setting either 1's or 0's respectively on the branches of the related
106  #if clauses. The following local defines should not be changed.
107 */
108 
109 #define ENCRYPTION_IN_C 1
110 #define DECRYPTION_IN_C 2
111 #define ENC_KEYING_IN_C 4
112 #define DEC_KEYING_IN_C 8
113 
114 #define NO_TABLES 0
115 #define ONE_TABLE 1
116 #define FOUR_TABLES 4
117 #define NONE 0
118 #define PARTIAL 1
119 #define FULL 2
120 
121 /* --- START OF USER CONFIGURED OPTIONS --- */
122 
123 /* 1. BYTE ORDER WITHIN 32 BIT WORDS
124 
125  The fundamental data processing units in Rijndael are 8-bit bytes. The
126  input, output and key input are all enumerated arrays of bytes in which
127  bytes are numbered starting at zero and increasing to one less than the
128  number of bytes in the array in question. This enumeration is only used
129  for naming bytes and does not imply any adjacency or order relationship
130  from one byte to another. When these inputs and outputs are considered
131  as bit sequences, bits 8*n to 8*n+7 of the bit sequence are mapped to
132  byte[n] with bit 8n+i in the sequence mapped to bit 7-i within the byte.
133  In this implementation bits are numbered from 0 to 7 starting at the
134  numerically least significant end of each byte (bit n represents 2^n).
135 
136  However, Rijndael can be implemented more efficiently using 32-bit
137  words by packing bytes into words so that bytes 4*n to 4*n+3 are placed
138  into word[n]. While in principle these bytes can be assembled into words
139  in any positions, this implementation only supports the two formats in
140  which bytes in adjacent positions within words also have adjacent byte
141  numbers. This order is called big-endian if the lowest numbered bytes
142  in words have the highest numeric significance and little-endian if the
143  opposite applies.
144 
145  This code can work in either order irrespective of the order used by the
146  machine on which it runs. Normally the internal byte order will be set
147  to the order of the processor on which the code is to be run but this
148  define can be used to reverse this in special situations
149 
150  WARNING: Assembler code versions rely on PLATFORM_BYTE_ORDER being set.
151  This define will hence be redefined later (in section 4) if necessary
152 */
153 
154 #if 1
155 # define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER
156 #elif 0
157 # define ALGORITHM_BYTE_ORDER IS_LITTLE_ENDIAN
158 #elif 0
159 # define ALGORITHM_BYTE_ORDER IS_BIG_ENDIAN
160 #else
161 # error The algorithm byte order is not defined
162 #endif
163 
164 /* 2. Intel AES AND VIA ACE SUPPORT */
165 
166 #if defined( __GNUC__ ) && defined( __i386__ ) \
167  || defined(_WIN32) && defined(_M_IX86) \
168  && !(defined(_WIN64) || defined(_WIN32_WCE) || defined(_MSC_VER) && (_MSC_VER <= 800))
169 # define VIA_ACE_POSSIBLE
170 #endif
171 
172 /* Define this option if support for the Intel AESNI is required (not
173  currently available with GCC). If AESNI is known to be present, then
174  defining ASSUME_INTEL_AES_VIA_PRESENT will replace the ordinary
175  encryption/decryption. If USE_INTEL_AES_IF_PRESENT is defined then
176  AESNI will be used if it is detected (both present and enabled).
177 
178  AESNI uses a decryption key schedule with the first decryption
179  round key at the high end of the key scedule with the following
180  round keys at lower positions in memory. So AES_REV_DKS must NOT
181  be defined when AESNI will be used. ALthough it is unlikely that
182  assembler code will be used with an AESNI build, if it is then
183  AES_REV_DKS must NOT be defined when such assembler files are
184  built
185 */
186 #if 0 && defined( _WIN64 ) && defined( _MSC_VER )
187 # define INTEL_AES_POSSIBLE
188 #endif
189 
190 #if defined( INTEL_AES_POSSIBLE ) && !defined( USE_INTEL_AES_IF_PRESENT )
191 # define USE_INTEL_AES_IF_PRESENT
192 #endif
193 
194 /* Define this option if support for the VIA ACE is required. This uses
195  inline assembler instructions and is only implemented for the Microsoft,
196  Intel and GCC compilers. If VIA ACE is known to be present, then defining
197  ASSUME_VIA_ACE_PRESENT will remove the ordinary encryption/decryption
198  code. If USE_VIA_ACE_IF_PRESENT is defined then VIA ACE will be used if
199  it is detected (both present and enabled) but the normal AES code will
200  also be present.
201 
202  When VIA ACE is to be used, all AES encryption contexts MUST be 16 byte
203  aligned; other input/output buffers do not need to be 16 byte aligned
204  but there are very large performance gains if this can be arranged.
205  VIA ACE also requires the decryption key schedule to be in reverse
206  order (which later checks below ensure).
207 
208  AES_REV_DKS must be set for assembler code used with a VIA ACE build
209 */
210 
211 #if 0 && defined( VIA_ACE_POSSIBLE ) && !defined( USE_VIA_ACE_IF_PRESENT )
212 # define USE_VIA_ACE_IF_PRESENT
213 #endif
214 
215 #if 0 && defined( VIA_ACE_POSSIBLE ) && !defined( ASSUME_VIA_ACE_PRESENT )
216 # define ASSUME_VIA_ACE_PRESENT
217 # endif
218 
219 /* 3. ASSEMBLER SUPPORT
220 
221  This define (which can be on the command line) enables the use of the
222  assembler code routines for encryption, decryption and key scheduling
223  as follows:
224 
225  ASM_X86_V1C uses the assembler (aes_x86_v1.asm) with large tables for
226  encryption and decryption and but with key scheduling in C
227  ASM_X86_V2 uses assembler (aes_x86_v2.asm) with compressed tables for
228  encryption, decryption and key scheduling
229  ASM_X86_V2C uses assembler (aes_x86_v2.asm) with compressed tables for
230  encryption and decryption and but with key scheduling in C
231  ASM_AMD64_C uses assembler (aes_amd64.asm) with compressed tables for
232  encryption and decryption and but with key scheduling in C
233 
234  Change one 'if 0' below to 'if 1' to select the version or define
235  as a compilation option.
236 */
237 
238 #if 0 && !defined( ASM_X86_V1C )
239 # define ASM_X86_V1C
240 #elif 0 && !defined( ASM_X86_V2 )
241 # define ASM_X86_V2
242 #elif 0 && !defined( ASM_X86_V2C )
243 # define ASM_X86_V2C
244 #elif 0 && !defined( ASM_AMD64_C )
245 # define ASM_AMD64_C
246 #endif
247 
248 #if (defined ( ASM_X86_V1C ) || defined( ASM_X86_V2 ) || defined( ASM_X86_V2C )) \
249  && !defined( _M_IX86 ) || defined( ASM_AMD64_C ) && !defined( _M_X64 )
250 # error Assembler code is only available for x86 and AMD64 systems
251 #endif
252 
253 /* 4. FAST INPUT/OUTPUT OPERATIONS.
254 
255  On some machines it is possible to improve speed by transferring the
256  bytes in the input and output arrays to and from the internal 32-bit
257  variables by addressing these arrays as if they are arrays of 32-bit
258  words. On some machines this will always be possible but there may
259  be a large performance penalty if the byte arrays are not aligned on
260  the normal word boundaries. On other machines this technique will
261  lead to memory access errors when such 32-bit word accesses are not
262  properly aligned. The option SAFE_IO avoids such problems but will
263  often be slower on those machines that support misaligned access
264  (especially so if care is taken to align the input and output byte
265  arrays on 32-bit word boundaries). If SAFE_IO is not defined it is
266  assumed that access to byte arrays as if they are arrays of 32-bit
267  words will not cause problems when such accesses are misaligned.
268 */
269 #if 1 && !defined( _MSC_VER )
270 # define SAFE_IO
271 #endif
272 
273 /* 5. LOOP UNROLLING
274 
275  The code for encryption and decrytpion cycles through a number of rounds
276  that can be implemented either in a loop or by expanding the code into a
277  long sequence of instructions, the latter producing a larger program but
278  one that will often be much faster. The latter is called loop unrolling.
279  There are also potential speed advantages in expanding two iterations in
280  a loop with half the number of iterations, which is called partial loop
281  unrolling. The following options allow partial or full loop unrolling
282  to be set independently for encryption and decryption
283 */
284 #if 1
285 # define ENC_UNROLL FULL
286 #elif 0
287 # define ENC_UNROLL PARTIAL
288 #else
289 # define ENC_UNROLL NONE
290 #endif
291 
292 #if 1
293 # define DEC_UNROLL FULL
294 #elif 0
295 # define DEC_UNROLL PARTIAL
296 #else
297 # define DEC_UNROLL NONE
298 #endif
299 
300 #if 1
301 # define ENC_KS_UNROLL
302 #endif
303 
304 #if 1
305 # define DEC_KS_UNROLL
306 #endif
307 
308 /* 6. FAST FINITE FIELD OPERATIONS
309 
310  If this section is included, tables are used to provide faster finite
311  field arithmetic (this has no effect if FIXED_TABLES is defined).
312 */
313 #if 1
314 # define FF_TABLES
315 #endif
316 
317 /* 7. INTERNAL STATE VARIABLE FORMAT
318 
319  The internal state of Rijndael is stored in a number of local 32-bit
320  word varaibles which can be defined either as an array or as individual
321  names variables. Include this section if you want to store these local
322  varaibles in arrays. Otherwise individual local variables will be used.
323 */
324 #if 1
325 # define ARRAYS
326 #endif
327 
328 /* 8. FIXED OR DYNAMIC TABLES
329 
330  When this section is included the tables used by the code are compiled
331  statically into the binary file. Otherwise the subroutine aes_init()
332  must be called to compute them before the code is first used.
333 */
334 #if 1 && !(defined( _MSC_VER ) && ( _MSC_VER <= 800 ))
335 # define FIXED_TABLES
336 #endif
337 
338 /* 9. MASKING OR CASTING FROM LONGER VALUES TO BYTES
339 
340  In some systems it is better to mask longer values to extract bytes
341  rather than using a cast. This option allows this choice.
342 */
343 #if 0
344 # define to_byte(x) ((uint8_t)(x))
345 #else
346 # define to_byte(x) ((x) & 0xff)
347 #endif
348 
349 /* 10. TABLE ALIGNMENT
350 
351  On some sytsems speed will be improved by aligning the AES large lookup
352  tables on particular boundaries. This define should be set to a power of
353  two giving the desired alignment. It can be left undefined if alignment
354  is not needed. This option is specific to the Microsft VC++ compiler -
355  it seems to sometimes cause trouble for the VC++ version 6 compiler.
356 */
357 
358 #if 1 && defined( _MSC_VER ) && ( _MSC_VER >= 1300 )
359 # define TABLE_ALIGN 32
360 #endif
361 
362 /* 11. REDUCE CODE AND TABLE SIZE
363 
364  This replaces some expanded macros with function calls if AES_ASM_V2 or
365  AES_ASM_V2C are defined
366 */
367 
368 #if 0 && (defined( ASM_X86_V2 ) || defined( ASM_X86_V2C ))
369 # define REDUCE_CODE_SIZE
370 #endif
371 
372 /* 12. TABLE OPTIONS
373 
374  This cipher proceeds by repeating in a number of cycles known as 'rounds'
375  which are implemented by a round function which can optionally be speeded
376  up using tables. The basic tables are each 256 32-bit words, with either
377  one or four tables being required for each round function depending on
378  how much speed is required. The encryption and decryption round functions
379  are different and the last encryption and decrytpion round functions are
380  different again making four different round functions in all.
381 
382  This means that:
383  1. Normal encryption and decryption rounds can each use either 0, 1
384  or 4 tables and table spaces of 0, 1024 or 4096 bytes each.
385  2. The last encryption and decryption rounds can also use either 0, 1
386  or 4 tables and table spaces of 0, 1024 or 4096 bytes each.
387 
388  Include or exclude the appropriate definitions below to set the number
389  of tables used by this implementation.
390 */
391 
392 #if 1 /* set tables for the normal encryption round */
393 # define ENC_ROUND FOUR_TABLES
394 #elif 0
395 # define ENC_ROUND ONE_TABLE
396 #else
397 # define ENC_ROUND NO_TABLES
398 #endif
399 
400 #if 1 /* set tables for the last encryption round */
401 # define LAST_ENC_ROUND FOUR_TABLES
402 #elif 0
403 # define LAST_ENC_ROUND ONE_TABLE
404 #else
405 # define LAST_ENC_ROUND NO_TABLES
406 #endif
407 
408 #if 1 /* set tables for the normal decryption round */
409 # define DEC_ROUND FOUR_TABLES
410 #elif 0
411 # define DEC_ROUND ONE_TABLE
412 #else
413 # define DEC_ROUND NO_TABLES
414 #endif
415 
416 #if 1 /* set tables for the last decryption round */
417 # define LAST_DEC_ROUND FOUR_TABLES
418 #elif 0
419 # define LAST_DEC_ROUND ONE_TABLE
420 #else
421 # define LAST_DEC_ROUND NO_TABLES
422 #endif
423 
424 /* The decryption key schedule can be speeded up with tables in the same
425  way that the round functions can. Include or exclude the following
426  defines to set this requirement.
427 */
428 #if 1
429 # define KEY_SCHED FOUR_TABLES
430 #elif 0
431 # define KEY_SCHED ONE_TABLE
432 #else
433 # define KEY_SCHED NO_TABLES
434 #endif
435 
436 /* ---- END OF USER CONFIGURED OPTIONS ---- */
437 
438 /* VIA ACE support is only available for VC++ and GCC */
439 
440 #if !defined( _MSC_VER ) && !defined( __GNUC__ )
441 # if defined( ASSUME_VIA_ACE_PRESENT )
442 # undef ASSUME_VIA_ACE_PRESENT
443 # endif
444 # if defined( USE_VIA_ACE_IF_PRESENT )
445 # undef USE_VIA_ACE_IF_PRESENT
446 # endif
447 #endif
448 
449 #if defined( ASSUME_VIA_ACE_PRESENT ) && !defined( USE_VIA_ACE_IF_PRESENT )
450 # define USE_VIA_ACE_IF_PRESENT
451 #endif
452 
453 /* define to reverse decryption key schedule */
454 #if 1 || defined( USE_VIA_ACE_IF_PRESENT ) && !defined ( AES_REV_DKS )
455 # define AES_REV_DKS
456 #endif
457 
458 /* Intel AESNI uses a decryption key schedule in the encryption order */
459 #if defined( USE_INTEL_AES_IF_PRESENT ) && defined ( AES_REV_DKS )
460 # undef AES_REV_DKS
461 #endif
462 
463 /* Assembler support requires the use of platform byte order */
464 
465 #if ( defined( ASM_X86_V1C ) || defined( ASM_X86_V2C ) || defined( ASM_AMD64_C ) ) \
466  && (ALGORITHM_BYTE_ORDER != PLATFORM_BYTE_ORDER)
467 # undef ALGORITHM_BYTE_ORDER
468 # define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER
469 #endif
470 
471 /* In this implementation the columns of the state array are each held in
472  32-bit words. The state array can be held in various ways: in an array
473  of words, in a number of individual word variables or in a number of
474  processor registers. The following define maps a variable name x and
475  a column number c to the way the state array variable is to be held.
476  The first define below maps the state into an array x[c] whereas the
477  second form maps the state into a number of individual variables x0,
478  x1, etc. Another form could map individual state colums to machine
479  register names.
480 */
481 
482 #if defined( ARRAYS )
483 # define s(x,c) x[c]
484 #else
485 # define s(x,c) x##c
486 #endif
487 
488 /* This implementation provides subroutines for encryption, decryption
489  and for setting the three key lengths (separately) for encryption
490  and decryption. Since not all functions are needed, masks are set
491  up here to determine which will be implemented in C
492 */
493 
494 #if !defined( AES_ENCRYPT )
495 # define EFUNCS_IN_C 0
496 #elif defined( ASSUME_VIA_ACE_PRESENT ) || defined( ASM_X86_V1C ) \
497  || defined( ASM_X86_V2C ) || defined( ASM_AMD64_C )
498 # define EFUNCS_IN_C ENC_KEYING_IN_C
499 #elif !defined( ASM_X86_V2 )
500 # define EFUNCS_IN_C ( ENCRYPTION_IN_C | ENC_KEYING_IN_C )
501 #else
502 # define EFUNCS_IN_C 0
503 #endif
504 
505 #if !defined( AES_DECRYPT )
506 # define DFUNCS_IN_C 0
507 #elif defined( ASSUME_VIA_ACE_PRESENT ) || defined( ASM_X86_V1C ) \
508  || defined( ASM_X86_V2C ) || defined( ASM_AMD64_C )
509 # define DFUNCS_IN_C DEC_KEYING_IN_C
510 #elif !defined( ASM_X86_V2 )
511 # define DFUNCS_IN_C ( DECRYPTION_IN_C | DEC_KEYING_IN_C )
512 #else
513 # define DFUNCS_IN_C 0
514 #endif
515 
516 #define FUNCS_IN_C ( EFUNCS_IN_C | DFUNCS_IN_C )
517 
518 /* END OF CONFIGURATION OPTIONS */
519 
520 #define RC_LENGTH (5 * (AES_BLOCK_SIZE / 4 - 2))
521 
522 /* Disable or report errors on some combinations of options */
523 
524 #if ENC_ROUND == NO_TABLES && LAST_ENC_ROUND != NO_TABLES
525 # undef LAST_ENC_ROUND
526 # define LAST_ENC_ROUND NO_TABLES
527 #elif ENC_ROUND == ONE_TABLE && LAST_ENC_ROUND == FOUR_TABLES
528 # undef LAST_ENC_ROUND
529 # define LAST_ENC_ROUND ONE_TABLE
530 #endif
531 
532 #if ENC_ROUND == NO_TABLES && ENC_UNROLL != NONE
533 # undef ENC_UNROLL
534 # define ENC_UNROLL NONE
535 #endif
536 
537 #if DEC_ROUND == NO_TABLES && LAST_DEC_ROUND != NO_TABLES
538 # undef LAST_DEC_ROUND
539 # define LAST_DEC_ROUND NO_TABLES
540 #elif DEC_ROUND == ONE_TABLE && LAST_DEC_ROUND == FOUR_TABLES
541 # undef LAST_DEC_ROUND
542 # define LAST_DEC_ROUND ONE_TABLE
543 #endif
544 
545 #if DEC_ROUND == NO_TABLES && DEC_UNROLL != NONE
546 # undef DEC_UNROLL
547 # define DEC_UNROLL NONE
548 #endif
549 
550 #if defined( bswap32 )
551 # define aes_sw32 bswap32
552 #elif defined( bswap_32 )
553 # define aes_sw32 bswap_32
554 #else
555 # define brot(x,n) (((uint32_t)(x) << n) | ((uint32_t)(x) >> (32 - n)))
556 # define aes_sw32(x) ((brot((x),8) & 0x00ff00ff) | (brot((x),24) & 0xff00ff00))
557 #endif
558 
559 /* upr(x,n): rotates bytes within words by n positions, moving bytes to
560  higher index positions with wrap around into low positions
561  ups(x,n): moves bytes by n positions to higher index positions in
562  words but without wrap around
563  bval(x,n): extracts a byte from a word
564 
565  WARNING: The definitions given here are intended only for use with
566  unsigned variables and with shift counts that are compile
567  time constants
568 */
569 
570 #if ( ALGORITHM_BYTE_ORDER == IS_LITTLE_ENDIAN )
571 # define upr(x,n) (((uint32_t)(x) << (8 * (n))) | ((uint32_t)(x) >> (32 - 8 * (n))))
572 # define ups(x,n) ((uint32_t) (x) << (8 * (n)))
573 # define bval(x,n) to_byte((x) >> (8 * (n)))
574 # define bytes2word(b0, b1, b2, b3) \
575  (((uint32_t)(b3) << 24) | ((uint32_t)(b2) << 16) | ((uint32_t)(b1) << 8) | (b0))
576 #endif
577 
578 #if ( ALGORITHM_BYTE_ORDER == IS_BIG_ENDIAN )
579 # define upr(x,n) (((uint32_t)(x) >> (8 * (n))) | ((uint32_t)(x) << (32 - 8 * (n))))
580 # define ups(x,n) ((uint32_t) (x) >> (8 * (n)))
581 # define bval(x,n) to_byte((x) >> (24 - 8 * (n)))
582 # define bytes2word(b0, b1, b2, b3) \
583  (((uint32_t)(b0) << 24) | ((uint32_t)(b1) << 16) | ((uint32_t)(b2) << 8) | (b3))
584 #endif
585 
586 #if defined( SAFE_IO )
587 # define word_in(x,c) bytes2word(((const uint8_t*)(x)+4*c)[0], ((const uint8_t*)(x)+4*c)[1], \
588  ((const uint8_t*)(x)+4*c)[2], ((const uint8_t*)(x)+4*c)[3])
589 # define word_out(x,c,v) { ((uint8_t*)(x)+4*c)[0] = bval(v,0); ((uint8_t*)(x)+4*c)[1] = bval(v,1); \
590  ((uint8_t*)(x)+4*c)[2] = bval(v,2); ((uint8_t*)(x)+4*c)[3] = bval(v,3); }
591 #elif ( ALGORITHM_BYTE_ORDER == PLATFORM_BYTE_ORDER )
592 # define word_in(x,c) (*((uint32_t*)(x)+(c)))
593 # define word_out(x,c,v) (*((uint32_t*)(x)+(c)) = (v))
594 #else
595 # define word_in(x,c) aes_sw32(*((uint32_t*)(x)+(c)))
596 # define word_out(x,c,v) (*((uint32_t*)(x)+(c)) = aes_sw32(v))
597 #endif
598 
599 /* the finite field modular polynomial and elements */
600 
601 #define WPOLY 0x011b
602 #define BPOLY 0x1b
603 
604 /* multiply four bytes in GF(2^8) by 'x' {02} in parallel */
605 
606 #define gf_c1 0x80808080
607 #define gf_c2 0x7f7f7f7f
608 #define gf_mulx(x) ((((x) & gf_c2) << 1) ^ ((((x) & gf_c1) >> 7) * BPOLY))
609 
610 /* The following defines provide alternative definitions of gf_mulx that might
611  give improved performance if a fast 32-bit multiply is not available. Note
612  that a temporary variable u needs to be defined where gf_mulx is used.
613 
614 #define gf_mulx(x) (u = (x) & gf_c1, u |= (u >> 1), ((x) & gf_c2) << 1) ^ ((u >> 3) | (u >> 6))
615 #define gf_c4 (0x01010101 * BPOLY)
616 #define gf_mulx(x) (u = (x) & gf_c1, ((x) & gf_c2) << 1) ^ ((u - (u >> 7)) & gf_c4)
617 */
618 
619 /* Work out which tables are needed for the different options */
620 
621 #if defined( ASM_X86_V1C )
622 # if defined( ENC_ROUND )
623 # undef ENC_ROUND
624 # endif
625 # define ENC_ROUND FOUR_TABLES
626 # if defined( LAST_ENC_ROUND )
627 # undef LAST_ENC_ROUND
628 # endif
629 # define LAST_ENC_ROUND FOUR_TABLES
630 # if defined( DEC_ROUND )
631 # undef DEC_ROUND
632 # endif
633 # define DEC_ROUND FOUR_TABLES
634 # if defined( LAST_DEC_ROUND )
635 # undef LAST_DEC_ROUND
636 # endif
637 # define LAST_DEC_ROUND FOUR_TABLES
638 # if defined( KEY_SCHED )
639 # undef KEY_SCHED
640 # define KEY_SCHED FOUR_TABLES
641 # endif
642 #endif
643 
644 #if ( FUNCS_IN_C & ENCRYPTION_IN_C ) || defined( ASM_X86_V1C )
645 # if ENC_ROUND == ONE_TABLE
646 # define FT1_SET
647 # elif ENC_ROUND == FOUR_TABLES
648 # define FT4_SET
649 # else
650 # define SBX_SET
651 # endif
652 # if LAST_ENC_ROUND == ONE_TABLE
653 # define FL1_SET
654 # elif LAST_ENC_ROUND == FOUR_TABLES
655 # define FL4_SET
656 # elif !defined( SBX_SET )
657 # define SBX_SET
658 # endif
659 #endif
660 
661 #if ( FUNCS_IN_C & DECRYPTION_IN_C ) || defined( ASM_X86_V1C )
662 # if DEC_ROUND == ONE_TABLE
663 # define IT1_SET
664 # elif DEC_ROUND == FOUR_TABLES
665 # define IT4_SET
666 # else
667 # define ISB_SET
668 # endif
669 # if LAST_DEC_ROUND == ONE_TABLE
670 # define IL1_SET
671 # elif LAST_DEC_ROUND == FOUR_TABLES
672 # define IL4_SET
673 # elif !defined(ISB_SET)
674 # define ISB_SET
675 # endif
676 #endif
677 
678 #if !(defined( REDUCE_CODE_SIZE ) && (defined( ASM_X86_V2 ) || defined( ASM_X86_V2C )))
679 # if ((FUNCS_IN_C & ENC_KEYING_IN_C) || (FUNCS_IN_C & DEC_KEYING_IN_C))
680 # if KEY_SCHED == ONE_TABLE
681 # if !defined( FL1_SET ) && !defined( FL4_SET )
682 # define LS1_SET
683 # endif
684 # elif KEY_SCHED == FOUR_TABLES
685 # if !defined( FL4_SET )
686 # define LS4_SET
687 # endif
688 # elif !defined( SBX_SET )
689 # define SBX_SET
690 # endif
691 # endif
692 # if (FUNCS_IN_C & DEC_KEYING_IN_C)
693 # if KEY_SCHED == ONE_TABLE
694 # define IM1_SET
695 # elif KEY_SCHED == FOUR_TABLES
696 # define IM4_SET
697 # elif !defined( SBX_SET )
698 # define SBX_SET
699 # endif
700 # endif
701 #endif
702 
703 /* generic definitions of Rijndael macros that use tables */
704 
705 #define no_table(x,box,vf,rf,c) bytes2word( \
706  box[bval(vf(x,0,c),rf(0,c))], \
707  box[bval(vf(x,1,c),rf(1,c))], \
708  box[bval(vf(x,2,c),rf(2,c))], \
709  box[bval(vf(x,3,c),rf(3,c))])
710 
711 #define one_table(x,op,tab,vf,rf,c) \
712  ( tab[bval(vf(x,0,c),rf(0,c))] \
713  ^ op(tab[bval(vf(x,1,c),rf(1,c))],1) \
714  ^ op(tab[bval(vf(x,2,c),rf(2,c))],2) \
715  ^ op(tab[bval(vf(x,3,c),rf(3,c))],3))
716 
717 #define four_tables(x,tab,vf,rf,c) \
718  ( tab[0][bval(vf(x,0,c),rf(0,c))] \
719  ^ tab[1][bval(vf(x,1,c),rf(1,c))] \
720  ^ tab[2][bval(vf(x,2,c),rf(2,c))] \
721  ^ tab[3][bval(vf(x,3,c),rf(3,c))])
722 
723 #define vf1(x,r,c) (x)
724 #define rf1(r,c) (r)
725 #define rf2(r,c) ((8+r-c)&3)
726 
727 /* perform forward and inverse column mix operation on four bytes in long word x in */
728 /* parallel. NOTE: x must be a simple variable, NOT an expression in these macros. */
729 
730 #if !(defined( REDUCE_CODE_SIZE ) && (defined( ASM_X86_V2 ) || defined( ASM_X86_V2C )))
731 
732 #if defined( FM4_SET ) /* not currently used */
733 # define fwd_mcol(x) four_tables(x,t_use(f,m),vf1,rf1,0)
734 #elif defined( FM1_SET ) /* not currently used */
735 # define fwd_mcol(x) one_table(x,upr,t_use(f,m),vf1,rf1,0)
736 #else
737 # define dec_fmvars uint32_t g2
738 # define fwd_mcol(x) (g2 = gf_mulx(x), g2 ^ upr((x) ^ g2, 3) ^ upr((x), 2) ^ upr((x), 1))
739 #endif
740 
741 #if defined( IM4_SET )
742 # define inv_mcol(x) four_tables(x,t_use(i,m),vf1,rf1,0)
743 #elif defined( IM1_SET )
744 # define inv_mcol(x) one_table(x,upr,t_use(i,m),vf1,rf1,0)
745 #else
746 # define dec_imvars uint32_t g2, g4, g9
747 # define inv_mcol(x) (g2 = gf_mulx(x), g4 = gf_mulx(g2), g9 = (x) ^ gf_mulx(g4), g4 ^= g9, \
748  (x) ^ g2 ^ g4 ^ upr(g2 ^ g9, 3) ^ upr(g4, 2) ^ upr(g9, 1))
749 #endif
750 
751 #if defined( FL4_SET )
752 # define ls_box(x,c) four_tables(x,t_use(f,l),vf1,rf2,c)
753 #elif defined( LS4_SET )
754 # define ls_box(x,c) four_tables(x,t_use(l,s),vf1,rf2,c)
755 #elif defined( FL1_SET )
756 # define ls_box(x,c) one_table(x,upr,t_use(f,l),vf1,rf2,c)
757 #elif defined( LS1_SET )
758 # define ls_box(x,c) one_table(x,upr,t_use(l,s),vf1,rf2,c)
759 #else
760 # define ls_box(x,c) no_table(x,t_use(s,box),vf1,rf2,c)
761 #endif
762 
763 #endif
764 
765 #if defined( ASM_X86_V1C ) && defined( AES_DECRYPT ) && !defined( ISB_SET )
766 # define ISB_SET
767 #endif
768 
769 #endif