
/* This is an independent implementation of the RIJNDAEL algorithm  */
/* designed by Joan Daemen and Vincent Rijmen and submitted as a    */
/* candidate for the NIST AES activity.                             */
/*                                                                  */
/* Copyright in this implementation is held by Dr B R Gladman but   */
/* I hereby give permission for its free direct or derivative use   */
/* subject to acknowledgment of its origin and compliance with any  */
/* conditions that the algorithm originators place on its use.      */
/*                                                                  */
/* Dr Brian Gladman (gladman@seven77.demon.co.uk) 18th July 1998    */
/* Timing data:

Algorithm: rijndael (rijndael3.c)
128 bit key:
Key Setup:    2063 cycles
Encrypt:       442 cycles =    57.9 mbits/sec
Decrypt:       445 cycles =    57.6 mbits/sec
Mean:          443 cycles =    57.7 mbits/sec
192 bit key:
Key Setup:    2433 cycles
Encrypt:       517 cycles =    49.6 mbits/sec
Decrypt:       506 cycles =    50.6 mbits/sec
Mean:          511 cycles =    50.1 mbits/sec
256 bit key:
Key Setup:    2925 cycles
Encrypt:       588 cycles =    43.5 mbits/sec
Decrypt:       568 cycles =    45.1 mbits/sec
Mean:          578 cycles =    44.3 mbits/sec

*/

#include "../std_defs.h"

static char *alg_name[] = { "rijndael", "rijndael3.c" };

char **cipher_name()
{
    return alg_name;
}

u1byte  pow_tab[256];
u1byte  log_tab[256];
u1byte  sbx_tab[256];
u1byte  isb_tab[256];
u4byte  rco_tab[ 10];
u4byte  ft_tab[4][256];
u4byte  it_tab[4][256];
u4byte  tab_gen = 0;

u4byte  k_len;
u4byte  e_key[60];
u4byte  d_key[60];

#define ff_mult(a,b)    (a && b ? pow_tab[(log_tab[a] + log_tab[b]) % 255] : 0)

#define ls_box(x)                                   \
    ((u4byte)sbx_tab[ (x) & 255] <<  0) ^           \
    ((u4byte)sbx_tab[((x) >>  8) & 255] <<  8) ^    \
    ((u4byte)sbx_tab[((x) >> 16) & 255] << 16) ^    \
    ((u4byte)sbx_tab[((x) >> 24) & 255] << 24)

#define f_rn(bo, bi, i)                             \
    bo[i] =  ft_tab[0][byte(bi[i],0)] ^             \
             ft_tab[1][byte(bi[(i + 1) & 3],1)] ^   \
             ft_tab[2][byte(bi[(i + 2) & 3],2)] ^   \
             ft_tab[3][byte(bi[(i + 3) & 3],3)]

#define f_rl(bo, bi, i)                                         \
    bo[i] = (u4byte)sbx_tab[byte(bi[i],0)] ^                    \
        rotl(((u4byte)sbx_tab[byte(bi[(i + 1) & 3],1)]),  8) ^  \
        rotl(((u4byte)sbx_tab[byte(bi[(i + 2) & 3],2)]), 16) ^  \
        rotl(((u4byte)sbx_tab[byte(bi[(i + 3) & 3],3)]), 24)

#define i_rn(bo, bi, i)                             \
    bo[i] =  it_tab[0][byte(bi[i],0)] ^             \
             it_tab[1][byte(bi[(i + 3) & 3],1)] ^   \
             it_tab[2][byte(bi[(i + 2) & 3],2)] ^   \
             it_tab[3][byte(bi[(i + 1) & 3],3)]

#define i_rl(bo, bi, i)                                         \
    bo[i] = (u4byte)isb_tab[byte(bi[i],0)] ^                    \
        rotl(((u4byte)isb_tab[byte(bi[(i + 3) & 3],1)]),  8) ^  \
        rotl(((u4byte)isb_tab[byte(bi[(i + 2) & 3],2)]), 16) ^  \
        rotl(((u4byte)isb_tab[byte(bi[(i + 1) & 3],3)]), 24)

void gen_tabs(void)
{   u4byte  i, t;
    u1byte  p, q;

    /* log and power tables for GF(2**8) finite field with  */
    /* 0x11b as modular polynomial - the simplest prmitive  */
    /* root is 0x11, used here to generate the tables       */

    for(i = 0,p = 1; i < 256; ++i)
    {
        pow_tab[i] = p; log_tab[p] = i;

        p = p ^ (p << 1) ^ (p & 0x80 ? 0x01b : 0);
    }

    log_tab[1] = 0; p = 1;

    for(i = 0; i < 10; ++i)
    {
        rco_tab[i] = p; 

        p = (p << 1) ^ (p & 0x80 ? 0x1b : 0);
    }

    /* note that the affine byte transformation matrix in   */
    /* rijndael specification is in big endian format with  */
    /* bit 0 as the most significant bit. In the remainder  */
    /* of the specification the bits are numbered from the  */
    /* least significant end of a byte.                     */

    for(i = 0; i < 256; ++i)
    {   
        p = (i ? pow_tab[255 - log_tab[i]] : 0); q = p; 
        q = (q >> 7) | (q << 1); p ^= q; 
        q = (q >> 7) | (q << 1); p ^= q; 
        q = (q >> 7) | (q << 1); p ^= q; 
        q = (q >> 7) | (q << 1); p ^= q ^ 0x63; 
        sbx_tab[i] = p; isb_tab[p] = i;
    }

    for(i = 0; i < 256; ++i)
    {
        p = sbx_tab[i];

        t = ((u4byte)ff_mult(2, p)) |
            ((u4byte)p <<  8) |
            ((u4byte)p << 16) |
            ((u4byte)ff_mult(3, p) << 24);
        
        ft_tab[0][i] = t;
        ft_tab[1][i] = rotl(t,  8);
        ft_tab[2][i] = rotl(t, 16);
        ft_tab[3][i] = rotl(t, 24);

        p = isb_tab[i];

        t = ((u4byte)ff_mult(14, p)) |
            ((u4byte)ff_mult( 9, p) <<  8) |
            ((u4byte)ff_mult(13, p) << 16) |
            ((u4byte)ff_mult(11, p) << 24);
        
        it_tab[0][i] = t; 
        it_tab[1][i] = rotl(t,  8); 
        it_tab[2][i] = rotl(t, 16); 
        it_tab[3][i] = rotl(t, 24); 
    }

    tab_gen = 1;
};

#define mask        0x80808080
#define star_x(r,x) t = (x) & mask; r = (((x) ^ t) << 1) ^ ((t >> 7) * 0x1b)

#define imix_col(x)                         \
    u   = (x); star_x(v, u);                \
          star_x(w, v); star_x(z, w);       \
   (x)  = z ^ w ^ v ^ rotr(z ^ v ^ u, 8)    \
        ^ rotr(z ^ w ^ u, 16)               \
        ^ rotr(z ^ u, 24)

/* initialise the key schedule from the user supplied key   */

u4byte *set_key(const u4byte in_key[], const u4byte key_len)
{   u4byte  i, j, lk[8], t, u, v, w, z;

    if(!tab_gen)

        gen_tabs();

    get_key(lk, key_len); k_len = (key_len + 31) / 32;

    for(i = 0; i < k_len; ++i)
    {       
        e_key[i] = lk[i];
        d_key[i] = lk[i];
    }
    
    for(i = 1; i < 5 + 27 / k_len; ++i) 
    {
        t = rotr(lk[k_len - 1],  8);

        lk[0] ^= ls_box(t) ^ rco_tab[i - 1];

        for(j = 1; j < k_len / 2; j++)
                
            lk[j] ^= lk[j - 1];

        t = lk[k_len / 2 - 1];

        lk[j] ^= (k_len == 8 ? ls_box(t) : lk[j - 1]);

        for(++j; j < k_len; ++j)
                
            lk[j] ^= lk[j - 1];

        for(j = 0; j < k_len; ++j)
        {
            e_key[i * k_len + j] = lk[j];
            d_key[i * k_len + j] = lk[j];
        }
    }

    for(i = 1; i < k_len + 6; ++i)
    {
        imix_col(d_key[4 * i]);
        imix_col(d_key[4 * i + 1]);
        imix_col(d_key[4 * i + 2]);
        imix_col(d_key[4 * i + 3]);
    }

    return e_key;
};

/* encrypt a block of text  */

void encrypt(const u4byte in_blk[4], u4byte out_blk[4])
{   u4byte  i, b0[4], b1[4];

    b0[0] = in_blk[0] ^ e_key[0];
    b0[1] = in_blk[1] ^ e_key[1];
    b0[2] = in_blk[2] ^ e_key[2];
    b0[3] = in_blk[3] ^ e_key[3];

    for(i = 1; i < k_len + 6; ++i)
    {
        f_rn(b1, b0, 0); f_rn(b1, b0, 1);
        f_rn(b1, b0, 2); f_rn(b1, b0, 3); 
        
        b0[0] = b1[0] ^ e_key[4 * i];
        b0[1] = b1[1] ^ e_key[4 * i + 1];
        b0[2] = b1[2] ^ e_key[4 * i + 2];
        b0[3] = b1[3] ^ e_key[4 * i + 3];
    }

    f_rl(b1, b0, 0); f_rl(b1, b0, 1);
    f_rl(b1, b0, 2); f_rl(b1, b0, 3);

    out_blk[0] = b1[0] ^ e_key[4 * k_len + 24];
    out_blk[1] = b1[1] ^ e_key[4 * k_len + 25];
    out_blk[2] = b1[2] ^ e_key[4 * k_len + 26];
    out_blk[3] = b1[3] ^ e_key[4 * k_len + 27];
};

/* decrypt a block of text  */

void decrypt(const u4byte in_blk[4], u4byte out_blk[4])
{   u4byte  i, b0[4], b1[4];

    b0[0] = in_blk[0] ^ e_key[4 * k_len + 24];
    b0[1] = in_blk[1] ^ e_key[4 * k_len + 25];
    b0[2] = in_blk[2] ^ e_key[4 * k_len + 26];
    b0[3] = in_blk[3] ^ e_key[4 * k_len + 27];

    for(i = k_len + 5; i > 0; --i)
    {
        i_rn(b1, b0, 0); i_rn(b1, b0, 1);
        i_rn(b1, b0, 2); i_rn(b1, b0, 3);
        
        b0[0] = b1[0] ^ d_key[4 * i];
        b0[1] = b1[1] ^ d_key[4 * i + 1];
        b0[2] = b1[2] ^ d_key[4 * i + 2];
        b0[3] = b1[3] ^ d_key[4 * i + 3];
    }

    i_rl(b1, b0, 0); i_rl(b1, b0, 1); 
    i_rl(b1, b0, 2); i_rl(b1, b0, 3); 
    
    out_blk[0] = b1[0] ^ d_key[0];
    out_blk[1] = b1[1] ^ d_key[1];
    out_blk[2] = b1[2] ^ d_key[2];
    out_blk[3] = b1[3] ^ d_key[3];
};
