So I have been attempting to implement AES-256 in C. After lots of reading around and following the wikipedia page to implement it, I managed to code it. Only when I tested it, it seemed to give the wrong output.
Code:
/*
AES-256
(c) 2017 Daniel Gee
*/
#include <stdio.h>
#include <stdlib.h>
unsigned char rcon[256] = {
0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a,
0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39,
0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a,
0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8,
0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef,
0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc,
0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b,
0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3,
0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94,
0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20,
0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35,
0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f,
0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04,
0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63,
0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd,
0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d
};
unsigned char sbox[256] = {
0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
};
void rotate(unsigned char *w){
unsigned char t;
t = w[0];
w[0] = w[1];
w[1] = w[2];
w[2] = w[3];
w[3] = t;
}
void key_schedule_core(unsigned char *w, unsigned char i){
unsigned char j;
rotate(w);
for(j = 0; j < 4; j++){
w[j] = sbox[w[j]];
}
w[0] ^= rcon[i];
}
unsigned char *key_schedule(unsigned char *key){
unsigned char n = 32, b = 240, *e = malloc(sizeof(unsigned char) * b), i = 1, j, k, t[4];
for(k = 0; k < n; k++){
e[k] = key[k];
}
j = 32;
while(j < b){
t[0] = e[j - 4];
t[1] = e[j - 3];
t[2] = e[j - 2];
t[3] = e[j - 1];
key_schedule_core(t, i);
i++;
t[0] ^= e[j - n];
t[1] ^= e[j - n + 1];
t[2] ^= e[j - n + 2];
t[3] ^= e[j - n + 3];
e[j] = t[0];
e[j + 1] = t[1];
e[j + 2] = t[2];
e[j + 3] = t[3];
j += 4;
for(k = 0; k < 3; k++){
t[0] = e[j - 4];
t[1] = e[j - 3];
t[2] = e[j - 2];
t[3] = e[j - 1];
t[0] ^= e[j - n];
t[1] ^= e[j - n + 1];
t[2] ^= e[j - n + 2];
t[3] ^= e[j - n + 3];
e[j] = t[0];
e[j + 1] = t[1];
e[j + 2] = t[2];
e[j + 3] = t[3];
j += 4;
}
t[0] = e[j - 4];
t[1] = e[j - 3];
t[2] = e[j - 2];
t[3] = e[j - 1];
t[0] = sbox[t[0]];
t[1] = sbox[t[1]];
t[2] = sbox[t[2]];
t[3] = sbox[t[3]];
t[0] ^= e[j - n];
t[1] ^= e[j - n + 1];
t[2] ^= e[j - n + 2];
t[3] ^= e[j - n + 3];
e[j] = t[0];
e[j + 1] = t[1];
e[j + 2] = t[2];
e[j + 3] = t[3];
j += 4;
if(j > b){
break;
}
for(k = 0; k < 3; k++){
t[0] = e[j - 4];
t[1] = e[j - 3];
t[2] = e[j - 2];
t[3] = e[j - 1];
t[0] ^= e[j - n];
t[1] ^= e[j - n + 1];
t[2] ^= e[j - n + 2];
t[3] ^= e[j - n + 3];
e[j] = t[0];
e[j + 1] = t[1];
e[j + 2] = t[2];
e[j + 3] = t[3];
j += 4;
}
}
return e;
}
void shift_rows(unsigned char *state){
unsigned char t;
t = state[4];
state[4] = state[5];
state[5] = state[6];
state[6] = state[7];
state[7] = t;
t = state[8];
state[10] = t;
t = state[9];
state[11] = t;
t = state[12];
state[12] = state[15];
state[15] = state[14];
state[14] = state[13];
state[13] = t;
}
void mix_columns(unsigned char *state){
unsigned char a[4], b[4], c, j;
for(j = 0; j < 4; j++){
for(c = 0; c < 4; c++){
a[c] = state[(j * 4) + c];
b[c] = state[(j * 4) + c] << 1;
if(state[(j * 4) + c] & 0x80){
b[c] ^= 0x1b;
}
}
state[(j * 4) + 0] = b[0] ^ a[3] ^ a[2] ^ b[1] ^ a[1];
state[(j * 4) + 1] = b[1] ^ a[0] ^ a[3] ^ b[2] ^ a[2];
state[(j * 4) + 2] = b[2] ^ a[1] ^ a[0] ^ b[3] ^ a[3];
state[(j * 4) + 3] = b[3] ^ a[2] ^ a[1] ^ b[0] ^ a[0];
}
}
void encrypt(unsigned char *key, unsigned char *state){
unsigned char *e = key_schedule(key), i, j;
for(i = 0; i < 14; i++){
if(i == 0){
for(j = 0; j < 16; j++){
state[j] ^= e[(i * 16) + j];
}
}else if(i == 13){
for(j = 0; j < 16; j++){
state[j] ^= sbox[state[j]];
}
shift_rows(state);
for(j = 0; j < 16; j++){
state[j] ^= e[(i * 16) + j];
}
}else{
for(j = 0; j < 16; j++){
state[j] ^= sbox[state[j]];
}
shift_rows(state);
mix_columns(state);
for(j = 0; j < 16; j++){
state[j] ^= e[(i * 16) + j];
}
}
}
}
int main(){
unsigned char key[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
message[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
encrypt(key, message);
for(int i = 0; i < 16; i++){
printf("%02x ", message[i]);
}
printf("\n");
}
Example:
key = 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
message = 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
output = c2 2a 26 68 a8 4a 1e f3 ac 40 23 05 25 50 00 02
The book "The Design of Rijndael" (ISBN 3-540-42580-2) has a great list of test vectors in appendix D for all the intermediate steps of an AES 128 encryption. Step through your code and compare your results with those from the book. It should be easy to change your code temporarily to AES-128 to find the bugs and then switch it back to AES-256 afterwards.
After that, throw away your implementation and use an established implementation that has been thoroughly tested, since yours is vulnerable to timing attacks (because of the if(state[(j * 4) + c] & 0x80)
), which allows an attacker to find out the secret key. To avoid this and other implementation bugs, consult a book about implementing cryptography and try to use as much preexisting code as possible instead of writing your own.