I'm currently trying to implement algorithm SHA-3 on a smartcard . I managed to succeed in implementation but it seems that the output of the algorithm doesn't match the Test Vectors from NIST. My entire source code is below.
final static byte CLA_KECCAK = (byte) 0x80;
final static byte INS_KECCAKF = (byte) 0x10;
short[] ram_B = JCSystem.makeTransientShortArray((short) 5, JCSystem.CLEAR_ON_DESELECT);
short[] ram_tmp = JCSystem.makeTransientShortArray((short) 1, JCSystem.CLEAR_ON_DESELECT);
final static short c = (short) 256;
final static short r = (short) (400 - c);
final static short rbyte = (short) (r / 8);
final static short outputLength = (short) 224;
private static short rot(short n, short i) {
return (short) ((n << i) | (n >>> ((short) (16 - i))));
}
private static final short[] roundConstants = { (short) 0x0001, (short) 0x8082, (short) 0x808A, (short) 0x8000,
(short) 0x808B, (short) 0x0001, (short) 0x8081, (short) 0x8009, (short) 0x008A, (short) 0x0088,
(short) 0x8009, (short) 0x000A, (short) 0x808B, (short) 0x008B, (short) 0x8089, (short) 0x8003,
(short) 0x8002, (short) 0x0080, (short) 0x800A, (short) 0x000A };
private static final short rotationConstants[] = { 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14, 27, 41, 56, 8, 25,
43, 62, 18, 39, 61, 20, 44 };
private static final short indexPi[] = { 10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4, 15, 23, 19, 13, 12, 2, 20, 14,
22, 9, 6, 1 };
private static final short index[] = { 0, 1, 2, 3, 4, 0, 1, 2, 3, 4 };
private TestSha() {
}
public static void install(byte bArray[], short bOffset, byte bLength) throws ISOException {
new TestSha().register();
}
public void process(APDU apdu) throws ISOException {
if (selectingApplet()) {
return;
}
byte[] buf = apdu.getBuffer();
byte[] hash;
switch (buf[ISO7816.OFFSET_INS]) {
case INS_KECCAKF:
short lgth = apdu.setIncomingAndReceive();
if (lgth == 0) {
ISOException.throwIt(ISO7816.SW_WRONG_LENGTH);
}
byte[] data = new byte[lgth];
Util.arrayCopy(buf, ISO7816.OFFSET_CDATA, data, (short) 0, lgth);
hash = KeccakF(data);
Util.arrayCopyNonAtomic(hash, (short) 0, buf, (short) 0, (short) hash.length);
apdu.setOutgoingAndSend((short) 0, (short) hash.length);
break;
default:
ISOException.throwIt(ISO7816.SW_INS_NOT_SUPPORTED);
}
}
private static short[] byteToShort(byte[] b) {
short[] c = new short[(short) (b.length / 2)];
byte aux[] = new byte[2];
for (short i = 0; i < c.length; i++) {
for (short j = 0; j < 2; j++) {
aux[j] = b[(short) (2 * i + j)];
}
c[i] = (short) ((((short) aux[1]) & 0xFF) | ((((short) aux[0]) & 0xFF) << 8));
}
return c;
}
private static byte[] shortToByte(short[] b) {
byte[] c = new byte[(short) (b.length * 2)];
short y = 0;
for (short x = 0; x < c.length; x += 2) {
c[(short) (x + 1)] = (byte) (b[y] & 0xFF);
c[x] = (byte) ((b[y] >> 8) & 0xFF);
y++;
}
return c;
}
public byte[] KeccakF(byte[] M) {
short i = 0;
short x = 0;
short Mlen = (short) M.length; // padding
x = (short) (rbyte - (Mlen % (rbyte)));
byte[] P = new byte[(short) (Mlen + x)];
Util.arrayCopy(M, (short) 0, P, (short) 0, Mlen);
if ((Mlen % rbyte) != 0) {
P[Mlen] = (byte) 0x01;
P[(short) (P.length - 1)] |= (byte) 0x80;
}
// absorbing
short S[] = new short[25];
byte[] Pi = new byte[rbyte];
short[] tmp = new short[rbyte / 2];
short Plen = (short) P.length;
for (i = 0; i < (short) (Plen / rbyte); i++) {
Util.arrayCopy(P, (short) (i * Plen), Pi, (short) 0, rbyte);
tmp = byteToShort(Pi);
for (x = 0; x < (short) (rbyte / 2); x++) {
S[x] ^= tmp[x];
}
S = Keccak400(S);
}
x = outputLength;
short[] Z = new short[outputLength / 16];
short Zlen = (short) Z.length;
while (x > 0) {
for (i = 0; i < Zlen; i++) {
Z[i] = S[i];
}
x -= r;
if (x > 0) {
S = Keccak400(S);
}
}
return shortToByte(Z);
}
private short[] Keccak400(short[] A) {
short x = 0;
short y = 0;
short round = 0;
for (round = 0; round < 20; round++) {
// Theta Step
for (x = 0; x < 5; x++) {
ram_B[x] = (short) (A[x] ^ A[(short) (5 + x)] ^ A[(short) (10 + x)] ^ A[(short) (15 + x)]
^ A[(short) (20 + x)]);
}
for (x = 0; x < 5; x++) {
ram_tmp[0] = (short) (ram_B[index[(short) (x + 4)]] ^ rot(ram_B[index[(short) (x + 1)]], (short) 1));
for (y = 0; y < 25; y += 5) {
A[(short) (x + y)] ^= ram_tmp[0];
}
}
// Rho and Pi Steps
ram_tmp[0] = A[1];
short t = 0;
for (x = 0; x < 24; x++) {
t = indexPi[x];
ram_B[0] = A[t];
A[t] = rot(ram_tmp[0], rotationConstants[x]);
ram_tmp[0] = ram_B[0];
}
for (y = 0; y < 25; y += 5) {
ram_B[0] = A[(short) (y + 0)];
ram_B[1] = A[(short) (y + 1)];
ram_B[2] = A[(short) (y + 2)];
ram_B[3] = A[(short) (y + 3)];
ram_B[4] = A[(short) (y + 4)];
for (x = 0; x < 5; x++) {
A[(short) (y + x)] = (short) (ram_B[x]
^ ((~ram_B[index[(short) (x + 1)]]) & ram_B[index[(short) (x + 2)]]));
}
}
// Iota Step
A[0] ^= roundConstants[round];
}
return A;
}
The code is not optimized and the SHA-3 variance is Keccak-224.
If your test vectors don't match then either your testing is invalid or - more likely - your algorithm doesn't yet work. You should try and validate intermediate values against another implementation. You could use BouncyCastle for the intermediate values and jCardSim to help with the debugging.