I want to perform XOR operation in AES before and after encryption in AES (like DESX) with new keys. But XOR operation takes too much time.
How can I reduce the XOR operation time?
Here is my code:
string XOR(string value, string key)
{
string retval(value);
short unsigned int klen=key.length();
short unsigned int vlen=value.length();
short unsigned int k=0;
short unsigned int v=0;
for(v;v<vlen;v++)
{
retval[v]=value[v]^key[k];
k=(++k<klen?k:0);
}
return retval;
}
int main(int argc, char* argv[])
{
AutoSeededRandomPool prng;
byte key1[AES::DEFAULT_KEYLENGTH];
prng.GenerateBlock(key1, sizeof(key1));
byte key[AES::DEFAULT_KEYLENGTH];
prng.GenerateBlock(key, sizeof(key));
byte key2[AES::DEFAULT_KEYLENGTH];
prng.GenerateBlock(key2, sizeof(key2));
byte iv[AES::BLOCKSIZE];
prng.GenerateBlock(iv, sizeof(iv));
string plain = "AESX CBC Mode Test";
string cipher,encoded, encodediv, encodedkey1, encodedkey,
encodedkey2, recovered, prerecovered, postrecovered,
prewhiten, postwhiten;
// Pretty print key1
StringSource(key1, sizeof(key1), true,
new HexEncoder(
new StringSink(encodedkey1)
) // HexEncoder
); // StringSource
cout << "key1: " << encodedkey1 << endl;
// Pretty print iv
StringSource(iv, sizeof(iv), true,
new HexEncoder(
new StringSink(encodediv)
) // HexEncoder
); // StringSource
cout << "iv: " << encodediv << endl;
// Pretty print key
StringSource(key, sizeof(key), true,
new HexEncoder(
new StringSink(encodedkey)
) // HexEncoder
); // StringSource
cout << "key: " << encodedkey << endl;
// Pretty print key2
StringSource(key2, sizeof(key2), true,
new HexEncoder(
new StringSink(encodedkey2)
) // HexEncoder
); // StringSource
cout << "key2: " << encodedkey2 << endl;
cout << "plain text: " << plain << endl;
prewhiten = XOR(plain, encodedkey1);
try
{
cout << "pre whiten text: " << prewhiten << endl;
CBC_Mode< AES >::Encryption e;
e.SetKeyWithIV(key, sizeof(key), iv);
// The StreamTransformationFilter removes
// padding as required.
StringSource s(prewhiten, true,
new StreamTransformationFilter(e,
new StringSink(cipher)
) // StreamTransformationFilter
); // StringSource
}
catch(const CryptoPP::Exception& e)
{
cerr << e.what() << endl;
exit(1);
}
/*********************************\
\*********************************/
// Pretty print
encoded.clear();
StringSource(cipher, true,
new HexEncoder(
new StringSink(encoded)
) // HexEncoder
); // StringSource
cout << "cipher text: " << encoded << endl;
postwhiten = XOR(encoded, encodedkey2);
cout << "post whiten text: " << postwhiten << endl;
//decryption
prerecovered = XOR(postwhiten, encodedkey2);
encoded.clear();
StringSource(prerecovered, true,
new HexEncoder(
new StringSink(encoded)
) // HexEncoder
); // StringSource
cout << "pre recovered text: " << encoded << endl;
try
{
CBC_Mode< AES >::Decryption d;
d.SetKeyWithIV(key, sizeof(key), iv);
// The StreamTransformationFilter removes
// padding as required.
StringSource s(prerecovered, true,
new HexDecoder(
new StreamTransformationFilter(d,
new StringSink(recovered)
) // StreamTransformationFilter
)//HexDecoder
); // StringSource
cout << "recovered text: " << recovered << endl;
}
catch(const CryptoPP::Exception& e)
{
cerr << e.what() << endl;
exit(1);
}
postrecovered = XOR(recovered, encodedkey1);
cout << "post recovered text: " << postrecovered << endl;
return 0;
}
Any help would be appreciated.
How can I do XOR operation in Crypto++?
There are two ways to use the library to perform an XOR. First, there are two functions xorbuf
in misc.h
. The first implementation is shown below, and it uses a single in/out buffer with a mask:
void xorbuf(byte *buf, const byte *mask, size_t count)
{
size_t i=0;
if (IsAligned<word32>(buf) && IsAligned<word32>(mask))
{
if (!CRYPTOPP_BOOL_SLOW_WORD64 && IsAligned<word64>(buf) && IsAligned<word64>(mask))
{
for (i=0; i<count/8; i++)
((word64*)(void*)buf)[i] ^= ((word64*)(void*)mask)[i];
count -= 8*i;
if (!count)
return;
buf += 8*i;
mask += 8*i;
}
for (i=0; i<count/4; i++)
((word32*)(void*)buf)[i] ^= ((word32*)(void*)mask)[i];
count -= 4*i;
if (!count)
return;
buf += 4*i;
mask += 4*i;
}
for (i=0; i<count; i++)
buf[i] ^= mask[i];
}
There's a second xorbuf(byte *output, const byte *input, const byte *mask, size_t count)
that uses separate in and out buffers with a mask.
The second way to XOR is use an ArrayXorSink
from filters.h
. Internally, ArrayXorSink
calls xorbuf
for you. You would use this is you prefer pipelines.
size_t ArrayXorSink::Put2(const byte *begin, size_t length, int messageEnd, bool blocking)
{
// Avoid passing NULL pointer to xorbuf
size_t copied = 0;
if (m_buf && begin)
{
copied = STDMIN(length, SaturatingSubtract(m_size, m_total));
xorbuf(m_buf+m_total, begin, copied);
}
m_total += copied;
return length - copied;
}
string XOR(string value, string key) { string retval(value); short unsigned int klen=key.length(); short unsigned int vlen=value.length(); short unsigned int k=0; short unsigned int v=0; for(v;v<vlen;v++) { retval[v]=value[v]^key[k]; k=(++k<klen?k:0); } return retval; }
For this, you could do something like the following. It asks the compiler to inline
the function, and it passes the value
and key
by constant reference to avoid the copies.
inline string XOR(const string& value, const string& key)
{
ASSERT(key.length() == value.length());
string retval(value);
xorbuf(&retval[0], &key[0], retval.length());
return retval;
}
The trick is, you have to take the address of element 0 to get the non-const pointer and avoid potential undefined behavior. You may need to cast to a byte*
.
How can I reduce the XOR operation time?
You probably want to use an operand size larger that 1-byte when its feasible. The library's xorbuf
uses word32
and word64
when available.
In addition, if you have AVX, then you can operate on buffers up to 512-bit. If you keep your buffers aligned, then GCC will try use the larger buffers at -O3
and above. -O3
is significant because that's when GCC starts aggressive vectorization and using features provided by AVX and AVX2.
The Crypto++ library endured a non-trivial amount of pain a couple of years ago because its buffers were not aligned as GCC expected, and it was causing SEGFAULT's -O3
and above. See, for example, Crash on Cygwin i386 with -O3.
The alignment problem was not limited to Cygwin; Cygwin happened to demonstrate it. The problem surfaced on occasion under other platforms and CPUs, like ARM when NEON was enabled. We believe all the issues have been cleared.