I am using MATLAB to look up the value in two vectors OCT_EXP
, OCT_LOG
, from two input values u,v
and output is val
as condition
if (( u == 0 )||( v == 0 ))
val = 0;
else
val = OCT_EXP( OCT_LOG(u) + OCT_LOG(v) + 1);
I tried to use three ways: normal way (no_vectorized way), vectorized way, and mex way. I expected that mex way will be the best way, then vectorized way. However, when I measure time consumption, the first way (no vectorized way) is best, the vectorized way is worst way. What is happen in my code? Thank all
I want to consider the speed up of the function because it will be called many time: 300.000 times
The first way:
function val = gfmult_no_vec( u, v )
OCT_EXP = [ 1, 2, 4, 8, 16, 32, 64, 128, 29, 58, 116, 232, 205, 135, 19, 38,...
76, 152, 45, 90, 180, 117, 234, 201, 143, 3, 6, 12, 24, 48, 96, 192, 157,...
39, 78, 156, 37, 74, 148, 53, 106, 212, 181, 119, 238, 193, 159, 35,...
70, 140, 5, 10, 20, 40, 80, 160, 93, 186, 105, 210, 185, 111, 222,...
161, 95, 190, 97, 194, 153, 47, 94, 188, 101, 202, 137, 15, 30, 60,...
120, 240, 253, 231, 211, 187, 107, 214, 177, 127, 254, 225, 223, 163,...
91, 182, 113, 226, 217, 175, 67, 134, 17, 34, 68, 136, 13, 26, 52,...
104, 208, 189, 103, 206, 129, 31, 62, 124, 248, 237, 199, 147, 59,...
118, 236, 197, 151, 51, 102, 204, 133, 23, 46, 92, 184, 109, 218,...
169, 79, 158, 33, 66, 132, 21, 42, 84, 168, 77, 154, 41, 82, 164, 85,...
170, 73, 146, 57, 114, 228, 213, 183, 115, 230, 209, 191, 99, 198,...
145, 63, 126, 252, 229, 215, 179, 123, 246, 241, 255, 227, 219, 171,...
75, 150, 49, 98, 196, 149, 55, 110, 220, 165, 87, 174, 65, 130, 25,...
50, 100, 200, 141, 7, 14, 28, 56, 112, 224, 221, 167, 83, 166, 81,...
162, 89, 178, 121, 242, 249, 239, 195, 155, 43, 86, 172, 69, 138, 9,...
18, 36, 72, 144, 61, 122, 244, 245, 247, 243, 251, 235, 203, 139, 11,...
22, 44, 88, 176, 125, 250, 233, 207, 131, 27, 54, 108, 216, 173, 71,...
142, 1, 2, 4, 8, 16, 32, 64, 128, 29, 58, 116, 232, 205, 135, 19, 38,...
76, 152, 45, 90, 180, 117, 234, 201, 143, 3, 6, 12, 24, 48, 96, 192,...
157, 39, 78, 156, 37, 74, 148, 53, 106, 212, 181, 119, 238, 193, 159,...
35, 70, 140, 5, 10, 20, 40, 80, 160, 93, 186, 105, 210, 185, 111,...
222, 161, 95, 190, 97, 194, 153, 47, 94, 188, 101, 202, 137, 15, 30,...
60, 120, 240, 253, 231, 211, 187, 107, 214, 177, 127, 254, 225, 223,...
163, 91, 182, 113, 226, 217, 175, 67, 134, 17, 34, 68, 136, 13, 26,...
52, 104, 208, 189, 103, 206, 129, 31, 62, 124, 248, 237, 199, 147,...
59, 118, 236, 197, 151, 51, 102, 204, 133, 23, 46, 92, 184, 109, 218,...
169, 79, 158, 33, 66, 132, 21, 42, 84, 168, 77, 154, 41, 82, 164, 85,...
170, 73, 146, 57, 114, 228, 213, 183, 115, 230, 209, 191, 99, 198,...
145, 63, 126, 252, 229, 215, 179, 123, 246, 241, 255, 227, 219, 171,...
75, 150, 49, 98, 196, 149, 55, 110, 220, 165, 87, 174, 65, 130, 25,...
50, 100, 200, 141, 7, 14, 28, 56, 112, 224, 221, 167, 83, 166, 81,...
162, 89, 178, 121, 242, 249, 239, 195, 155, 43, 86, 172, 69, 138, 9,...
18, 36, 72, 144, 61, 122, 244, 245, 247, 243, 251, 235, 203, 139, 11,...
22, 44, 88, 176, 125, 250, 233, 207, 131, 27, 54, 108, 216, 173, 71,...
142 ];
OCT_LOG = [ 0, 1, 25, 2, 50, 26, 198, 3, 223, 51, 238, 27, 104, 199, 75, 4,...
100, 224, 14, 52, 141, 239, 129, 28, 193, 105, 248, 200, 8, 76, 113, 5,...
138, 101, 47, 225, 36, 15, 33, 53, 147, 142, 218, 240, 18, 130, 69,...
29, 181, 194, 125, 106, 39, 249, 185, 201, 154, 9, 120, 77, 228, 114,... end
166, 6, 191, 139, 98, 102, 221, 48, 253, 226, 152, 37, 179, 16, 145,...
34, 136, 54, 208, 148, 206, 143, 150, 219, 189, 241, 210, 19, 92,...
131, 56, 70, 64, 30, 66, 182, 163, 195, 72, 126, 110, 107, 58, 40,...
84, 250, 133, 186, 61, 202, 94, 155, 159, 10, 21, 121, 43, 78, 212,...
229, 172, 115, 243, 167, 87, 7, 112, 192, 247, 140, 128, 99, 13, 103,...
74, 222, 237, 49, 197, 254, 24, 227, 165, 153, 119, 38, 184, 180,...
124, 17, 68, 146, 217, 35, 32, 137, 46, 55, 63, 209, 91, 149, 188,...
207, 205, 144, 135, 151, 178, 220, 252, 190, 97, 242, 86, 211, 171,...
20, 42, 93, 158, 132, 60, 57, 83, 71, 109, 65, 162, 31, 45, 67, 216,...
183, 123, 164, 118, 196, 23, 73, 236, 127, 12, 111, 246, 108, 161,...
59, 82, 41, 157, 85, 170, 251, 96, 134, 177, 187, 204, 62, 90, 203,...
89, 95, 176, 156, 169, 160, 81, 11, 245, 22, 235, 122, 117, 44, 215,...
79, 174, 213, 233, 230, 231, 173, 232, 116, 214, 244, 234, 168, 80,...
88, 175 ];
if (( u == 0 )||( v == 0 ))
val = 0;
else
val = OCT_EXP( OCT_LOG(u) + OCT_LOG(v) + 1);
The second way: Vectorized way
function val = gfmult_vec( u, v )
OCT_EXP = [ 1, 2, 4, 8, 16, 32, 64, 128, 29, 58, 116, 232, 205, 135, 19, 38,...
76, 152, 45, 90, 180, 117, 234, 201, 143, 3, 6, 12, 24, 48, 96, 192, 157,...
39, 78, 156, 37, 74, 148, 53, 106, 212, 181, 119, 238, 193, 159, 35,...
70, 140, 5, 10, 20, 40, 80, 160, 93, 186, 105, 210, 185, 111, 222,...
161, 95, 190, 97, 194, 153, 47, 94, 188, 101, 202, 137, 15, 30, 60,...
120, 240, 253, 231, 211, 187, 107, 214, 177, 127, 254, 225, 223, 163,...
91, 182, 113, 226, 217, 175, 67, 134, 17, 34, 68, 136, 13, 26, 52,...
104, 208, 189, 103, 206, 129, 31, 62, 124, 248, 237, 199, 147, 59,...
118, 236, 197, 151, 51, 102, 204, 133, 23, 46, 92, 184, 109, 218,...
169, 79, 158, 33, 66, 132, 21, 42, 84, 168, 77, 154, 41, 82, 164, 85,...
170, 73, 146, 57, 114, 228, 213, 183, 115, 230, 209, 191, 99, 198,...
145, 63, 126, 252, 229, 215, 179, 123, 246, 241, 255, 227, 219, 171,...
75, 150, 49, 98, 196, 149, 55, 110, 220, 165, 87, 174, 65, 130, 25,...
50, 100, 200, 141, 7, 14, 28, 56, 112, 224, 221, 167, 83, 166, 81,...
162, 89, 178, 121, 242, 249, 239, 195, 155, 43, 86, 172, 69, 138, 9,...
18, 36, 72, 144, 61, 122, 244, 245, 247, 243, 251, 235, 203, 139, 11,...
22, 44, 88, 176, 125, 250, 233, 207, 131, 27, 54, 108, 216, 173, 71,...
142, 1, 2, 4, 8, 16, 32, 64, 128, 29, 58, 116, 232, 205, 135, 19, 38,...
76, 152, 45, 90, 180, 117, 234, 201, 143, 3, 6, 12, 24, 48, 96, 192,...
157, 39, 78, 156, 37, 74, 148, 53, 106, 212, 181, 119, 238, 193, 159,...
35, 70, 140, 5, 10, 20, 40, 80, 160, 93, 186, 105, 210, 185, 111,...
222, 161, 95, 190, 97, 194, 153, 47, 94, 188, 101, 202, 137, 15, 30,...
60, 120, 240, 253, 231, 211, 187, 107, 214, 177, 127, 254, 225, 223,...
163, 91, 182, 113, 226, 217, 175, 67, 134, 17, 34, 68, 136, 13, 26,...
52, 104, 208, 189, 103, 206, 129, 31, 62, 124, 248, 237, 199, 147,...
59, 118, 236, 197, 151, 51, 102, 204, 133, 23, 46, 92, 184, 109, 218,...
169, 79, 158, 33, 66, 132, 21, 42, 84, 168, 77, 154, 41, 82, 164, 85,...
170, 73, 146, 57, 114, 228, 213, 183, 115, 230, 209, 191, 99, 198,...
145, 63, 126, 252, 229, 215, 179, 123, 246, 241, 255, 227, 219, 171,...
75, 150, 49, 98, 196, 149, 55, 110, 220, 165, 87, 174, 65, 130, 25,...
50, 100, 200, 141, 7, 14, 28, 56, 112, 224, 221, 167, 83, 166, 81,...
162, 89, 178, 121, 242, 249, 239, 195, 155, 43, 86, 172, 69, 138, 9,...
18, 36, 72, 144, 61, 122, 244, 245, 247, 243, 251, 235, 203, 139, 11,...
22, 44, 88, 176, 125, 250, 233, 207, 131, 27, 54, 108, 216, 173, 71,...
142 ];
OCT_LOG = [ 0, 1, 25, 2, 50, 26, 198, 3, 223, 51, 238, 27, 104, 199, 75, 4,...
100, 224, 14, 52, 141, 239, 129, 28, 193, 105, 248, 200, 8, 76, 113, 5,...
138, 101, 47, 225, 36, 15, 33, 53, 147, 142, 218, 240, 18, 130, 69,...
29, 181, 194, 125, 106, 39, 249, 185, 201, 154, 9, 120, 77, 228, 114,...
166, 6, 191, 139, 98, 102, 221, 48, 253, 226, 152, 37, 179, 16, 145,...
34, 136, 54, 208, 148, 206, 143, 150, 219, 189, 241, 210, 19, 92,...
131, 56, 70, 64, 30, 66, 182, 163, 195, 72, 126, 110, 107, 58, 40,...
84, 250, 133, 186, 61, 202, 94, 155, 159, 10, 21, 121, 43, 78, 212,...
229, 172, 115, 243, 167, 87, 7, 112, 192, 247, 140, 128, 99, 13, 103,...
74, 222, 237, 49, 197, 254, 24, 227, 165, 153, 119, 38, 184, 180,...
124, 17, 68, 146, 217, 35, 32, 137, 46, 55, 63, 209, 91, 149, 188,...
207, 205, 144, 135, 151, 178, 220, 252, 190, 97, 242, 86, 211, 171,...
20, 42, 93, 158, 132, 60, 57, 83, 71, 109, 65, 162, 31, 45, 67, 216,...
183, 123, 164, 118, 196, 23, 73, 236, 127, 12, 111, 246, 108, 161,...
59, 82, 41, 157, 85, 170, 251, 96, 134, 177, 187, 204, 62, 90, 203,...
89, 95, 176, 156, 169, 160, 81, 11, 245, 22, 235, 122, 117, 44, 215,...
79, 174, 213, 233, 230, 231, 173, 232, 116, 214, 244, 234, 168, 80,...
88, 175 ];
uv0 = (~(( u == 0 )|( v == 0 )));
val = zeros(size(u));
val(uv0) = OCT_EXP( OCT_LOG(u(uv0)) + OCT_LOG(v(uv0)) + 1);
end
The last way: mex code
#include "mex.h"
double look_up(double u, double v)
{
double OCT_EXP [510] = { 1, 2, 4, 8, 16, 32, 64, 128, 29, 58, 116, 232, 205, 135, 19, 38,
76, 152, 45, 90, 180, 117, 234, 201, 143, 3, 6, 12, 24, 48, 96, 192, 157,
39, 78, 156, 37, 74, 148, 53, 106, 212, 181, 119, 238, 193, 159, 35,
70, 140, 5, 10, 20, 40, 80, 160, 93, 186, 105, 210, 185, 111, 222,
161, 95, 190, 97, 194, 153, 47, 94, 188, 101, 202, 137, 15, 30, 60,
120, 240, 253, 231, 211, 187, 107, 214, 177, 127, 254, 225, 223, 163,
91, 182, 113, 226, 217, 175, 67, 134, 17, 34, 68, 136, 13, 26, 52,
104, 208, 189, 103, 206, 129, 31, 62, 124, 248, 237, 199, 147, 59,
118, 236, 197, 151, 51, 102, 204, 133, 23, 46, 92, 184, 109, 218,
169, 79, 158, 33, 66, 132, 21, 42, 84, 168, 77, 154, 41, 82, 164, 85,
170, 73, 146, 57, 114, 228, 213, 183, 115, 230, 209, 191, 99, 198,
145, 63, 126, 252, 229, 215, 179, 123, 246, 241, 255, 227, 219, 171,
75, 150, 49, 98, 196, 149, 55, 110, 220, 165, 87, 174, 65, 130, 25,
50, 100, 200, 141, 7, 14, 28, 56, 112, 224, 221, 167, 83, 166, 81,
162, 89, 178, 121, 242, 249, 239, 195, 155, 43, 86, 172, 69, 138, 9,
18, 36, 72, 144, 61, 122, 244, 245, 247, 243, 251, 235, 203, 139, 11,
22, 44, 88, 176, 125, 250, 233, 207, 131, 27, 54, 108, 216, 173, 71,
142, 1, 2, 4, 8, 16, 32, 64, 128, 29, 58, 116, 232, 205, 135, 19, 38,
76, 152, 45, 90, 180, 117, 234, 201, 143, 3, 6, 12, 24, 48, 96, 192,
157, 39, 78, 156, 37, 74, 148, 53, 106, 212, 181, 119, 238, 193, 159,
35, 70, 140, 5, 10, 20, 40, 80, 160, 93, 186, 105, 210, 185, 111,
222, 161, 95, 190, 97, 194, 153, 47, 94, 188, 101, 202, 137, 15, 30,
60, 120, 240, 253, 231, 211, 187, 107, 214, 177, 127, 254, 225, 223,
163, 91, 182, 113, 226, 217, 175, 67, 134, 17, 34, 68, 136, 13, 26,
52, 104, 208, 189, 103, 206, 129, 31, 62, 124, 248, 237, 199, 147,
59, 118, 236, 197, 151, 51, 102, 204, 133, 23, 46, 92, 184, 109, 218,
169, 79, 158, 33, 66, 132, 21, 42, 84, 168, 77, 154, 41, 82, 164, 85,
170, 73, 146, 57, 114, 228, 213, 183, 115, 230, 209, 191, 99, 198,
145, 63, 126, 252, 229, 215, 179, 123, 246, 241, 255, 227, 219, 171,
75, 150, 49, 98, 196, 149, 55, 110, 220, 165, 87, 174, 65, 130, 25,
50, 100, 200, 141, 7, 14, 28, 56, 112, 224, 221, 167, 83, 166, 81,
162, 89, 178, 121, 242, 249, 239, 195, 155, 43, 86, 172, 69, 138, 9,
18, 36, 72, 144, 61, 122, 244, 245, 247, 243, 251, 235, 203, 139, 11,
22, 44, 88, 176, 125, 250, 233, 207, 131, 27, 54, 108, 216, 173, 71,
142 };
double OCT_LOG[255] = { 0, 1, 25, 2, 50, 26, 198, 3, 223, 51, 238, 27, 104, 199, 75, 4,
100, 224, 14, 52, 141, 239, 129, 28, 193, 105, 248, 200, 8, 76, 113, 5,
138, 101, 47, 225, 36, 15, 33, 53, 147, 142, 218, 240, 18, 130, 69,
29, 181, 194, 125, 106, 39, 249, 185, 201, 154, 9, 120, 77, 228, 114,
166, 6, 191, 139, 98, 102, 221, 48, 253, 226, 152, 37, 179, 16, 145,
34, 136, 54, 208, 148, 206, 143, 150, 219, 189, 241, 210, 19, 92,
131, 56, 70, 64, 30, 66, 182, 163, 195, 72, 126, 110, 107, 58, 40,
84, 250, 133, 186, 61, 202, 94, 155, 159, 10, 21, 121, 43, 78, 212,
229, 172, 115, 243, 167, 87, 7, 112, 192, 247, 140, 128, 99, 13, 103,
74, 222, 237, 49, 197, 254, 24, 227, 165, 153, 119, 38, 184, 180,
124, 17, 68, 146, 217, 35, 32, 137, 46, 55, 63, 209, 91, 149, 188,
207, 205, 144, 135, 151, 178, 220, 252, 190, 97, 242, 86, 211, 171,
20, 42, 93, 158, 132, 60, 57, 83, 71, 109, 65, 162, 31, 45, 67, 216,
183, 123, 164, 118, 196, 23, 73, 236, 127, 12, 111, 246, 108, 161,
59, 82, 41, 157, 85, 170, 251, 96, 134, 177, 187, 204, 62, 90, 203,
89, 95, 176, 156, 169, 160, 81, 11, 245, 22, 235, 122, 117, 44, 215,
79, 174, 213, 233, 230, 231, 173, 232, 116, 214, 244, 234, 168, 80,
88, 175 };
if (( u == 0 )||( v == 0 ))
return 0;
else
{
int index=OCT_LOG[int(u-1)] + OCT_LOG[int(v-1)] + 1;
return OCT_EXP [index-1];
}
}
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
{
double *u, *v, *uv;
int mrows, ncols;
plhs[0] = mxCreateDoubleMatrix(1,1, mxREAL);
/* Assign pointers to each input and output. */
u = mxGetPr(prhs[0]);
v = mxGetPr(prhs[1]);
uv = mxGetPr(plhs[0]);
*uv = look_up(*u, *v);
}
The measurement code
function main
function test1()
for i=1:200
for j=1:200
gfmult_no_vec(i,j);
end
end
end
function test2()
for i=1:200
for j=1:200
gfmult_vec(i,j);
end
end
end
function test3()
for i=1:200
for j=1:200
gfmult_mex(i,j);
end
end
end
f1=@()test1();
t1=timeit(f1)
f2=@()test2();
t2=timeit(f2)
f3=@()test3();
t3=timeit(f3)
end
Report time: t1 = 0.1934, t2 = 1.1739, t3 = 0.3584
This solution takes 0.0006 second in my computer (and it is vectorized):
u = randi(5,200,1)-1; % some arbitrary data including zeros
v = randi(5,200,1)-1; % some arbitrary data including zeros
[U,V] = ndgrid(u(u~=0),v(v~=0)); % make all possible combinations of u and v
val = zeros(length(u),length(v)); % initialize the output size, in case the last value in u or v is zero.
f = @(u,v) OCT_EXP(OCT_LOG(u)+OCT_LOG(v)+1);
val((u~=0),(v~=0)) = f(U,V);
Now val(u,v) = OCT_EXP(OCT_LOG(u)+OCT_LOG(v)+1)
if u
and v
are both not zeros, otherwise val(u,v) = 0
.
If you want gfmult
to have a scalar input, then your first method seems to be the fastest way. However, I would define OCT_EXP
and OCT_LOG
outside the function and pass them to it, instead of assigning this values over and over:
function val = gfmult(OCT_EXP,OCT_LOG,u,v)
if (u==0)||(v==0)
val = 0;
else
val = OCT_EXP(OCT_LOG(u)+OCT_LOG(v)+1);
end
end
in my computer it reduces running time from 0.21444 (with your version) to 0.158 second for 100K iteration, which is not such a big improvement (0.05644 second), but if you have millions of those, it may be significant.