to do the the eigendecomposition in C++, I use the routine "zhpev". This routine is embedded in a dll file of a larger piece of software and is exhaustively used during the run time. After ~5000 calls of "zhpev" I measure the run time. Everything is fine for the first 900 run time evaluations. The run time is around 0.7 seconds with little variation. However, 900 run time evaluations, the run time suddenly increases from 0.7 seconds up to 2.7 seconds with heavy variation.
I made the following observations:
Sorry that I could not post any code since the project I'm working with is too large.
I would appreciate any hint which helps me to stop this strange behaviour!
Edit The routine "zhpev" works on complex Hermitian matrices with size 32x32 in double precision. So, the chunks of data processed at a time are rather small.
Update 1) Paging is not the problem here. I disabled the paging file in the system options. The run time issue is still not fixed. 2) Running the application on a different Windows computer also leads to the same run time issues. However, the starting of the run time increase occurs now later, after 1400 run time evaluations.
Update I found out that the run time problems occur only if I call "zhpev" inside a thread. With this I can create a small code example where I run into the same problems.
Let me explain my code
This is my code
#include <windows.h>
#include <tchar.h>
#include <strsafe.h>
#include "stdafx.h"
#include "mkl_lapack.h"
#include "mkl_service.h"
#include <time.h>
#include <stdio.h>
#include <iostream>
#include <fstream>
#include <stdlib.h>
#include <iostream>
using namespace std;
#define CACHE_LINE 32
#define CACHE_ALIGN __declspec(align(CACHE_LINE))
#define MAX_THREADS 2
#define BUF_SIZE 255
DWORD WINAPI MyThreadFunction( LPVOID lpParam );
void ErrorHandler(LPTSTR lpszFunction);
// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
// This is the critical function.
void Eigendecomposition();
// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
typedef struct MyData {
int val1;
int val2;
} MYDATA, *PMYDATA;
int _tmain()
{
PMYDATA pDataArray[MAX_THREADS];
DWORD dwThreadIdArray[MAX_THREADS];
HANDLE hThreadArray[MAX_THREADS];
std::ofstream ofs;
double tstart;
double tend;
double proc_time_pure;
for(int j=0;j<10000;j++){
// Start one iteration
tstart = clock();
// Create MAX_THREADS worker threads.
for( int i=0; i<MAX_THREADS; i++ )
{
pDataArray[i] = (PMYDATA) HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
sizeof(MYDATA));
if( pDataArray[i] == NULL )
{
ExitProcess(2);
}
pDataArray[i]->val1 = i;
pDataArray[i]->val2 = i+100;
// Create the thread to begin execution on its own.
hThreadArray[i] = CreateThread(
NULL, // default security attributes
0, // use default stack size
MyThreadFunction, // thread function name
pDataArray[i], // argument to thread function
0, // use default creation flags
&dwThreadIdArray[i]); // returns the thread identifier
if (hThreadArray[i] == NULL)
{
ErrorHandler(TEXT("CreateThread"));
ExitProcess(3);
}
} // End of main thread creation loop.
// Wait until all threads have terminated.
WaitForMultipleObjects(MAX_THREADS, hThreadArray, TRUE, INFINITE);
for(int i=0; i<MAX_THREADS; i++)
{
CloseHandle(hThreadArray[i]);
if(pDataArray[i] != NULL)
{
HeapFree(GetProcessHeap(), 0, pDataArray[i]);
pDataArray[i] = NULL; // Ensure address is not reused.
}
}
tend = clock();
proc_time_pure = tend-tstart;
// Print processing time into console and write it into a file
printf(" Processing time: %4.3f \n", proc_time_pure/1000.0);
ofs.open ("Processing_time.txt", std::ofstream::out | std::ofstream::app);
ofs << proc_time_pure/1000.0 << " ";
ofs.close();
}
return 0;
}
DWORD WINAPI MyThreadFunction( LPVOID lpParam )
{
HANDLE hStdout;
PMYDATA pDataArray;
TCHAR msgBuf[BUF_SIZE];
size_t cchStringSize;
DWORD dwChars;
hStdout = GetStdHandle(STD_OUTPUT_HANDLE);
if( hStdout == INVALID_HANDLE_VALUE )
return 1;
pDataArray = (PMYDATA)lpParam;
// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
// Critical function
Eigendecomposition();
// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
return 0;
}
void ErrorHandler(LPTSTR lpszFunction)
{
// Retrieve the system error message for the last-error code.
LPVOID lpMsgBuf;
LPVOID lpDisplayBuf;
DWORD dw = GetLastError();
FormatMessage(
FORMAT_MESSAGE_ALLOCATE_BUFFER |
FORMAT_MESSAGE_FROM_SYSTEM |
FORMAT_MESSAGE_IGNORE_INSERTS,
NULL,
dw,
MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
(LPTSTR) &lpMsgBuf,
0, NULL );
// Display the error message.
lpDisplayBuf = (LPVOID)LocalAlloc(LMEM_ZEROINIT,
(lstrlen((LPCTSTR) lpMsgBuf) + lstrlen((LPCTSTR) lpszFunction) + 40) * sizeof(TCHAR));
StringCchPrintf((LPTSTR)lpDisplayBuf,
LocalSize(lpDisplayBuf) / sizeof(TCHAR),
TEXT("%s failed with error %d: %s"),
lpszFunction, dw, lpMsgBuf);
MessageBox(NULL, (LPCTSTR) lpDisplayBuf, TEXT("Error"), MB_OK);
// Free error-handling buffer allocations.
LocalFree(lpMsgBuf);
LocalFree(lpDisplayBuf);
}
void Eigendecomposition(){
const int M = 32;
typedef MKL_Complex16 double_complex;
const char jobz = 'V';
const char uplo = 'L'; // lower triangular part of input matrix is used
const MKL_INT dim = M;
const MKL_INT ldz = M;
const MKL_INT LWORK = (2*M-1);
const MKL_INT LRWORK = (3*M-2);
MKL_INT info = 0;
double_complex A_H_MKL[(M*M+M)/2];
CACHE_ALIGN double_complex work[LWORK];
CACHE_ALIGN double rwork[LRWORK];
double D[M];
double_complex U[M][M];
for(int i=0;i<500;i++ ){
// Create the input matrix
for (int tmp=0; tmp < (M*M+M)/2; tmp++){
A_H_MKL[tmp].real = 1 ;
A_H_MKL[tmp].imag = 0;}
// This is the mkl function
zhpev(&jobz, // const char* jobz,
&uplo, // const char* uplo,
&dim, // const MKL_INT* n,
(double_complex *)&A_H_MKL[0], // double_complex* ap,
(double *)&D[0], // double* w,
(double_complex *)&U[0][0], // double_complex* z,
&ldz, // const MKL_INT* ldz,
work, // double_complex* work,
rwork, // double* rwork,
&info); // MKL_INT* info
}
}
I found the bug in my code. I run the routine for eigendecomposition inside a thread that was created with the windows function CreatThread. However, there was no function to end the thread as for example the WaitForMultipleObjects-routine. For all the other parts of my application this was not a problem but the eigendecomposition run into difficulties.