I'm trying to programmatically determine the filename and line of the source code for a function definition using its address and the pdb database file generated by visual studio for the module the function is defined in.
For instance, I have a function Lua::asset::get_supported_export_file_extensions
defined in the module shared.dll
and I want to determine the source code location for it.
To get the relative virtual address (rva) of the function, I subtract the module base address from the absolute virtual function address like so:
static MODULEENTRY32 GetModuleInfo(std::uint32_t ProcessID, const char* ModuleName)
{
void* hSnap = nullptr;
MODULEENTRY32 Mod32 = {0};
if ((hSnap = CreateToolhelp32Snapshot(TH32CS_SNAPMODULE, ProcessID)) == INVALID_HANDLE_VALUE)
return Mod32;
Mod32.dwSize = sizeof(MODULEENTRY32);
while (Module32Next(hSnap, &Mod32))
{
if (!strcmp(ModuleName, Mod32.szModule))
{
CloseHandle(hSnap);
return Mod32;
}
}
CloseHandle(hSnap);
return {0};
}
int main(int argc,char *argv[])
{
auto dllInfo = GetModuleInfo(GetCurrentProcessId(),"shared.dll");
auto rva = (DWORD)((uint64_t)&Lua::asset::get_supported_export_file_extensions -(uint64_t)dllInfo.modBaseAddr);
[...]
}
This gives me the rva 0x44508 for the function.
To confirm the address, I've tried using the dbh debugging tool from the Windows SDK to look up the function's address (as well as the base address for the module):
shared [1000000]: enum shared!*get_supported_export_file_extensions*
index address name
1 1e376c0 : `Lua::asset::get_supported_export_file_extensions'::`1'::dtor$0
3 18409c0 : Lua::asset::get_supported_export_file_extensions
shared [1000000]: scope 18409c0
name : path\to\object_files\lasset.obj
addr : 0
size : 0
flags : 0
type : 0
modbase : 1000000
value : 0
reg : 0
scope : SymTagNull (0)
tag : SymTagCompiland (2)
index : 6
I would expect it to give me the same rva if I subtract the base address from the function address, but instead it gives me a rva of 0x18409c0 -0x1000000 = 0x8409c0
For convenience I will be referring to the addresses as:
0x44508 = calculated address
0x8409c0 = dbh address
I then used the Debug Interface Access SDK to look up both addresses in the pdb to determine why I'm getting differing results:
static BOOL find_function_in_pdb(DWORD rva,enum SymTagEnum tag)
{
std::string pdbFilePath = "path/to/shared.pdb";
CComPtr<IDiaDataSource> pSource;
if(FAILED(CoInitializeEx(NULL,COINIT_MULTITHREADED)))
return FALSE;
auto hr = CoCreateInstance(
CLSID_DiaSource,
NULL,
CLSCTX_INPROC_SERVER,
__uuidof(IDiaDataSource),
(void **) &pSource
);
if(FAILED(hr))
return FALSE;
wchar_t wszFilename[_MAX_PATH];
mbstowcs(wszFilename,pdbFilePath.data(),sizeof(wszFilename) /sizeof(wszFilename[0]));
if(FAILED(pSource->loadDataFromPdb(wszFilename)))
return FALSE;
IDiaSession *session;
IDiaSymbol *globalSymbol = nullptr;
IDiaEnumTables *enumTables = nullptr;
IDiaEnumSymbolsByAddr *enumSymbolsByAddr = nullptr;
if(FAILED(pSource->openSession(&session)))
return FALSE;
if(FAILED(session->get_globalScope(&globalSymbol)))
return FALSE;
if(FAILED(session->getEnumTables(&enumTables)))
return FALSE;
if(FAILED(session->getSymbolsByAddr(&enumSymbolsByAddr)))
return FALSE;
IDiaSymbol *symbol;
if(session->findSymbolByRVA(rva,tag,&symbol) == S_OK)
{
BSTR name;
symbol->get_name(&name);
std::cout<<"Name: "<<ConvertBSTRToMBS(name)<<std::endl;
ULONGLONG length = 0;
if(symbol->get_length(&length) == S_OK)
{
IDiaEnumLineNumbers *lineNums[100];
if(session->findLinesByRVA(rva,length,lineNums) == S_OK)
{
auto &l = lineNums[0];
CComPtr<IDiaLineNumber> line;
IDiaLineNumber *lineNum;
ULONG fetched = 0;
for(uint8_t i=0;i<5;++i) {
if(l->Next(i,&lineNum,&fetched) == S_OK && fetched == 1)
{
DWORD l;
IDiaSourceFile *srcFile;
if(lineNum->get_sourceFile(&srcFile) == S_OK)
{
BSTR fileName;
srcFile->get_fileName(&fileName);
std::cout<<"File: "<<ConvertBSTRToMBS(fileName)<<std::endl;
}
if(lineNum->get_lineNumber(&l) == S_OK)
std::cout<<"Line: "<<+l<<std::endl;
}
}
}
}
}
return TRUE;
}
int main(int argc,char *argv[])
{
find_function_in_pdb(0x44508 /* calculated address */,SymTagEnum::SymTagPublicSymbol);
find_function_in_pdb(0x8409c0 /* dbh address */,SymTagEnum::SymTagFunction);
[...]
}
It does actually find both addresses and both point to a symbol with a name matching my function, however the symbol at the calculated address is a SymTagPublicSymbol
and the symbol at the dbh address is a SymTagFunction
.
I'm guessing that means that the calculated address is for the public symbol and the dbh address for the private symbol? (https://learn.microsoft.com/en-us/windows-hardware/drivers/debugger/public-and-private-symbols)
So far so good, the problem is the public symbol does not have any source code information associated with it, but the private symbol does. Assuming I'm correct so far (which I'm not quite sure about), my question boils down to:
How can I get the private symbol/address from the public symbol/address? I need a solution that I can implement programmatically.
After some more experimentation I found the solution:
IDiaSymbol *publicSymbol;
DWORD publicRva = 0x44508;
if(session->findSymbolByRVA(publicRva,SymTagEnum::SymTagPublicSymbol,&publicSymbol) == S_OK)
{
DWORD privateRva;
IDiaSymbol *privateSymbol;
if(
publicSymbol->get_targetRelativeVirtualAddress(&privateRva) == S_OK &&
session->findSymbolByRVA(privateRva,SymTagEnum::SymTagFunction,&privateSymbol) == S_OK
)
{
// Do stuff with private symbol
}
}
get_targetRelativeVirtualAddress
gives me 0x8409c0
, the address of the private symbol which contains the source code information.
As for why this works, I have no idea. According to the documentation, get_targetRelativeVirtualAddress is only supposed to be valid for SymTagThunk
symbols and returns the rva of a "thunk target". I don't think the public symbol is a thunk target, but it works without errors and gives me exactly what I need.