I want to detect the instructions like mov dword ptr [rbp-0x28], 0x7
(so, all the instructions in mov dword ptr [rbp-0xxx], xxx
format) using Intel PIN (mainly to get array writes information). In un-optimized code, this should get most stores to local variables.
I can do:
if (INS_Opcode(ins) == XED_ICLASS_MOV)
instruction detection;
to detect the mov
instruction. But, along with that it also detects other instruction such as mov eax, 0x0
. I want to detect the instructions with dword ptr
size directive.
I checked the pin instruction inspection API and pin xed-iclass-enum. Using that documentation I tried something like:
if ((INS_Opcode(ins) == XED_ICLASS_MOV) && INS_OperandIsMemory(ins, 0))
instruction detection;
which gives me the desired result. But also gives me the instructions like mov esi, eax
(which I don't desire).
My code:
#include <fstream>
#include <iostream>
#include "pin.H"
#include <stack>
#include <unordered_map>
// Additional library calls go here
// Stack allocation
struct Node
{
int value;
};
std::stack<Node> mainStack;
// Ins object mapping
class Insr
{
private:
INS insobject;
public:
Insr(INS insob)
{
insobject = insob;
}
INS get_insobject()
{
return insobject;
}
};
static std::unordered_map<ADDRINT, Insr*> insstack;
// Output file object
ofstream OutFile;
//static uint64_t counter = 0;
std::string rtin = "";
// Make this lock if you want to print from _start
uint32_t key = 0;
void printmaindisas(uint64_t addr, std::string disassins)
{
std::stringstream tempstream;
tempstream << std::hex << addr;
std::string address = tempstream.str();
// if (addr > 0x700000000000)
// return;
if (addr > 0x700000000000)
return;
if (!key)
return;
// if (insstack[addr]->get_opcode() == XED_ICLASS_ADD || insstack[addr]->get_opcode()
// == XED_ICLASS_SUB)
INS ins = insstack[addr]->get_insobject();
if((INS_Opcode(ins) == XED_ICLASS_ADD || INS_Opcode(ins) == XED_ICLASS_SUB)
&&(INS_OperandIsImmediate(ins, 1)))
{
int value = INS_OperandImmediate(ins, 1);
std::cout << "value: " << value << '\n';
Node node{value};
mainStack.push(node);
std::cout << "stack top: " << mainStack.top().value << '\n';
}
if ((INS_Opcode(ins) == XED_ICLASS_MOV) && INS_OperandIsMemory(ins, 0))
{
std::cout << "yes!" << '\n';
}
std::cout<<address<<"\t"<<disassins<<std::endl;
}
void mutex_lock()
{
key = 0;
std::cout<<"out\n";
}
void mutex_unlock()
{
key = 1;
std::cout<<"in\n";
}
void Instruction(INS ins, VOID *v)
{
insstack.insert(std::make_pair(INS_Address(ins), new Insr(ins)));
INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)printmaindisas, IARG_ADDRINT, INS_Address(ins),
IARG_PTR, new string(INS_Disassemble(ins)), IARG_END);
}
void Routine(RTN rtn, VOID *V)
{
if (RTN_Name(rtn) == "main")
{
//std::cout<<"Loading: "<<RTN_Name(rtn) << endl;
RTN_Open(rtn);
RTN_InsertCall(rtn, IPOINT_BEFORE, (AFUNPTR)mutex_unlock, IARG_END);
RTN_InsertCall(rtn, IPOINT_AFTER, (AFUNPTR)mutex_lock, IARG_END);
RTN_Close(rtn);
}
}
KNOB<string> KnobOutputFile(KNOB_MODE_WRITEONCE, "pintool", "o", "mytool.out", "specify output file name");
/*
VOID Fini(INT32 code, VOID *v)
{
// Write to a file since cout and cerr maybe closed by the application
OutFile.setf(ios::showbase);
OutFile << "Count " << count << endl;
OutFile.close();
}
*/
int32_t Usage()
{
cerr << "This is my custom tool" << endl;
cerr << endl << KNOB_BASE::StringKnobSummary() << endl;
return -1;
}
int main(int argc, char * argv[])
{
// It must be called for image instrumentation
// Initialize the symbol table
PIN_InitSymbols();
// Initialize pin
// PIN_Init must be called before PIN_StartProgram
// as mentioned in the documentation
if (PIN_Init(argc, argv)) return Usage();
// Open the output file to write
OutFile.open(KnobOutputFile.Value().c_str());
// Set instruction format as intel
// Not needed because my machine is intel
PIN_SetSyntaxIntel();
RTN_AddInstrumentFunction(Routine, 0);
// Add an isntruction instrumentation
INS_AddInstrumentFunction(Instruction, 0);
//PIN_AddFiniFunction(Fini, 0);
// Start the program here
PIN_StartProgram();
return 0;
}
And the output I'm getting:
in
40051e push rbp
value: -128
stack top: -128
40051f mov rbp, rsp
400522 add rsp, 0xffffffffffffff80
yes!
400526 mov dword ptr [rbp-0x28], 0x7
yes!
40052d mov dword ptr [rbp-0x64], 0x9
400534 mov eax, 0x0
400539 call 0x4004e6
4004e6 push rbp
value: 64
stack top: 64
4004e7 mov rbp, rsp
4004ea sub rsp, 0x40
yes!
4004ee mov dword ptr [rbp-0xc], 0x4
4004f5 lea rax, ptr [rbp-0xc]
yes!
4004f9 mov qword ptr [rbp-0x8], rax
4004fd mov rax, qword ptr [rbp-0x8]
400501 mov eax, dword ptr [rax]
yes!
400503 mov esi, eax
400505 mov edi, 0x4005d0
40050a mov eax, 0x0
40050f call 0x4003f0
4003f0 jmp qword ptr [rip+0x200c22]
4003f6 push 0x0
4003fb jmp 0x4003e0
4003e0 push qword ptr [rip+0x200c22]
4003e6 jmp qword ptr [rip+0x200c24]
4
yes!
400514 mov dword ptr [rbp-0x3c], 0x3
40051b nop
40051c leave
40051d ret
40053e mov eax, 0x0
400543 leave
out
Is this the correct way to do that (without any false positives)?
If you want to accept all of the following instructions:
mov [rbp + disp], reg/imm
mov [rbp*scale + disp], reg/imm
mov [reg + rbp*scale], reg/imm
mov [rbp + reg*scale + disp], reg/imm
then you need to perform the following checks:
if (INS_Opcode(ins) == XED_ICLASS_MOV && // Check that the instruction is MOV.
INS_OperandIsMemory(ins, 0) && // Check that the destination operand is a memory operand.
INS_OperandWidth(ins, 0) == 32 && // Check that the size of the operand is 32 bits.
(INS_OperandMemoryBaseReg(ins, 0) == REG_EBP ||
INS_OperandMemoryIndexReg(ins, 0) == REG_EBP)) // Check that the base or index register is RBP.
{
...
}
Note that these checks accept both MOV instructions with displacement (including a displacement of zero) and MOV instructions without displacement (which is semantically equivalent to a displacement of zero but the encoding is different).
I assumed that you want to accept RBP both as a base register or as an index register (potentially with a scale larger than 1). Note that in case RBP is used as a base register, the encoding of the instruction will always include a displacement. See: Why are rbp and rsp called general purpose registers?.
If you want to accept all of the following instructions where RBP
is used as the base register:
mov [rbp + disp], reg/imm
mov [rbp + reg*scale + disp], reg/imm
then you need to perform the following checks:
if (INS_Opcode(ins) == XED_ICLASS_MOV && // Check that the instruction is MOV.
INS_OperandIsMemory(ins, 0) && // Check that the destination operand is a memory operand.
INS_OperandWidth(ins, 0) == 32 && // Check that the size of the operand is 32 bits.
INS_OperandMemoryBaseReg(ins, 0) == REG_EBP) // Check that the base is RBP.
{
...
}
If you want to accept only the following instruction:
mov [rbp + disp], reg/imm
then you need to perform the following checks:
if (INS_Opcode(ins) == XED_ICLASS_MOV && // Check that the instruction is MOV.
INS_OperandIsMemory(ins, 0) && // Check that the destination operand is a memory operand.
INS_OperandWidth(ins, 0) == 32 && // Check that the size of the operand is 32 bits.
INS_OperandMemoryBaseReg(ins, 0) == REG_EBP && // Check that the base is RBP.
INS_OperandMemoryIndexReg(ins, 0) == REG_INVALID()) // Check that there is no index register.
{
...
}
If you want to check whether the displacement is a negative number, use the following check:
INS_OperandMemoryDisplacement(ins, 0) < 0
Note that INS_OperandMemoryDisplacement
does not distinguish between a memory operand that has no displacement and one that has a displacement of zero. If there is no displacement, it just returns zero. If you want to determine whether the instruction encoding actually includes a displacement field, then you should use the XED API instead.