This post belong to the series “Reversing C++ binaries”. First post here.


The goal of this episode is to understand the lifecycle of object, when using
global, local and dynamic allocated instances. In the end, the field memory structure of simple object is anlyzed.

As a first example, we take into account the following class:

class TestClass
{
public:
// _ZN9TestClassC1Ev
TestClass()
{
stuff = 1;
}

// _ZN9TestClassD1Ev
~TestClass()
{
stuff = 0;
}

// _ZN9TestClass8GetStuffEv
int GetStuff()
{
return stuff;
}

private:
int stuff;
};

A common approach when compiling a class is to create global functions for constructors, destructors and methods and allocate only enough space for fields. Each global function operates on this, which is a pointer to the object. Basing on the compiler, the reference can be provided into a register or onto the stack. I’m currently using g++, which uses the stack to pass this as first parameter.

Local Objects

The following function initialize a class onto the stack and calls the method GetStuff:

// _Z7OnStackv
int OnStack()
{
TestClass t1;
return t1.GetStuff();
}

804862d: push   %ebp
804862e: mov %esp,%ebp
8048630: push %ebx
8048631: lea -0x1034(%esp),%esp
8048638: orl $0x0,(%esp)
804863c: lea 0x1010(%esp),%esp
8048643: lea -0xc(%ebp),%eax ; <- this
8048646: mov %eax,(%esp) ; as first parameter
8048649: call 804876e ; Constructor
804864e: lea -0xc(%ebp),%eax ; <- this
8048651: mov %eax,(%esp) ; as first parameter
8048654: call 80487ae ; Method
8048659: mov %eax,%ebx
804865b: lea -0xc(%ebp),%eax ; <- this
804865e: mov %eax,(%esp) ; as first parameter
8048661: call 804878e ; Destructor on exit
8048666: mov %ebx,%eax
8048668: add $0x24,%esp
804866b: pop %ebx
804866c: pop %ebp
804866d: ret

Dynamic Allocated Object

An heap based initialization implies the use of the new and delete operator which respectively trigger the class constructor and destructor. The new operator, which resides in a shared library, performs a malloc, while the delete operator performs a free.

// _Z6OnHeapv
int OnHeap()
{
// new: _Znwj@plt
// con: _ZN9TestClassC1Ev
TestClass* t1 = new TestClass();

int ret = t1->GetStuff();

// des: _ZN9TestClassD1Ev
// delete: _ZdlPv@plt
delete t1;
return ret;
}
804866e: push   %ebp
804866f: mov %esp,%ebp
8048671: push %ebx
8048672: lea -0x1034(%esp),%esp
8048679: orl $0x0,(%esp)
804867d: lea 0x1010(%esp),%esp
8048684: movl $0x4,(%esp) ; the size to allocate
804868b: call 80484e0 ; the new operator
8048690: mov %eax,%ebx ; return the 'this' reference
8048692: mov %ebx,(%esp)
8048695: call 804876e
804869a: mov %ebx,-0xc(%ebp)
804869d: mov -0xc(%ebp),%eax
80486a0: mov %eax,(%esp)
80486a3: call 80487ae
80486a8: mov %eax,-0x10(%ebp)
80486ab: mov -0xc(%ebp),%ebx
80486ae: test %ebx,%ebx ; If the reference is null
80486b0: je 80486c2 ; ...return
80486b2: mov %ebx,(%esp)
80486b5: call 804878e
80486ba: mov %ebx,(%esp) ; Value of this as first parameter
80486bd: call 80484a0 ; of the delete operator
80486c2: mov -0x10(%ebp),%eax
80486c5: add $0x24,%esp
80486c8: pop %ebx
80486c9: pop %ebp
80486ca: ret

Global Objects

Both static and global objects are initialized before the main is called,
by an initialization routine. The same routine saves the new references and
the destructor address into a global table, where they will be destroyed by
the __run_exit_handlers routine, executed after the main function.

TestClass tg;
static TestClass ts;

int GlobalTc()
{
return tg.GetStuff() + ts.GetStuff();
}

08048793 :
...
80487e3: movl $0x804a00c,(%esp)
80487ea: call 8048860 ; Global initialization
80487ef: movl $0x804a004,0x8(%esp)
...
804880b: movl $0x804a014,(%esp)
8048812: call 8048860 ; Static initialization
...
8048834: ret

Memory Layout

For a better understanding of memory layour, let’s use a class with a few more fields.

class WithFieldsClass
{
public:
// _ZN15WithFieldsClassC1Ev
WithFieldsClass()
{
publicS1 = 1;
publicS2 = &protectedS1;
protectedS1 = 'c';
protectedS2 = 2;
privateS1 = &publicS1;
privateS2 = 'd';
}

int publicS1;
char* publicS2;

protected:
char protectedS1;
int protectedS2;

private:
int* privateS1;
char privateS2;
};

The constructor is the function to analyze to understand which fields are present into the object and maybe also their type.

08048b02 :
8048b02: push %ebp
8048b03: mov %esp,%ebp
8048b05: lea -0x1010(%esp),%esp
8048b0c: orl $0x0,(%esp)
8048b10: lea 0x1010(%esp),%esp
8048b17: mov 0x8(%ebp),%eax
8048b1a: movl $0x1,(%eax) ; this->publicS1 = 1
8048b20: mov 0x8(%ebp),%eax
8048b23: lea 0x8(%eax),%edx
8048b26: mov 0x8(%ebp),%eax
8048b29: mov %edx,0x4(%eax) ; this->publicS2 = &protectedS1
8048b2c: mov 0x8(%ebp),%eax
8048b2f: movb $0x63,0x8(%eax) ; this->protectedS1 = 'c'
8048b33: mov 0x8(%ebp),%eax
8048b36: movl $0x2,0xc(%eax) ; this->protectedD2 = 2
8048b3d: mov 0x8(%ebp),%edx
8048b40: mov 0x8(%ebp),%eax
8048b43: mov %edx,0x10(%eax) ; this->privateS1 = &publicS1
8048b46: mov 0x8(%ebp),%eax
8048b49: movb $0x64,0x14(%eax) ; this->privateS2 = 'd'
8048b4d: pop %ebp
8048b4e: ret

From this analysis, we can infer that the memory layout is sequential:

  this+0x00: publicS1
this+0x04: publicS2
this+0x08: protectedS1
this+0x0c: protectedS2
this+0x10: privateS1
this+0x14: privateS2

Advertisements