Daniele Bellavista's Blog

Security, IT, Projects

Reversing C++ binaries 4: Inheritance — 2014-10-20

Reversing C++ binaries 4: Inheritance

The previous tutorial was about virtual members. Now we can use that knowledge to analyze inerithance. We will use the usual TestClass, but now it extends BaseTestClass:

class BaseTestClass
// _ZN13BaseTestClassC1Ev
baseStuff = 1;
// _ZN13BaseTestClassD*Ev
virtual ~BaseTestClass()
baseStuff = 0;
// _ZN13BaseTestClass8GetStuffEv
virtual int GetStuff()
return baseStuff;
int baseStuff;
class TestClass : public BaseTestClass
// _ZN9TestClassC1Ev
stuff = 2;
// _ZN9TestClassD*Ev
virtual ~TestClass()
stuff = 0;
// _ZN9TestClass8GetStuffEv
virtual int GetStuff()
return BaseTestClass::GetStuff() + stuff;
// _ZN9TestClass13AnotherMethodEv
virtual int AnotherMethod()
return baseStuff;
int stuff;
int DoIt(BaseTestClass* btc)
return btc->GetStuff();
int main()
BaseTestClass* t1 = new BaseTestClass();
TestClass* t2 = new TestClass();
int a = DoIt(t1) + DoIt(t2) + t2->AnotherMethod();
delete t1;
delete t2;
return a;

This time we’ll go directly to the constructors to analyze the virtual tables:

080487ee :
80487ee: push %ebp
80487ef: mov %esp,%ebp
80487f1: lea -0x1010(%esp),%esp
80487f8: orl $0x0,(%esp)
80487fc: lea 0x1010(%esp),%esp
8048803: mov 0x8(%ebp),%eax ; this pointer
8048806: movl $0x8048ad8,(%eax) ; vtable = 0x8048ad8
804880c: mov 0x8(%ebp),%eax
804880f: movl $0x1,0x4(%eax) ; baseStuff = 1
8048816: pop %ebp
8048817: ret

080488a2 :
80488a2: push %ebp
80488a3: mov %esp,%ebp
80488a5: lea -0x1028(%esp),%esp
80488ac: orl $0x0,(%esp)
80488b0: lea 0x1010(%esp),%esp
80488b7: mov 0x8(%ebp),%eax
80488ba: mov %eax,(%esp) ; this pointer
80488bd: call 80487ee ; Call to base constructor
80488c2: mov 0x8(%ebp),%eax
80488c5: movl $0x8048ac0,(%eax) ; vtable = 0x8048ac0 (overwrite)
80488cb: mov 0x8(%ebp),%eax
80488ce: movl $0x2,0x8(%eax) ; stuff = 2
80488d5: leave
80488d6: ret
80488d7: nop

Let’s review what happened. The first action done by the TestClass constructor is to call the base constructor, which sets the virtual table and the field baseStuff:

  this+0x0: 0x8048ad8 (BaseTestClass vtable)
this+0x4: 1 (BaseTestClass::baseStuff)

Then, the vtable is overridden and the field stuff is inserted under baseStuff:

  this+0x0: 0x8048ac0 (TestClass vtable)
this+0x4: 1 (BaseTestClass::baseStuff)
this+0x8: 2 (TestClass::stuff)

Vtables are:

Contents of section .rodata:
8048ab0 03000000 01000200 00000000 f08a0408 ................
8048ac0 4c890408 96890408 c4890408 ee890408 L...............
8048ad0 00000000 0c8b0408 8c880408 ca880408 ................
8048ae0 f8880408 39546573 74436c61 73730000 ....9TestClass..
8048af0 68a00408 e48a0408 0c8b0408 31334261 h...........13Ba
8048b00 73655465 7374436c 61737300 28a00408 seTestClass.(...
8048b10 fc8a0408 ....

// Base1:
0x8048ad8: _ZN13Base1D1Ev
0x8048adc: _ZN13Base1D0Ev
0x8048ae0: _ZN13Base18GetStuffEv

// TestClass:
0x8048ac0: _ZN9TestClassD1Ev
0x8048ac4: _ZN9TestClassD0Ev
0x8048ac8: _ZN9TestClass8GetStuffEv
0x8048ac8: _ZN9TestClass13AnotherMethodEv

Multiple Inheritance

C++ supports multiple inheritance, so, let’s see how it’s implemented:
class Base2
// _ZN5Base2C1Ev
b2 = 1;
// _ZN5Base2D*Ev
virtual ~Base2()
b2 = 0;
// _ZN5Base29GetStuff2Ev
virtual int GetStuff2()
return b2;
int b2;


class Base1
// _ZN5Base1C1Ev
baseStuff = 1;
// _ZN5Base1D*Ev
virtual ~Base1()
baseStuff = 0;
// _ZN5Base18GetStuffEv
virtual int GetStuff()
return baseStuff;
int baseStuff;


class TestClass : public Base1, public Base2
// _ZN9TestClassC1Ev
stuff = 2;
// _ZN9TestClassD*Ev
// _ZThn8_N9TestClassD*Ev
virtual ~TestClass()
stuff = 0;
// _ZN9TestClass9GetStuff2Ev
// _ZThn8_N9TestClass9GetStuff2Ev
virtual int GetStuff2()
return Base2::GetStuff2() + stuff;
// _ZN9TestClass8GetStuffEv
virtual int GetStuff()
return Base1::GetStuff() + stuff;

int stuff;

// _Z4DoItP5Base1P5Base2
int DoIt(Base1* bt1, Base2* bt2)
return bt1->GetStuff() + bt2->GetStuff2();

int main()
TestClass* t1 = new TestClass();
return DoIt(t1, t1);

For each method, I wrote their mangled name as comment. As you may have notice, the method TestClass::GetStuff2() and TestClass::~TestClass() have two mangled names, that is, they are mapped into two different functions. The first is the usual method implementation, while the second moves the this reference and jumps to the right method:

08048a7b :
8048a7b: subl $0x8,0x4(%esp)
8048a80: jmp 8048a4e

The reason is simple, but it will be more clear after we see the memory layout, so let’s analyze the constructors:

080488ac :
80488ac: push %ebp
80488ad: mov %esp,%ebp
80488af: lea -0x1010(%esp),%esp
80488b6: orl $0x0,(%esp)
80488ba: lea 0x1010(%esp),%esp
80488c1: mov 0x8(%ebp),%eax ; this pointer
80488c4: movl $0x8048bd8,(%eax) ; Base1 vtable (0x8048bd8)
80488ca: mov 0x8(%ebp),%eax
80488cd: movl $0x1,0x4(%eax) ; baseStuff = 1
80488d4: pop %ebp
80488d5: ret

080487f8 :
80487f8: push %ebp
80487f9: mov %esp,%ebp
80487fb: lea -0x1010(%esp),%esp
8048802: orl $0x0,(%esp)
8048806: lea 0x1010(%esp),%esp
804880d: mov 0x8(%ebp),%eax ; this pointer
8048810: movl $0x8048bf0,(%eax) ; Base2 vtable (0x8048bf0)
8048816: mov 0x8(%ebp),%eax
8048819: movl $0x1,0x4(%eax) ; b2 = 4
8048820: pop %ebp
8048821: ret

08048982 :
8048982: push %ebp
8048983: mov %esp,%ebp
8048985: lea -0x1028(%esp),%esp
804898c: orl $0x0,(%esp)
8048990: lea 0x1010(%esp),%esp
8048997: mov 0x8(%ebp),%eax ; this pointer
804899a: mov %eax,(%esp)
804899d: call 80488ce ; Base1 constructor
80489a2: mov 0x8(%ebp),%eax
80489a5: add $0x8,%eax ; this + 8
80489a8: mov %eax,(%esp)
80489ab: call 8048838 ; Base2 constructor
80489b0: mov 0x8(%ebp),%eax
80489b3: movl $0x8048ba8,(%eax) ; Base1 vtable overwrite (0x8048ba8)
80489b9: mov 0x8(%ebp),%eax
80489bc: movl $0x8048bc0,0x8(%eax) ; Base2 vtable overwrite (0x8048bc0)
80489c3: mov 0x8(%ebp),%eax
80489c6: movl $0x2,0x10(%eax) ; stuff = 2
80489cd: leave
80489ce: ret

Note the presence of two vtables. The memory layout is the following after calling the two base constructors:

  this+0x0: 0x8048bb8 (Base1 vtable)
this+0x4: 1 (Base1::baseStuff)
this+0x8: 0x8048bd0 (Base2 vtable)
this+0xc: 4 (Base2::b2)

Then, the constructor overwrites both the vtable references and writes its variable:
  this+0x0: 0x8048b88 (Base1 vtable overwrite)
this+0x4: 1 (Base1::baseStuff)
this+0x8: 0x8048ba0 (Base2 vtable overwrite)
this+0xc: 4 (Base2::b2)
this+0x10: 2 (TestClass::stuff)


Contents of section .rodata:
8048b80 03000000 01000200 00000000 00000000 ................
8048b90 00000000 00000000 00000000 00000000 ................
8048ba0 00000000 208c0408 ce890408 3a8a0408 .... .......:...
8048bb0 a28a0408 6e8a0408 f8ffffff 208c0408 ....n....... ...
8048bc0 2f8a0408 678a0408 9b8a0408 00000000 /...g...........
8048bd0 00000000 488c0408 f6880408 34890408 ....H.......4...
8048be0 62890408 00000000 00000000 588c0408 b...........X...
8048bf0 42880408 80880408 ae880408 39546573 B...........9Tes
8048c00 74436c61 73730000 00000000 00000000 tClass..........
8048c10 00000000 00000000 00000000 00000000 ................
8048c20 68b00408 fc8b0408 00000000 02000000 h...............
8048c30 488c0408 02000000 588c0408 02080000 H.......X.......
8048c40 35426173 65310000 28b00408 408c0408 5Base1..(...@...
8048c50 35426173 65320000 28b00408 508c0408 5Base2..(...P...

// Base1:
0x8048bd8: _ZN5Base1D1Ev
0x8048bdc: _ZN5Base1D0Ev
0x8048be0: _ZN5Base18GetStuffEv

// Base2:
0x8048bf0: _ZN5Base2D1Ev
0x8048bf4: _ZN5Base2D0Ev
0x8048bf8: _ZN5Base29GetStuff2Ev

// TestClass vtable1:
0x8048ba8: _ZN9TestClassD1Ev
0x8048bac: _ZN9TestClassD0Ev
0x8048bb0: _ZN9TestClass8GetStuffEv
// TestClass vtable2:
0x8048bc0: _ZThn8_N9TestClassD1Ev
0x8048bc4: _ZThn8_N9TestClassD0Ev
0x8048bc8: _ZThn8_N9TestClass9GetStuff2Ev

By looking at the memory layout and the vtables, the reason of the double functions, such as _ZThn8_N9TestClass9GetStuff2Ev, and of the double vtable is obvious:

08048a7b :
8048a7b: subl $0x8,0x4(%esp)
8048a80: jmp 8048a4e

The wrapper function move the offset of the this reference, so that the compiler can reuse the implementation of TestClass::GetStuff2. If you cast TestClass* to Base2*, the compiler moves the this reference so that it points to the second vtable. However, since the method is virtual and can be overridden, by calling Base2::GetStuff2, you are actually calling TestClass::GetStuff2But the this reference has been moved. The goal of the wrapper is to adjust that move, restoring this to the orginal location.

For instance, let’s see the main and DoIt disassembly:

08048741 :
804877b: mov 0x1c(%esp),%eax ; TestClass* tc -> eax
804877f: add $0x8,%eax ; Casting: this = this+0x8 (2nd vtable)
8048789: mov %eax,0x4(%esp) ; Second parameter of DoIt
804878d: mov 0x1c(%esp),%eax ; TestClass* tc -> eax
8048791: mov %eax,(%esp) ; First parameter of DoIt
8048794: call 80486fd

080486fd :
8048713: mov 0x8(%ebp),%eax ; Base1* bt1 -> eax
8048716: mov (%eax),%eax
8048718: add $0x8,%eax
804871b: mov (%eax),%eax ; GetStuff from vtable
804871d: mov 0x8(%ebp),%edx
8048720: mov %edx,(%esp)
8048723: call *%eax
8048725: mov %eax,%ebx
8048727: mov 0xc(%ebp),%eax ; Base2* bt2 -> eax
804872a: mov (%eax),%eax
804872c: add $0x8,%eax
804872f: mov (%eax),%eax ; _ZThn8_N9TestClass9GetStuff2Ev from vtable
8048731: mov 0xc(%ebp),%edx
8048734: mov %edx,(%esp)
8048737: call *%eax ; Actual call to _ZThn8_N9TestClass9GetStuff2Ev

08048a9b :
8048a9b: subl $0x8,0x4(%esp) ; 'this' adjustment
8048aa0: jmp 8048a6e

Calling the wrapper, will cause this to point to the original TestClass pointer, so the function _ZN9TestClass9GetStuff2Ev can be called with consistent data.

Reversing C++ binaries 3: Virtual members — 2014-10-17

Reversing C++ binaries 3: Virtual members

Now that we have a better understanding of how classes are compiled, we can analyze polymorphism. We can expect virtual members to be handled differently, because a class instance may have a different implementation: the compiler simply can’t know at compile time which function to call.

I modified the old TestClass to include a virtual method:

class TestClass
// _ZN9TestClassC1Ev
stuff = 1;

// _ZN9TestClassD1Ev
virtual ~TestClass()
stuff = 0;

// _ZN9TestClass8GetStuffEv
virtual int GetStuff()
return stuff;

int stuff;

// _Z4DoItP9TestClass
int DoIt(TestClass* t1)
return t1->GetStuff();

int main()
TestClass* t1 = new TestClass();
int r = DoIt(t1);
delete t1;
return r;

Let’s look at the disassebly of the function DoIt:

0804869d :
804869d: push %ebp
804869e: mov %esp,%ebp
80486a0: lea -0x1028(%esp),%esp
80486a7: orl $0x0,(%esp)
80486ab: lea 0x1010(%esp),%esp
80486b2: mov 0x8(%ebp),%eax ; t1 = ebp + 8
80486b5: mov (%eax),%eax ; obj = *t1
80486b7: add $0x8,%eax ;
80486ba: mov (%eax),%eax ; GetStuff = obj[8]
80486bc: mov 0x8(%ebp),%edx
80486bf: mov %edx,(%esp)
80486c2: call *%eax ; GetStuff(this)
80486c4: leave
80486c5: ret

Unlike previous usages, the DoIt disassembly doesn’t contain an explict call to GetStuff. Instead, there is a call to register eax, which is initialized by deferencing a field of TestClass. This particular field is not present into the C++ code, thus we must look at the disassembled constructor:

080487a6 :
80487a6: push %ebp
80487a7: mov %esp,%ebp
80487a9: lea -0x1010(%esp),%esp
80487b0: orl $0x0,(%esp)
80487b4: lea 0x1010(%esp),%esp
80487bb: mov 0x8(%ebp),%eax
80487be: movl $0x8048910,(%eax) ; this[0] = 0x8048910
80487c4: mov 0x8(%ebp),%eax
80487c7: movl $0x1,0x4(%eax) ; this->stuff = 1;
80487ce: pop %ebp
80487cf: ret

The address 0x8048910 resides in the .rodata section and point to the Virtual Table of TestClass. The vtable contains references to all virtual methods present in TestClass:

Contents of section .rodata:
8048900 03000000 01000200 00000000 28890408 ............(...
8048910 d0870408 0e880408 3c880408 39546573 ........<...9Tes
8048920 74436c61 73730000 28a00408 1c890408 tClass..(.......

// TestClass virtual table:

0x8048910 + 0x00: 080487d0 ; _ZN9TestClassD1Ev (Complete Object destructor)
0x8048910 + 0x04: 0804880e ; _ZN9TestClassD0Ev (Deleting destructor)
0x8048910 + 0x08: 0804883c ; _ZN9TestClass8GetStuffEv (GetStuff)

To expain the difference between the two destruction, I quote the C++ ABI Itanium reference:

  1. Base object destructor of a class T: A function that runs the destructors for non-static data members of T and non-virtual direct base classes of T. Mangled with suffix D2.
  2. Complete object destructor of a class T: a function that, in addition to the actions required of a base object destructor, runs the destructors for the virtual base classes of T. Mangled with suffix D1.
  3. Deleting destructor of a class T: a function that, in addition to the actions required of a complete object destructor, calls the appropriate deallocation function (i.e,. operator delete) for T. Mangled with suffix D0.

Reversing C++ binaries 2: Objects lifecycle and structure —

Reversing C++ binaries 2: Objects lifecycle and structure

This post belong to the series “Reversing C++ binaries”. First post here.

The goal of this episode is to understand the lifecycle of object, when using
global, local and dynamic allocated instances. In the end, the field memory structure of simple object is anlyzed.

As a first example, we take into account the following class:

class TestClass
// _ZN9TestClassC1Ev
stuff = 1;

// _ZN9TestClassD1Ev
stuff = 0;

// _ZN9TestClass8GetStuffEv
int GetStuff()
return stuff;

int stuff;

A common approach when compiling a class is to create global functions for constructors, destructors and methods and allocate only enough space for fields. Each global function operates on this, which is a pointer to the object. Basing on the compiler, the reference can be provided into a register or onto the stack. I’m currently using g++, which uses the stack to pass this as first parameter.

Local Objects

The following function initialize a class onto the stack and calls the method GetStuff:

// _Z7OnStackv
int OnStack()
TestClass t1;
return t1.GetStuff();

804862d: push   %ebp
804862e: mov %esp,%ebp
8048630: push %ebx
8048631: lea -0x1034(%esp),%esp
8048638: orl $0x0,(%esp)
804863c: lea 0x1010(%esp),%esp
8048643: lea -0xc(%ebp),%eax ; <- this
8048646: mov %eax,(%esp) ; as first parameter
8048649: call 804876e ; Constructor
804864e: lea -0xc(%ebp),%eax ; <- this
8048651: mov %eax,(%esp) ; as first parameter
8048654: call 80487ae ; Method
8048659: mov %eax,%ebx
804865b: lea -0xc(%ebp),%eax ; <- this
804865e: mov %eax,(%esp) ; as first parameter
8048661: call 804878e ; Destructor on exit
8048666: mov %ebx,%eax
8048668: add $0x24,%esp
804866b: pop %ebx
804866c: pop %ebp
804866d: ret

Dynamic Allocated Object

An heap based initialization implies the use of the new and delete operator which respectively trigger the class constructor and destructor. The new operator, which resides in a shared library, performs a malloc, while the delete operator performs a free.

// _Z6OnHeapv
int OnHeap()
// new: _Znwj@plt
// con: _ZN9TestClassC1Ev
TestClass* t1 = new TestClass();

int ret = t1->GetStuff();

// des: _ZN9TestClassD1Ev
// delete: _ZdlPv@plt
delete t1;
return ret;
804866e: push   %ebp
804866f: mov %esp,%ebp
8048671: push %ebx
8048672: lea -0x1034(%esp),%esp
8048679: orl $0x0,(%esp)
804867d: lea 0x1010(%esp),%esp
8048684: movl $0x4,(%esp) ; the size to allocate
804868b: call 80484e0 ; the new operator
8048690: mov %eax,%ebx ; return the 'this' reference
8048692: mov %ebx,(%esp)
8048695: call 804876e
804869a: mov %ebx,-0xc(%ebp)
804869d: mov -0xc(%ebp),%eax
80486a0: mov %eax,(%esp)
80486a3: call 80487ae
80486a8: mov %eax,-0x10(%ebp)
80486ab: mov -0xc(%ebp),%ebx
80486ae: test %ebx,%ebx ; If the reference is null
80486b0: je 80486c2 ; ...return
80486b2: mov %ebx,(%esp)
80486b5: call 804878e
80486ba: mov %ebx,(%esp) ; Value of this as first parameter
80486bd: call 80484a0 ; of the delete operator
80486c2: mov -0x10(%ebp),%eax
80486c5: add $0x24,%esp
80486c8: pop %ebx
80486c9: pop %ebp
80486ca: ret

Global Objects

Both static and global objects are initialized before the main is called,
by an initialization routine. The same routine saves the new references and
the destructor address into a global table, where they will be destroyed by
the __run_exit_handlers routine, executed after the main function.

TestClass tg;
static TestClass ts;

int GlobalTc()
return tg.GetStuff() + ts.GetStuff();

08048793 :
80487e3: movl $0x804a00c,(%esp)
80487ea: call 8048860 ; Global initialization
80487ef: movl $0x804a004,0x8(%esp)
804880b: movl $0x804a014,(%esp)
8048812: call 8048860 ; Static initialization
8048834: ret

Memory Layout

For a better understanding of memory layour, let’s use a class with a few more fields.

class WithFieldsClass
// _ZN15WithFieldsClassC1Ev
publicS1 = 1;
publicS2 = &protectedS1;
protectedS1 = 'c';
protectedS2 = 2;
privateS1 = &publicS1;
privateS2 = 'd';

int publicS1;
char* publicS2;

char protectedS1;
int protectedS2;

int* privateS1;
char privateS2;

The constructor is the function to analyze to understand which fields are present into the object and maybe also their type.

08048b02 :
8048b02: push %ebp
8048b03: mov %esp,%ebp
8048b05: lea -0x1010(%esp),%esp
8048b0c: orl $0x0,(%esp)
8048b10: lea 0x1010(%esp),%esp
8048b17: mov 0x8(%ebp),%eax
8048b1a: movl $0x1,(%eax) ; this->publicS1 = 1
8048b20: mov 0x8(%ebp),%eax
8048b23: lea 0x8(%eax),%edx
8048b26: mov 0x8(%ebp),%eax
8048b29: mov %edx,0x4(%eax) ; this->publicS2 = &protectedS1
8048b2c: mov 0x8(%ebp),%eax
8048b2f: movb $0x63,0x8(%eax) ; this->protectedS1 = 'c'
8048b33: mov 0x8(%ebp),%eax
8048b36: movl $0x2,0xc(%eax) ; this->protectedD2 = 2
8048b3d: mov 0x8(%ebp),%edx
8048b40: mov 0x8(%ebp),%eax
8048b43: mov %edx,0x10(%eax) ; this->privateS1 = &publicS1
8048b46: mov 0x8(%ebp),%eax
8048b49: movb $0x64,0x14(%eax) ; this->privateS2 = 'd'
8048b4d: pop %ebp
8048b4e: ret

From this analysis, we can infer that the memory layout is sequential:

  this+0x00: publicS1
this+0x04: publicS2
this+0x08: protectedS1
this+0x0c: protectedS2
this+0x10: privateS1
this+0x14: privateS2

Reversing C++ binaries 1: name mangling and global/static functions — 2014-10-15

Reversing C++ binaries 1: name mangling and global/static functions

Binary reversing is an essential skill for malware analysis and solving wargames challenges. Program written in C are common and there are various tutorial about their reversing (calling conventions, dynamic libraries, stack, variables and so on). Once the assembly language is learned, it’s just a matter of patience to reverse an application (anti-reversing techniques aside, of course).

However the assembly generated from C++ code is harder to analyze, due to object-oriented constructs. These tutorials aim to study how high-levels constructs, such as namespaces, operators, classes and their relationships, are converted into assembly code and how to reverse them when analyzing a binary.


First of all, functions memory addresses are renamed with a name suitable for the compiler and the linker. This process is called name mangling (see below for references).

// _ZN12_GLOBAL__N_17ScroogeEv
int Scrooge()
return 5;

// _Z11GlobalPlutov
int GlobalPluto()
return 4;

// _ZL11GoofyStaticv
static int GoofyStatic()
return 3;

namespace Donald
// _ZN6Donald12GlobalDonaldEv
int GlobalDonald()
return 1;
// _ZN6DonaldL12StaticDonaldEv
int StaticDonald()
return 2;

Thus by reading the following code:

push   %ebp
mov %esp,%ebp
lea -0x1018(%esp),%esp
orl $0x0,(%esp)
lea 0x1010(%esp),%esp
call 80487fd
call 80487c1
call 8048749
call 8048785
call 804870d
mov $0x0,%eax

We can say that the function calls two function without namespace called GoofyStatic and GlobalPluto, two function inside the ‘Donald’ namespace and finally a function residing in the global namespace. Finally, GDB offers an automatic demangling utility:

gdb> set print asm-demangle on
gdb> disass main
Dump of assembler code for function main:
0x0804873f : push %ebp
0x08048740 : mov %esp,%ebp
0x08048742 : and $0xfffffff0,%esp
0x08048745 : lea -0x1010(%esp),%esp
0x0804874c : orl $0x0,(%esp)
0x08048750 : lea 0x1010(%esp),%esp
0x08048757 : call 0x8048659
0x0804875c : call 0x80485e9
0x08048761 : call 0x8048707
0x08048766 : call 0x8048621
0x0804876b : call 0x8048723
0x08048770 : mov $0x0,%eax
0x08048775 : leave
0x08048776 : ret