For details of ELF dynamic linking, read John Levine's http://www.iecc.com/linker/ Chapter 10 (and previous chapters as needed to understand the data structures). The Grugq's "Cheating the ELF" http://grugq.github.io/docs/subversiveld.pdf for offensive uses. More on ELF hackery, if you feel like it: http://www.muppetlabs.com/~breadbox/software/tiny/teensy.html http://althing.cs.dartmouth.edu/local/reverse-talk.pdf --------------------------------------------------------------------------- sergey@toy32:~$ cat exec.c #include #include int main() { char * args[] = {"/bin/ls", NULL}; printf("pid: %d\n", getpid()); //sleep(60); execv("/bin/ls", args); } sergey@toy32:~$ ls -l exec-* -rwxrwxr-x 1 sergey sergey 7272 2013-01-16 14:40 exec-dyn -rwxrwxr-x 1 sergey sergey 644618 2013-01-11 13:27 exec-stat sergey@toy32:~$ gdb ./exec-stat GNU gdb (Ubuntu/Linaro 7.3-0ubuntu2) 7.3-2011.08 Copyright (C) 2011 Free Software Foundation, Inc. License GPLv3+: GNU GPL version 3 or later This is free software: you are free to change and redistribute it. There is NO WARRANTY, to the extent permitted by law. Type "show copying" and "show warranty" for details. This GDB was configured as "i686-linux-gnu". For bug reporting instructions, please see: ... Reading symbols from /home/sergey/exec-stat...(no debugging symbols found)...done. (gdb) b main Breakpoint 1 at 0x8048cb3 (gdb) r Starting program: /home/sergey/exec-stat Breakpoint 1, 0x08048cb3 in main () (gdb) disas main Dump of assembler code for function main: 0x08048cb0 <+0>: push %ebp 0x08048cb1 <+1>: mov %esp,%ebp => 0x08048cb3 <+3>: and $0xfffffff0,%esp 0x08048cb6 <+6>: sub $0x20,%esp 0x08048cb9 <+9>: movl $0x80af468,0x18(%esp) 0x08048cc1 <+17>: movl $0x0,0x1c(%esp) 0x08048cc9 <+25>: call 0x80536d0 0x08048cce <+30>: mov $0x80af470,%edx 0x08048cd3 <+35>: mov %eax,0x4(%esp) 0x08048cd7 <+39>: mov %edx,(%esp) 0x08048cda <+42>: call 0x80497e0 0x08048cdf <+47>: movl $0x3c,(%esp) 0x08048ce6 <+54>: call 0x80533c0 0x08048ceb <+59>: lea 0x18(%esp),%eax 0x08048cef <+63>: mov %eax,0x4(%esp) 0x08048cf3 <+67>: movl $0x80af468,(%esp) 0x08048cfa <+74>: call 0x80536a0 0x08048cff <+79>: leave 0x08048d00 <+80>: ret ---Type to continue, or q to quit--- End of assembler dump. (gdb) disas execv Dump of assembler code for function execv: 0x080536a0 <+0>: sub $0xc,%esp 0x080536a3 <+3>: mov 0x80d7570,%eax 0x080536a8 <+8>: mov %eax,0x8(%esp) 0x080536ac <+12>: mov 0x14(%esp),%eax 0x080536b0 <+16>: mov %eax,0x4(%esp) 0x080536b4 <+20>: mov 0x10(%esp),%eax 0x080536b8 <+24>: mov %eax,(%esp) 0x080536bb <+27>: call 0x8077430 0x080536c0 <+32>: add $0xc,%esp 0x080536c3 <+35>: ret End of assembler dump. (gdb) disas execve Dump of assembler code for function execve: 0x08077430 <+0>: push %ebx 0x08077431 <+1>: mov 0x10(%esp),%edx 0x08077435 <+5>: mov 0xc(%esp),%ecx 0x08077439 <+9>: mov 0x8(%esp),%ebx 0x0807743d <+13>: mov $0xb,%eax <---- exec() syscall # 0x08077442 <+18>: call *0x80d60bc <---- call to VDSO's location 0x08077448 <+24>: cmp $0xfffff000,%eax 0x0807744d <+29>: ja 0x8077451 0x0807744f <+31>: pop %ebx 0x08077450 <+32>: ret 0x08077451 <+33>: mov $0xffffffe8,%edx 0x08077457 <+39>: neg %eax 0x08077459 <+41>: mov %gs:0x0,%ecx 0x08077460 <+48>: mov %eax,(%ecx,%edx,1) 0x08077463 <+51>: or $0xffffffff,%eax 0x08077466 <+54>: pop %ebx 0x08077467 <+55>: ret End of assembler dump. The "x" (or "x/x") command interprets memory as a 32bit pointer. (gdb) x 0x80d60bc 0x80d60bc <_dl_sysinfo>: 0x00110414 But notice how the bytes are actually stored on the (emulated) little-endian x86, lowest order bits first (x/4b command gives bytes in their increasing address order, so LSB first, MSB last): (gdb) x/4b 0x80d60bc 0x80d60bc <_dl_sysinfo>: 0x14 0x04 0x11 0x00 And now we are looking at the "VDSO" page, whose address is stored at 0x80d60bc: (gdb) disas *0x80d60bc Dump of assembler code for function __kernel_vsyscall: 0x00110414 <+0>: int $0x80 <---- so int 0x80 was faster on boot 0x00110416 <+2>: ret End of assembler dump. Now let's run the code and see the process id (PID): (gdb) s Single stepping until exit from function main, which has no line number information. pid: 3788 ^Z Program received signal SIGTSTP, Stopped (user). 0x00110416 in __kernel_vsyscall () (gdb) ^Z [1]+ Stopped gdb ./exec-stat ...and see memory maps for that PID in the /proc pseudo-filesystem (in reality, any file read in /proc get redirected via VFS to kernel functions that walk the kernel's process descriptor table): sergey@toy32:~$ cat /proc/3788/maps | less sergey@toy32:~$ fg gdb ./exec-stat ^CQuit (gdb) q A debugging session is active. Inferior 1 [process 3788] will be killed. Quit anyway? (y or n) y OK, let's try this again: sergey@toy32:~$ gdb ./exec-stat GNU gdb (Ubuntu/Linaro 7.3-0ubuntu2) 7.3-2011.08 Copyright (C) 2011 Free Software Foundation, Inc. License GPLv3+: GNU GPL version 3 or later This is free software: you are free to change and redistribute it. There is NO WARRANTY, to the extent permitted by law. Type "show copying" and "show warranty" for details. This GDB was configured as "i686-linux-gnu". For bug reporting instructions, please see: ... Reading symbols from /home/sergey/exec-stat...(no debugging symbols found)...done. (gdb) b main Breakpoint 1 at 0x8048cb3 (gdb) s The program is not being run. (gdb) r Starting program: /home/sergey/exec-stat Breakpoint 1, 0x08048cb3 in main () (gdb) s Single stepping until exit from function main, which has no line number information. pid: 3801 ^Z Program received signal SIGTSTP, Stopped (user). 0x00110416 in __kernel_vsyscall () (gdb) ^Z [1]+ Stopped gdb ./exec-stat So now we see the process' (virtual address space) memory map: sergey@toy32:~$ cat /proc/3801/maps 00110000-00111000 r-xp 00000000 00:00 0 [vdso] 08048000-080d5000 r-xp 00000000 08:01 130598 /home/sergey/exec-stat 080d5000-080d7000 rw-p 0008c000 08:01 130598 /home/sergey/exec-stat 080d7000-080fb000 rw-p 00000000 00:00 0 [heap] b7fff000-b8000000 rw-p 00000000 00:00 0 bffdf000-c0000000 rw-p 00000000 00:00 0 [stack] ========================================================================= And now let's look at a dynamically linked file: sergey@toy32:~$ xxd ./exec-dyn | less sergey@toy32:~$ readelf -a ./exec-dyn | less sergey@toy32:~$ objdump -d ./exec-dyn | less sergey@toy32:~$ gdb ./exec-dyn GNU gdb (Ubuntu/Linaro 7.3-0ubuntu2) 7.3-2011.08 Copyright (C) 2011 Free Software Foundation, Inc. License GPLv3+: GNU GPL version 3 or later This is free software: you are free to change and redistribute it. There is NO WARRANTY, to the extent permitted by law. Type "show copying" and "show warranty" for details. This GDB was configured as "i686-linux-gnu". For bug reporting instructions, please see: ... Reading symbols from /home/sergey/exec-dyn...(no debugging symbols found)...done. (gdb) disas No frame selected. (gdb) disas main Dump of assembler code for function main: 0x08048474 <+0>: push %ebp 0x08048475 <+1>: mov %esp,%ebp 0x08048477 <+3>: and $0xfffffff0,%esp 0x0804847a <+6>: sub $0x20,%esp 0x0804847d <+9>: movl $0x80485a0,0x18(%esp) 0x08048485 <+17>: movl $0x0,0x1c(%esp) 0x0804848d <+25>: call 0x8048380 0x08048492 <+30>: mov $0x80485a8,%edx 0x08048497 <+35>: mov %eax,0x4(%esp) 0x0804849b <+39>: mov %edx,(%esp) 0x0804849e <+42>: call 0x8048360 0x080484a3 <+47>: movl $0x3c,(%esp) 0x080484aa <+54>: call 0x8048370 0x080484af <+59>: lea 0x18(%esp),%eax 0x080484b3 <+63>: mov %eax,0x4(%esp) 0x080484b7 <+67>: movl $0x80485a0,(%esp) 0x080484be <+74>: call 0x80483b0 0x080484c3 <+79>: leave 0x080484c4 <+80>: ret ---Type to continue, or q to quit--- End of assembler dump. (gdb) b sleep Breakpoint 1 at 0x8048370 This is the PLT stub for getpid: (gdb) disas 0x8048380 Dump of assembler code for function getpid@plt: 0x08048380 <+0>: jmp *0x804a008 0x08048386 <+6>: push $0x10 0x0804838b <+11>: jmp 0x8048350 End of assembler dump. And this is getpid's GOT slot: (gdb) x 0x804a008 0x804a008 : 0x08048386 Note how it's initially pointing right back at the next instruction in the stub? The dynamic linker will change this GOT slot's value to the actual address at which libc's getpid() gets loaded. (gdb) r Starting program: /home/sergey/exec-dyn pid: 3849 OK, we hit the call for getpid(), now let's see the GOT slot: Breakpoint 1, 0x001cc250 in sleep () from /lib/i386-linux-gnu/libc.so.6 (gdb) x 0x804a008 0x804a008 : 0x001cd380 Different! Got changed to the address within libc (check with /proc//maps on your system!) (gdb) disas *0x804a008 Dump of assembler code for function getpid: 0x001cd380 <+0>: mov %gs:0x6c,%edx 0x001cd387 <+7>: cmp $0x0,%edx 0x001cd38a <+10>: jle 0x1cd390 0x001cd38c <+12>: mov %edx,%eax 0x001cd38e <+14>: repz ret 0x001cd390 <+16>: jne 0x1cd3a2 0x001cd392 <+18>: mov %gs:0x68,%eax 0x001cd398 <+24>: test %eax,%eax 0x001cd39a <+26>: lea 0x0(%esi),%esi 0x001cd3a0 <+32>: jne 0x1cd38e 0x001cd3a2 <+34>: mov $0x14,%eax <---- getpid syscall # 0x001cd3a7 <+39>: call *%gs:0x10 <---- call to VDSO, off GS 0x001cd3ae <+46>: test %edx,%edx 0x001cd3b0 <+48>: mov %eax,%ecx 0x001cd3b2 <+50>: jne 0x1cd38e 0x001cd3b4 <+52>: mov %ecx,%gs:0x68 0x001cd3bb <+59>: nop 0x001cd3bc <+60>: lea 0x0(%esi,%eiz,1),%esi 0x001cd3c0 <+64>: ret ---Type to continue, or q to quit---q Quit (gdb) For more background on various ways Linux system calls are dispatched on 32bits vs 64bits platforms: http://www.win.tue.nl/~aeb/linux/lk/lk-4.html (see also 2013/syscalls.txt in the course directory; it has questions and answers from last year).