在了解了上面的规律后,现在开始来解决本章一开头的问题:
(gdb) bt #0 0x4365b569 in vfprintf () from /lib/libc.so.6 #1 0x436629ff in printf () from /lib/libc.so.6 #2 0x080485b9 in main ()
看一下main函数的汇编:
(gdb) disassemble main Dump of assembler code for function main: 0x08048500 <+0>: push %ebp 0x08048501 <+1>: mov %esp,%ebp 0x08048503 <+3>: and $0xfffffff0,%esp 0x08048506 <+6>: sub $0x20,%esp 0x08048509 <+9>: movl $0x0,0x1c(%esp) 0x08048511 <+17>: jmp 0x80485bf <main+191> 0x08048516 <+22>: mov 0x1c(%esp),%eax 0x0804851a <+26>: lea 0x0(,%eax,4),%edx 0x08048521 <+33>: mov 0xc(%ebp),%eax 0x08048524 <+36>: add %edx,%eax 0x08048526 <+38>: mov (%eax),%eax 0x08048528 <+40>: mov %eax,(%esp) 0x0804852b <+43>: call 0x80483d0 <strlen@plt> 0x08048530 <+48>: mov %eax,0x18(%esp) 0x08048534 <+52>: mov 0x18(%esp),%eax 0x08048538 <+56>: cmp $0x1,%eax 0x0804853b <+59>: je 0x8048570 <main+112> 0x0804853d <+61>: cmp $0x2,%eax 0x08048540 <+64>: je 0x804858f <main+143> 0x08048542 <+66>: test %eax,%eax 0x08048544 <+68>: jne 0x80485a5 <main+165> 0x08048546 <+70>: mov 0x1c(%esp),%eax 0x0804854a <+74>: lea 0x0(,%eax,4),%edx 0x08048551 <+81>: mov 0xc(%ebp),%eax 0x08048554 <+84>: add %edx,%eax 0x08048556 <+86>: mov (%eax),%eax 0x08048558 <+88>: movzbl (%eax),%eax 0x0804855b <+91>: movsbl %al,%eax 0x0804855e <+94>: mov %eax,0x4(%esp) 0x08048562 <+98>: movl $0x8048674,(%esp) 0x08048569 <+105>: call 0x80483e0 <printf@plt> 0x0804856e <+110>: jmp 0x80485ba <main+186> 0x08048570 <+112>: mov 0x1c(%esp),%eax 0x08048574 <+116>: add $0x1,%eax 0x08048577 <+119>: lea 0x0(,%eax,4),%edx 0x0804857e <+126>: mov 0xc(%ebp),%eax 0x08048581 <+129>: add %edx,%eax 0x08048583 <+131>: mov (%eax),%eax 0x08048585 <+133>: mov %eax,(%esp) 0x08048588 <+136>: call 0x80483f0 <puts@plt> 0x0804858d <+141>: jmp 0x80485ba <main+186> 0x0804858f <+143>: mov 0x1c(%esp),%eax 0x08048593 <+147>: mov %eax,0x4(%esp) 0x08048597 <+151>: movl $0x8048678,(%esp) 0x0804859e <+158>: call 0x80483e0 <printf@plt> 0x080485a3 <+163>: jmp 0x80485ba <main+186> 0x080485a5 <+165>: mov 0x1c(%esp),%eax 0x080485a9 <+169>: mov %eax,0x4(%esp) 0x080485ad <+173>: movl $0x804867c,(%esp) 0x080485b4 <+180>: call 0x80483e0 <printf@plt> 0x080485b9 <+185>: nop 0x080485ba <+186>: addl $0x1,0x1c(%esp) 0x080485bf <+191>: mov 0x1c(%esp),%eax 0x080485c3 <+195>: cmp 0x8(%ebp),%eax 0x080485c6 <+198>: setl %al 0x080485c9 <+201>: test %al,%al 0x080485cb <+203>: jne 0x8048516 <main+22> 0x080485d1 <+209>: mov $0x0,%eax 0x080485d6 <+214>: leave 0x080485d7 <+215>: ret End of assembler dump.
由
0x080485cb <+203>: jne 0x8048516 <main+22>
可知,0x8048516到0x080485cb构成一个循环。
而0x080486cb的判断条件:
0x080485bf <+191>: mov 0x1c(%esp),%eax 0x080485c3 <+195>: cmp 0x8(%ebp),%eax 0x080485c6 <+198>: setl %al 0x080485c9 <+201>: test %al,%al
里,提到ebp + 8.,由于main函数的原型第一个参数是argc,这在第三章“函数参数”有提及的。所以,上面的语句是判断esp+0x1c的值是否和argc相等,如果不是,就继续循环,否则跳出循环。假定esp+0x1c这个变量命名为cnt。
又由
0x080485ba <+186>: addl $0x1,0x1c(%esp)
可翻译成 cnt++,可知,
cnt一开初应该是小于argc的。每次递增都要判断一次。
又由
0x08048509 <+9>: movl $0x0,0x1c(%esp) 0x08048511 <+17>: jmp 0x80485bf <main+191>
可知,cnt的初始值为0,且一初始化之后就跳转到0x080485bf和argc比较。
而又由
0x080485d1 <+209>: mov $0x0,%eax 0x080485d6 <+214>: leave 0x080485d7 <+215>: ret
结合第三章“返回值”那一节可知,main函数无论什么情况都返回0。
所以,整个main函数可以翻译成这样:
int main( int argc, char* argv[] ) { int cnt = 0; while ( cnt < argc ) { 0x08048516 <+22>: mov 0x1c(%esp),%eax 0x0804851a <+26>: lea 0x0(,%eax,4),%edx 0x08048521 <+33>: mov 0xc(%ebp),%eax 0x08048524 <+36>: add %edx,%eax 0x08048526 <+38>: mov (%eax),%eax 0x08048528 <+40>: mov %eax,(%esp) 0x0804852b <+43>: call 0x80483d0 <strlen@plt> 0x08048530 <+48>: mov %eax,0x18(%esp) 0x08048534 <+52>: mov 0x18(%esp),%eax 0x08048538 <+56>: cmp $0x1,%eax 0x0804853b <+59>: je 0x8048570 <main+112> 0x0804853d <+61>: cmp $0x2,%eax 0x08048540 <+64>: je 0x804858f <main+143> 0x08048542 <+66>: test %eax,%eax 0x08048544 <+68>: jne 0x80485a5 <main+165> 0x08048546 <+70>: mov 0x1c(%esp),%eax 0x0804854a <+74>: lea 0x0(,%eax,4),%edx 0x08048551 <+81>: mov 0xc(%ebp),%eax 0x08048554 <+84>: add %edx,%eax 0x08048556 <+86>: mov (%eax),%eax 0x08048558 <+88>: movzbl (%eax),%eax 0x0804855b <+91>: movsbl %al,%eax 0x0804855e <+94>: mov %eax,0x4(%esp) 0x08048562 <+98>: movl $0x8048674,(%esp) 0x08048569 <+105>: call 0x80483e0 <printf@plt> 0x0804856e <+110>: jmp 0x80485ba <main+186> 0x08048570 <+112>: mov 0x1c(%esp),%eax 0x08048574 <+116>: add $0x1,%eax 0x08048577 <+119>: lea 0x0(,%eax,4),%edx 0x0804857e <+126>: mov 0xc(%ebp),%eax 0x08048581 <+129>: add %edx,%eax 0x08048583 <+131>: mov (%eax),%eax 0x08048585 <+133>: mov %eax,(%esp) 0x08048588 <+136>: call 0x80483f0 <puts@plt> 0x0804858d <+141>: jmp 0x80485ba <main+186> 0x0804858f <+143>: mov 0x1c(%esp),%eax 0x08048593 <+147>: mov %eax,0x4(%esp) 0x08048597 <+151>: movl $0x8048678,(%esp) 0x0804859e <+158>: call 0x80483e0 <printf@plt> 0x080485a3 <+163>: jmp 0x80485ba <main+186> 0x080485a5 <+165>: mov 0x1c(%esp),%eax 0x080485a9 <+169>: mov %eax,0x4(%esp) 0x080485ad <+173>: movl $0x804867c,(%esp) 0x080485b4 <+180>: call 0x80483e0 <printf@plt> 0x080485b9 <+185>: nop cnt++; } return 0; }
分析一下上面汇编块的跳转语句
由
0x08048538 <+56>: cmp $0x1,%eax 0x0804853b <+59>: je 0x8048570 <main+112>
可知,
0x08048570 <+112>: mov 0x1c(%esp),%eax 0x08048574 <+116>: add $0x1,%eax 0x08048577 <+119>: lea 0x0(,%eax,4),%edx 0x0804857e <+126>: mov 0xc(%ebp),%eax 0x08048581 <+129>: add %edx,%eax 0x08048583 <+131>: mov (%eax),%eax 0x08048585 <+133>: mov %eax,(%esp) 0x08048588 <+136>: call 0x80483f0 <puts@plt> 0x0804858d <+141>: jmp 0x80485ba <main+186>
是在eax等于1的情况下的代码块。
同样分析
0x0804853d <+61>: cmp $0x2,%eax 0x08048540 <+64>: je 0x804858f <main+143> 0x08048542 <+66>: test %eax,%eax 0x08048544 <+68>: jne 0x80485a5 <main+165>
可知
0x0804858f <+143>: mov 0x1c(%esp),%eax 0x08048593 <+147>: mov %eax,0x4(%esp) 0x08048597 <+151>: movl $0x8048678,(%esp) 0x0804859e <+158>: call 0x80483e0 <printf@plt> 0x080485a3 <+163>: jmp 0x80485ba <main+186>
是在eax为1的情况下的代码块。
0x08048546 <+70>: mov 0x1c(%esp),%eax 0x0804854a <+74>: lea 0x0(,%eax,4),%edx 0x08048551 <+81>: mov 0xc(%ebp),%eax 0x08048554 <+84>: add %edx,%eax 0x08048556 <+86>: mov (%eax),%eax 0x08048558 <+88>: movzbl (%eax),%eax 0x0804855b <+91>: movsbl %al,%eax 0x0804855e <+94>: mov %eax,0x4(%esp) 0x08048562 <+98>: movl $0x8048674,(%esp) 0x08048569 <+105>: call 0x80483e0 <printf@plt> 0x0804856e <+110>: jmp 0x80485ba <main+186>
是在eax为0的情况下的代码块。
0x080485a5 <+165>: mov 0x1c(%esp),%eax 0x080485a9 <+169>: mov %eax,0x4(%esp) 0x080485ad <+173>: movl $0x804867c,(%esp) 0x080485b4 <+180>: call 0x80483e0 <printf@plt> 0x080485b9 <+185>: nop
是在eax不为0,1,2这三种情况下的代码块。由于这几个判断都是特定的整数,所以最好用switch结构来还原。
而对于
0x0804856e <+110>: jmp 0x80485ba <main+186>
这条指令所跳转的地方,刚好是
0x080485ba <+186>: addl $0x1,0x1c(%esp)
即cnt++;
而eax的则是由
0x0804852b <+43>: call 0x80483d0 <strlen@plt> 0x08048530 <+48>: mov %eax,0x18(%esp) 0x08048534 <+52>: mov 0x18(%esp),%eax
得来的,根据第三章“返回值”那一节,可以知道eax应该是strlen函数的返回值,命名为len。
而在
0x08048516 <+22>: mov 0x1c(%esp),%eax 0x0804851a <+26>: lea 0x0(,%eax,4),%edx 0x08048521 <+33>: mov 0xc(%ebp),%eax 0x08048524 <+36>: add %edx,%eax 0x08048526 <+38>: mov (%eax),%eax 0x08048528 <+40>: mov %eax,(%esp)
由于esp+0x1c已经命名为cnt了,ebp+0xC为main函数第二个参数argv。那么这一段汇编的意思是取argv[cnt]的值,并把它压入栈里。
所以main函数又会变成这样:
int main( int argc, char* argv[] ) { int cnt = 0; while ( cnt < argc ) { size_t len = strlen( argv[cnt] ); switch ( len ) { case 0: { 0x08048546 <+70>: mov 0x1c(%esp),%eax 0x0804854a <+74>: lea 0x0(,%eax,4),%edx 0x08048551 <+81>: mov 0xc(%ebp),%eax 0x08048554 <+84>: add %edx,%eax 0x08048556 <+86>: mov (%eax),%eax 0x08048558 <+88>: movzbl (%eax),%eax 0x0804855b <+91>: movsbl %al,%eax 0x0804855e <+94>: mov %eax,0x4(%esp) 0x08048562 <+98>: movl $0x8048674,(%esp) 0x08048569 <+105>: call 0x80483e0 <printf@plt> break; } case 1: { 0x08048570 <+112>: mov 0x1c(%esp),%eax 0x08048574 <+116>: add $0x1,%eax 0x08048577 <+119>: lea 0x0(,%eax,4),%edx 0x0804857e <+126>: mov 0xc(%ebp),%eax 0x08048581 <+129>: add %edx,%eax 0x08048583 <+131>: mov (%eax),%eax 0x08048585 <+133>: mov %eax,(%esp) 0x08048588 <+136>: call 0x80483f0 <puts@plt> break; } case 2: { 0x0804858f <+143>: mov 0x1c(%esp),%eax 0x08048593 <+147>: mov %eax,0x4(%esp) 0x08048597 <+151>: movl $0x8048678,(%esp) 0x0804859e <+158>: call 0x80483e0 <printf@plt> break; } default: { 0x080485a5 <+165>: mov 0x1c(%esp),%eax 0x080485a9 <+169>: mov %eax,0x4(%esp) 0x080485ad <+173>: movl $0x804867c,(%esp) 0x080485b4 <+180>: call 0x80483e0 <printf@plt> 0x080485b9 <+185>: nop break; } } cnt++; } return 0; }
看一下case 0情况的汇编:
0x08048546 <+70>: mov 0x1c(%esp),%eax 0x0804854a <+74>: lea 0x0(,%eax,4),%edx 0x08048551 <+81>: mov 0xc(%ebp),%eax 0x08048554 <+84>: add %edx,%eax 0x08048556 <+86>: mov (%eax),%eax 0x08048558 <+88>: movzbl (%eax),%eax 0x0804855b <+91>: movsbl %al,%eax 0x0804855e <+94>: mov %eax,0x4(%esp) 0x08048562 <+98>: movl $0x8048674,(%esp) 0x08048569 <+105>: call 0x80483e0 <printf@plt>
由于printf的第一个参数是格式字符串,那么看一下0x8048674存放着什么内容:
(gdb) x /s 0x8048674 0x8048674 <__dso_handle+4>: "%c\n"
而
0x08048546 <+70>: mov 0x1c(%esp),%eax 0x0804854a <+74>: lea 0x0(,%eax,4),%edx 0x08048551 <+81>: mov 0xc(%ebp),%eax 0x08048554 <+84>: add %edx,%eax
又在分析strlen时已经知道是指argv[cnt+1],所以,
0x08048556 <+86>: mov (%eax),%eax 0x08048558 <+88>: movzbl (%eax),%eax 0x0804855b <+91>: movsbl %al,%eax
就是取argv[cnt+1][0]的值。
那么,case 0的汇编可翻译成
printf( “%c\n”, argv[cnt][0] );
也就是说,main函数可以变成这样:
int main( int argc, char* argv[] ) { int cnt = 0; while ( cnt < argc ) { size_t len = strlen( argv[cnt] ); switch ( len ) { case 0: { printf( “%c\n”, argv[cnt][0] ); break; } case 1: { 0x08048570 <+112>: mov 0x1c(%esp),%eax 0x08048574 <+116>: add $0x1,%eax 0x08048577 <+119>: lea 0x0(,%eax,4),%edx 0x0804857e <+126>: mov 0xc(%ebp),%eax 0x08048581 <+129>: add %edx,%eax 0x08048583 <+131>: mov (%eax),%eax 0x08048585 <+133>: mov %eax,(%esp) 0x08048588 <+136>: call 0x80483f0 <puts@plt> break; } case 2: { 0x0804858f <+143>: mov 0x1c(%esp),%eax 0x08048593 <+147>: mov %eax,0x4(%esp) 0x08048597 <+151>: movl $0x8048678,(%esp) 0x0804859e <+158>: call 0x80483e0 <printf@plt> break; } default: { 0x080485a5 <+165>: mov 0x1c(%esp),%eax 0x080485a9 <+169>: mov %eax,0x4(%esp) 0x080485ad <+173>: movl $0x804867c,(%esp) 0x080485b4 <+180>: call 0x80483e0 <printf@plt> 0x080485b9 <+185>: nop break; } } cnt++; } return 0; }
按照case 0的情况可以分析出其它几个如下:
case 1: { puts( argv[cnt+1] ); break; } case 2: { printf( “%d\n”, cnt ); break; } default: { printf( “%s\n”, cnt ); break; }
即整个main函数如下:
int main( int argc, char* argv[] ) { int cnt = 0; while ( cnt < argc ) { size_t len = strlen( argv[cnt+1] ); switch ( len ) { case 0: { printf( “%c\n”, argv[cnt+1][0] ); break; } case 1: { puts( argv[cnt+2] ); break; } case 2: { printf( “%d\n”, cnt ); break; } default: { printf( “%s\n”, cnt ); break; } } cnt++; } return 0; }
由于崩溃指令地址0x080485b9是在一段中
default: { 0x080485a5 <+165>: mov 0x1c(%esp),%eax 0x080485a9 <+169>: mov %eax,0x4(%esp) 0x080485ad <+173>: movl $0x804867c,(%esp) 0x080485b4 <+180>: call 0x80483e0 <printf@plt> 0x080485b9 <+185>: nop break; }
可知,是由
default: { printf( “%s\n”, cnt ); break; }
导致崩溃的。
对比一下源代码,可见非常吻合。
#include <stdio.h> #include <string.h> int main(int argc, char* argv[] ) { for ( int i = 0; i < argc; i++ ) { int len = strlen( argv[i] ); switch ( len ) { case 0: printf( "%c\n", argv[i][0] ); break; case 1: printf( "%s\n", argv[i+1] ); break; case 2: printf( "%d\n", i ); break; default: printf( "%s\n", i ); break; } } return 0; }