本文介绍QEMU创建虚拟机后,如何为虚拟机创建vcpu.
QEMU
在qemu/target/i386/cpu.c中,x86_cpu_common_class_init()函数中,1
2
3
4...
device_class_set_parent_realize(dc, x86_cpu_realizefn, &xcc->parent_realize);
device_class_set_parent_unrealize(dc, x86_cpu_unrealizefn, &xcc->parent_unrealize);
...
其中x86_cpu_realizefn()函数主要做了以下几件事:
- x86_cpu_expand_features(cpu, &local_err);
- x86_cpu_filter_features(cpu);
- cpu->check_cpuid || cpu->enforce_cpuid的检查
- guest physical bits set
- Cache information initialization
- cpu_exec_realizefn(cs, &local_err);
- mce_init(cpu);
- qemu_init_vcpu(cs);
- x86_cpu_apic_realize(cpu, &local_err);
- cpu_reset(cs);
- xcc->parent_realize(dev, &local_err);
其中第8项qemu_init_vcpu()函数如下:1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32void qemu_init_vcpu(CPUState *cpu)
{
cpu->nr_cores = smp_cores;
cpu->nr_threads = smp_threads;
cpu->stopped = true;
if (!cpu->as) {
/* If the target cpu hasn't set up any address spaces itself,
* give it the default one.
*/
cpu->num_ases = 1;
cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory);
}
if (kvm_enabled()) {
qemu_kvm_start_vcpu(cpu);
} else if (hax_enabled()) {
qemu_hax_start_vcpu(cpu);
} else if (hvf_enabled()) {
qemu_hvf_start_vcpu(cpu);
} else if (tcg_enabled()) {
qemu_tcg_init_vcpu(cpu);
} else if (whpx_enabled()) {
qemu_whpx_start_vcpu(cpu);
} else {
qemu_dummy_start_vcpu(cpu);
}
while (!cpu->created) {
qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
}
}
- 其根据
qemu-system-x86_64的启动参数-smp指定的cores, threads初始化cpu相关的结构。 - 然后初始化其address space;
- 然后根据不同的平台支持调用start_vcpu()函数,在kvm平台中会调用
qemu_kvm_start_vcpu(cpu)函数;
下面我们来看qemu_kvm_start_vcpu()函数的实现:1
2
3
4
5
6
7
8
9
10
11
12static void qemu_kvm_start_vcpu(CPUState *cpu)
{
char thread_name[VCPU_THREAD_NAME_SIZE];
cpu->thread = g_malloc0(sizeof(QemuThread));
cpu->halt_cond = g_malloc0(sizeof(QemuCond));
qemu_cond_init(cpu->halt_cond);
snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
cpu->cpu_index);
qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
cpu, QEMU_THREAD_JOINABLE);
}
可见其调用了qemu_thread_create()创建了qemu thread,这些qemu threads会运行qemu_kvm_cpu_thread_fn()函数。qemu_thread_create()函数使用的是pthread,感兴趣的可以去util/qemu-thread-posix.c文件看其实现;或者HANDLE hThread(util/qemu-thread-win32.c)。1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42static void *qemu_kvm_cpu_thread_fn(void *arg)
{
CPUState *cpu = arg;
int r;
rcu_register_thread();
qemu_mutex_lock_iothread();
qemu_thread_get_self(cpu->thread);
cpu->thread_id = qemu_get_thread_id();
cpu->can_do_io = 1;
current_cpu = cpu;
r = kvm_init_vcpu(cpu);
if (r < 0) {
error_report("kvm_init_vcpu failed: %s", strerror(-r));
exit(1);
}
kvm_init_cpu_signals(cpu);
/* signal CPU creation */
cpu->created = true;
qemu_cond_signal(&qemu_cpu_cond);
do {
if (cpu_can_run(cpu)) {
r = kvm_cpu_exec(cpu);
if (r == EXCP_DEBUG) {
cpu_handle_guest_debug(cpu);
}
}
qemu_wait_io_event(cpu);
} while (!cpu->unplug || cpu_can_run(cpu));
qemu_kvm_destroy_vcpu(cpu);
cpu->created = false;
qemu_cond_signal(&qemu_cpu_cond);
qemu_mutex_unlock_iothread();
rcu_unregister_thread();
return NULL;
}
qemu_kvm_cpu_thread_fn()线程函数首先会调用kvm_init_vcpu()函数初始化/创建vcpu,然后发送信号(具体作用还不清除,to be figured out),然后在while循环中不停的调用kvm_vcpu_exce(cpu),
如果返回结果是EXCP_DEBUG,那么调用cpu_handle_guest_debug()进行userspace debug。
当从while循环中退出后,需要调用qemu_kvm_destroy_vcpu()函数来destroy之前创建的vcpu。同样也要发送信号,最后调用rcu_unregister_thread()函数unregister该线程。
下面,我们来分析kvm_init_vcpu()函数是如何初始化/创建vcpu的:1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42int kvm_init_vcpu(CPUState *cpu)
{
KVMState *s = kvm_state;
long mmap_size;
int ret;
DPRINTF("kvm_init_vcpu\n");
ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu));
if (ret < 0) {
DPRINTF("kvm_create_vcpu failed\n");
goto err;
}
cpu->kvm_fd = ret;
cpu->kvm_state = s;
cpu->vcpu_dirty = true;
mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
if (mmap_size < 0) {
ret = mmap_size;
DPRINTF("KVM_GET_VCPU_MMAP_SIZE failed\n");
goto err;
}
cpu->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
cpu->kvm_fd, 0);
if (cpu->kvm_run == MAP_FAILED) {
ret = -errno;
DPRINTF("mmap'ing vcpu state failed\n");
goto err;
}
if (s->coalesced_mmio && !s->coalesced_mmio_ring) {
s->coalesced_mmio_ring =
(void *)cpu->kvm_run + s->coalesced_mmio * PAGE_SIZE;
}
ret = kvm_arch_init_vcpu(cpu);
err:
return ret;
}
kvm_init_vcpu()函数
最后调用kvm_arch_init_vcpu()函数初始化vcpu,其具体为:
- 设置env->tsc_khz,使用kvm_arch_set_tsc_khz()函数或者kvm_vcpu_ioctl(cs, KVM_GET_TSC_KHZ);
- 设置pv CPUIDs,(if(hyperv_enabled(cpu)));
- if(cpu->expose_kvm),则设置guest中KVM related CPUID的值,0x4000 0000,
- 填充cpuid_data结构,最后调用KVM_SET_CPUID2向guest设置cpuid。