Linode Xen 下 grsecurity >= 4.3 崩潰問題
自從 Linux 4.3 開始,在 Linode 上使用 PaX/grsecurity 時,內(nèi)核會在被 pv-grub 執(zhí)行后不久立即崩潰。由于崩潰是在啟動后極早期立刻發(fā)生的,沒有任何可以用來調(diào)試的日志,同時公司也不是蓋子開的,也沒有辦法得到母機上有意義的調(diào)試信息。這導致了蓋子的 VPS 內(nèi)核從去年 12 月開始被鎖定在 4.2.7。由于不知什么時候產(chǎn)生了 Linode 東京機房會在 2016 年 6 月從 Xen 遷移到 KVM 的錯覺,也沒有花精力去嘗試調(diào)試這個問題。
然而今年 Linode 周年慶時硬件全部翻倍,惟獨東京機房除外。而根據(jù)官方最新的說法,新機房樂觀估計要第四季度上線。解決內(nèi)核問題就不得不提上了蓋子的日程,首先是手工修復了不少 CVE 高危漏洞,隨后又祭出 diff 折騰半天,內(nèi)核始終會在啟動后立刻死亡。而由于 grsecurity 并不提供 git 源,所以 git bisect 也是不可能的,唯一可用的工具只有 Linux 4.2.7 / 補丁文件,與 Linux 4.3.3 / 補丁文件。
在閱讀代碼差異時,一個很大的挑戰(zhàn)是如何區(qū)分上游內(nèi)核的修改與下游 PaX/grsecurity 補丁的修改。直接比較補丁文件會導致代碼上下文丟失,讓代碼的意圖不可理解。最后蓋子打算編寫一個名為 metadiff 的工具,自動比較并去除在上游中出現(xiàn)的代碼段,以便僅僅對 PaX/grsecurity 的代碼進行比較,就連名字都想好了就叫 metadiff ,但一直沒有動手。
直到上個月和 Shawn 聊天時,提到了自己裝個 Xen 也不是不可行;于是周六終于動手在 VirutalBox 虛擬機里撞了個 Debian + Xen,又在 Xen 里啟動了一個虛擬機,果然很快就得到了內(nèi)核崩潰的 traceback。
rip: ffffffff8100b2b0 pmu_msr_read+0x10 flags: 00000282 i s nz rsp: ffffffff81aeff30 rax: 8000000000000000 rcx: 0000000000000001 rdx: ffffffff81aeffcc rbx: 00000000c0000080 rsi: ffffffff81aeffa0 rdi: 00000000c0000080 rbp: ffffffff81aeffa0 r8: 0000000000000001 r9: 00000000ffffffff r10: ffffffff81cf9000 r11: 0000000000000000 r12: ffffffff81aeffcc r13: ffffffff81aeffc4 r14: ffffffff81aeffc0 r15: 6f73b764afec1c9d cs: e033 ss: e02b ds: 0000 es: 0000 fs: 0000 @ 0000000000000000 gs: 0000 @ 0000000000000000/0000000000000000 Code (instr addr ffffffff8100b2b0) 00 00 00 00 00 41 54 49 89 d4 55 48 89 f5 53 89 fb 48 83 ec 10 <65> 48 8b 04 25 28 00 00 00 48 89 Stack: 0000000000000001 0000000000000000 0000000000000000 ffffffff8100b2b0 000000010000e030 0000000000010082 ffffffff81aeff70 000000000000e02b 0000000000000000 0000000000000000 00000000c0000080 ffffffff81aeffcc ffffffff81aeffc8 ffffffff810041c8 ffffffff81aeffc8 ffffffff81aeffcc Call Trace: [<ffffffff8100b2b0>] pmu_msr_read+0x10 <-- [<ffffffff8100b2b0>] pmu_msr_read+0x10 [<ffffffff810041c8>] xen_read_msr_safe+0x18 [<ffffffff81be93eb>] xen_start_kernel+0x1b9
哦?可見內(nèi)核在 xen_start_kernel 不久就崩潰了,這是 /* First C function to be called on Xen boot */,在如此早期就崩潰,什么錯誤日志到看不到也就不奇怪了。來看看 xen_read_msr 和 pmu_msr_read 在 4.2 和 4.3 之間有什么改變:
--- ../../4.2.7/linux-4.2.7/arch/x86/xen/enlighten.c 2016-09-11 00:44:12.010022936 +0800 +++ arch/x86/xen/enlighten.c 2015-12-15 13:41:43.000000000 +0800 @@ -1030,6 +1034,9 @@ static u64 xen_read_msr_safe(unsigned in { u64 val; + if (pmu_msr_read(msr, &val, err)) + return val; + val = native_read_msr_safe(msr, err); switch (msr) { case MSR_IA32_APICBASE: @@ -1074,9 +1081,11 @@ static int xen_write_msr_safe(unsigned i /* Fast syscall setup is all done in hypercalls, so these are all ignored. Stub them out here to stop Xen console noise. */ + break; default: - ret = native_write_msr_safe(msr, low, high); + if (!pmu_msr_write(msr, low, high, &ret)) + ret = native_write_msr_safe(msr, low, high); } return ret;
可見 pmu_msr_read 完全是個新東西,使用 git blame 繼續(xù)追查。
xen/PMU: Initialization code for Xen PMU 65d0cf0be79feebeb19e7626fd3ed41ae73f642d
xen/PMU: Describe vendor-specific PMU registers e27b72df01109c689062caeba1defa013b759e0e
xen/PMU: Intercept PMU-related MSR and APIC accesses 6b08cd6328c58a2ae190c5ee03a2ffcab5ef828e
xen/PMU: PMU emulation code bf6dfb154d935725c9a2005033ca33017b9df439
發(fā)現(xiàn) PMU 是 Xen 在 4.3 進入主線內(nèi)核的新特性,于是解決方法就很簡單了,把 bf6dfb 和 6b08cd 都撤銷就好,接下來的事情就讓 PaX Team 和 spender 去追查吧。最后的補丁是:
diff -uprN linux-4.7.3-hardened/arch/x86/xen/apic.c linux-4.7.3-hardened.good/arch/x86/xen/apic.c --- linux-4.7.3-hardened/arch/x86/xen/apic.c 2016-07-24 19:23:50.000000000 +0000 +++ linux-4.7.3-hardened.good/arch/x86/xen/apic.c 2016-09-10 20:05:21.450647009 +0000 @@ -7,7 +7,6 @@ #include <xen/xen.h> #include <xen/interface/physdev.h> #include "xen-ops.h" -#include "pmu.h" #include "smp.h" static unsigned int xen_io_apic_read(unsigned apic, unsigned reg) @@ -73,10 +72,8 @@ static u32 xen_apic_read(u32 reg) static void xen_apic_write(u32 reg, u32 val) { - if (reg == APIC_LVTPC) { - (void)pmu_apic_update(reg); + if (reg == APIC_LVTPC) return; - } /* Warn to see if there's any stray references */ WARN(1,"register: %x, value: %x\n", reg, val); diff -uprN linux-4.7.3-hardened/arch/x86/xen/enlighten.c linux-4.7.3-hardened.good/arch/x86/xen/enlighten.c --- linux-4.7.3-hardened/arch/x86/xen/enlighten.c 2016-09-10 19:59:29.237313676 +0000 +++ linux-4.7.3-hardened.good/arch/x86/xen/enlighten.c 2016-09-10 20:06:49.683980342 +0000 @@ -1031,9 +1031,6 @@ static u64 xen_read_msr_safe(unsigned in { u64 val; - if (pmu_msr_read(msr, &val, err)) - return val; - val = native_read_msr_safe(msr, err); switch (msr) { case MSR_IA32_APICBASE: @@ -1081,13 +1078,17 @@ static int xen_write_msr_safe(unsigned i break; default: - if (!pmu_msr_write(msr, low, high, &ret)) - ret = native_write_msr_safe(msr, low, high); + ret = native_write_msr_safe(msr, low, high); } return ret; } +unsigned long long xen_read_pmc(int counter) +{ + return 0; +} + static u64 xen_read_msr(unsigned int msr) { /* diff -uprN linux-4.7.3-hardened/arch/x86/xen/pmu.c linux-4.7.3-hardened.good/arch/x86/xen/pmu.c --- linux-4.7.3-hardened/arch/x86/xen/pmu.c 2016-07-24 19:23:50.000000000 +0000 +++ linux-4.7.3-hardened.good/arch/x86/xen/pmu.c 2016-09-10 20:05:21.450647009 +0000 @@ -13,20 +13,11 @@ /* x86_pmu.handle_irq definition */ #include "../events/perf_event.h" -#define XENPMU_IRQ_PROCESSING 1 -struct xenpmu { - /* Shared page between hypervisor and domain */ - struct xen_pmu_data *xenpmu_data; - uint8_t flags; -}; -static DEFINE_PER_CPU(struct xenpmu, xenpmu_shared); -#define get_xenpmu_data() (this_cpu_ptr(&xenpmu_shared)->xenpmu_data) -#define get_xenpmu_flags() (this_cpu_ptr(&xenpmu_shared)->flags) - -/* Macro for computing address of a PMU MSR bank */ -#define field_offset(ctxt, field) ((void *)((uintptr_t)ctxt + \ - (uintptr_t)ctxt->field)) +/* Shared page between hypervisor and domain */ +static DEFINE_PER_CPU(struct xen_pmu_data *, xenpmu_shared); +#define get_xenpmu_data() per_cpu(xenpmu_shared, smp_processor_id()) + /* AMD PMU */ #define F15H_NUM_COUNTERS 6 @@ -60,8 +51,6 @@ static __read_mostly int amd_num_counter /* Alias registers (0x4c1) for full-width writes to PMCs */ #define MSR_PMC_ALIAS_MASK (~(MSR_IA32_PERFCTR0 ^ MSR_IA32_PMC0)) -#define INTEL_PMC_TYPE_SHIFT 30 - static __read_mostly int intel_num_arch_counters, intel_num_fixed_counters; @@ -178,232 +167,6 @@ static int is_intel_pmu_msr(u32 msr_inde } } -static bool xen_intel_pmu_emulate(unsigned int msr, u64 *val, int type, - int index, bool is_read) -{ - uint64_t *reg = NULL; - struct xen_pmu_intel_ctxt *ctxt; - uint64_t *fix_counters; - struct xen_pmu_cntr_pair *arch_cntr_pair; - struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); - uint8_t xenpmu_flags = get_xenpmu_flags(); - - - if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) - return false; - - ctxt = &xenpmu_data->pmu.c.intel; - - switch (msr) { - case MSR_CORE_PERF_GLOBAL_OVF_CTRL: - reg = &ctxt->global_ovf_ctrl; - break; - case MSR_CORE_PERF_GLOBAL_STATUS: - reg = &ctxt->global_status; - break; - case MSR_CORE_PERF_GLOBAL_CTRL: - reg = &ctxt->global_ctrl; - break; - case MSR_CORE_PERF_FIXED_CTR_CTRL: - reg = &ctxt->fixed_ctrl; - break; - default: - switch (type) { - case MSR_TYPE_COUNTER: - fix_counters = field_offset(ctxt, fixed_counters); - reg = &fix_counters[index]; - break; - case MSR_TYPE_ARCH_COUNTER: - arch_cntr_pair = field_offset(ctxt, arch_counters); - reg = &arch_cntr_pair[index].counter; - break; - case MSR_TYPE_ARCH_CTRL: - arch_cntr_pair = field_offset(ctxt, arch_counters); - reg = &arch_cntr_pair[index].control; - break; - default: - return false; - } - } - - if (reg) { - if (is_read) - *val = *reg; - else { - *reg = *val; - - if (msr == MSR_CORE_PERF_GLOBAL_OVF_CTRL) - ctxt->global_status &= (~(*val)); - } - return true; - } - - return false; -} - -static bool xen_amd_pmu_emulate(unsigned int msr, u64 *val, bool is_read) -{ - uint64_t *reg = NULL; - int i, off = 0; - struct xen_pmu_amd_ctxt *ctxt; - uint64_t *counter_regs, *ctrl_regs; - struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); - uint8_t xenpmu_flags = get_xenpmu_flags(); - - if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) - return false; - - if (k7_counters_mirrored && - ((msr >= MSR_K7_EVNTSEL0) && (msr <= MSR_K7_PERFCTR3))) - msr = get_fam15h_addr(msr); - - ctxt = &xenpmu_data->pmu.c.amd; - for (i = 0; i < amd_num_counters; i++) { - if (msr == amd_ctrls_base + off) { - ctrl_regs = field_offset(ctxt, ctrls); - reg = &ctrl_regs[i]; - break; - } else if (msr == amd_counters_base + off) { - counter_regs = field_offset(ctxt, counters); - reg = &counter_regs[i]; - break; - } - off += amd_msr_step; - } - - if (reg) { - if (is_read) - *val = *reg; - else - *reg = *val; - - return true; - } - return false; -} - -bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err) -{ - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { - if (is_amd_pmu_msr(msr)) { - if (!xen_amd_pmu_emulate(msr, val, 1)) - *val = native_read_msr_safe(msr, err); - return true; - } - } else { - int type, index; - - if (is_intel_pmu_msr(msr, &type, &index)) { - if (!xen_intel_pmu_emulate(msr, val, type, index, 1)) - *val = native_read_msr_safe(msr, err); - return true; - } - } - - return false; -} - -bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err) -{ - uint64_t val = ((uint64_t)high << 32) | low; - - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { - if (is_amd_pmu_msr(msr)) { - if (!xen_amd_pmu_emulate(msr, &val, 0)) - *err = native_write_msr_safe(msr, low, high); - return true; - } - } else { - int type, index; - - if (is_intel_pmu_msr(msr, &type, &index)) { - if (!xen_intel_pmu_emulate(msr, &val, type, index, 0)) - *err = native_write_msr_safe(msr, low, high); - return true; - } - } - - return false; -} - -static unsigned long long xen_amd_read_pmc(int counter) -{ - struct xen_pmu_amd_ctxt *ctxt; - uint64_t *counter_regs; - struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); - uint8_t xenpmu_flags = get_xenpmu_flags(); - - if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) { - uint32_t msr; - int err; - - msr = amd_counters_base + (counter * amd_msr_step); - return native_read_msr_safe(msr, &err); - } - - ctxt = &xenpmu_data->pmu.c.amd; - counter_regs = field_offset(ctxt, counters); - return counter_regs[counter]; -} - -static unsigned long long xen_intel_read_pmc(int counter) -{ - struct xen_pmu_intel_ctxt *ctxt; - uint64_t *fixed_counters; - struct xen_pmu_cntr_pair *arch_cntr_pair; - struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); - uint8_t xenpmu_flags = get_xenpmu_flags(); - - if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) { - uint32_t msr; - int err; - - if (counter & (1 << INTEL_PMC_TYPE_SHIFT)) - msr = MSR_CORE_PERF_FIXED_CTR0 + (counter & 0xffff); - else - msr = MSR_IA32_PERFCTR0 + counter; - - return native_read_msr_safe(msr, &err); - } - - ctxt = &xenpmu_data->pmu.c.intel; - if (counter & (1 << INTEL_PMC_TYPE_SHIFT)) { - fixed_counters = field_offset(ctxt, fixed_counters); - return fixed_counters[counter & 0xffff]; - } - - arch_cntr_pair = field_offset(ctxt, arch_counters); - return arch_cntr_pair[counter].counter; -} - -unsigned long long xen_read_pmc(int counter) -{ - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) - return xen_amd_read_pmc(counter); - else - return xen_intel_read_pmc(counter); -} - -int pmu_apic_update(uint32_t val) -{ - int ret; - struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); - - if (!xenpmu_data) { - pr_warn_once("%s: pmudata not initialized\n", __func__); - return -EINVAL; - } - - xenpmu_data->pmu.l.lapic_lvtpc = val; - - if (get_xenpmu_flags() & XENPMU_IRQ_PROCESSING) - return 0; - - ret = HYPERVISOR_xenpmu_op(XENPMU_lvtpc_set, NULL); - - return ret; -} - /* perf callbacks */ static int xen_is_in_guest(void) { @@ -476,37 +239,26 @@ static void xen_convert_regs(const struc irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id) { - int err, ret = IRQ_NONE; + int ret = IRQ_NONE; struct pt_regs regs; const struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); - uint8_t xenpmu_flags = get_xenpmu_flags(); if (!xenpmu_data) { pr_warn_once("%s: pmudata not initialized\n", __func__); return ret; } - this_cpu_ptr(&xenpmu_shared)->flags = - xenpmu_flags | XENPMU_IRQ_PROCESSING; xen_convert_regs(&xenpmu_data->pmu.r.regs, ®s, xenpmu_data->pmu.pmu_flags); if (x86_pmu.handle_irq(®s)) ret = IRQ_HANDLED; - /* Write out cached context to HW */ - err = HYPERVISOR_xenpmu_op(XENPMU_flush, NULL); - this_cpu_ptr(&xenpmu_shared)->flags = xenpmu_flags; - if (err) { - pr_warn_once("%s: failed hypercall, err: %d\n", __func__, err); - return IRQ_NONE; - } - return ret; } bool is_xen_pmu(int cpu) { - return (get_xenpmu_data() != NULL); + return (per_cpu(xenpmu_shared, cpu) != NULL); } void xen_pmu_init(int cpu) @@ -536,8 +288,7 @@ void xen_pmu_init(int cpu) if (err) goto fail; - per_cpu(xenpmu_shared, cpu).xenpmu_data = xenpmu_data; - per_cpu(xenpmu_shared, cpu).flags = 0; + per_cpu(xenpmu_shared, cpu) = xenpmu_data; if (cpu == 0) { perf_register_guest_info_callbacks(&xen_guest_cbs); @@ -565,6 +316,6 @@ void xen_pmu_finish(int cpu) (void)HYPERVISOR_xenpmu_op(XENPMU_finish, &xp); - free_pages((unsigned long)per_cpu(xenpmu_shared, cpu).xenpmu_data, 0); - per_cpu(xenpmu_shared, cpu).xenpmu_data = NULL; + free_pages((unsigned long)per_cpu(xenpmu_shared, cpu), 0); + per_cpu(xenpmu_shared, cpu) = NULL; } diff -uprN linux-4.7.3-hardened/arch/x86/xen/pmu.h linux-4.7.3-hardened.good/arch/x86/xen/pmu.h --- linux-4.7.3-hardened/arch/x86/xen/pmu.h 2016-07-24 19:23:50.000000000 +0000 +++ linux-4.7.3-hardened.good/arch/x86/xen/pmu.h 2016-09-10 20:05:21.453980342 +0000 @@ -7,9 +7,5 @@ irqreturn_t xen_pmu_irq_handler(int irq, void xen_pmu_init(int cpu); void xen_pmu_finish(int cpu); bool is_xen_pmu(int cpu); -bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err); -bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err); -int pmu_apic_update(uint32_t reg); -unsigned long long xen_read_pmc(int counter); #endif /* __XEN_PMU_H */
打好補丁再編譯內(nèi)核,被智子鎖定版本的內(nèi)核果然升級成功了。
$ uname -r 4.7.3-hardened
更新:官方已在 grsecurity-3.1-4.7.4-201609152234.patch 中修復問題,不再需要此 workaround。
相關文章
在Xen虛擬機全虛擬化環(huán)境中安裝Windows2003系統(tǒng)
本文主要介紹了Xen虛擬機全虛擬化環(huán)境中安裝Windows2003系統(tǒng),Xen虛擬機支持兩種客戶機的安裝方式。一種是半虛擬化,一種是全虛擬化。Win2003系統(tǒng)要求在全虛擬化的環(huán)境下安裝和運行。2016-10-10Linode Xen 下 grsecurity >= 4.3 崩潰問題
本文給大家分享的是在Linode Xen 下 grsecurity >= 4.3 崩潰問題的個人解決辦法,雖然官方已修復此問題,但還是分享給大家,就當給大家學習個思路吧2017-01-01Xen虛擬機在CentOS系統(tǒng)中的安裝和使用方法
相信大家都知道XEN是目前Linux上的最佳的虛擬化解決方案,特別適合于服務器應用,所以這篇文章給大家分享了在CentOS系統(tǒng)中Xen虛擬機的安裝和使用方法,文中通過圖文介紹的很詳細,相信對大家學習使用Xen虛擬機具有一定參考借鑒價值,有需要的朋友們下面來一起看看吧。2016-10-10