限于作者能力水平,本文可能存在谬误,因此而给读者带来的损失,作者不做任何承诺。
本文基于 ARMv7 架构 + linux-4.14.132内核代码
进行分析。对涉及的 ATF(Arm Trusted Firmware) 以及 ARMv7 CPU HYP 模式
知识不做展开,读者可自行阅读相关资料进行了解。
ARMv7
架构下,SoC
的一般启动流程大概如下:
上电 --> SoC Boot ROM --> SPL --> U-BOOT --> Linux 内核
在进入 Linux 内核
之前,通常只启动了一个 BOOT CPU
(通常是 CPU 0
),而其它的 CPU 核处于待机状态。我们的分析,直接从 Linux 内核
入口开始,我们也不会讨论 Linux 内核
的解压过程。
从内核链接脚本 arch/arm/kernel/vmlinux.lds.S
的如下片段:
/* include/asm-generic/vmlinux.lds.h *//* Section used for early init (in .S files) */
#define HEAD_TEXT *(.)
/* arch/arm/kernel/vmlinux.lds.S */...
OUTPUT_ARCH(arm)
ENTRY(stext) /* 内核入口 */...SECTIONS
{.... = PAGE_OFFSET + TEXT_OFFSET;. : {_text = .;HEAD_TEXT}...
}
了解到内核的入口在 arch/arm/kernel/head.S
中:
/* include/linux/init.h *//* For assembly routines */
#define __HEAD .section ".","ax"
/* arch/arm/kernel/head.S *//** Kernel startup entry point.* ---------------------------** This is normally called from the decompressor code. The requirements* are: MMU = off, D-cache = off, I-cache = dont care, r0 = 0,* r1 = machine nr, r2 = atags or dtb pointer.** This code is mostly position independent, so if you link the kernel at* 0xc0008000, you call this at __pa(0xc0008000).** See linux/arch/arm/tools/mach-types for the complete list of machine* numbers for r1.** We're trying to keep crap to a minimum; DO NOT add any machine specific* crap here - that's what the boot loader (or in extreme, well justified* circumstances, zImage) is for.*/.arm__HEAD
ENTRY(stext)ARM_BE8(setend be ) @ ensure we are in #ifdef CONFIG_ARM_VIRT_EXTbl __hyp_stub_install
#endif@ ensure svc mode and all interrupts maskedsafe_svcmode_maskall r9mrc p15, 0, r9, c0, c0 @ get processor idbl __lookup_processor_type @ r5=procinfo r9=cpuidmovs r10, r5 @ invalid processor (r5=0)?...#ifdef CONFIG_ARM_LPAEmrc p15, 0, r3, c0, c1, 4 @ read ID_MMFR0and r3, r3, #0xf @ extract VMSA supportcmp r3, #5 @ long-descriptor translation table format?...
#endifldr r8, =PLAT_PHYS_OFFSET @ always constant in this case/** r1 = machine no, r2 = atags or dtb,* r8 = phys_offset, r9 = cpuid, r10 = procinfo*/bl __vet_atags
#ifdef CONFIG_SMP_ON_UPbl __fixup_smp
#endif
#ifdef CONFIG_ARM_PATCH_PHYS_VIRTbl __fixup_pv_table
#endifbl __create_page_tables /* 建立内核页表 */ldr r13, =__mmap_switched @ address to jump to after@ mmu has been enabledbadr lr, 1f @ return (PIC) address
#ifdef CONFIG_ARM_LPAEmov r5, #0 @ high TTBR0mov r8, r4, lsr #12 @ TTBR1 is swapper_pg_dir pfn
#elsemov r8, r4 @ set TTBR1 to swapper_pg_dir
#endifldr r12, [r10, #PROCINFO_INITFUNC]add r12, r12, r10ret r12
/** The following fragment of code is executed with the MMU on in MMU mode,* and uses absolute addresses; this is not position independent.** r0 = cp#15 control register* r1 = machine ID* r2 = atags/dtb pointer* r9 = processor ID*/__INIT
__mmap_switched:adr r3, __mmap_switched_dataldmia r3!, {r4, r5, r6, r7}cmp r4, r5 @ Copy data segment if needed
1: cmpne r5, r6ldrne fp, [r4], #4strne fp, [r5], #4bne 1bmov fp, #0 @ Clear BSS (and zero fp)
1: cmp r6, r7strcc fp, [r6],#4bcc 1bARM( ldmia r3, {r4, r5, r6, r7, sp})THUMB( ldmia r3, {r4, r5, r6, r7} )THUMB( ldr sp, [r3, #16] )str r9, [r4] @ Save processor IDstr r1, [r5] @ Save machine typestr r2, [r6] @ Save atags pointercmp r7, #0strne r0, [r7] @ Save control register valuesb start_kernel /* start_kernel() */
ENDPROC(__mmap_switched)
内核流程从汇编代码进入了 C 入口 start_kernel()
:
/* init/main.c */asmlinkage __visible void __init start_kernel(void)
{...pr_notice("%s", linux_banner);setup_arch(&command_line);...sched_init();.../* Do the rest non-__init'ed, we're now alive */rest_init();
}
在 setup_arch()
中 解析 CPU DTS 配置, 以及 PSCI(Power State Coordination Interface)
初始化。看 CPU 相关的 DTS 配置:
/ {cpus {#address-cells = <1>;#size-cells = <0>;cpu0: cpu@0 {compatible = "arm,cortex-a7";device_type = "cpu";reg = <0>;clocks = <&ccu CLK_CPUX>;clock-latency = <244144>; /* 8 32k periods */clock-frequency = <1200000000>;};cpu@1 {compatible = "arm,cortex-a7";device_type = "cpu";reg = <1>;clock-frequency = <1200000000>;};cpu@2 {compatible = "arm,cortex-a7";device_type = "cpu";reg = <2>;clock-frequency = <1200000000>;};cpu@3 {compatible = "arm,cortex-a7";device_type = "cpu";reg = <3>;clock-frequency = <1200000000>;};};...
};
/* arch/arm/kernel/psci_smp.c */const struct smp_operations psci_smp_ops __initconst = {.smp_boot_secondary = psci_boot_secondary,
#ifdef CONFIG_HOTPLUG_CPU.cpu_disable = psci_cpu_disable,.cpu_die = psci_cpu_die,.cpu_kill = psci_cpu_kill,
#endif
};
/* arch/arm/kernel/setup.c */void __init setup_arch(char **cmdline_p)
{.../* 解析 "cpus" DTS 配置 */arm_dt_init_cpu_maps();/* ARM PSCI(Power State Coordinate Interface) 初始化 */psci_dt_init();
#ifdef CONFIG_SMPif (is_smp()) {if (!mdesc->smp_init || !mdesc->smp_init()) {if (psci_smp_available()) /* 如果 PSCI(Power State Coordinate Interface) 可用 */smp_set_ops(&psci_smp_ops); /* 使用 PSCI 的 smp_operations */else if (mdesc->smp)...}smp_init_cpus();smp_build_mpidr_hash();}
#
}
ARM PSCI
用来管理 CPU 的 启动、关闭、休眠、重启等工作。我们先看 PSCI
配置相关的 DTS 配置:
/{cpus {...};.../* PSCI 配置 */psci {compatible = "arm,psci-1.0";method = "smc";};
};
有时候,PSCI
DTS 配置可能是由 U-BOOT
动态插入的,所以你无法在内核的 DTS 中找到它。继续看 PSCI
的初始化:
/* drivers/firmware/psci.c */static const struct of_device_id psci_of_match[] __initconst = {{ patible = "arm,psci", .data = psci_0_1_init},{ patible = "arm,psci-0.2", .data = psci_0_2_init},{ patible = "arm,psci-1.0", .data = psci_0_2_init},{},
};int __init psci_dt_init(void)
{struct device_node *np;np = of_find_matching_node_and_match(NULL, psci_of_match, &matched_np);/* 没有配置 "psci" 节点 */if (!np || !of_device_is_available(np))return -ENODEV;init_fn = (psci_initcall_t)matched_np->data;return init_fn(np); /* psci_0_1_init() */
}static int __init psci_0_1_init(struct device_node *np)
{int err;err = get_set_conduit_method(np); /* 设置 发起 PSCI 功能接口 请求方式 (SMC) */...pr_info("Using PSCI v0.1 Function IDs from DTn");if (!of_property_read_u32(np, "cpu_suspend", &id)) {psci_function_id[PSCI_FN_CPU_SUSPEND] = id;psci_ops.cpu_suspend = psci_cpu_suspend;}if (!of_property_read_u32(np, "cpu_off", &id)) {psci_function_id[PSCI_FN_CPU_OFF] = id;psci_ops.cpu_off = psci_cpu_off;}if (!of_property_read_u32(np, "cpu_on", &id)) {psci_function_id[PSCI_FN_CPU_ON] = id;psci_ops.cpu_on = psci_cpu_on;}if (!of_property_read_u32(np, "migrate", &id)) {psci_function_id[PSCI_FN_MIGRATE] = id;psci_ops.migrate = psci_migrate;}...return err;
}static int get_set_conduit_method(struct device_node *np)
{const char *method;pr_info("probing for conduit method from DT.n");if (of_property_read_string(np, "method", &method)) {pr_warn("missing "method" propertyn");return -ENXIO;}if (!strcmp("hvc", method)) {set_conduit(PSCI_CONDUIT_HVC);} else if (!strcmp("smc", method)) { /* 我们的 DTS 配置通过 SMC 指令发起 PSCI 功能请求 */set_conduit(PSCI_CONDUIT_SMC);} else {pr_warn("invalid "method" property: %sn", method);return -EINVAL;}return 0;
}static void set_conduit(enum psci_conduit conduit)
{switch (conduit) {...case PSCI_CONDUIT_SMC:invoke_psci_fn = __invoke_psci_fn_smc;break;...}duit = conduit;
}
接下来看每 CPU idle
进程的初始工作:
/* kernel/sched/core.c */void __init sched_init(void)
{.../** Make us the idle thread. Technically, schedule() should not be* called from this thread, however somewhere below it might be,* but because we are the idle thread, we just pick up running again* when this runqueue becomes "idle".*//* 初始化当前 CPU 的 idle 进程 */init_idle(current, smp_processor_id());...
}/* 初始化 @cpu 的 idle 进程 */
void init_idle(struct task_struct *idle, int cpu)
{struct rq *rq = cpu_rq(cpu); /* @cpu 的运行队列 */...__sched_fork(0, idle);idle->state = TASK_RUNNING;idle-&_start = sched_clock();idle->flags |= PF_IDLE;#ifdef CONFIG_SMPset_cpus_allowed_common(idle, cpumask_of(cpu)); /* 限定 idle 进程到 @cpu 上运行 */
#__set_task_cpu(idle, cpu);...rq->curr = rq->idle = idle; /* 设置 @cpu 运行队列当前进程为 idle */idle->on_rq = TASK_ON_RQ_QUEUED;
#ifdef CONFIG_SMPidle->on_cpu = 1;
#init_idle_preempt_count(idle, cpu); /* 开启 @cpu 的抢占 */idle->sched_class = &idle_sched_class;...
#ifdef CONFIG_SMP/* 设置 idle 进程名为 "swapper/%d" */sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
#endif
}
BOOT CPU
的启动过程接近尾声了,接下就是其它 非 BOOT CPU
的启动的前期准备工作:
/* init/main.c */static noinline void __ref rest_init(void)
{/* 做内核剩余初始化工作的内核线程:其它非 BOOT CPU 将从 kernel_init() 拉起 */pid = kernel_thread(kernel_init, NULL, CLONE_FS);.../* BOOT CPU 的 idle 进程 CPU 亲和性设置:限定到 BOOT CPU 上运行 */rcu_read_lock();tsk = find_task_by_pid_ns(pid, &init_pid_ns);set_cpus_allowed_ptr(tsk, cpumask_of(smp_processor_id()));rcu_read_unlock();/* 创建并唤醒【用于创建内核线程的内核线程 kthreadd 】 */pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES);...system_state = SYSTEM_SCHEDULING;complete(&kthreadd_done);schedule_preempt_disabled();cpu_startup_entry(CPUHP_ONLINE); /* BOOT CPU 进入其 idle 进程 */
}
/* kernel/sched/idle.c *//* 在 BOOT CPU 上启动其 idle 进程 */
void cpu_startup_entry(enum cpuhp_state state)
{...while (1)do_idle();
}
到此,BOOT CPU
从已经启动完毕,进入了其 idle
进程。
前面我们看到,从 BOOT CPU
启动了一个入口为 kernel_init()
的内核线程,它负责完成内核中剩余的初始化工作,其中就包括 非 BOOT CPU
的启动工作。我们来看 非 BOOT CPU
启动的细节。
kernel_init()kernel_init_freeable()smp_init() /* 启动其它 非 BOOT CPU */
/* kernel/smp.c *//* Called by boot processor to activate the rest. */
void __init smp_init(void)
{idle_threads_init(); /* 为系统中的所有 CPU 创建每 CPU 的 idle 线程数据(task_struct等) 并初始化 */ cpuhp_threads_init(); /* 为系统中的所有 CPU 创建每 CPU 的 热插拔管理 内核线程 */pr_info("Bringing up secondary CPUs ...n");/* 启动所有非 BOOT CPU ,逐个按顺序启动 */for_each_present_cpu(cpu) {if (num_online_cpus() >= setup_max_cpus)break;if (!cpu_online(cpu)) /* CPU 尚未启动 */cpu_up(cpu); /* 启动 CPU @cpu */}...
}
用 cpu_up()
启动一个 CPU:
/* kernel/cpu.c */int cpu_up(unsigned int cpu)
{return do_cpu_up(cpu, CPUHP_ONLINE);
}static int do_cpu_up(unsigned int cpu, enum cpuhp_state target)
{int err = 0;...err = _cpu_up(cpu, 0, target);...return err;
}static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
{int ret = 0;struct task_struct *idle;struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);...if (st->state == CPUHP_OFFLINE) { /* 如果 @cpu 处于离线关闭状态 */idle = idle_thread_get(cpu);...}cpuhp_set_state(st, target); /* 标记 @cpu 为目标状态 @target: st->target = CPUHP_ONLINE */target = min((int)target, CPUHP_BRINGUP_CPU);ret = cpuhp_up_callbacks(cpu, st, target);...return ret;
}/* 设置 CPU 目标状态,返回 CPU 的当前状态 */
static inline enum cpuhp_state
cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target)
{enum cpuhp_state prev_state = st->state;st->rollback = false;st->last = NULL;st->target = target; /* 设置 CPU 目标状态为 @target */st->single = false;/** 如果 CPU 的 【目标状态 @target > 当前状态 @st->state】,设为 true ,表示是 CPU 启动正向过程; * 如果 CPU 的 【目标状态 @target <= 当前状态 @st->state】,设为 false ,表示是 CPU 关闭反向过程。*/st->bringup = st->state < target;return prev_state; /* 返回 CPU 的当前状态 */
}static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,enum cpuhp_state target)
{enum cpuhp_state prev_state = st->state;int ret = 0;/* 逐个调用状态区间 [CPUHP_OFFLINE, CPUHP_BRINGUP_CPU] 所有热插拔状态的回调 */while (st->state < target) {st->state++;ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);...}return ret;
}static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,bool bringup, struct hlist_node *node,struct hlist_node **lastp)
{struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);struct cpuhp_step *step = cpuhp_get_step(state); /* 获取 @state 的回调接口 */if (!step->multi_instance) {.../* 我们只关心和分析相关的状态回调 bringup_cpu() */ret = cb(cpu); /* ..., bringup_cpu() */...return ret;}
}/* Boot processor state steps */
/* * 我们只关注状态 CPUHP_BRINGUP_CPU 的回调,其它的状态回调对我们* 的分析没有本质影响。*/
static struct cpuhp_step cpuhp_bp_states[] = {...
#ifdef /* Kicks the plugged cpu into life */[CPUHP_BRINGUP_CPU] = {.name = "cpu:bringup",.startup.single = bringup_cpu,.teardown.single = NULL,.cant_stop = true,},...
#
#endif
};static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
{struct cpuhp_step *sp;sp = cpuhp_is_ap_state(state) ? cpuhp_ap_states : cpuhp_bp_states;return sp + state;
}static bool cpuhp_is_ap_state(enum cpuhp_state state)
{/** The extra check for CPUHP_TEARDOWN_CPU is only for documentation* purposes as that state is handled explicitly in cpu_down.*/return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
}
看 CPU 热插拔状态 CPUHP_BRINGUP_CPU
回调 bringup_cpu()
:
/* kernel/cpu.c */
static int bringup_cpu(unsigned int cpu)
{struct task_struct *idle = idle_thread_get(cpu);int ret;.../* Arch-specific enabling code. */ret = __cpu_up(cpu, idle); /* 进入 CPU 启动架构相关的流程 */.../* * 非 BOOT CPU 启动最后,CPU 进入 状态时被唤醒: * secondary_start_kernel()* cpu_startup_entry(CPUHP_AP_ONLINE_IDLE)* cpuhp_online_idle(state)* st->state = CPUHP_AP_ONLINE_IDLE;* complete_ap_thread(st, true);* while (1)* do_idle();*/return bringup_wait_for_ap(cpu); /* 等待 CPU 启动完成(进入 CPUHP_AP_ONLINE_IDLE 态) */
}
CPU 启动 ARM 架构相关的流程:
/* arch/arm/kernel/smp.c */
int __cpu_up(unsigned int cpu, struct task_struct *idle)
{/* 配置 @cpu 的内核栈空间 */secondary_data.stack = task_stack_page(idle) + THREAD_START_SP;...#ifdef CONFIG_MMU/* 配置 @cpu 的页表 */secondary_data.pgdir = virt_to_phys(idmap_pgd);secondary_data.swapper_pg_dir = get_arch_pgd(swapper_pg_dir);
#endifsync_cache_w(&secondary_data); /* cache 同步 */ret = smp_ops.smp_boot_secondary(cpu, idle); /* psci_boot_secondary() */if (ret == 0) {/** CPU was successfully started, wait for it* to come online or time out.*//* 等待 CPU 成功启动:* secondary_start_kernel() -> complete(&cpu_running) */ wait_for_completion_timeout(&cpu_running,msecs_to_jiffies(1000));if (!cpu_online(cpu)) { /* CPU 应该已经处于在线状态 */pr_crit("CPU%u: failed to come onlinen", cpu);ret = -EIO;}} else {...}/* secondary_data 数据是所有非 BOOT CPU 共享的,每个 CPU 启动时都要重新设置 */memset(&secondary_data, 0, sizeof(secondary_data));return ret;
}
从这里开始,通过 PSCI
接口 psci_boot_secondary()
来启动 CPU :
/* arch/arm/kernel/psci_smp.c */static int psci_boot_secondary(unsigned int cpu, struct task_struct *idle)
{if (psci_ops.cpu_on) /* psci_cpu_on() */return psci_ops.cpu_on(cpu_logical_map(cpu),virt_to_idmap(&secondary_startup));return -ENODEV;
}
/* drivers/firmware/psci.c */static int psci_cpu_on(unsigned long cpuid, unsigned long entry_point)
{int err;u32 fn; /* PSCI 请求功能号 */fn = psci_function_id[PSCI_FN_CPU_ON]; /* 启用 CPU 的 PSCI 功能号 */// 中转调用请求给实现了 PSCI 接口的固件 ATF 。// ATF 处理完 @fn 请求后,会调用 secondary_startup ,// 然后返回此处继续执行。// 通过 SMC 指令发起 PSCI 功能请求。err = invoke_psci_fn(fn, cpuid, entry_point, 0); /* __invoke_psci_fn_smc() */return psci_to_linux_errno(err);
}
进入 ATF 固件中的 PSCI 启动 CPU 功能接口
后,该接口会跳转到 secondary_startup()
继续执行:
/* arch/arm/kernel/head.S */ENTRY(secondary_startup)
#ifdef CONFIG_ARM_VIRT_EXTbl __hyp_stub_install_secondary
#endifsafe_svcmode_maskall r9mrc p15, 0, r9, c0, c0 @ get processor idbl __lookup_processor_typemovs r10, r5 @ invalid processor?moveq r0, #'p' @ yes, error 'p'beq __error_p/** Use the page tables supplied from __cpu_up.*/adr r4, __secondary_dataldmia r4, {r5, r7, r12} @ address to jump to aftersub lr, r4, r5 @ mmu has been enabledadd r3, r7, lrldrd r4, [r3, #0] @ get secondary_data.pgdir
ARM_BE8(eor r4, r4, r5) @ Swap r5 and r4 in BE:
ARM_BE8(eor r5, r4, r5) @ it can be done in 3 steps
ARM_BE8(eor r4, r4, r5) @ without using a temp reg.ldr r8, [r3, #8] @ get secondary_data.swapper_pg_dir/* 将 CPU 初始化接口(如 __v7_ca7mp_setup)的调用返回地址设为 __enable_mmu */badr lr, __enable_mmu @ return address/* r13 = __secondary_switched 的链接虚拟地址, __enable_mmu 后跳转到此处执行 */mov r13, r12 @ __secondary_switched addressldr r12, [r10, #PROCINFO_INITFUNC]add r12, r12, r10 @ initialise processor@ (return control reg)/* * 这里流程有点绕: * __v7_ca7mp_setup // CPU 初始化接口* __enable_mmu // 从 __v7_ca7mp_setup 返回到 __enable_mmu* // __enable_mmu 执行完成跳到 __secondary_switched 处继续执行*/ret r12 /* 跳转到 secondary_start_kernel() */
ENDPROC(secondary_startup)/** r6 = &secondary_data*/
ENTRY(__secondary_switched)ldr sp, [r7, #12] @ get secondary_data.stackmov fp, #0b secondary_start_kernel /* 跳转到 secondary_start_kernel() */
ENDPROC(__secondary_switched).../** r6 = &secondary_data*/
ENTRY(__secondary_switched)ldr sp, [r7, #12] @ get secondary_data.stackmov fp, #0b secondary_start_kernel
ENDPROC(__secondary_switched).pe __secondary_data, %object
__secondary_data:.long ..long secondary_data.long __secondary_switched
/* arch/arm/kernel/smp.c */asmlinkage void secondary_start_kernel(void)
{struct mm_struct *mm = &init_mm;unsigned int cpu;secondary_biglittle_init();cpu_switch_mm(mm->pgd, mm);local_flush_bp_all();enter_lazy_tlb(mm, current);local_flush_tlb_all();cpu = smp_processor_id();mmgrab(mm);current->active_mm = mm;cpumask_set_cpu(cpu, mm_cpumask(mm));cpu_init();preempt_disable(); /* 禁用当前CPU抢占 *//* * CPU 热插拔上线前的所有准备工作: * 触发所有状态 CPUHP_AP_ONLINE 之前的 cpu 热插拔回调* (CPUHP_BRINGUP_CPU + 1 -> CPUHP_AP_ONLINE)*/notify_cpu_starting(cpu);...set_cpu_online(cpu, true);// __cpu_up() -> wait_for_completion_timeout(&cpu_running, ...)complete(&cpu_running);/** OK, it's off to the idle thread for us*/cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); /* 进入当前 CPU 的idle 进程 */
}
/* kernel/cpu.c */void notify_cpu_starting(unsigned int cpu)
{struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);int ret;st->booted_once = true;/* 进入 CPU CPUHP_AP_ONLINE 态的所有 @cpu 热插拔状态的工作:* 触发状态区间 [CPUHP_BRINGUP_CPU + 1, CPUHP_AP_ONLINE] 回* 调。*/while (st->state < target) {st->state++;ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL); /* 如 gic_starting_cpu(), ... *//** STARTING must not fail!*/WARN_ON_ONCE(ret);}
}
CPU 已启动完毕,然后最终进入 idle 状态:
void cpu_startup_entry(enum cpuhp_state state)
{...cpuhp_online_idle(state);while (1)do_idle();
}void cpuhp_online_idle(enum cpuhp_state state)
{...st->state = CPUHP_AP_ONLINE_IDLE;// bringup_cpu() -> bringup_wait_for_ap(cpu)complete_ap_thread(st, true);
}
我们可以看到,系统 BOOT 阶段,非 BOOT CPU 是逐个、按严格的先后顺序启动的:只有前一 CPU 进入 idle 循环后,后一个 CPU 的启动工作,才会开始。
如果在不支持或没有实现 PSCI
固件功能的 ARMv7
架构平台,各 CPU 的启动流程稍有不同,下面我们以全志 sun8i SoC
为例,来说明 CPU 的启动流程。
start_kernel()...setup_arch(&command_line)...arm_dt_init_cpu_maps()#ifdef CONFIG_SMPif (is_smp()) {if (!mdesc->smp_init || !mdesc->smp_init()) {if (psci_smp_available())...else if (mdesc->smp)smp_set_ops(mdesc->smp); /* sun8i_smp_ops */}}...#sched_init()...rest_init()pid = kernel_thread(kernel_init, NULL, CLONE_FS);...cpu_startup_entry(CPUHP_ONLINE);
kernel_init()kernel_init_freeable()smp_init()cpu_up(cpu)do_cpu_up(cpu, CPUHP_ONLINE)_cpu_up(cpu, 0, target)...bringup_cpu()__cpu_up(cpu, idle)sun8i_smp_boot_secondary()
static int sun8i_smp_boot_secondary(unsigned int cpu,struct task_struct *idle)
{u32 reg;if (!(prcm_membase && cpucfg_membase))return -EFAULT;spin_lock(&cpu_lock);/* Set CPU boot address *//* 设置非 BOOT CPU 的启动地址为 secondary_startup */writel(__pa_symbol(secondary_startup),cpucfg_membase + CPUCFG_PRIVATE0_REG);/* Assert the CPU core in reset */writel(0, cpucfg_membase + CPUCFG_CPU_RST_CTRL_REG(cpu));/* Assert the L1 cache in reset */reg = readl(cpucfg_membase + CPUCFG_GEN_CTRL_REG);writel(reg & ~BIT(cpu), cpucfg_membase + CPUCFG_GEN_CTRL_REG);/* Clear CPU power-off gating */reg = readl(prcm_membase + PRCM_CPU_PWROFF_REG);writel(reg & ~BIT(cpu), prcm_membase + PRCM_CPU_PWROFF_REG);mdelay(1);/* Deassert the CPU core reset */writel(3, cpucfg_membase + CPUCFG_CPU_RST_CTRL_REG(cpu));spin_unlock(&cpu_lock);return 0;
}
看看 sun8i_smp_boot_secondary()
的逻辑,就是把 非 BOOT CPU
的启动地址设置为 secondary_startup
,即 CPU 启动时从 secondary_startup
开始执行,后面的流程就和 PSCI 一样了:
secondary_startupsecondary_start_kernel()...notify_cpu_starting(cpu)...cpu_startup_entry(CPUHP_AP_ONLINE_IDLE)
start_kernel()rest_init()pid = kernel_thread(kernel_init, NULL, CLONE_FS)kernel_init()kernel_init_freeable()smp_init() /* 启动所有 CPU */...do_basic_setup()...do_initcalls()for (level = 0; level < ARRAY_SIZE(initcall_levels) - 1; level++)do_initcall_level(level)for (fn = initcall_levels[level]; fn < initcall_levels[level+1]; fn++)do_one_initcall(*fn)ret = fn() /* topology_init() */
/* include/linux/cpu.h */struct cpu { /* CPU 设备抽象*/int node_id; /* The node which contains the CPU *//* CPU 是否可以热插拔: BOOT CPU 不允许热插拔 */int hotpluggable; /* creates sysfs control file if hotpluggable */struct device dev;
};/* arch/arm/include/asm/cpu.h */struct cpuinfo_arm { /* ARM CPU 设备抽象*/struct cpu cpu;u32 cpuid;
#ifdef CONFIG_SMPunsigned int loops_per_jiffy;
#endif
}/* arch/arm/kernel/setup.c */DEFINE_PER_CPU(struct cpuinfo_arm, cpu_data); /* ARM 平台每 CPU 的信息数据 */static int __init topology_init(void)
{int cpu;for_each_possible_cpu(cpu) {struct cpuinfo_arm *cpuinfo = &per_cpu(cpu_data, cpu);cpuinfo->cpu.hotpluggable = platform_can_hotplug_cpu(cpu);register_cpu(&cpuinfo->cpu, cpu);}return 0;
}
/* drivers/base/cpu.c */static DEFINE_PER_CPU(struct device *, cpu_sys_devices);struct bus_type cpu_subsys = {.name = "cpu",.dev_name = "cpu",.match = cpu_subsys_match,
#ifdef CONFIG_HOTPLUG_CPU/* 用来处理 CPU 热插拔。热插拔细节在章节 4 展开 */.online = cpu_subsys_online,.offline = cpu_subsys_offline,
#endif
};int register_cpu(struct cpu *cpu, int num)
{int error;cpu->node_id = cpu_to_node(num);memset(&cpu->dev, 0x00, sizeof(struct device));cpu->dev.id = num; /* Linux CPU 编号 */cpu->dev.bus = &cpu_subsys;...error = device_register(&cpu->dev); /* 注册 CPU 设备到 driver core */...per_cpu(cpu_sys_devices, num) = &cpu->dev;register_cpu_under_node(num, cpu_to_node(num));...return 0;
}
每个 CPU 都有一个热插拔处理线程,前面的流程中,我们没有仔细分析它们,现在来看一下:
kernel_init()kernel_init_freeable()smp_init()...cpuhp_threads_init(); /* 为系统中的所有 CPU 创建每 CPU 的 热插拔管理 内核线程 */...for_each_present_cpu(cpu) {...if (!cpu_online(cpu)) /* CPU 尚未启动 */cpu_up(cpu); /* 启动 CPU @cpu */}
/* kernel/cpu.c *//* 每 CPU 的热插拔[状态、内核线程等]数据 */
static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state) = {.fail = CPUHP_INVALID,
};static struct smp_hotplug_thread cpuhp_threads = {/** store 指向每cpu的热插拔管理数据 cpuhp_state 的 thread: * smpboot_register_percpu_thread(&cpuhp_threads) 调用过程中,* 会设定到创建的热插拔线程对应的 task_struct */.store = &cpuhp_state.thread,.create = &cpuhp_create,.thread_should_run = cpuhp_should_run,.thread_fn = cpuhp_thread_fun,.thread_comm = "cpuhp/%u",.selfparking = true,
};void __init cpuhp_threads_init(void)
{BUG_ON(smpboot_register_percpu_thread(&cpuhp_threads)); /* 为系统中的每个 CPU 都创建一个热插拔处理内核线程 */kthread_unpark(this_cpu_read(cpuhp_state.thread)); /* 启动当前 CPU 的热插拔处理内核线程 */
}
/* kernel/smpboot.c */int smpboot_register_percpu_thread_cpumask(struct smp_hotplug_thread *plug_thread,const struct cpumask *cpumask)
{unsigned int cpu;int ret = 0;if (!alloc_cpumask_var(&plug_thread->cpumask, GFP_KERNEL))return -ENOMEM;cpumask_copy(plug_thread->cpumask, cpumask); /* 设置所有要创建热插拔管理线程的 CPU 掩码 */for_each_online_cpu(cpu) { /* 为当前在线的 CPU 创建内核线程:当前只有 BOOT CPU 在线 *//* 创建 @cpu 的热插拔管理内核线程:创建但不启动它 */ret = __smpboot_create_thread(plug_thread, cpu);...if (cpumask_test_cpu(cpu, cpumask)) /* 如果 @cpu 在 @cpumask 中 */smpboot_unpark_thread(plug_thread, cpu); /* 则启动 @cpu 的内核线程 */}list_add(&plug_thread->list, &hotplug_threads);...return ret;
}
因为当前只有 BOOT CPU
在线,所以只为 BOOT CPU
创建了1个热插拔内核线程。
在 非 BOOT CPU
启动过程中,在进入 CPUHP_BRINGUP_CPU
状态拉起 CPU 之前,会经过 CPUHP_CREATE_THREADS
状态 ,此时会触发回调 smpboot_create_threads()
,建立当前启动 CPU 的热插拔管理线程:
/* kernel/smpboot.c */int smpboot_create_threads(unsigned int cpu)
{struct smp_hotplug_thread *cur;int ret = 0;mutex_lock(&smpboot_threads_lock);list_for_each_entry(cur, &hotplug_threads, list) {/* 创建 @cpu 的各内核线程(包括 @cpu 的热插拔管理内核线程) */ret = __smpboot_create_thread(cur, cpu);if (ret)break;}mutex_unlock(&smpboot_threads_lock);return ret;
}
系统启动后,我们可以查看到各 CPU 热插拔管理内核线程:
root@qemu-ubuntu:~# ps -ef | grep cpuhp | grep -v grep
root 13 2 0 03:00 ? 00:00:00 [cpuhp/0]
root 14 2 0 03:00 ? 00:00:00 [cpuhp/1]
root 20 2 0 03:00 ? 00:00:00 [cpuhp/2]
root 26 2 0 03:00 ? 00:00:00 [cpuhp/3]
本小节给出 CPU 热插拔过程的概述,由于涉及的细节太多,限于篇幅,将不做深入展开。
以一条 shell 命令发起 CPU offline 过程:
# echo 0 > /sys/devices/system/cpu/cpuN/online
这将触发接口 cpu_subsys_offline()
:
device_offline()dev->bus->offline(dev) = cpu_subsys_offline(dev)cpu_down(dev->id)do_cpu_down(cpu, CPUHP_OFFLINE)cpu_down_maps_locked(cpu, target)_cpu_down(cpu, 0, target)
BOOT CPU 是不支持 offline 的,我们查看 BOOT CPU 的 sysfs 接口:
root@qemu-ubuntu:~# ls -l /sys/devices/system/cpu/cpu0
total 0
-rw-r--r-- 1 root root 4096 Mar 26 06:04 cpu_capacity
-r-------- 1 root root 4096 Mar 26 06:04 crash_notes
-r-------- 1 root root 4096 Mar 26 06:04 crash_notes_size
drwxr-xr-x 2 root root 0 Mar 26 06:04 hotplug
lrwxrwxrwx 1 root root 0 Mar 26 06:04 of_node -> ../../../../firmware/devicetree/base/cpus/cpu@0
drwxr-xr-x 2 root root 0 Mar 26 06:04 power
lrwxrwxrwx 1 root root 0 Mar 26 06:04 subsystem -> ../../../../bus/cpu
drwxr-xr-x 2 root root 0 Mar 26 06:04 topology
-rw-r--r-- 1 root root 4096 Mar 26 06:04 uevent
我们看到,cpu0
没有 online
属性导出,自然也就不支持 offline
和 online
操作。
以一条 shell 命令发起 CPU online 过程:
# echo 1 > /sys/devices/system/cpu/cpuN/online
这将触发接口 cpu_subsys_online()
:
device_online()dev->bus->online(dev) = cpu_subsys_online(dev)cpu_up(cpuid)do_cpu_up(cpu, CPUHP_ONLINE)
从上两小节的描述,我们看不出 CPU 热插拔(offline/online
)和其热插拔管理线程有什么关系,我们在这里以 offline
过程为例,展开其细节:
device_offline()...do_cpu_down(cpu, CPUHP_OFFLINE)..._cpu_down(cpu, 0, target)
static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,enum cpuhp_state target)
{...if (st->state > CPUHP_TEARDOWN_CPU) {st->target = max((int)target, CPUHP_TEARDOWN_CPU);/* 唤醒 CPU 热插拔管理线程,处理 CPU offline 过程前期部分 */ret = cpuhp_kick_ap_work(cpu); ...if (st->state > CPUHP_TEARDOWN_CPU)goto out;st->target = target;}/* 处理剩余的 CPU offline 过程: 调用状态区间 [..., CPUHP_TEARDOWN_CPU] 各回调 */ ret = cpuhp_down_callbacks(cpu, st, target);...out:...return ret;
}
随着 CPU 的热插拔管理线程被唤醒,将处理 CPU offline
过程前期部分:
/* kernel/smpboot.c */static int smpboot_thread_fn(void *data)
{struct smpboot_thread_data *td = data;struct smp_hotplug_thread *ht = td->ht;while (1) {...if (!ht->thread_should_run(td->cpu)) {...} else {__set_current_state(TASK_RUNNING);preempt_enable();ht->thread_fn(td->cpu); /* cpuhp_thread_fun() */}}
}
/* kernel/cpu.c */static void cpuhp_thread_fun(unsigned int cpu)
{...if (st->single) {...} else {if (bringup) {...} else {state = st->state;st->state--; /* 更新状态 */st->should_run = (st->state > st->target);}}.../* CPU offline 状态回调:每次调用 1 个状态回调,直到达到目标状态 @st->target 为止 */st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);...
}
CPU online
过程中,唤醒热插拔管理线程的过程类似,在此不再赘述,感兴趣的童鞋可自行阅读代码分析。
《DEN0013D_cortex_a_series_PG.pdf》
《learn_the_architecture_-_trustzone_for_aarch64_102418_0101_01_en.pdf》
/
本文发布于:2024-01-29 06:35:25,感谢您对本站的认可!
本文链接:https://www.4u4v.net/it/170648133013390.html
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。
留言与评论(共有 0 条评论) |