2021 Linux technology summary: kernel

By combing the startup process of U-boot, we finally entered the Linux kernel. Next, we analyze the startup process of Linux kernel and how to do a simple transplantation.

1, kernel startup process

Through arch / arm / kernel / vmlinux The LDS script link file is used to view stext at the Linux kernel entry. Then, we can find that stext is defined in arch / arm / kernel / head In s, the function code is as follows:

ENTRY(stext)
 ARM_BE8(setend	be )			@ ensure we are in BE8 mode

 THUMB(	adr	r9, BSYM(1f)	)	@ Kernel is always entered in ARM.
 THUMB(	bx	r9		)			@ If this is a Thumb-2 kernel,
 THUMB(	.thumb			)		@ switch to Thumb now.
 THUMB(1:			)

#ifdef CONFIG_ARM_VIRT_EXT
	bl	__hyp_stub_install
#endif
	@ ensure svc mode and all interrupts masked
	safe_svcmode_maskall r9

	mrc	p15, 0, r9, c0, c0			@ get processor id
	bl	__lookup_processor_type		@ r5=procinfo r9=cpuid
	movs	r10, r5					@ invalid processor (r5=0)?
 THUMB( it	eq )					@ force fixup-able long branch encoding
	beq	__error_p					@ yes, error 'p'

#ifdef CONFIG_ARM_LPAE
	mrc	p15, 0, r3, c0, c1, 4		@ read ID_MMFR0
	and	r3, r3, #0xf				@ extract VMSA support
	cmp	r3, #5						@ long-descriptor translation table format?
 THUMB( it	lo )					@ force fixup-able long branch encoding
	blo	__error_lpae				@ only classic page table format
#endif

#ifndef CONFIG_XIP_KERNEL
	adr	r3, 2f
	ldmia	r3, {r4, r8}
	sub	r4, r3, r4			@ (PHYS_OFFSET - PAGE_OFFSET)
	add	r8, r8, r4			@ PHYS_OFFSET
#else
	ldr	r8, =PLAT_PHYS_OFFSET		@ always constant in this case
#endif

	/*
	 * r1 = machine no, r2 = atags or dtb,
	 * r8 = phys_offset, r9 = cpuid, r10 = procinfo
	 */
	bl	__vet_atags
#ifdef CONFIG_SMP_ON_UP
	bl	__fixup_smp
#endif
#ifdef CONFIG_ARM_PATCH_PHYS_VIRT
	bl	__fixup_pv_table
#endif
	bl	__create_page_tables

	/*
	 * The following calls CPU specific code in a position independent
	 * manner.  See arch/arm/mm/proc-*.S for details.  r10 = base of
	 * xxx_proc_info structure selected by __lookup_processor_type
	 * above.  On return, the CPU will be ready for the MMU to be
	 * turned on, and r0 will hold the CPU control register value.
	 */
	ldr	r13, =__mmap_switched		@ address to jump to after
									@ mmu has been enabled
	adr	lr, BSYM(1f)				@ return (PIC) address
	mov	r8, r4						@ set TTBR1 to swapper_pg_dir
	ldr	r12, [r10, #PROCINFO_INITFUNC]
	add	r12, r12, r10
	ret	r12
1:	b	__enable_mmu
ENDPROC(stext)

Through comments, we can know that the prerequisites for entering the kernel are: MMU = off, D-cache = off, I-cache = don care, r0 = 0, R1 = machine NR, r2 = atags or DTB pointer MMU and D-cache are closed, I-cache is optional, r0 is 0, ri saves the correct machine ID, and r2 saves the device tree head address (atags doesn't know what).
Then the stext process is:

  • Call safe_svcmode_maskall, enter svc mode and close all interrupts;
  • Call__ lookup_processor_type determines whether the kernel is compatible with the CPU. If it is compatible, obtain procinfo information;
  • Call__ vet_atags validation equipment tree;
  • Call__ create_page_tables create a page table;
  • Call__ mmap_ The switched function saves the return address;
  • Finally, the call is made. enable_ The MMU function enables MMC.

By the way__ mmap_ The switched function, because it eventually calls start_kernel function. The code is in arch / arm / kernel / head common S, as follows:

__mmap_switched:
	adr	r3, __mmap_switched_data

	ldmia	r3!, {r4, r5, r6, r7}
	cmp	r4, r5				@ Copy data segment if needed
1:	cmpne	r5, r6
	ldrne	fp, [r4], #4
	strne	fp, [r5], #4
	bne	1b

	mov	fp, #0				@ Clear BSS (and zero fp)
1:	cmp	r6, r7
	strcc	fp, [r6],#4
	bcc	1b

 ARM(	ldmia	r3, {r4, r5, r6, r7, sp})
 THUMB(	ldmia	r3, {r4, r5, r6, r7}	)
 THUMB(	ldr	sp, [r3, #16]		)
	str	r9, [r4]			@ Save processor ID
	str	r1, [r5]			@ Save machine type
	str	r2, [r6]			@ Save atags pointer
	cmp	r7, #0
	strne	r0, [r7]			@ Save control register values
	b	start_kernel
ENDPROC(__mmap_switched)

On the penultimate line, start is called_ Kernel function, start_ The kernel function is defined in the file init / main c. It will call many functions to complete Linux startup. Attach the code first, and then explain it in detail:

asmlinkage __visible void __init start_kernel(void)
{
	char *command_line;
	char *after_dashes;

	/*
	 * Need to run as early as possible, to initialize the
	 * lockdep hash:
	 */
	lockdep_init();
	set_task_stack_end_magic(&init_task);
	smp_setup_processor_id();
	debug_objects_early_init();

	/*
	 * Set up the the initial canary ASAP:
	 */
	boot_init_stack_canary();

	cgroup_init_early();

	local_irq_disable();
	early_boot_irqs_disabled = true;

/*
 * Interrupts are still disabled. Do necessary setups, then
 * enable them
 */
	boot_cpu_init();
	page_address_init();
	pr_notice("%s", linux_banner);
	setup_arch(&command_line);
	mm_init_cpumask(&init_mm);
	setup_command_line(command_line);
	setup_nr_cpu_ids();
	setup_per_cpu_areas();
	smp_prepare_boot_cpu();	/* arch-specific boot-cpu hooks */

	build_all_zonelists(NULL, NULL);
	page_alloc_init();

	pr_notice("Kernel command line: %s\n", boot_command_line);
	parse_early_param();
	after_dashes = parse_args("Booting kernel",
				  static_command_line, __start___param,
				  __stop___param - __start___param,
				  -1, -1, &unknown_bootoption);
	if (!IS_ERR_OR_NULL(after_dashes))
		parse_args("Setting init args", after_dashes, NULL, 0, -1, -1,
			   set_init_arg);

	jump_label_init();

	/*
	 * These use large bootmem allocations and must precede
	 * kmem_cache_init()
	 */
	setup_log_buf(0);
	pidhash_init();
	vfs_caches_init_early();
	sort_main_extable();
	trap_init();
	mm_init();

	/*
	 * Set up the scheduler prior starting any interrupts (such as the
	 * timer interrupt). Full topology setup happens at smp_init()
	 * time - but meanwhile we still have a functioning scheduler.
	 */
	sched_init();
	/*
	 * Disable preemption - early bootup scheduling is extremely
	 * fragile until we cpu_idle() for the first time.
	 */
	preempt_disable();
	if (WARN(!irqs_disabled(),
		 "Interrupts were enabled *very* early, fixing it\n"))
		local_irq_disable();
	idr_init_cache();
	rcu_init();

	/* trace_printk() and trace points may be used after this */
	trace_init();

	context_tracking_init();
	radix_tree_init();
	/* init some links before init_ISA_irqs() */
	early_irq_init();
	init_IRQ();
	tick_init();
	rcu_init_nohz();
	init_timers();
	hrtimers_init();
	softirq_init();
	timekeeping_init();
	time_init();
	sched_clock_postinit();
	perf_event_init();
	profile_init();
	call_function_init();
	WARN(!irqs_disabled(), "Interrupts were enabled early\n");
	early_boot_irqs_disabled = false;
	local_irq_enable();

	kmem_cache_init_late();

	/*
	 * HACK ALERT! This is early. We're enabling the console before
	 * we've done PCI setups etc, and console_init() must be aware of
	 * this. But we do want output early, in case something goes wrong.
	 */
	console_init();
	if (panic_later)
		panic("Too many boot %s vars at `%s'", panic_later,
		      panic_param);

	lockdep_info();

	/*
	 * Need to run this when irqs are enabled, because it wants
	 * to self-test [hard/soft]-irqs on/off lock inversion bugs
	 * too:
	 */
	locking_selftest();

#ifdef CONFIG_BLK_DEV_INITRD
	if (initrd_start && !initrd_below_start_ok &&
	    page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) {
		pr_crit("initrd overwritten (0x%08lx < 0x%08lx) - disabling it.\n",
		    page_to_pfn(virt_to_page((void *)initrd_start)),
		    min_low_pfn);
		initrd_start = 0;
	}
#endif
	page_ext_init();
	debug_objects_mem_init();
	kmemleak_init();
	setup_per_cpu_pageset();
	numa_policy_init();
	if (late_time_init)
		late_time_init();
	sched_clock_init();
	calibrate_delay();
	pidmap_init();
	anon_vma_init();
	acpi_early_init();
#ifdef CONFIG_X86
	if (efi_enabled(EFI_RUNTIME_SERVICES))
		efi_enter_virtual_mode();
#endif
#ifdef CONFIG_X86_ESPFIX64
	/* Should be run before the first non-init thread is created */
	init_espfix_bsp();
#endif
	thread_info_cache_init();
	cred_init();
	fork_init();
	proc_caches_init();
	buffer_init();
	key_init();
	security_init();
	dbg_late_init();
	vfs_caches_init(totalram_pages);
	signals_init();
	/* rootfs populating might need page-writeback */
	page_writeback_init();
	proc_root_init();
	nsfs_init();
	cpuset_init();
	cgroup_init();
	taskstats_init_early();
	delayacct_init();

	check_bugs();

	acpi_subsystem_init();
	sfi_init_late();

	if (efi_enabled(EFI_RUNTIME_SERVICES)) {
		efi_late_init();
		efi_free_boot_services();
	}

	ftrace_init();

	/* Do the rest non-__init'ed, we're now alive */
	rest_init();
}

At start_kernel finally called rest_. Init function, rest_ The init function is also defined in the file init / main In C, this is not long:

static noinline void __init_refok rest_init(void)
{
	int pid;

	rcu_scheduler_starting();
	smpboot_thread_init();
	/*
	 * We need to spawn init first so that it obtains pid 1, however
	 * the init task will end up wanting to create kthreads, which, if
	 * we schedule it before we create kthreadd, will OOPS.
	 */
	kernel_thread(kernel_init, NULL, CLONE_FS);
	numa_default_policy();
	pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES);
	rcu_read_lock();
	kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns);
	rcu_read_unlock();
	complete(&kthreadd_done);

	/*
	 * The boot idle thread must execute schedule()
	 * at least once to get things moving:
	 */
	init_idle_bootup_task(current);
	schedule_preempt_disabled();
	/* Call into cpu_idle with preempt disabled */
	cpu_startup_entry(CPUHP_ONLINE);
}

Its process is:

  • Call rcu_scheduler_starting to start the RCU lock scheduler
  • Call kernel_thread create kernel_init process to transfer the kernel from kernel state to user state
  • Call kernel_thread creates a kthreadd kernel process with a PID of 2. Kthreadd process is responsible for the scheduling and management of all kernel processes.
  • Finally, call cpu_. startup_ The entry enters the idle process, and the idle thread is very similar to standby. It keeps the CPU busy and gives the CPU to other processes when there are tasks.
    kernel_ The init function is used to manage the init process. The code is as follows:
static int __ref kernel_init(void *unused)
{
	int ret;

	kernel_init_freeable();
	/* need to finish all async __init code before freeing the memory */
	async_synchronize_full();
	free_initmem();
	mark_rodata_ro();
	system_state = SYSTEM_RUNNING;
	numa_default_policy();

	flush_delayed_fput();

	if (ramdisk_execute_command) {
		ret = run_init_process(ramdisk_execute_command);
		if (!ret)
			return 0;
		pr_err("Failed to execute %s (error %d)\n",
		       ramdisk_execute_command, ret);
	}

	/*
	 * We try each of these until one succeeds.
	 *
	 * The Bourne shell can be used instead of init if we are
	 * trying to recover a really broken machine.
	 */
	if (execute_command) {
		ret = run_init_process(execute_command);
		if (!ret)
			return 0;
		panic("Requested init %s failed (error %d).",
		      execute_command, ret);
	}
	if (!try_to_run_init_process("/sbin/init") ||
	    !try_to_run_init_process("/etc/init") ||
	    !try_to_run_init_process("/bin/init") ||
	    !try_to_run_init_process("/bin/sh"))
		return 0;

	panic("No working init found.  Try passing init= option to kernel. "
	      "See Linux Documentation/init.txt for guidance.");
}
  • Call kernel_ init_ The freeable function is used to complete some other initialization of the init process, which will be described below;
  • Call async_synchronize_full function, wait for List async_running and async_ Returns after pending is cleared, which is used to speed up Linux startup;
  • Call free_initmem function to release the memory related to Linux Kernel to meet the high requirements of multimedia, interrupt and so on;
  • Call mark_rodata_ro function, marking kernel data read-only;
  • Call NUMA_ default_ The policy function restores the memory policy of the current process to the default state;
  • If ramdisk_execute_command,execute_ If command is true, call run_ init_ The process function initializes the user process, otherwise it initializes the user layer through other attempts.
    Finally, take a look at the kernel_init_freeable() function:
static noinline void __init kernel_init_freeable(void)
{
	/*
	 * Wait until kthreadd is all set-up.
	 */
	wait_for_completion(&kthreadd_done);

	/* Now the scheduler is fully set up and can do blocking allocations */
	gfp_allowed_mask = __GFP_BITS_MASK;

	/*
	 * init can allocate pages on any node
	 */
	set_mems_allowed(node_states[N_MEMORY]);
	/*
	 * init can run on any cpu.
	 */
	set_cpus_allowed_ptr(current, cpu_all_mask);

	cad_pid = task_pid(current);

	smp_prepare_cpus(setup_max_cpus);

	do_pre_smp_initcalls();
	lockup_detector_init();

	smp_init();
	sched_init_smp();

	do_basic_setup();

	/* Open the /dev/console on the rootfs, this should never fail */
	if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0)
		pr_err("Warning: unable to open an initial console.\n");

	(void) sys_dup(0);
	(void) sys_dup(0);
	/*
	 * check if there is an early userspace init.  If yes, let it do all
	 * the work
	 */

	if (!ramdisk_execute_command)
		ramdisk_execute_command = "/init";

	if (sys_access((const char __user *) ramdisk_execute_command, 0) != 0) {
		ramdisk_execute_command = NULL;
		prepare_namespace();
	}

	/*
	 * Ok, we have completed the initial bootup, and
	 * we're essentially up and running. Get rid of the
	 * initmem segments and start the user-mode stuff..
	 *
	 * rootfs is available now, try loading the public keys
	 * and default modules
	 */

	integrity_load_keys();
	load_default_modules();
}
  • Call wait_for_completion function, wait for kthreadd_done;
  • Call set_ mems_ The allowed function initializes the memory page that can be allocated at any node;
  • Call set_cpus_allowed_ptr function, setting cpu_bit_mask, which restricts the task to run only on a specific processor, so that the init process can run on any cpu;
  • Call smp_prepare_cpus function, which sets the maximum number of CPUs supported when compiling the core;
  • Call do_pre_smp_initcalls function, traverse some functions in Symbol, and call do_one_initcall(fn) is executed in sequence;
  • Call do_ basic_ The setup function completes the initialization of device driver under Linux and the initialization of driver model subsystem under Linux;
  • Call the function prepare_ The namespace function mounts the root file system.

2, kernel migration

kernel migration only needs to migrate defconfig configuration files and device tree files, which are under arch / arm / configurations and arch/arm/boot/dts respectively. After copying, add the device tree in the Makefile of arch/arm/boot/dts, and add the copied one to the following one.
Migration script, note: NXP migration imx6ullevk:

#!/bin/bash

board=Xport_Alientek
BOARD=XPORT_ALIENTEK

# Adding a compilation script
touch ${board}_building.sh
chmod 777 ${board}_building.sh
echo '#!/bin/bash' > ${board}_building.sh
echo '' >> ${board}_building.sh
echo "make ARCH=arm CROSS_COMPILE=arm-linux-gnueabihf- distclean" >> ${board}_building.sh
echo "make ARCH=arm CROSS_COMPILE=arm-linux-gnueabihf- imx_${board}_emmc_defconfig" >> ${board}_building.sh
echo "make ARCH=arm CROSS_COMPILE=arm-linux-gnueabihf- menuconfig" >> ${board}_building.sh
echo "make ARCH=arm CROSS_COMPILE=arm-linux-gnueabihf- all -j8" >> ${board}_building.sh

cd arch/arm/configs
cp imx_alientek_emmc_defconfig imx_${board}_emmc_defconfig

cd ../../../arch/arm/boot/dts
cp imx6ull-alientek-emmc.dts imx6ull-${board}-emmc.dts

getline()
{
	cat -n Makefile|grep "imx6ull-${board}-emmc.dtb "|awk '{print $1}'
}
if [ `getline` > 0 ]
then
	echo The development board : $board already exists
else
	declare -i nline
	getline()
	{
		cat -n Makefile|grep "CONFIG_SOC_IMX6ULL) += "|awk '{print $1}'
	}
	getlinenum()
	{
		awk "BEGIN{a=`getline`;b="1";c=(a+b);print c}";
	}
	nline=`getlinenum`-1
	sed -i "${nline}a\	imx6ull-${board}-emmc.dtb "'\\' Makefile
	echo The new linux-kernel : $board is added
fi

Keywords: Linux kernel

Added by iyia12co on Thu, 13 Jan 2022 10:07:53 +0200