Start of kthreadd and init processes

*** IngresGe boss Analysis of

1, kthreadd

/bsp/kernel/kernel4.14/kernel/kthread.c

int kthreadd(void *unused)
{
	struct task_struct *tsk = current;

	/* Setup a clean context for our children to inherit. */
	set_task_comm(tsk, "kthreadd");
	ignore_signals(tsk);
	
	//Allow kthreadd to run on any CPU
	set_cpus_allowed_ptr(tsk, cpu_all_mask);
	set_mems_allowed(node_states[N_MEMORY]);

	current->flags |= PF_NOFREEZE;
	cgroup_init_kthreadd();

	for (;;) {
		set_current_state(TASK_INTERRUPTIBLE);
		if (list_empty(&kthread_create_list))
			schedule();
		__set_current_state(TASK_RUNNING);

		spin_lock(&kthread_create_lock);
		while (!list_empty(&kthread_create_list)) {
			struct kthread_create_info *create;

			create = list_entry(kthread_create_list.next,
					    struct kthread_create_info, list);
			list_del_init(&create->list);
			spin_unlock(&kthread_create_lock);

			create_kthread(create);

			spin_lock(&kthread_create_lock);
		}
		spin_unlock(&kthread_create_lock);
	}

	return 0;
}

2, init

kernel_ After init is started, complete some init initialization operations, and then go to the system root directory to find ramdisk in turn_ execute_command and execute_ If the application set by command cannot be found in both directories, go to the root directory to find / sbin/init, / etc/init, / bin/init,/bin/sh to start. As long as one of these applications is started, the others will not be started.

Android system generally places an init executable file in the root directory, that is, the init process of Linux system directly executes the init file after the kernel initialization is completed.

static int __ref kernel_init(void *unused)
{
	int ret;
    //Perform some initialization operations of init process
	kernel_init_freeable();
	/* need to finish all async __init code before freeing the memory */
	// Wait for all asynchronous calls to complete. Before releasing memory, all asynchronous calls must be completed__ init code
	async_synchronize_full();
	ftrace_free_init_mem();
	// Release all init* Memory in segment
	free_initmem();
	mark_readonly();
	// Set the system status to running status
	system_state = SYSTEM_RUNNING;
	// Set the default memory access policy of NUMA system
	numa_default_policy();
    // Release all deferred struct file structures
	rcu_end_inkernel_boot();

	pr_emerg("run init\n");
	//ramdisk_ execute_ The value of command is "/ init"
	if (ramdisk_execute_command) {
		ret = run_init_process(ramdisk_execute_command);
		//Run the init program in the root directory
		if (!ret)
			return 0;
		pr_err("Failed to execute %s (error %d)\n",
		       ramdisk_execute_command, ret);
	}

	/*
	 * We try each of these until one succeeds.
	 *
	 * The Bourne shell can be used instead of init if we are
	 * trying to recover a really broken machine.
	 */
    //execute_ If the command value is defined, go to the root directory to find the corresponding application, and then start it
	if (execute_command) {
		ret = run_init_process(execute_command);
		if (!ret)
			return 0;
		panic("Requested init %s failed (error %d).",
		      execute_command, ret);
	}
	//If ramdisk_execute_command and execute_ If the application defined by command is not found, go to the root directory to find / sbin/init,/etc/init,/bin/init,/bin/sh to start
	if (!try_to_run_init_process("/sbin/init") ||
	    !try_to_run_init_process("/etc/init") ||
	    !try_to_run_init_process("/bin/init") ||
	    !try_to_run_init_process("/bin/sh"))
		return 0;

	panic("No working init found.  Try passing init= option to kernel. "
	      "See Linux Documentation/admin-guide/init.rst for guidance.");
}

Perform some initialization operations of init process

static noinline void __init kernel_init_freeable(void)
{
	/*
	 * Wait until kthreadd is all set-up.
	 */
	wait_for_completion(&kthreadd_done);

	/* Now the scheduler is fully set up and can do blocking allocations */
	gfp_allowed_mask = __GFP_BITS_MASK;

	/*
	 * init can allocate pages on any node
	 */
	set_mems_allowed(node_states[N_MEMORY]);

	cad_pid = task_pid(current);

	smp_prepare_cpus(setup_max_cpus);

	workqueue_init();

	init_mm_internals();

	do_pre_smp_initcalls();
	lockup_detector_init();

	smp_init();
	sched_init_smp();

	page_alloc_init_late();
	/* Initialize page ext after all struct pages are initialized. */
	page_ext_init();

	do_basic_setup();

	test_executor_init();

	/* Open the /dev/console on the rootfs, this should never fail */
	if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0)
		pr_err("Warning: unable to open an initial console.\n");

	(void) sys_dup(0);
	(void) sys_dup(0);
	/*
	 * check if there is an early userspace init.  If yes, let it do all
	 * the work
	 */

	if (!ramdisk_execute_command)
		ramdisk_execute_command = "/init";

	if (sys_access((const char __user *) ramdisk_execute_command, 0) != 0) {
		ramdisk_execute_command = NULL;
		prepare_namespace();
	}

	/*
	 * Ok, we have completed the initial bootup, and
	 * we're essentially up and running. Get rid of the
	 * initmem segments and start the user-mode stuff..
	 *
	 * rootfs is available now, try loading the public keys
	 * and default modules
	 */

	integrity_load_keys();
	load_default_modules();
}
/*
 * Ok, the machine is now initialized. None of the devices
 * have been touched yet, but the CPU subsystem is up and
 * running, and memory and process management works.
 *
 * Now we can finally start doing some real work..
 */
static void __init do_basic_setup(void)
{
    //For the SMP system, initialize the cpuset subsystem of the kernel control group.
	cpuset_init_smp();
    // Initialize shared memory
	shmem_init();
	// Initialize device driver   
	driver_init();
	//Create / proc/irq directory and initialize subdirectories corresponding to all interrupts in the system
	init_irq_proc();
	// Execute kernel constructor
	do_ctors();
	// Enable usermodehelper
	usermodehelper_enable();
	//Traverse initcall_ The levels array calls the initcall function inside. This is mainly used to initialize the device, driver and file system. All functions are encapsulated in the array for traversal, mainly for expansion
	do_initcalls();
}

The above is the related operations of init startup. Next, let's see what operations it will do after startup, starting with its main function

3, Init process entry

system/core/init/main.cpp

/*
 1. 1.The first parameter argc represents the number of parameters, and the second parameter is the parameter list, that is, the specific parameters
 2. 2.main The function has four parameter entries,
 *First, there is ueventd in the parameter. Enter ueventd_main
 *Second, there is subcontext in the parameter. Enter InitLogging and SubcontextMain
 *Third, SELinux is included in the parameters_ Setup, enter SetupSelinux
 *Fourth, there is second in the parameter_ Stage, enter SecondStageMain
 *3.main The sequence of execution is as follows:
 3.  (1)ueventd_main    init Process creates a child process ueventd,
 4.      ueventd is entrusted with the task of creating equipment node files. ueventd creates equipment node files in two ways
 5.  (2)FirstStageMain  Start the first phase
 6.  (3)SetupSelinux     Load SELinux rules and set SELinux logs to complete SELinux related work
 7.  (4)SecondStageMain  Start the second stage
 */
int main(int argc, char** argv) {
    //When the content of argv[0] is ueventd, the value of strcmp is 0,! strcmp is 1
    //1 means true, and ueventd is executed_ Main and ueventd are mainly responsible for the creation of equipment nodes, permission settings and other tasks
    if (!strcmp(basename(argv[0]), "ueventd")) {
        return ueventd_main(argc, argv);
    }
 
   //When the number of parameters passed in is greater than 1, perform the following operations
    if (argc > 1) {
        //The parameter is subcontext, which initializes the log system,
        if (!strcmp(argv[1], "subcontext")) {
            android::base::InitLogging(argv, &android::base::KernelLogger);
            const BuiltinFunctionMap function_map;
            return SubcontextMain(argc, argv, &function_map);
        }
 
      //The parameter is "selinux_setup", and start Selinux security policy
        if (!strcmp(argv[1], "selinux_setup")) {
            return SetupSelinux(argv);
        }
       //The parameter is "second_stage", which starts the second stage of the init process
        if (!strcmp(argv[1], "second_stage")) {
            return SecondStageMain(argc, argv);
        }
    }
 // Start the first stage of init process by default
    return FirstStageMain(argc, argv);
}
3.1 ueventd_main

system/core/init/ueventtd.cpp

int ueventd_main(int argc, char** argv) {
    //Set the default value for creating a new file. This is opposite to chmod. Here, the permission for creating a new file is 666
    umask(000); 
 
    //Initialize the kernel log at node / dev/kmsg. At this time, the logd and logcat processes are not up yet,
    //Using the kernel log system, open the device node / dev/kmsg, then you can obtain the kernel log through cat /dev/kmsg.
    android::base::InitLogging(argv, &android::base::KernelLogger);
 
    //Register selinux related callback functions for printing log s
    SelinuxSetupKernelLogging(); 
    SelabelInitialize();
 
    //Parse xml and obtain different hardware rc files according to different SOC manufacturers
    auto ueventd_configuration = ParseConfig({"/ueventd.rc", "/vendor/ueventd.rc",
                                              "/odm/ueventd.rc", "/ueventd." + hardware + ".rc"});
 
    //cold boot
    if (access(COLDBOOT_DONE, F_OK) != 0) {
        ColdBoot cold_boot(uevent_listener, uevent_handlers);
        cold_boot.Run();
    }
    for (auto& uevent_handler : uevent_handlers) {
        uevent_handler->ColdbootDone();
    }
 
    //Ignore child process termination signal
    signal(SIGCHLD, SIG_IGN);
    // Reap and pending children that exited between the last call to waitpid() and setting SIG_IGN
    // for SIGCHLD above.
       //On the last call to waitpid (), set sig for sigchld above_ Get and pending children exiting between igns
    while (waitpid(-1, nullptr, WNOHANG) > 0) {
    }
 
    //Listen to uevent from the driver and perform "hot plug" processing
    uevent_listener.Poll([&uevent_handlers](const Uevent& uevent) {
        for (auto& uevent_handler : uevent_handlers) {
            uevent_handler->HandleUevent(uevent); //Hot start, create device
        }
        return ListenerAction::kContinue;
    });
    return 0;
}

A child process started by init is used to create device node files in two ways

  1. "Cold Plug": Based on the predefined device information, the device node files are created uniformly after ueventd is started. This type of device node file is also called static node file.
  2. "Hot Plug": that is, when a device is inserted into a USB port during system operation, ueventd will receive this event and dynamically create a device node file for the inserted device. This type of device node file is also called dynamic node file.

The main operations are as follows:

  • set files permissions
  • Initialize the kernel log (/ dev/kmsg). You can get the kernel log through cat /dev/kmsg
  • Register selinux related callback functions for printing log s
  • Parse xml and obtain different hardware rc files according to different SOC manufacturers
  • cold boot
  • Monitor hot start for processing
3.2 FirstStageMain

The first stage mainly completed:

  1. Mount partition
  2. Create device nodes and key directories
  3. Initialize logging system
  4. Start selinux security policy
    system\core\init\first_stage_init.cpp
int FirstStageMain(int argc, char** argv) {
    //Restart the boot loader on init crash
    //This function is mainly used to set the behavior of various semaphores, such as sigabrt and sigbus, to SA_RESTART: once these signals are heard, restart the system
    if (REBOOT_BOOTLOADER_ON_PANIC) {
        InstallRebootSignalHandlers();
    }
    //Clear file permissions
    umask(0);
 
    CHECKCALL(clearenv());
    CHECKCALL(setenv("PATH", _PATH_DEFPATH, 1));
 
    //Get the basic file system in RAM memory, and the rest is used by rc files
    CHECKCALL(mount("tmpfs", "/dev", "tmpfs", MS_NOSUID, "mode=0755"));
    CHECKCALL(mkdir("/dev/pts", 0755));
    CHECKCALL(mkdir("/dev/socket", 0755));
    CHECKCALL(mount("devpts", "/dev/pts", "devpts", 0, NULL));
#define MAKE_STR(x) __STRING(x)
    CHECKCALL(mount("proc", "/proc", "proc", 0, "hidepid=2,gid=" MAKE_STR(AID_READPROC)));
#undef MAKE_STR
 
    // Non privileged applications cannot use Andrlid cmdline
    CHECKCALL(chmod("/proc/cmdline", 0440));
    gid_t groups[] = {AID_READPROC};
    CHECKCALL(setgroups(arraysize(groups), groups));
    CHECKCALL(mount("sysfs", "/sys", "sysfs", 0, NULL));
    CHECKCALL(mount("selinuxfs", "/sys/fs/selinux", "selinuxfs", 0, NULL));
 
    CHECKCALL(mknod("/dev/kmsg", S_IFCHR | 0600, makedev(1, 11)));
 
    if constexpr (WORLD_WRITABLE_KMSG) {
        CHECKCALL(mknod("/dev/kmsg_debug", S_IFCHR | 0622, makedev(1, 11)));
    }
 
    CHECKCALL(mknod("/dev/random", S_IFCHR | 0666, makedev(1, 8)));
    CHECKCALL(mknod("/dev/urandom", S_IFCHR | 0666, makedev(1, 9)));
 
 
    //This is required for the log wrapper, which is called before ueventd runs
    CHECKCALL(mknod("/dev/ptmx", S_IFCHR | 0666, makedev(5, 2)));
    CHECKCALL(mknod("/dev/null", S_IFCHR | 0666, makedev(1, 3)));
 
 
    //In the first stage, it is attached to the tmpfs, mnt/vendor, and mount/product partitions. Other partitions do not need to be loaded in the first stage,
    //It only needs to be loaded through rc file parsing in the second stage.
    CHECKCALL(mount("tmpfs", "/mnt", "tmpfs", MS_NOEXEC | MS_NOSUID | MS_NODEV,
                    "mode=0755,uid=0,gid=1000"));
    
    //Create a vendor directory that can be read and written
    CHECKCALL(mkdir("/mnt/vendor", 0755));
    // /mnt/product is used to mount product-specific partitions that can not be
    // part of the product partition, e.g. because they are mounted read-write.
    CHECKCALL(mkdir("/mnt/product", 0755));
 
    // Mount APEX, which is specially introduced in Android 10.0 to solve the problem of fragmentation. It is similar to a component method to enhance Treble,
    // Do not write Google special updates. You do not need to completely upgrade the entire system version. You only need to upgrade APEX components like upgrading APK
    CHECKCALL(mount("tmpfs", "/apex", "tmpfs", MS_NOEXEC | MS_NOSUID | MS_NODEV,
                    "mode=0755,uid=0,gid=0"));
 
    // /debug_ramdisk is used to preserve additional files from the debug ramdisk
    CHECKCALL(mount("tmpfs", "/debug_ramdisk", "tmpfs", MS_NOEXEC | MS_NOSUID | MS_NODEV,
                    "mode=0755,uid=0,gid=0"));
#undef CHECKCALL
 
    //Redirect standard input, standard output, and standard error to the empty device file "/ dev/null"
    SetStdioToDevNull(argv);
    //Mount tmpfs and kmsg in the / dev directory 
    //In this way, the / kernel Log system can be initialized for users to print logs
    InitKernelLogging(argv);
 
    ...
 
    /* Initialize some required partitions
     *The main function is to parse / proc / device tree / firmware / Android / fstab,
     * Then get the mounting information of "/ system", "vendor" and "/ ODM"
     */
    if (!DoFirstStageMount()) {
        LOG(FATAL) << "Failed to mount required partitions early ...";
    }
 
    struct stat new_root_info;
    if (stat("/", &new_root_info) != 0) {
        PLOG(ERROR) << "Could not stat(\"/\"), not freeing ramdisk";
        old_root_dir.reset();
    }
 
    if (old_root_dir && old_root_info.st_dev != new_root_info.st_dev) {
        FreeRamdisk(old_root_dir.get(), old_root_info.st_dev);
    }
 
    SetInitAvbVersionInRecovery();
 
    static constexpr uint32_t kNanosecondsPerMillisecond = 1e6;
    uint64_t start_ms = start_time.time_since_epoch().count() / kNanosecondsPerMillisecond;
    setenv("INIT_STARTED_AT", std::to_string(start_ms).c_str(), 1);
 
    //Start the init process and pass in the parameter selinux_steup
    // Execute the command: / system/bin/init selinux_setup
    const char* path = "/system/bin/init";
    const char* args[] = {path, "selinux_setup", nullptr};
    execv(path, const_cast<char**>(args));
    PLOG(FATAL) << "execv(\"" << path << "\") failed";
 
    return 1;
}
3.3 SetupSelinux

This stage is mainly completed: initialize SELinux, load SELinux rules, configure SELinux related log output, and start the second stage
system\core\init\selinux.cpp

/*This function initializes selinux and then executes init to run in init selinux*/
int SetupSelinux(char** argv) {
       //Initialize Kernel log
    InitKernelLogging(argv);
 
       // Restart the boot loader when the Debug version init crash
    if (REBOOT_BOOTLOADER_ON_PANIC) {
        InstallRebootSignalHandlers();
    }
 
    //Register a callback to set the selinux log to be written to kmsg
    SelinuxSetupKernelLogging();
   
     //Loading SELinux rules
    SelinuxInitialize();
 
    /*
       *In the kernel domain, we want to switch to the init domain. The file system where selabel is stored in xattrs (such as ext4) does not need explicit restorecon,
       *But other file systems need it. Especially for ramdisk, such as the recovery image of a/b device, this is a necessary step.
       *In fact, in the current kernel domain, after loading Seliux, you need to re execute init to switch to the user state of C space
       */
    if (selinux_android_restorecon("/system/bin/init", 0) == -1) {
        PLOG(FATAL) << "restorecon failed of /system/bin/init failed";
    }
 
  //Ready to start the innit process, pass in the parameter second_stage
    const char* path = "/system/bin/init";
    const char* args[] = {path, "second_stage", nullptr};
    execv(path, const_cast<char**>(args));
 
    /*
       *Execute / system/bin/init second_stage, enter the second stage
       */
    PLOG(FATAL) << "execv(\"" << path << "\") failed";
 
    return 1;
}
3.4 SecondStageMain
  1. Create a process session key and initialize the property system
  2. Perform SELinux phase 2 and restore some file security contexts
  3. Create a new epoll and initialize the subprocess termination signal processing function
  4. Start the server that matches the attribute
  5. Parse init rc and other files, establish the action and service of rc files, and start other processes

This stage is too complicated. I mainly learned about the parsing of rc files.
In the past, the startup process was started by passing parameters to exec. If each process was started in this way, it would be very cumbersome, so init was introduced RC this mechanism.

init.rc file parsing

init.rc mainly contains five types of statements: Action Command Service Option Import

action consists of a set of command commands, including a trigger, starting with on

command common commands:

    class_start <service_class_name>:  Start belongs to the same class All services provided by the;
    class_stop <service_class_name> : Stop the service of the specified class
    start <service_name>:  Start the specified service. If it has been started, skip it;
    stop <service_name>:  Stop running services
    setprop <name> <value>: Set attribute value
    mkdir <path>: Create the specified directory
    symlink <target> <sym_link>:  Create connection to<target>of<sym_link>Symbolic links;
    write <path> <string>:  To file path Write a string in the;
    exec:  fork And execute, it will block init Process until the program is completed;
    exprot <name> <name>: Setting environment variables;
    loglevel <level>: set up log level
    hostname <name> : Set host name
    import <filename> : Import an additional init configuration file

options:

Options yes Service Options for, and service Use together

    disabled: Not follow class Automatic start, only according to service Start only after name;
    oneshot: service No restart after exiting;
    user/group:  Set the user who performs the service/User groups, all by default root;
    class: Set the name of the class. When the class starts/When you exit, the service also starts/Stop, default to default;
    onrestart:Execute corresponding commands when the service is restarted;
    socket: Create a file named/dev/socket/<name>of socket
    critical: Within the specified time service If the system is restarted continuously, the system will restart and enter the recovery mode

default: signify disabled=false,oneshot=false,critical=false. 

Parse init rc
system/core/init/init.cpp

static void LoadBootScripts(ActionManager& action_manager, ServiceList& service_list) {
    Parser parser = CreateParser(action_manager, service_list);

    std::string bootscript = GetProperty("ro.boot.init_rc", "");
    if (bootscript.empty()) {
	std::string bootmode = GetProperty("ro.bootmode", "");
	if (bootmode == "charger") {
		parser.ParseConfig("/vendor/etc/init/charge.rc");
	} else {
        	parser.ParseConfig("/init.rc");
        	if (!parser.ParseConfig("/system/etc/init")) {
            		late_import_paths.emplace_back("/system/etc/init");
        	}
        	if (!parser.ParseConfig("/product/etc/init")) {
            		late_import_paths.emplace_back("/product/etc/init");
        	}
        	if (!parser.ParseConfig("/product_services/etc/init")) {
            		late_import_paths.emplace_back("/product_services/etc/init");
        	}
        	if (!parser.ParseConfig("/odm/etc/init")) {
            		late_import_paths.emplace_back("/odm/etc/init");
        	}
        	if (!parser.ParseConfig("/vendor/etc/init")) {
            		late_import_paths.emplace_back("/vendor/etc/init");
        	}
	}
    } else {
        parser.ParseConfig(bootscript);
    }
}

Create a parsing object, service on import

Parser CreateParser(ActionManager& action_manager, ServiceList& service_list) {
    Parser parser;

    parser.AddSectionParser("service", std::make_unique<ServiceParser>(&service_list, subcontexts));
    parser.AddSectionParser("on", std::make_unique<ActionParser>(&action_manager, subcontexts));
    parser.AddSectionParser("import", std::make_unique<ImportParser>(&parser));

    return parser;
}

init. In rc = = > Import / init$ {ro.zygote}. rc uses this value to determine which rc file to load
Under / system/core/rootdir, init. Exists zygoteXXX. RC, this example is init zygote32. rc

service zygote /system/bin/app_process -Xzygote /system/bin --zygote --start-system-server
    class main
    priority -20
    
     //Set user root
    user root  
    
    //The access group supports root readproc reserved_disk
    group root readproc reserved_disk
    
    //Create a socket named zygote. In the form of tcp, you can see a zygote socket in / dev/socket
    socket zygote stream 660 root system
    socket usap_pool_primary stream 660 root system
    
    // onrestart means to execute the following commands when the process restarts
    onrestart write /sys/android_power/request_state wake
    onrestart write /sys/power/state on
    onrestart restart audioserver
    onrestart restart cameraserver
    onrestart restart media
    onrestart restart netd
    onrestart restart wificond
    
    // When a child process is created, write the pid to / dev/cpuset/foreground/tasks
    writepid /dev/cpuset/foreground/tasks

service zygote /system/bin/app_process -Xzygote /system/bin --zygote --start-system-server

Define a named zygote of service,implement/system/bin/app_process Binary file,Pass in four parameters
-Xzygote ---->Will be used as a parameter required for virtual machine startup
/system/bin ---->Represents the directory where the virtual machine program is located
--zygote ---->Specify with ZygoteInit.java Class main Function as virtual machine execution entry
--start-system-server ---->start-up systemServer process

*The above is a learning record of init process startup and main processes. The boss's analysis idea is very clear. I like it! There are still many things you don't understand that need follow-up study.

Added by drewbie on Mon, 17 Jan 2022 08:20:46 +0200