一、前言

在init进程启动之前,Android系统的启动主要执行了如下三个部分操作。

1.启动电源,加载引导程序

接通电源并启动时,引导芯片代码从预定义的地方(固化在ROM)开始执行,加载引导程序 BootLoader 到RAM中。

2.执行引导程序BootLoader

Android系统运行前会先运行 BootLoader,它的作用是拉起并运行系统OS。

3.启动Linux内核

在内核的启动过程中,会设置缓存、加载驱动等。当内核完成系统设置后,它会在系统文件中寻找 init.rc 文件,解析并启动 init 进程。

下面讲解init进程启动过程。

二、init进程启动

文中所涉及的源码版本为 Android 14

1.init进程的入口函数

/system/core/init/main.cpp

int main(int argc, char** argv) {#if __has_feature(address_sanitizer)__asan_set_error_report_callback(AsanReportCallback);#elif __has_feature(hwaddress_sanitizer)__hwasan_set_error_report_callback(AsanReportCallback);#endif// Boost prio which will be restored latersetpriority(PRIO_PROCESS, 0, -20);if (!strcmp(basename(argv[0]), "ueventd")) {return ueventd_main(argc, argv);}if (argc > 1) {if (!strcmp(argv[1], "subcontext")) {android::base::InitLogging(argv, &android::base::KernelLogger);const BuiltinFunctionMap& function_map = GetBuiltinFunctionMap();return SubcontextMain(argc, argv, &function_map);}if (!strcmp(argv[1], "selinux_setup")) {// SELinux 初始化return SetupSelinux(argv);}if (!strcmp(argv[1], "second_stage")) {// 第二阶段return SecondStageMain(argc, argv);}}// 第一阶段return FirstStageMain(argc, argv);}

init进程的创建是以该main函数作为入口函数开始的,在main函数中,根据参数的不同,初始化的内容有所不同。在kernel启动进来的时候是不带其他 argv 参数的,就直接走到了FirstStageMain。

2.init启动第一阶段

/system/core/init/first_stage_init.cpp

int FirstStageMain(int argc, char** argv) {if (REBOOT_BOOTLOADER_ON_PANIC) {InstallRebootSignalHandlers();}boot_clock::time_point start_time = boot_clock::now();std::vector<std::pair> errors;#define CHECKCALL(x) \if ((x) != 0) errors.emplace_back(#x " failed", errno);// Clear the umask.// 清空文件权限,设置 0777umask(0);CHECKCALL(clearenv());CHECKCALL(setenv("PATH", _PATH_DEFPATH, 1));// Get the basic filesystem setup we need put together in the initramdisk// on / and then we'll let the rc file figure out the rest.// 挂载 dev 目录CHECKCALL(mount("tmpfs", "/dev", "tmpfs", MS_NOSUID, "mode=0755"));// 远程登录后创建的控制台设备文件所在的目录CHECKCALL(mkdir("/dev/pts", 0755));// socket节点所在目录CHECKCALL(mkdir("/dev/socket", 0755));CHECKCALL(mkdir("/dev/dm-user", 0755));CHECKCALL(mount("devpts", "/dev/pts", "devpts", 0, NULL));#define MAKE_STR(x) __STRING(x)// 挂载 proc 目录CHECKCALL(mount("proc", "/proc", "proc", 0, "hidepid=2,gid=" MAKE_STR(AID_READPROC)));#undef MAKE_STR// Don't expose the raw commandline to unprivileged processes.CHECKCALL(chmod("/proc/cmdline", 0440));std::string cmdline;android::base::ReadFileToString("/proc/cmdline", &cmdline);// Don't expose the raw bootconfig to unprivileged processes.chmod("/proc/bootconfig", 0440);std::string bootconfig;android::base::ReadFileToString("/proc/bootconfig", &bootconfig);gid_t groups[] = {AID_READPROC};CHECKCALL(setgroups(arraysize(groups), groups));// 创建和挂载启动所需要的文件目录// 挂载 sys 目录,用来访问内核信息CHECKCALL(mount("sysfs", "/sys", "sysfs", 0, NULL));// 挂载 /sys/fs/selinux 目录,selinux相关节点所在目录CHECKCALL(mount("selinuxfs", "/sys/fs/selinux", "selinuxfs", 0, NULL));// 创建 kmsg 节点,用于保存 kenel logCHECKCALL(mknod("/dev/kmsg", S_IFCHR | 0600, makedev(1, 11)));if constexpr (WORLD_WRITABLE_KMSG) {CHECKCALL(mknod("/dev/kmsg_debug", S_IFCHR | 0622, makedev(1, 11)));}CHECKCALL(mknod("/dev/random", S_IFCHR | 0666, makedev(1, 8)));CHECKCALL(mknod("/dev/urandom", S_IFCHR | 0666, makedev(1, 9)));// This is needed for log wrapper, which gets called before ueventd runs.CHECKCALL(mknod("/dev/ptmx", S_IFCHR | 0666, makedev(5, 2)));CHECKCALL(mknod("/dev/null", S_IFCHR | 0666, makedev(1, 3)));// 挂载 mnt 目录,用于挂载光驱和 usb 设备CHECKCALL(mount("tmpfs", "/mnt", "tmpfs", MS_NOEXEC | MS_NOSUID | MS_NODEV,"mode=0755,uid=0,gid=1000"));CHECKCALL(mkdir("/mnt/vendor", 0755));CHECKCALL(mkdir("/mnt/product", 0755));CHECKCALL(mount("tmpfs", "/debug_ramdisk", "tmpfs", MS_NOEXEC | MS_NOSUID | MS_NODEV,"mode=0755,uid=0,gid=0"));// stage initCHECKCALL(mount("tmpfs", kSecondStageRes, "tmpfs", MS_NOEXEC | MS_NOSUID | MS_NODEV,"mode=0755,uid=0,gid=0"))#undef CHECKCALLSetStdioToDevNull(argv);// 初始化 kernel 的log,输出定向到 /dev/kmsg,这样就可以从外界获取 kernel 的日志InitKernelLogging(argv);if (!errors.empty()) {for (const auto& [error_string, error_errno] : errors) {LOG(ERROR) << error_string << " " << strerror(error_errno);}LOG(FATAL) << "Init encountered errors starting first stage, aborting";}LOG(INFO) << "init first stage started!";auto old_root_dir = std::unique_ptr{opendir("/"), closedir};if (!old_root_dir) {PLOG(ERROR) << "Could not opendir(\"/\"), not freeing ramdisk";}struct stat old_root_info;if (stat("/", &old_root_info) != 0) {PLOG(ERROR) << "Could not stat(\"/\"), not freeing ramdisk";old_root_dir.reset();}auto want_console = ALLOW_FIRST_STAGE_CONSOLE ? FirstStageConsole(cmdline, bootconfig) : 0;auto want_parallel =bootconfig.find("androidboot.load_modules_parallel = \"true\"") != std::string::npos;boot_clock::time_point module_start_time = boot_clock::now();int module_count = 0;if (!LoadKernelModules(IsRecoveryMode() && !ForceNormalBoot(cmdline, bootconfig), want_console, want_parallel, module_count)) {if (want_console != FirstStageConsoleParam::DISABLED) {LOG(ERROR) << "Failed to load kernel modules, starting console";} else {LOG(FATAL) < 0) {auto module_elapse_time = std::chrono::duration_cast(boot_clock::now() - module_start_time);setenv(kEnvInitModuleDurationMs, std::to_string(module_elapse_time.count()).c_str(), 1);LOG(INFO) << "Loaded " << module_count << " kernel modules took "<< module_elapse_time.count() << " ms";}bool created_devices = false;if (want_console == FirstStageConsoleParam::CONSOLE_ON_FAILURE) {if (!IsRecoveryMode()) {created_devices = DoCreateDevices();if (!created_devices) {LOG(ERROR) << "Failed to create device nodes early";}}StartConsole(cmdline);}if (access(kBootImageRamdiskProp, F_OK) == 0) {std::string dest = GetRamdiskPropForSecondStage();std::string dir = android::base::Dirname(dest);std::error_code ec;if (!fs::create_directories(dir, ec) && !!ec) {LOG(FATAL) << "Can't mkdir " << dir << ": " << ec.message();}if (!fs::copy_file(kBootImageRamdiskProp, dest, ec)) {LOG(FATAL) << "Can't copy " << kBootImageRamdiskProp << " to " << dest << ": " << ec.message();}LOG(INFO) << "Copied ramdisk prop to " << dest;}// If "/force_debuggable" is present, the second-stage init will use a userdebug// sepolicy and load adb_debug.prop to allow adb root, if the device is unlocked.if (access("/force_debuggable", F_OK) == 0) {constexpr const char adb_debug_prop_src[] = "/adb_debug.prop";constexpr const char userdebug_plat_sepolicy_cil_src[] = "/userdebug_plat_sepolicy.cil";std::error_code ec;// to invoke the overloaded copy_file() that won't throw.if (access(adb_debug_prop_src, F_OK) == 0 &&!fs::copy_file(adb_debug_prop_src, kDebugRamdiskProp, ec)) {LOG(WARNING) << "Can't copy " << adb_debug_prop_src << " to " << kDebugRamdiskProp << ": " << ec.message();}if (access(userdebug_plat_sepolicy_cil_src, F_OK) == 0 &&!fs::copy_file(userdebug_plat_sepolicy_cil_src, kDebugRamdiskSEPolicy, ec)) {LOG(WARNING) << "Can't copy " << userdebug_plat_sepolicy_cil_src << " to " << kDebugRamdiskSEPolicy << ": " << ec.message();}// setenv for second-stage init to read above kDebugRamdisk* files.setenv("INIT_FORCE_DEBUGGABLE", "true", 1);}if (ForceNormalBoot(cmdline, bootconfig)) {mkdir("/first_stage_ramdisk", 0755);PrepareSwitchRoot();// SwitchRoot() must be called with a mount point as the target, so we bind mount the// target directory to itself here.if (mount("/first_stage_ramdisk", "/first_stage_ramdisk", nullptr, MS_BIND, nullptr) != 0) {PLOG(FATAL) << "Could not bind mount /first_stage_ramdisk to itself";}SwitchRoot("/first_stage_ramdisk");}if (!DoFirstStageMount(!created_devices)) {LOG(FATAL) << "Failed to mount required partitions early ...";}struct stat new_root_info;if (stat("/", &new_root_info) != 0) {PLOG(ERROR) << "Could not stat(\"/\"), not freeing ramdisk";old_root_dir.reset();}if (old_root_dir && old_root_info.st_dev != new_root_info.st_dev) {FreeRamdisk(old_root_dir.get(), old_root_info.st_dev);}SetInitAvbVersionInRecovery();setenv(kEnvFirstStageStartedAt, std::to_string(start_time.time_since_epoch().count()).c_str(), 1);// 找到 system 分区下的 init 的二进制文件目录const char* path = "/system/bin/init";// 重启 init 进程,进行 selinux 初始化工作const char* args[] = {path, "selinux_setup", nullptr};auto fd = open("/dev/kmsg", O_WRONLY | O_CLOEXEC);dup2(fd, STDOUT_FILENO);dup2(fd, STDERR_FILENO);close(fd);// 通过 execv 来重新启动 init 进程execv(path, const_cast(args));// execv() only returns if an error happened, in which case we// panic and never fall through this conditional.PLOG(FATAL) << "execv(\"" << path << "\") failed";return 1;}

在第一阶段中,会创建并挂载一些基本的目录,然后初始化 kernel log 等。在第一阶段 init 完成后,会使用 selinux_setup 参数执行 /system/bin/init。流程就会回到 init 进程的入口 main 函数,参数中含有selinux_setup,此时会执行SetupSelinux 函数。

3.SELinux相关初始化

/system/core/init/selinux.cpp

int SetupSelinux(char** argv) { SetStdioToDevNull(argv); InitKernelLogging(argv); if (REBOOT_BOOTLOADER_ON_PANIC) { InstallRebootSignalHandlers(); } boot_clock::time_point start_time = boot_clock::now(); MountMissingSystemPartitions(); SelinuxSetupKernelLogging(); LOG(INFO) <StartTransition();}LoadSelinuxPolicy(policy);if (snapuserd_helper) {// Before enforcing, finish the pending snapuserd transition.snapuserd_helper->FinishTransition();snapuserd_helper = nullptr;}// This restorecon is intentionally done before SelinuxSetEnforcement because the permissions// needed to transition files from tmpfs to *_contexts_file context should not be granted to// any process after selinux is set into enforcing mode.if (selinux_android_restorecon("/dev/selinux/", SELINUX_ANDROID_RESTORECON_RECURSE) == -1) { PLOG(FATAL) << "restorecon failed of /dev/selinux failed";}SelinuxSetEnforcement();// We're in the kernel domain and want to transition to the init domain.File systems that// store SELabels in their xattrs, such as ext4 do not need an explicit restorecon here,// but other file systems do.In particular, this is needed for ramdisks such as the// recovery image for A/B devices.if (selinux_android_restorecon("/system/bin/init", 0) == -1) {PLOG(FATAL) << "restorecon failed of /system/bin/init failed";}setenv(kEnvSelinuxStartedAt, std::to_string(start_time.time_since_epoch().count()).c_str(), 1);// 确认 init 二进制程序路径const char* path = "/system/bin/init";// 重启 init 进入第二阶段const char* args[] = {path, "second_stage", nullptr};execv(path, const_cast(args));// execv() only returns if an error happened, in which case we// panic and never return from this function.PLOG(FATAL) << "execv(\"" << path << "\") failed";return 1;}

该阶段会进行 SELinux 的相关初始化,包括加载 SELinux 策略文件等。完成后会使用 second_stage 参数执行 /system/bin/init,流程就会回到 init 进程的入口 main 函数,参数中含有second_stage,此时会执行SecondStageMain 函数。

4.init启动第二阶段

/system/core/init/init.cpp

int SecondStageMain(int argc, char** argv) {if (REBOOT_BOOTLOADER_ON_PANIC) {InstallRebootSignalHandlers();}// No threads should be spin up until signalfd// is registered. If the threads are indeed required,// each of these threads _should_ make sure SIGCHLD signal// is blocked. See b/223076262boot_clock::time_point start_time = boot_clock::now();trigger_shutdown = [](const std::string& command) { shutdown_state.TriggerShutdown(command); };SetStdioToDevNull(argv);InitKernelLogging(argv);LOG(INFO) << "init second stage started!";SelinuxSetupKernelLogging();// Update $PATH in the case the second stage init is newer than first stage init, where it is// first set.if (setenv("PATH", _PATH_DEFPATH, 1) != 0) {PLOG(FATAL) << "Could not set $PATH to '" << _PATH_DEFPATH << "' in second stage";}// Init should not crash because of a dependence on any other process, therefore we ignore// SIGPIPE and handle EPIPE at the call site directly.Note that setting a signal to SIG_IGN// is inherited across exec, but custom signal handlers are not.Since we do not want to// ignore SIGPIPE for child processes, we set a no-op function for the signal handler instead.{struct sigaction action = {.sa_flags = SA_RESTART};action.sa_handler = [](int) {};sigaction(SIGPIPE, &action, nullptr);}// Set init and its forked children's oom_adj.if (auto result =WriteFile("/proc/1/oom_score_adj", StringPrintf("%d", DEFAULT_OOM_SCORE_ADJUST));!result.ok()) {LOG(ERROR) << "Unable to write " << DEFAULT_OOM_SCORE_ADJUST << " to /proc/1/oom_score_adj: " << result.error();}// Set up a session keyring that all processes will have access to. It// will hold things like FBE encryption keys. No process should override// its session keyring.keyctl_get_keyring_ID(KEY_SPEC_SESSION_KEYRING, 1);// Indicate that booting is in progress to background fw loaders, etc.close(open("/dev/.booting", O_WRONLY | O_CREAT | O_CLOEXEC, 0000));// See if need to load debug props to allow adb root, when the device is unlocked.const char* force_debuggable_env = getenv("INIT_FORCE_DEBUGGABLE");bool load_debug_prop = false;if (force_debuggable_env && AvbHandle::IsDeviceUnlocked()) {load_debug_prop = "true"s == force_debuggable_env;}unsetenv("INIT_FORCE_DEBUGGABLE");// Umount the debug ramdisk so property service doesn't read .prop files from there, when it// is not meant to.if (!load_debug_prop) {UmountDebugRamdisk();}// 初始化属性服务PropertyInit();// Umount second stage resources after property service has read the .prop files.UmountSecondStageRes();// Umount the debug ramdisk after property service has read the .prop files when it means to.if (load_debug_prop) {UmountDebugRamdisk();}// Mount extra filesystems required during second stage initMountExtraFilesystems();// Now set up SELinux for second stage.SelabelInitialize();SelinuxRestoreContext();// 创建 epoll 描述符结合注册 socket 监听,对挂掉的子进程重启处理Epoll epoll;if (auto result = epoll.Open(); !result.ok()) {PLOG(FATAL) << result.error();}// We always reap children before responding to the other pending functions. This is to // prevent a race where other daemons see that a service has exited and ask init to // start it again via ctl.start before init has reaped it. epoll.SetFirstCallback(ReapAnyOutstandingChildren);// 子进程信号处理函数,如果子进程(Zygote进程)异常退出,init进程会调用该函数设定的信号函数来处理 // 主要用于防止 init 进程的子进程成为僵尸进程。 InstallSignalFdHandler(&epoll); InstallInitNotifier(&epoll); // 启动属性服务 StartPropertyService(&property_fd);// Make the time that init stages started available for bootstat to log. // 记录init阶段的启动时间 RecordStageBoottimes(start_time);// Set libavb version for Framework-only OTA match in Treble build. if (const char* avb_version = getenv("INIT_AVB_VERSION"); avb_version != nullptr) { SetProperty("ro.boot.avb_version", avb_version); } unsetenv("INIT_AVB_VERSION");fs_mgr_vendor_overlay_mount_all(); export_oem_lock_status(); MountHandler mount_handler(&epoll); SetUsbController(); SetKernelVersion();const BuiltinFunctionMap& function_map = GetBuiltinFunctionMap(); Action::set_function_map(&function_map);if (!SetupMountNamespaces()) { PLOG(FATAL) < Result { for (const auto& svc : ServiceList::GetInstance()) { keychords.Register(svc->keycodes()); } keychords.Start(&epoll, HandleKeychord); return {}; }, "KeychordInit");// Trigger all the boot actions to get us started. am.QueueEventTrigger("init");// Don't mount filesystems or start core system services in charger mode. std::string bootmode = GetProperty("ro.bootmode", ""); if (bootmode == "charger") { am.QueueEventTrigger("charger"); } else { am.QueueEventTrigger("late-init"); }// Run all property triggers based on current state of the properties. am.QueueBuiltinAction(queue_property_triggers_action, "queue_property_triggers");// Restore prio before main loop setpriority(PRIO_PROCESS, 0, 0); while (true) { // By default, sleep until something happens. Do not convert far_future into // std::chrono::milliseconds because that would trigger an overflow. The unit of boot_clock // is 1ns. const boot_clock::time_point far_future = boot_clock::time_point::max(); boot_clock::time_point next_action_time = far_future;auto shutdown_command = shutdown_state.CheckShutdown(); if (shutdown_command) { LOG(INFO) << "Got shutdown_command '" << *shutdown_command << "' Calling HandlePowerctlMessage()"; HandlePowerctlMessage(*shutdown_command); }if (!(prop_waiter_state.MightBeWaiting() || Service::is_exec_service_running())) { // 依次执行每个 action 中携带的 command 对应的执行函数 am.ExecuteOneCommand(); // If there's more work to do, wake up again immediately. if (am.HasMoreCommands()) { next_action_time = boot_clock::now(); } } // Since the above code examined pending actions, no new actions must be // queued by the code between this line and the Epoll::Wait() call below // without calling WakeMainInitThread(). if (!IsShuttingDown()) { auto next_process_action_time = HandleProcessActions();// If there's a process that needs restarting, wake up in time for that. if (next_process_action_time) { next_action_time = std::min(next_action_time, *next_process_action_time); } }std::optional epoll_timeout; if (next_action_time != far_future) { epoll_timeout = std::chrono::ceil( std::max(next_action_time - boot_clock::now(), 0ns)); } auto epoll_result = epoll.Wait(epoll_timeout); if (!epoll_result.ok()) { LOG(ERROR) << epoll_result.error(); } if (!IsShuttingDown()) { HandleControlMessages(); SetUsbController(); } } return 0;}

在该阶段,有一些初始化动作,包括初始化并启动属性服务;还创建了epoll,监听子进程的状态对挂掉的子进程重启处理;然后解析init.rc和其他init.*.rc文件(如init.car.rc),执行其中的命令;