对于某些加固壳,加载解释器 elf 的方式不会是常规的
System.loadLibrary
, 而是仿照System.loadLibrary
在 AOSP 中的实现方式,在 JNI 中自己实现 so 的加载
本次所使用到的 AOSP 的源码的安卓版本为 android-12.0.0_r34
# so 的启动过程
# System.load
在 Android 中我们想要加载一个 so 时,可以有两种方式
-
静态加载
System.loadLibrary("native");
这种加载方式下,要加载的 so 一般已经内置在 apk 的
lib/[arch]/
中 -
动态加载
String soPath = "/data/data/com.example.myapp/libmynative.so";
System.load(soPath);
这一种加载方式可以加载任意路径下的 so
这里我们将动态加载,即 System.load
函数作为分析的入口,可以发现它实际上调用的是 Runtime.getRuntime().load0
对于静态加载
System.loadLibrary
来说,它调用的函数是Runtime.getRuntime().loadLibrary0(Reflection.getCallerClass(), libname);
//android-platform\libcore\ojluni\src\main\java\java\lang\System.java | |
/** | |
* Loads the native library specified by the filename argument. The filename | |
* argument must be an absolute path name. | |
* | |
* If the filename argument, when stripped of any platform-specific library | |
* prefix, path, and file extension, indicates a library whose name is, | |
* for example, L, and a native library called L is statically linked | |
* with the VM, then the JNI_OnLoad_L function exported by the library | |
* is invoked rather than attempting to load a dynamic library. | |
* A filename matching the argument does not have to exist in the | |
* file system. | |
* See the JNI Specification for more details. | |
* | |
* Otherwise, the filename argument is mapped to a native library image in | |
* an implementation-dependent manner. | |
* | |
* <p> | |
* The call <code>System.load(name)</code> is effectively equivalent | |
* to the call: | |
* <blockquote><pre> | |
* Runtime.getRuntime().load(name) | |
* </pre></blockquote> | |
* | |
* @param filename the file to load. | |
* @exception SecurityException if a security manager exists and its | |
* <code>checkLink</code> method doesn't allow | |
* loading of the specified dynamic library | |
* @exception UnsatisfiedLinkError if either the filename is not an | |
* absolute path name, the native library is not statically | |
* linked with the VM, or the library cannot be mapped to | |
* a native library image by the host system. | |
* @exception NullPointerException if <code>filename</code> is | |
* <code>null</code> | |
* @see java.lang.Runtime#load(java.lang.String) | |
* @see java.lang.SecurityManager#checkLink(java.lang.String) | |
*/ | |
@CallerSensitive | |
public static void load(String filename) { | |
Runtime.getRuntime().load0(Reflection.getCallerClass(), filename); | |
} |
Reflection.getCallerClass
第一个传入的参数 Reflection.getCallerClass()
相关的实现即注释如下,读起来可能会有点绕,什么叫返回方法的方法的调用者的类?
//android-platform\libcore\ojluni\src\main\java\sun\reflect\Reflection.java | |
/** Returns the class of the caller of the method calling this method, | |
ignoring frames associated with java.lang.reflect.Method.invoke() | |
and its implementation. * | |
@CallerSensitive | |
public static native Class<?> getCallerClass(); | |
*/ | |
public static Class<?> getCallerClass() { | |
// This method (getCallerClass()) constitutes another stack frame, | |
// so we need to call getStackClass2() rather than getStackClass1(). | |
return VMStack.getStackClass2(); | |
} |
其实按照逆向的角度来看,就是返回调用栈的最低层的那个类,例如有一个 Class A
经过如下的调用过程: Class A->反射1->反射2->反射n->...->Reflection.getCallerClass()
, 最终 getCallerClass
方法将返回 Class A
# Runtime.getRuntime().load0
在 Runtime.getRuntime().load0
中进行了 filename
是否是 绝对路径
以及 空字符串
的检查之后,便开始调用 nativeLoad
真正的去加载 so 了
//android-platform\libcore\ojluni\src\main\java\java\lang\Runtime.java | |
synchronized void load0(Class<?> fromClass, String filename) { | |
if (!(new File(filename).isAbsolute())) { | |
throw new UnsatisfiedLinkError( | |
"Expecting an absolute path of the library: " + filename); | |
} | |
if (filename == null) { | |
throw new NullPointerException("filename == null"); | |
} | |
String error = nativeLoad(filename, fromClass.getClassLoader()); | |
if (error != null) { | |
throw new UnsatisfiedLinkError(error); | |
} | |
} |
fromClass.getClassLoader()
向 nativeLoad
中传入的第二个参数是 fromClass.getClassLoader()
, 利用这个方法可以获取到 fromClass
类的 ClassLoader
为什么要获取 ClassLoader 并将其作为参数传入 nativeLoad?
# Runtime.java 中的 nativeLoad
在 nativeLoad
重载调用了 nativeLoad(String filename, ClassLoader loader, Class<?> caller)
, 而该函数的声明为 native
, 这便意味着我们即将走出 java 来到 c++ 的世界了
//android-platform\libcore\ojluni\src\main\java\java\lang\Runtime.java | |
private static String nativeLoad(String filename, ClassLoader loader) { | |
return nativeLoad(filename, loader, null); | |
} | |
private static native String nativeLoad(String filename, ClassLoader loader, Class<?> caller); |
# Runtime.c 中的 nativeLoad
在常规的 JNI 函数的编写过程中,想要让 java 层调用 JNI 中定义的函数,要么使用静态注册,利用 JNIEXPORT
关键字将这个函数导出,或者使用 RegisterNatives
方法去进行动态注册,而在 AOSP 定义 nativeLoad
时,这两种方法都使用到了,对于学习 JNI 的注册过程非常具有参考意义
查看 Runtime_nativeLoad
函数,可以发现它继续调用了 JVM_NativeLoad
//android-platform\libcore\ojluni\src\main\native\Runtime.c | |
JNIEXPORT jstring JNICALL | |
Runtime_nativeLoad(JNIEnv* env, jclass ignored, jstring javaFilename, | |
jobject javaLoader, jclass caller) | |
{ | |
return JVM_NativeLoad(env, javaFilename, javaLoader, caller); | |
} | |
//these macros below are defined at android-platform\libnativehelper\include_platform_header_only\nativehelper\jni_macros.h | |
#define MAKE_JNI_NATIVE_METHOD(name, signature, function) \ | |
_NATIVEHELPER_JNI_MAKE_METHOD(kNormalNative, name, signature, function) | |
#define FAST_NATIVE_METHOD(className, functionName, signature) \ | |
MAKE_JNI_FAST_NATIVE_METHOD(#functionName, signature, className ## _ ## functionName) | |
#define NATIVE_METHOD(className, functionName, signature) \ | |
MAKE_JNI_NATIVE_METHOD(#functionName, signature, className ## _ ## functionName) | |
static JNINativeMethod gMethods[] = { | |
FAST_NATIVE_METHOD(Runtime, freeMemory, "()J"), | |
FAST_NATIVE_METHOD(Runtime, totalMemory, "()J"), | |
FAST_NATIVE_METHOD(Runtime, maxMemory, "()J"), | |
NATIVE_METHOD(Runtime, nativeGc, "()V"), | |
NATIVE_METHOD(Runtime, nativeExit, "(I)V"), | |
NATIVE_METHOD(Runtime, nativeLoad, | |
"(Ljava/lang/String;Ljava/lang/ClassLoader;Ljava/lang/Class;)" | |
"Ljava/lang/String;"), | |
}; | |
void register_java_lang_Runtime(JNIEnv* env) { | |
jniRegisterNativeMethods(env, "java/lang/Runtime", gMethods, NELEM(gMethods)); | |
} |
# JVM_NativeLoad
JVM_NativeLoad
获取当前进程的 javaVM
对象并调用 javaVM
的 LoadNativeLibrary
方法
//android-platform\art\openjdkjvm\OpenjdkJvm.cc | |
JNIEXPORT jstring JVM_NativeLoad(JNIEnv* env, | |
jstring javaFilename, | |
jobject javaLoader, | |
jclass caller) { | |
// 实例化一个文件对象 | |
ScopedUtfChars filename(env, javaFilename); | |
if (filename.c_str() == nullptr) { | |
return nullptr; | |
} | |
std::string error_msg; | |
{ | |
// 获取当前进程的 javaVM 对象 | |
art::JavaVMExt* vm = art::Runtime::Current()->GetJavaVM(); | |
bool success = vm->LoadNativeLibrary(env, | |
filename.c_str(), | |
javaLoader, | |
caller, | |
&error_msg); | |
if (success) { | |
return nullptr; | |
} | |
} | |
// Don't let a pending exception from JNI_OnLoad cause a CheckJNI issue with NewStringUTF. | |
env->ExceptionClear(); | |
return env->NewStringUTF(error_msg.c_str()); | |
} |
# vm->LoadNativeLibrary
LoadNativeLibrary
的代码很长,细看之后可以分为三个部分
-
so
加载前
这个部分主要判断即将被 so 是否之前就已经被加载过,如果已经被加载好了,那么就直接退出这个函数并返回 -
so
加载时
这个部分就是 AOSP 实现动态链接库加载的核心实现,之后会进行详细的分析 -
so
加载后
在这最后一个部分中,当一个 so 被成功加载后,会立即调用so
的导出函数中的JNI_OnLoad
函数 (如果JNI_OnLoad
存在的话)//android-platform\art\runtime\jni\java_vm_ext.cc
// 阶段三:当 so 被加载之后,立即调用导出函数 JNI_OnLoad
bool was_successful = false;
void* sym = library->FindSymbol("JNI_OnLoad", nullptr);
if (sym == nullptr) {
VLOG(jni) << "[No JNI_OnLoad found in \"" << path << "\"]";
was_successful = true;
} else {
// Call JNI_OnLoad. We have to override the current class
// loader, which will always be "null" since the stuff at the
// top of the stack is around Runtime.loadLibrary(). (See
// the comments in the JNI FindClass function.)
ScopedLocalRef<jobject> old_class_loader(env, env->NewLocalRef(self->GetClassLoaderOverride()));
self->SetClassLoaderOverride(class_loader);
VLOG(jni) << "[Calling JNI_OnLoad in \"" << path << "\"]";
using JNI_OnLoadFn = int(*)(JavaVM*, void*);
JNI_OnLoadFn jni_on_load = reinterpret_cast<JNI_OnLoadFn>(sym);
int version = (*jni_on_load)(this, nullptr);
if (IsSdkVersionSetAndAtMost(runtime_->GetTargetSdkVersion(), SdkVersion::kL)) {
// Make sure that sigchain owns SIGSEGV.
EnsureFrontOfChain(SIGSEGV);
}
self->SetClassLoaderOverride(old_class_loader.get());
if (version == JNI_ERR) {
StringAppendF(error_msg, "JNI_ERR returned from JNI_OnLoad in \"%s\"", path.c_str());
} else if (JavaVMExt::IsBadJniVersion(version)) {
StringAppendF(error_msg, "Bad JNI version returned from JNI_OnLoad in \"%s\": %d",
path.c_str(), version);
// It's unwise to call dlclose() here, but we can mark it
// as bad and ensure that future load attempts will fail.
// We don't know how far JNI_OnLoad got, so there could
// be some partially-initialized stuff accessible through
// newly-registered native method calls. We could try to
// unregister them, but that doesn't seem worthwhile.
} else {
was_successful = true;
}
VLOG(jni) << "[Returned " << (was_successful ? "successfully" : "failure")
<< " from JNI_OnLoad in \"" << path << "\"]";
}
完整的 LoadNativeLibrary
代码
//android-platform\art\runtime\jni\java_vm_ext.cc | |
bool JavaVMExt::LoadNativeLibrary(JNIEnv* env, | |
const std::string& path, | |
jobject class_loader, | |
jclass caller_class, | |
std::string* error_msg) { | |
error_msg->clear(); | |
// 阶段一:判断目标 so 是否已经被加载过 | |
// See if we've already loaded this library. If we have, and the class loader | |
// matches, return successfully without doing anything. | |
// TODO: for better results we should canonicalize (规范化) the pathname (or even compare | |
// inodes). This implementation is fine if everybody is using System.loadLibrary. | |
SharedLibrary* library; | |
Thread* self = Thread::Current(); | |
{ | |
// TODO: move the locking (and more of this logic) into Libraries. | |
MutexLock mu(self, *Locks::jni_libraries_lock_); | |
library = libraries_->Get(path); | |
} | |
void* class_loader_allocator = nullptr; | |
std::string caller_location; | |
{ | |
ScopedObjectAccess soa(env); | |
// As the incoming class loader is reachable/alive during the call of this function, | |
// it's okay to decode it without worrying about unexpectedly marking it alive. | |
ObjPtr<mirror::ClassLoader> loader = soa.Decode<mirror::ClassLoader>(class_loader); | |
ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); | |
if (class_linker->IsBootClassLoader(soa, loader.Ptr())) { | |
loader = nullptr; | |
class_loader = nullptr; | |
if (caller_class != nullptr) { | |
ObjPtr<mirror::Class> caller = soa.Decode<mirror::Class>(caller_class); | |
ObjPtr<mirror::DexCache> dex_cache = caller->GetDexCache(); | |
if (dex_cache != nullptr) { | |
caller_location = dex_cache->GetLocation()->ToModifiedUtf8(); | |
} | |
} | |
} | |
class_loader_allocator = class_linker->GetAllocatorForClassLoader(loader.Ptr()); | |
CHECK(class_loader_allocator != nullptr); | |
} | |
if (library != nullptr) { | |
// Use the allocator pointers for class loader equality to avoid unnecessary weak root decode. | |
if (library->GetClassLoaderAllocator() != class_loader_allocator) { | |
// The library will be associated with class_loader. The JNI | |
// spec says we can't load the same library into more than one | |
// class loader. | |
// | |
// This isn't very common. So spend some time to get a readable message. | |
auto call_to_string = [&](jobject obj) -> std::string { | |
if (obj == nullptr) { | |
return "null"; | |
} | |
// Handle jweaks. Ignore double local-ref. | |
ScopedLocalRef<jobject> local_ref(env, env->NewLocalRef(obj)); | |
if (local_ref != nullptr) { | |
ScopedLocalRef<jclass> local_class(env, env->GetObjectClass(local_ref.get())); | |
jmethodID to_string = env->GetMethodID(local_class.get(), | |
"toString", | |
"()Ljava/lang/String;"); | |
DCHECK(to_string != nullptr); | |
ScopedLocalRef<jobject> local_string(env, | |
env->CallObjectMethod(local_ref.get(), to_string)); | |
if (local_string != nullptr) { | |
ScopedUtfChars utf(env, reinterpret_cast<jstring>(local_string.get())); | |
if (utf.c_str() != nullptr) { | |
return utf.c_str(); | |
} | |
} | |
if (env->ExceptionCheck()) { | |
// We can't do much better logging, really. So leave it with a Describe. | |
env->ExceptionDescribe(); | |
env->ExceptionClear(); | |
} | |
return "(Error calling toString)"; | |
} | |
return "null"; | |
}; | |
std::string old_class_loader = call_to_string(library->GetClassLoader()); | |
std::string new_class_loader = call_to_string(class_loader); | |
StringAppendF(error_msg, "Shared library \"%s\" already opened by " | |
"ClassLoader %p(%s); can't open in ClassLoader %p(%s)", | |
path.c_str(), | |
library->GetClassLoader(), | |
old_class_loader.c_str(), | |
class_loader, | |
new_class_loader.c_str()); | |
LOG(WARNING) << *error_msg; | |
return false; | |
} | |
VLOG(jni) << "[Shared library \"" << path << "\" already loaded in " | |
<< " ClassLoader " << class_loader << "]"; | |
if (!library->CheckOnLoadResult()) { | |
StringAppendF(error_msg, "JNI_OnLoad failed on a previous attempt " | |
"to load \"%s\"", path.c_str()); | |
return false; | |
} | |
return true; | |
} | |
// 阶段二:加载 so | |
// Open the shared library. Because we're using a full path, the system | |
// doesn't have to search through LD_LIBRARY_PATH. (It may do so to | |
// resolve this library's dependencies though.) | |
// Failures here are expected when java.library.path has several entries | |
// and we have to hunt for the lib. | |
// Below we dlopen but there is no paired dlclose, this would be necessary if we supported | |
// class unloading. Libraries will only be unloaded when the reference count (incremented by | |
// dlopen) becomes zero from dlclose. | |
// Retrieve the library path from the classloader, if necessary. | |
ScopedLocalRef<jstring> library_path(env, GetLibrarySearchPath(env, class_loader)); | |
Locks::mutator_lock_->AssertNotHeld(self); | |
const char* path_str = path.empty() ? nullptr : path.c_str(); | |
bool needs_native_bridge = false; | |
char* nativeloader_error_msg = nullptr; | |
// 调用 dlopen 打开目标 so | |
void* handle = android::OpenNativeLibrary( | |
env, | |
runtime_->GetTargetSdkVersion(), | |
path_str, | |
class_loader, | |
(caller_location.empty() ? nullptr : caller_location.c_str()), | |
library_path.get(), | |
&needs_native_bridge, | |
&nativeloader_error_msg); | |
VLOG(jni) << "[Call to dlopen(\"" << path << "\", RTLD_NOW) returned " << handle << "]"; | |
if (handle == nullptr) { | |
*error_msg = nativeloader_error_msg; | |
android::NativeLoaderFreeErrorMessage(nativeloader_error_msg); | |
VLOG(jni) << "dlopen(\"" << path << "\", RTLD_NOW) failed: " << *error_msg; | |
return false; | |
} | |
if (env->ExceptionCheck() == JNI_TRUE) { | |
LOG(ERROR) << "Unexpected exception:"; | |
env->ExceptionDescribe(); | |
env->ExceptionClear(); | |
} | |
// Create a new entry. | |
// TODO: move the locking (and more of this logic) into Libraries. | |
bool created_library = false; | |
{ | |
// Create SharedLibrary ahead of taking the libraries lock to maintain lock ordering. | |
std::unique_ptr<SharedLibrary> new_library( | |
new SharedLibrary(env, | |
self, | |
path, | |
handle, | |
needs_native_bridge, | |
class_loader, | |
class_loader_allocator)); | |
MutexLock mu(self, *Locks::jni_libraries_lock_); | |
library = libraries_->Get(path); | |
if (library == nullptr) { // We won race to get libraries_lock. | |
library = new_library.release(); | |
libraries_->Put(path, library); | |
created_library = true; | |
} | |
} | |
if (!created_library) { | |
LOG(INFO) << "WOW: we lost a race to add shared library: " | |
<< "\"" << path << "\" ClassLoader=" << class_loader; | |
return library->CheckOnLoadResult(); | |
} | |
// 动态链接库装载完成 | |
VLOG(jni) << "[Added shared library \"" << path << "\" for ClassLoader " << class_loader << "]"; | |
// 阶段三:当 so 被加载之后,立即调用导出函数 JNI_OnLoad | |
bool was_successful = false; | |
void* sym = library->FindSymbol("JNI_OnLoad", nullptr); | |
if (sym == nullptr) { | |
VLOG(jni) << "[No JNI_OnLoad found in \"" << path << "\"]"; | |
was_successful = true; | |
} else { | |
// Call JNI_OnLoad. We have to override the current class | |
// loader, which will always be "null" since the stuff at the | |
// top of the stack is around Runtime.loadLibrary(). (See | |
// the comments in the JNI FindClass function.) | |
ScopedLocalRef<jobject> old_class_loader(env, env->NewLocalRef(self->GetClassLoaderOverride())); | |
self->SetClassLoaderOverride(class_loader); | |
VLOG(jni) << "[Calling JNI_OnLoad in \"" << path << "\"]"; | |
using JNI_OnLoadFn = int(*)(JavaVM*, void*); | |
JNI_OnLoadFn jni_on_load = reinterpret_cast<JNI_OnLoadFn>(sym); | |
int version = (*jni_on_load)(this, nullptr); | |
if (IsSdkVersionSetAndAtMost(runtime_->GetTargetSdkVersion(), SdkVersion::kL)) { | |
// Make sure that sigchain owns SIGSEGV. | |
EnsureFrontOfChain(SIGSEGV); | |
} | |
self->SetClassLoaderOverride(old_class_loader.get()); | |
if (version == JNI_ERR) { | |
StringAppendF(error_msg, "JNI_ERR returned from JNI_OnLoad in \"%s\"", path.c_str()); | |
} else if (JavaVMExt::IsBadJniVersion(version)) { | |
StringAppendF(error_msg, "Bad JNI version returned from JNI_OnLoad in \"%s\": %d", | |
path.c_str(), version); | |
// It's unwise to call dlclose() here, but we can mark it | |
// as bad and ensure that future load attempts will fail. | |
// We don't know how far JNI_OnLoad got, so there could | |
// be some partially-initialized stuff accessible through | |
// newly-registered native method calls. We could try to | |
// unregister them, but that doesn't seem worthwhile. | |
} else { | |
was_successful = true; | |
} | |
VLOG(jni) << "[Returned " << (was_successful ? "successfully" : "failure") | |
<< " from JNI_OnLoad in \"" << path << "\"]"; | |
} | |
library->SetResult(was_successful); | |
return was_successful; | |
} |
# so 的加载过程
在刚刚对于 vm->LoadNativeLibrary
函数 so加载时
的简要分析中,我们知道目标 so 是通过 dlopen
打开的,我们可以从 android::OpenNativeLibrary
函数开始分析
//android-platform\art\runtime\jni\java_vm_ext.cc | |
bool JavaVMExt::LoadNativeLibrary(JNIEnv* env, | |
const std::string& path, | |
jobject class_loader, | |
jclass caller_class, | |
std::string* error_msg) { | |
error_msg->clear(); | |
// 阶段一:判断目标 so 是否已经被加载过 | |
... | |
// 阶段二:加载 so | |
// Open the shared library. Because we're using a full path, the system | |
// doesn't have to search through LD_LIBRARY_PATH. (It may do so to | |
// resolve this library's dependencies though.) | |
// Failures here are expected when java.library.path has several entries | |
// and we have to hunt for the lib. | |
// Below we dlopen but there is no paired dlclose, this would be necessary if we supported | |
// class unloading. Libraries will only be unloaded when the reference count (incremented by | |
// dlopen) becomes zero from dlclose. | |
// Retrieve the library path from the classloader, if necessary. | |
ScopedLocalRef<jstring> library_path(env, GetLibrarySearchPath(env, class_loader)); | |
Locks::mutator_lock_->AssertNotHeld(self); | |
const char* path_str = path.empty() ? nullptr : path.c_str(); | |
bool needs_native_bridge = false; | |
char* nativeloader_error_msg = nullptr; | |
// 调用 dlopen 打开目标 so | |
void* handle = android::OpenNativeLibrary( | |
env, | |
runtime_->GetTargetSdkVersion(), | |
path_str, | |
class_loader, | |
(caller_location.empty() ? nullptr : caller_location.c_str()), | |
library_path.get(), | |
&needs_native_bridge, | |
&nativeloader_error_msg); | |
VLOG(jni) << "[Call to dlopen(\"" << path << "\", RTLD_NOW) returned " << handle << "]"; | |
if (handle == nullptr) { | |
*error_msg = nativeloader_error_msg; | |
android::NativeLoaderFreeErrorMessage(nativeloader_error_msg); | |
VLOG(jni) << "dlopen(\"" << path << "\", RTLD_NOW) failed: " << *error_msg; | |
return false; | |
} | |
if (env->ExceptionCheck() == JNI_TRUE) { | |
LOG(ERROR) << "Unexpected exception:"; | |
env->ExceptionDescribe(); | |
env->ExceptionClear(); | |
} | |
// Create a new entry. | |
// TODO: move the locking (and more of this logic) into Libraries. | |
bool created_library = false; | |
{ | |
// Create SharedLibrary ahead of taking the libraries lock to maintain lock ordering. | |
std::unique_ptr<SharedLibrary> new_library( | |
new SharedLibrary(env, | |
self, | |
path, | |
handle, | |
needs_native_bridge, | |
class_loader, | |
class_loader_allocator)); | |
MutexLock mu(self, *Locks::jni_libraries_lock_); | |
library = libraries_->Get(path); | |
if (library == nullptr) { // We won race to get libraries_lock. | |
library = new_library.release(); | |
libraries_->Put(path, library); | |
created_library = true; | |
} | |
} | |
if (!created_library) { | |
LOG(INFO) << "WOW: we lost a race to add shared library: " | |
<< "\"" << path << "\" ClassLoader=" << class_loader; | |
return library->CheckOnLoadResult(); | |
} | |
// 动态链接库装载完成 | |
VLOG(jni) << "[Added shared library \"" << path << "\" for ClassLoader " << class_loader << "]"; | |
// 阶段三:当 so 被加载之后,立即调用导出函数 JNI_OnLoad | |
... | |
} |
# android::OpenNativeLibrary
这个函数中有条件编译,接下来我们分析的是 ART_TARGET_ANDROID
的编译条件分支
跟踪 path
参数,可以发现它被传入到了 android_dlopen_ext(const char* filename, int flag, const android_dlextinfo* extinfo)
函数中, flag
RTLD_NOW
的含义是立即解析所有符号,并在加载时报告任何解析错误
android_dlopen_ext
函数的常见flag
含义
- RTLD_NOW
立即解析所有符号,并在加载时报告任何解析错误- RTLD_LAZY
只在符号首次使用时解析- RTLD_GLOBAL
将库及其符号添加到全局命名空间中,以便其他库可以使用这些符号
extinfo
的值为 {.flags = ANDROID_DLEXT_USE_NAMESPACE,.library_namespace = boot_namespace,};
//android-platform\art\libnativeloader\native_loader.cpp | |
void* OpenNativeLibrary(JNIEnv* env, int32_t target_sdk_version, const char* path, | |
jobject class_loader, const char* caller_location, jstring library_path, | |
bool* needs_native_bridge, char** error_msg) { | |
// 条件编译 | |
//ART_TARGET_ANDROID - Defined for target Android builds of ART. | |
//ref: https://android.googlesource.com/platform/art/+/32c8337/runtime/globals.h | |
#if defined(ART_TARGET_ANDROID) | |
UNUSED(target_sdk_version); | |
if (class_loader == nullptr) { | |
*needs_native_bridge = false; | |
if (caller_location != nullptr) { | |
android_namespace_t* boot_namespace = FindExportedNamespace(caller_location); | |
if (boot_namespace != nullptr) { | |
const android_dlextinfo dlextinfo = { | |
.flags = ANDROID_DLEXT_USE_NAMESPACE, | |
.library_namespace = boot_namespace, | |
}; | |
// 调用 android_dlopen_ext, 并传入 path | |
void* handle = android_dlopen_ext(path, RTLD_NOW, &dlextinfo); | |
if (handle == nullptr) { | |
*error_msg = strdup(dlerror()); | |
} | |
return handle; | |
} | |
} | |
// Check if the library is in NATIVELOADER_DEFAULT_NAMESPACE_LIBS and should | |
// be loaded from the kNativeloaderExtraLibs namespace. | |
{ | |
Result<void*> handle = TryLoadNativeloaderExtraLib(path); | |
if (!handle.ok()) { | |
*error_msg = strdup(handle.error().message().c_str()); | |
return nullptr; | |
} | |
if (handle.value() != nullptr) { | |
return handle.value(); | |
} | |
} | |
// Fall back to the system namespace. This happens for preloaded JNI | |
// libraries in the zygote. | |
// TODO(b/185833744): Investigate if this should fall back to the app main | |
// namespace (aka anonymous namespace) instead. | |
void* handle = OpenSystemLibrary(path, RTLD_NOW); | |
if (handle == nullptr) { | |
*error_msg = strdup(dlerror()); | |
} | |
return handle; | |
} | |
std::lock_guard<std::mutex> guard(g_namespaces_mutex); | |
NativeLoaderNamespace* ns; | |
if ((ns = g_namespaces->FindNamespaceByClassLoader(env, class_loader)) == nullptr) { | |
// This is the case where the classloader was not created by ApplicationLoaders | |
// In this case we create an isolated not-shared namespace for it. | |
Result<NativeLoaderNamespace*> isolated_ns = | |
CreateClassLoaderNamespaceLocked(env, | |
target_sdk_version, | |
class_loader, | |
/*is_shared=*/false, | |
/*dex_path=*/nullptr, | |
library_path, | |
/*permitted_path=*/nullptr, | |
/*uses_library_list=*/nullptr); | |
if (!isolated_ns.ok()) { | |
*error_msg = strdup(isolated_ns.error().message().c_str()); | |
return nullptr; | |
} else { | |
ns = *isolated_ns; | |
} | |
} | |
return OpenNativeLibraryInNamespace(ns, path, needs_native_bridge, error_msg); | |
#else | |
UNUSED(env, target_sdk_version, class_loader, caller_location); | |
// Do some best effort to emulate library-path support. It will not | |
// work for dependencies. | |
// | |
// Note: null has a special meaning and must be preserved. | |
std::string c_library_path; // Empty string by default. | |
if (library_path != nullptr && path != nullptr && path[0] != '/') { | |
ScopedUtfChars library_path_utf_chars(env, library_path); | |
c_library_path = library_path_utf_chars.c_str(); | |
} | |
std::vector<std::string> library_paths = base::Split(c_library_path, ":"); | |
for (const std::string& lib_path : library_paths) { | |
*needs_native_bridge = false; | |
const char* path_arg; | |
std::string complete_path; | |
if (path == nullptr) { | |
// Preserve null. | |
path_arg = nullptr; | |
} else { | |
complete_path = lib_path; | |
if (!complete_path.empty()) { | |
complete_path.append("/"); | |
} | |
complete_path.append(path); | |
path_arg = complete_path.c_str(); | |
} | |
void* handle = dlopen(path_arg, RTLD_NOW); | |
if (handle != nullptr) { | |
return handle; | |
} | |
if (NativeBridgeIsSupported(path_arg)) { | |
*needs_native_bridge = true; | |
handle = NativeBridgeLoadLibrary(path_arg, RTLD_NOW); | |
if (handle != nullptr) { | |
return handle; | |
} | |
*error_msg = strdup(NativeBridgeGetError()); | |
} else { | |
*error_msg = strdup(dlerror()); | |
} | |
} | |
return nullptr; | |
#endif | |
} |
# android_dlopen_ext
android_dlopen_ext
调用了 __loader_android_dlopen_ext
在代码的第四行出现了内建函数 __builtin_return_address(LEVEL)
, 这个函数用来返回当前函数或调用者的返回地址。函数的参数 LEVEL 表示函数调用链中的不同层次的函数,各个值代表的意义如下:
- 0:返回当前函数的返回地址;
- 1:返回当前函数调用者的返回地址;
- 2:返回当前函数调用者的调用者的返回地址;
//android-platform\bionic\libdl\libdl.cpp | |
__attribute__((__weak__)) | |
void* android_dlopen_ext(const char* filename, int flag, const android_dlextinfo* extinfo) { | |
const void* caller_addr = __builtin_return_address(0); | |
return __loader_android_dlopen_ext(filename, flag, extinfo, caller_addr); | |
} |
# __loader_android_dlopen_ext
__loader_android_dlopen_ext
调用了 dlopen_ext
//android-platform\bionic\linker\dlfcn.cpp | |
void* __loader_android_dlopen_ext(const char* filename, | |
int flags, | |
const android_dlextinfo* extinfo, | |
const void* caller_addr) { | |
return dlopen_ext(filename, flags, extinfo, caller_addr); | |
} |
# dlopen_ext
dlopen_ext
中调用了 do_dlopen
//android-platform\bionic\linker\dlfcn.cpp | |
static void* dlopen_ext(const char* filename, | |
int flags, | |
const android_dlextinfo* extinfo, | |
const void* caller_addr) { | |
ScopedPthreadMutexLocker locker(&g_dl_mutex); | |
g_linker_logger.ResetState(); | |
void* result = do_dlopen(filename, flags, extinfo, caller_addr); | |
if (result == nullptr) { | |
__bionic_format_dlerror("dlopen failed", linker_get_error_buffer()); | |
return nullptr; | |
} | |
return result; | |
} |
# do_dlopen
这个函数中最为关键的是调用 find_library
获取到了待加载 so
的 soinfo
//android-platform\bionic\linker\linker.cpp | |
void* do_dlopen(const char* name, int flags, | |
const android_dlextinfo* extinfo, | |
const void* caller_addr) { | |
std::string trace_prefix = std::string("dlopen: ") + (name == nullptr ? "(nullptr)" : name); | |
ScopedTrace trace(trace_prefix.c_str()); | |
ScopedTrace loading_trace((trace_prefix + " - loading and linking").c_str()); | |
soinfo* const caller = find_containing_library(caller_addr); | |
android_namespace_t* ns = get_caller_namespace(caller); | |
LD_LOG(kLogDlopen, | |
"dlopen(name=\"%s\", flags=0x%x, extinfo=%s, caller=\"%s\", caller_ns=%s@%p, targetSdkVersion=%i) ...", | |
name, | |
flags, | |
android_dlextinfo_to_string(extinfo).c_str(), | |
caller == nullptr ? "(null)" : caller->get_realpath(), | |
ns == nullptr ? "(null)" : ns->get_name(), | |
ns, | |
get_application_target_sdk_version()); | |
auto purge_guard = android::base::make_scope_guard([&]() { purge_unused_memory(); }); | |
auto failure_guard = android::base::make_scope_guard( | |
[&]() { LD_LOG(kLogDlopen, "... dlopen failed: %s", linker_get_error_buffer()); }); | |
// 对 flags 的合法性进行判断 | |
if ((flags & ~(RTLD_NOW|RTLD_LAZY|RTLD_LOCAL|RTLD_GLOBAL|RTLD_NODELETE|RTLD_NOLOAD)) != 0) { | |
DL_OPEN_ERR("invalid flags to dlopen: %x", flags); | |
return nullptr; | |
} | |
// 对 extinfo 的合法性进行判断 | |
if (extinfo != nullptr) { | |
if ((extinfo->flags & ~(ANDROID_DLEXT_VALID_FLAG_BITS)) != 0) { | |
DL_OPEN_ERR("invalid extended flags to android_dlopen_ext: 0x%" PRIx64, extinfo->flags); | |
return nullptr; | |
} | |
if ((extinfo->flags & ANDROID_DLEXT_USE_LIBRARY_FD) == 0 && | |
(extinfo->flags & ANDROID_DLEXT_USE_LIBRARY_FD_OFFSET) != 0) { | |
DL_OPEN_ERR("invalid extended flag combination (ANDROID_DLEXT_USE_LIBRARY_FD_OFFSET without " | |
"ANDROID_DLEXT_USE_LIBRARY_FD): 0x%" PRIx64, extinfo->flags); | |
return nullptr; | |
} | |
if ((extinfo->flags & ANDROID_DLEXT_USE_NAMESPACE) != 0) { | |
if (extinfo->library_namespace == nullptr) { | |
DL_OPEN_ERR("ANDROID_DLEXT_USE_NAMESPACE is set but extinfo->library_namespace is null"); | |
return nullptr; | |
} | |
ns = extinfo->library_namespace; | |
} | |
} | |
// Workaround for dlopen(/system/lib/<soname>) when .so is in /apex. http://b/121248172 | |
// The workaround works only when targetSdkVersion < Q. | |
// 当 apk 的 targetSdkVersion<Q 时才会将 /system 路径转换成 /apex 路径 (不太清楚这样做的意义是什么) | |
std::string name_to_apex; | |
if (translateSystemPathToApexPath(name, &name_to_apex)) { | |
const char* new_name = name_to_apex.c_str(); | |
LD_LOG(kLogDlopen, "dlopen considering translation from %s to APEX path %s", | |
name, | |
new_name); | |
// Some APEXs could be optionally disabled. Only translate the path | |
// when the old file is absent and the new file exists. | |
// TODO(b/124218500): Re-enable it once app compat issue is resolved | |
/* | |
if (file_exists(name)) { | |
LD_LOG(kLogDlopen, "dlopen %s exists, not translating", name); | |
} else | |
*/ | |
if (!file_exists(new_name)) { | |
LD_LOG(kLogDlopen, "dlopen %s does not exist, not translating", | |
new_name); | |
} else { | |
LD_LOG(kLogDlopen, "dlopen translation accepted: using %s", new_name); | |
name = new_name; | |
} | |
} | |
// End Workaround for dlopen(/system/lib/<soname>) when .so is in /apex. | |
std::string asan_name_holder; | |
const char* translated_name = name; | |
if (g_is_asan && translated_name != nullptr && translated_name[0] == '/') { | |
char original_path[PATH_MAX]; | |
if (realpath(name, original_path) != nullptr) { | |
asan_name_holder = std::string(kAsanLibDirPrefix) + original_path; | |
if (file_exists(asan_name_holder.c_str())) { | |
soinfo* si = nullptr; | |
if (find_loaded_library_by_realpath(ns, original_path, true, &si)) { | |
PRINT("linker_asan dlopen NOT translating \"%s\" -> \"%s\": library already loaded", name, | |
asan_name_holder.c_str()); | |
} else { | |
PRINT("linker_asan dlopen translating \"%s\" -> \"%s\"", name, translated_name); | |
translated_name = asan_name_holder.c_str(); | |
} | |
} | |
} | |
} | |
ProtectedDataGuard guard; | |
// 重头戏,这里调用了 find_library 获取到了这个 so 的 soinfo | |
soinfo* si = find_library(ns, translated_name, flags, extinfo, caller); | |
loading_trace.End(); | |
if (si != nullptr) { | |
void* handle = si->to_handle(); | |
LD_LOG(kLogDlopen, | |
"... dlopen calling constructors: realpath=\"%s\", soname=\"%s\", handle=%p", | |
si->get_realpath(), si->get_soname(), handle); | |
si->call_constructors(); | |
failure_guard.Disable(); | |
LD_LOG(kLogDlopen, | |
"... dlopen successful: realpath=\"%s\", soname=\"%s\", handle=%p", | |
si->get_realpath(), si->get_soname(), handle); | |
return handle; | |
} | |
return nullptr; | |
} |
# soinfo
soinfo
结构体如下所示,可以用在 frida 或者 ida 中
//IMPORTANT | |
//ELF64 启用该宏 | |
#define __LP64__ 1 | |
//ELF32 启用该宏 | |
//#define __work_around_b_24465209__ 1 | |
/* | |
//https://android.googlesource.com/platform/bionic/+/master/linker/Android.bp | |
架构为 32 位 定义__work_around_b_24465209__宏 | |
arch: { | |
arm: {cflags: ["-D__work_around_b_24465209__"],}, | |
x86: {cflags: ["-D__work_around_b_24465209__"],}, | |
} | |
*/ | |
//android-platform\bionic\libc\include\link.h | |
#if defined(__LP64__) | |
#define ElfW(type) Elf64_ ## type | |
#else | |
#define ElfW(type) Elf32_ ## type | |
#endif | |
//android-platform\bionic\linker\linker_common_types.h | |
// Android uses RELA for LP64. | |
#if defined(__LP64__) | |
#define USE_RELA 1 | |
#endif | |
//android-platform\bionic\libc\kernel\uapi\asm-generic\int-ll64.h | |
//__signed__-->signed | |
typedef signed char __s8; | |
typedef unsigned char __u8; | |
typedef signed short __s16; | |
typedef unsigned short __u16; | |
typedef signed int __s32; | |
typedef unsigned int __u32; | |
typedef signed long long __s64; | |
typedef unsigned long long __u64; | |
//A12-src\msm-google\include\uapi\linux\elf.h | |
/* 32-bit ELF base types. */ | |
typedef __u32 Elf32_Addr; | |
typedef __u16 Elf32_Half; | |
typedef __u32 Elf32_Off; | |
typedef __s32 Elf32_Sword; | |
typedef __u32 Elf32_Word; | |
/* 64-bit ELF base types. */ | |
typedef __u64 Elf64_Addr; | |
typedef __u16 Elf64_Half; | |
typedef __s16 Elf64_SHalf; | |
typedef __u64 Elf64_Off; | |
typedef __s32 Elf64_Sword; | |
typedef __u32 Elf64_Word; | |
typedef __u64 Elf64_Xword; | |
typedef __s64 Elf64_Sxword; | |
typedef struct dynamic{ | |
Elf32_Sword d_tag; | |
union{ | |
Elf32_Sword d_val; | |
Elf32_Addr d_ptr; | |
} d_un; | |
} Elf32_Dyn; | |
typedef struct { | |
Elf64_Sxword d_tag; /* entry tag value */ | |
union { | |
Elf64_Xword d_val; | |
Elf64_Addr d_ptr; | |
} d_un; | |
} Elf64_Dyn; | |
typedef struct elf32_rel { | |
Elf32_Addr r_offset; | |
Elf32_Word r_info; | |
} Elf32_Rel; | |
typedef struct elf64_rel { | |
Elf64_Addr r_offset; /* Location at which to apply the action */ | |
Elf64_Xword r_info; /* index and type of relocation */ | |
} Elf64_Rel; | |
typedef struct elf32_rela{ | |
Elf32_Addr r_offset; | |
Elf32_Word r_info; | |
Elf32_Sword r_addend; | |
} Elf32_Rela; | |
typedef struct elf64_rela { | |
Elf64_Addr r_offset; /* Location at which to apply the action */ | |
Elf64_Xword r_info; /* index and type of relocation */ | |
Elf64_Sxword r_addend; /* Constant addend used to compute value */ | |
} Elf64_Rela; | |
typedef struct elf32_sym{ | |
Elf32_Word st_name; | |
Elf32_Addr st_value; | |
Elf32_Word st_size; | |
unsigned char st_info; | |
unsigned char st_other; | |
Elf32_Half st_shndx; | |
} Elf32_Sym; | |
typedef struct elf64_sym { | |
Elf64_Word st_name; /* Symbol name, index in string tbl */ | |
unsigned char st_info; /* Type and binding attributes */ | |
unsigned char st_other; /* No defined meaning, 0 */ | |
Elf64_Half st_shndx; /* Associated section index */ | |
Elf64_Addr st_value; /* Value of the symbol */ | |
Elf64_Xword st_size; /* Associated symbol size */ | |
} Elf64_Sym; | |
#define EI_NIDENT 16 | |
typedef struct elf32_hdr{ | |
unsigned char e_ident[EI_NIDENT]; | |
Elf32_Half e_type; | |
Elf32_Half e_machine; | |
Elf32_Word e_version; | |
Elf32_Addr e_entry; /* Entry point */ | |
Elf32_Off e_phoff; | |
Elf32_Off e_shoff; | |
Elf32_Word e_flags; | |
Elf32_Half e_ehsize; | |
Elf32_Half e_phentsize; | |
Elf32_Half e_phnum; | |
Elf32_Half e_shentsize; | |
Elf32_Half e_shnum; | |
Elf32_Half e_shstrndx; | |
} Elf32_Ehdr; | |
typedef struct elf64_hdr { | |
unsigned char e_ident[EI_NIDENT]; /* ELF "magic number" */ | |
Elf64_Half e_type; | |
Elf64_Half e_machine; | |
Elf64_Word e_version; | |
Elf64_Addr e_entry; /* Entry point virtual address */ | |
Elf64_Off e_phoff; /* Program header table file offset */ | |
Elf64_Off e_shoff; /* Section header table file offset */ | |
Elf64_Word e_flags; | |
Elf64_Half e_ehsize; | |
Elf64_Half e_phentsize; | |
Elf64_Half e_phnum; | |
Elf64_Half e_shentsize; | |
Elf64_Half e_shnum; | |
Elf64_Half e_shstrndx; | |
} Elf64_Ehdr; | |
/* These constants define the permissions on sections in the program | |
header, p_flags. */ | |
#define PF_R 0x4 | |
#define PF_W 0x2 | |
#define PF_X 0x1 | |
typedef struct elf32_phdr{ | |
Elf32_Word p_type; | |
Elf32_Off p_offset; | |
Elf32_Addr p_vaddr; | |
Elf32_Addr p_paddr; | |
Elf32_Word p_filesz; | |
Elf32_Word p_memsz; | |
Elf32_Word p_flags; | |
Elf32_Word p_align; | |
} Elf32_Phdr; | |
typedef struct elf64_phdr { | |
Elf64_Word p_type; | |
Elf64_Word p_flags; | |
Elf64_Off p_offset; /* Segment file offset */ | |
Elf64_Addr p_vaddr; /* Segment virtual address */ | |
Elf64_Addr p_paddr; /* Segment physical address */ | |
Elf64_Xword p_filesz; /* Segment size in file */ | |
Elf64_Xword p_memsz; /* Segment size in memory */ | |
Elf64_Xword p_align; /* Segment alignment, file & memory */ | |
} Elf64_Phdr; | |
typedef struct elf32_shdr { | |
Elf32_Word sh_name; | |
Elf32_Word sh_type; | |
Elf32_Word sh_flags; | |
Elf32_Addr sh_addr; | |
Elf32_Off sh_offset; | |
Elf32_Word sh_size; | |
Elf32_Word sh_link; | |
Elf32_Word sh_info; | |
Elf32_Word sh_addralign; | |
Elf32_Word sh_entsize; | |
} Elf32_Shdr; | |
typedef struct elf64_shdr { | |
Elf64_Word sh_name; /* Section name, index in string tbl */ | |
Elf64_Word sh_type; /* Type of section */ | |
Elf64_Xword sh_flags; /* Miscellaneous section attributes */ | |
Elf64_Addr sh_addr; /* Section virtual addr at execution */ | |
Elf64_Off sh_offset; /* Section file offset */ | |
Elf64_Xword sh_size; /* Size of section in bytes */ | |
Elf64_Word sh_link; /* Index of another section */ | |
Elf64_Word sh_info; /* Additional section information */ | |
Elf64_Xword sh_addralign; /* Section alignment */ | |
Elf64_Xword sh_entsize; /* Entry size if section holds table */ | |
} Elf64_Shdr; | |
typedef unsigned long uintptr_t; | |
struct link_map | |
{ | |
uintptr_t l_addr; | |
char * l_name; | |
uintptr_t l_ld; | |
struct link_map * l_next; | |
struct link_map * l_prev; | |
}; | |
//android-platform\bionic\linker\linker_soinfo.h | |
typedef void (*linker_dtor_function_t)(); | |
typedef void (*linker_ctor_function_t)(int, char**, char**); | |
#if defined(__work_around_b_24465209__) | |
#define SOINFO_NAME_LEN 128 | |
#endif | |
struct soinfo { | |
#if defined(__work_around_b_24465209__) | |
char old_name_[SOINFO_NAME_LEN]; | |
#endif | |
const ElfW(Phdr)* phdr; | |
size_t phnum; | |
#if defined(__work_around_b_24465209__) | |
ElfW(Addr) unused0; // DO NOT USE, maintained for compatibility. | |
#endif | |
ElfW(Addr) base; | |
size_t size; | |
#if defined(__work_around_b_24465209__) | |
uint32_t unused1; // DO NOT USE, maintained for compatibility. | |
#endif | |
ElfW(Dyn)* dynamic; | |
#if defined(__work_around_b_24465209__) | |
uint32_t unused2; // DO NOT USE, maintained for compatibility | |
uint32_t unused3; // DO NOT USE, maintained for compatibility | |
#endif | |
soinfo* next; | |
uint32_t flags_; | |
const char* strtab_; | |
ElfW(Sym)* symtab_; | |
size_t nbucket_; | |
size_t nchain_; | |
uint32_t* bucket_; | |
uint32_t* chain_; | |
#if !defined(__LP64__) | |
ElfW(Addr)** unused4; // DO NOT USE, maintained for compatibility | |
#endif | |
#if defined(USE_RELA) | |
ElfW(Rela)* plt_rela_; | |
size_t plt_rela_count_; | |
ElfW(Rela)* rela_; | |
size_t rela_count_; | |
#else | |
ElfW(Rel)* plt_rel_; | |
size_t plt_rel_count_; | |
ElfW(Rel)* rel_; | |
size_t rel_count_; | |
#endif | |
linker_ctor_function_t* preinit_array_; | |
size_t preinit_array_count_; | |
linker_ctor_function_t* init_array_; | |
size_t init_array_count_; | |
linker_dtor_function_t* fini_array_; | |
size_t fini_array_count_; | |
linker_ctor_function_t init_func_; | |
linker_dtor_function_t fini_func_; | |
#if defined(__arm__) | |
// ARM EABI section used for stack unwinding. | |
uint32_t* ARM_exidx; | |
size_t ARM_exidx_count; | |
#endif | |
size_t ref_count_; | |
link_map link_map_head; | |
bool constructors_called; | |
// When you read a virtual address from the ELF file, add this | |
// value to get the corresponding address in the process' address space. | |
ElfW(Addr) load_bias; | |
#if !defined(__LP64__) | |
bool has_text_relocations; | |
#endif | |
bool has_DT_SYMBOLIC; | |
}; |
# find_library
接下来在 find_library
中调用了 find_libraries
//android-platform\bionic\linker\linker.cpp | |
static soinfo* find_library(android_namespace_t* ns, | |
const char* name, int rtld_flags, | |
const android_dlextinfo* extinfo, | |
soinfo* needed_by) { | |
soinfo* si = nullptr; | |
// 如果 name 是空的,则为 si 赋值为 somain | |
//somain: main process, always the one after libdl_info | |
if (name == nullptr) { | |
si = solist_get_somain();// 这个函数将会返回 somain; | |
} else if (!find_libraries(ns, | |
needed_by, | |
&name, | |
1, | |
&si, | |
nullptr, | |
0, | |
rtld_flags, | |
extinfo, | |
false /* add_as_children */)) { | |
if (si != nullptr) { | |
soinfo_unload(si); | |
} | |
return nullptr; | |
} | |
// 加载 so 成功,so 的引用次数 + 1, 对应了 JavaVMExt::LoadNativeLibrary 中 | |
//so 加载时 这一部分的注释↓ | |
/* | |
Below we dlopen but there is no paired dlclose, this would be necessary if we supported | |
class unloading. Libraries will only be unloaded when the reference count (incremented by | |
dlopen) becomes zero from dlclose. | |
*/ | |
si->increment_ref_count(); | |
return si; | |
} |
# find_libraries
分析了这么久等的就是这个函数!
函数声明:
//android-platform\bionic\linker\linker.cpp | |
/* | |
ns 加载的命名空间 = 调用者的命名空间 | |
start_with 调用者的 soinfo | |
library_names 所有加载库名称 | |
library_names_count 加载库数量 | |
soinfos 保存加载完成的 soinfo | |
ld_preloads 保存预加载库,没有可以为 null | |
ld_preloads_count 预加载库数量 | |
extinfo Android 调用附带 | |
add_as_children 是否作为 start_with 的子库 | |
search_linked_namespaces 查询链接命名空间 | |
namespaces 链接命名空间 | |
*/ | |
bool find_libraries(android_namespace_t* ns, | |
soinfo* start_with, | |
const char* const library_names[], | |
size_t library_names_count, | |
soinfo* soinfos[], | |
std::vector<soinfo*>* ld_preloads, | |
size_t ld_preloads_count, | |
int rtld_flags, | |
const android_dlextinfo* extinfo, | |
bool add_as_children, | |
std::vector<android_namespace_t*>* namespaces) |
这个函数可以分为七个部分进行分析
# 准备阶段
这一部分将待加载的 so 添加到 LoadTaskList
加载任务队列中
// Step 0: prepare. | |
std::unordered_map<const soinfo*, ElfReader> readers_map; | |
LoadTaskList load_tasks; | |
// 可以同时加载多个 so, 但从 find_library 传入的参数看来,library_names_count | |
// 的值为 1, 也就是说仅加载一个目标 path 的 so | |
for (size_t i = 0; i < library_names_count; ++i) { | |
const char* name = library_names[i]; | |
// 将这个 so push 到 load_tasks 的任务中 | |
load_tasks.push_back(LoadTask::create(name, start_with, ns, &readers_map)); | |
} | |
// If soinfos array is null allocate one on stack. | |
// The array is needed in case of failure; for example | |
// when library_names[] = {libone.so, libtwo.so} and libone.so | |
// is loaded correctly but libtwo.so failed for some reason. | |
// In this case libone.so should be unloaded on return. | |
// See also implementation of failure_guard below. | |
// 为 soinfos 分配空间 | |
if (soinfos == nullptr) { | |
size_t soinfos_size = sizeof(soinfo*)*library_names_count; | |
soinfos = reinterpret_cast<soinfo**>(alloca(soinfos_size)); | |
memset(soinfos, 0, soinfos_size); | |
} | |
// list of libraries to link - see step 2. | |
size_t soinfos_count = 0; | |
auto scope_guard = android::base::make_scope_guard([&]() { | |
for (LoadTask* t : load_tasks) { | |
LoadTask::deleter(t); | |
} | |
}); | |
ZipArchiveCache zip_archive_cache; | |
soinfo_list_t new_global_group_members; |
# 寻找依赖库,添加到待加载队列
将待加载的 so 的依赖库添加到 load_tasks
队列中,此时并不会加载依赖库
// Step 1: expand the list of load_tasks to include | |
// all DT_NEEDED libraries (do not load them just yet) | |
for (size_t i = 0; i<load_tasks.size(); ++i) { | |
LoadTask* task = load_tasks[i]; | |
soinfo* needed_by = task->get_needed_by(); | |
bool is_dt_needed = needed_by != nullptr && (needed_by != start_with || add_as_children); | |
task->set_extinfo(is_dt_needed ? nullptr : extinfo); | |
task->set_dt_needed(is_dt_needed); | |
LD_LOG(kLogDlopen, "find_libraries(ns=%s): task=%s, is_dt_needed=%d", ns->get_name(), | |
task->get_name(), is_dt_needed); | |
// Note: start from the namespace that is stored in the LoadTask. This namespace | |
// is different from the current namespace when the LoadTask is for a transitive | |
// dependency and the lib that created the LoadTask is not found in the | |
// current namespace but in one of the linked namespace. | |
if (!find_library_internal(const_cast<android_namespace_t*>(task->get_start_from()), | |
task, | |
&zip_archive_cache, | |
&load_tasks, | |
rtld_flags)) { | |
return false; | |
} | |
soinfo* si = task->get_soinfo(); | |
if (is_dt_needed) { | |
needed_by->add_child(si); | |
} | |
// When ld_preloads is not null, the first | |
// ld_preloads_count libs are in fact ld_preloads. | |
bool is_ld_preload = false; | |
if (ld_preloads != nullptr && soinfos_count < ld_preloads_count) { | |
ld_preloads->push_back(si); | |
is_ld_preload = true; | |
} | |
if (soinfos_count < library_names_count) { | |
soinfos[soinfos_count++] = si; | |
} | |
// Add the new global group members to all initial namespaces. Do this secondary namespace setup | |
// at the same time that libraries are added to their primary namespace so that the order of | |
// global group members is the same in the every namespace. Only add a library to a namespace | |
// once, even if it appears multiple times in the dependency graph. | |
if (is_ld_preload || (si->get_dt_flags_1() & DF_1_GLOBAL) != 0) { | |
if (!si->is_linked() && namespaces != nullptr && !new_global_group_members.contains(si)) { | |
new_global_group_members.push_back(si); | |
for (auto linked_ns : *namespaces) { | |
if (si->get_primary_namespace() != linked_ns) { | |
linked_ns->add_soinfo(si); | |
si->add_secondary_namespace(linked_ns); | |
} | |
} | |
} | |
} | |
} |
这一步要做的是调用 find_library_internal
获取到这个 so 的依赖库,那么什么是 so 的依赖库呢?我们拿 ida 随便反编译一个 so, Needed Library
开头的就是这一个 so 的所有依赖库
# find_library_internal
这里做了四次对于待加载 so 的依赖库的寻找
- 调用
find_loaded_library_by_soname
这个库有没有被加载过了?加载过那么我找都不用找了直接返回寻找结果 - 正常使用
load_library
找依赖库 - 正常寻找找不到,那这个库是不是已经预置在系统库里面了?试试到全局命名空间
g_default_namespace
里面找找 - 前三种方式都失败了?启动终极解决方案,到共享命名空间
linked namespace
找这个 so 的依赖库,还没找到那就是真的找不到了
//android-platform\bionic\linker\linker.cpp | |
static bool find_library_internal(android_namespace_t* ns, | |
LoadTask* task, | |
ZipArchiveCache* zip_archive_cache, | |
LoadTaskList* load_tasks, | |
int rtld_flags) { | |
soinfo* candidate; | |
// 如果这个 so 已经被加载过了,就直接给 task 设置完 soinfo 后返回 | |
if (find_loaded_library_by_soname(ns, task->get_name(), true /* search_linked_namespaces */, | |
&candidate)) { | |
LD_LOG(kLogDlopen, | |
"find_library_internal(ns=%s, task=%s): Already loaded (by soname): %s", | |
ns->get_name(), task->get_name(), candidate->get_realpath()); | |
task->set_soinfo(candidate); | |
return true; | |
} | |
// Library might still be loaded, the accurate detection | |
// of this fact is done by load_library. | |
TRACE("[ \"%s\" find_loaded_library_by_soname failed (*candidate=%s@%p). Trying harder... ]", | |
task->get_name(), candidate == nullptr ? "n/a" : candidate->get_realpath(), candidate); | |
// 关键函数,用来寻找依赖库 | |
if (load_library(ns, task, zip_archive_cache, load_tasks, rtld_flags, | |
true /* search_linked_namespaces */)) { | |
return true; | |
} | |
// TODO(dimitry): workaround for http://b/26394120 (the exempt-list) | |
//exempt lib, 即已经预置在系统库中的 so, 例如 libcrypto.so,libssl.so | |
// 等等比较著名的库,假如发现是这些库的话,用默认命名空间获取 soinfo | |
if (ns->is_exempt_list_enabled() && is_exempt_lib(ns, task->get_name(), task->get_needed_by())) { | |
// For the libs in the exempt-list, switch to the default namespace and then | |
// try the load again from there. The library could be loaded from the | |
// default namespace or from another namespace (e.g. runtime) that is linked | |
// from the default namespace. | |
LD_LOG(kLogDlopen, | |
"find_library_internal(ns=%s, task=%s): Exempt system library - trying namespace %s", | |
ns->get_name(), task->get_name(), g_default_namespace.get_name()); | |
ns = &g_default_namespace; | |
if (load_library(ns, task, zip_archive_cache, load_tasks, rtld_flags, | |
true /* search_linked_namespaces */)) { | |
return true; | |
} | |
} | |
// END OF WORKAROUND | |
// if a library was not found - look into linked namespaces | |
// preserve current dlerror in the case it fails. | |
// 假如找遍了自己的命名空间还是没找到这个 so 的依赖库的话,就去共享命名空间 (linked namespace) | |
// 里面去找找看 | |
DlErrorRestorer dlerror_restorer; | |
LD_LOG(kLogDlopen, "find_library_internal(ns=%s, task=%s): Trying %zu linked namespaces", | |
ns->get_name(), task->get_name(), ns->linked_namespaces().size()); | |
for (auto& linked_namespace : ns->linked_namespaces()) { | |
if (find_library_in_linked_namespace(linked_namespace, task)) { | |
// Library is already loaded. | |
if (task->get_soinfo() != nullptr) { | |
// n.b. This code path runs when find_library_in_linked_namespace found an already-loaded | |
// library by soname. That should only be possible with a exempt-list lookup, where we | |
// switch the namespace, because otherwise, find_library_in_linked_namespace is duplicating | |
// the soname scan done in this function's first call to find_loaded_library_by_soname. | |
return true; | |
} | |
if (load_library(linked_namespace.linked_namespace(), task, zip_archive_cache, load_tasks, | |
rtld_flags, false /* search_linked_namespaces */)) { | |
LD_LOG(kLogDlopen, "find_library_internal(ns=%s, task=%s): Found in linked namespace %s", | |
ns->get_name(), task->get_name(), linked_namespace.linked_namespace()->get_name()); | |
return true; | |
} | |
} | |
} | |
return false; | |
} |
# load_library
函数声明:
//android-platform\bionic\linker\linker.cpp | |
static bool load_library(android_namespace_t* ns, | |
LoadTask* task, | |
ZipArchiveCache* zip_archive_cache, | |
LoadTaskList* load_tasks, | |
int rtld_flags, | |
bool search_linked_namespaces); |
在这个函数中,首先判断 extinfo->flags
是否是 ANDROID_DLEXT_USE_LIBRARY_FD
, 如果同时有 ANDROID_DLEXT_USE_LIBRARY_FD_OFFSET
, 标志在 Android 官网的解释如下
那这样就方便了,假如已经有了这个 library 的 fd
文件描述符,那直接拿过来用就可以了
但是我们待加载的 so 的 extinfo->flags
已经在 android::OpenNativeLibrary
中被定义为 ANDROID_DLEXT_USE_NAMESPACE
了,这个标志的含义在上图中也有给出,所以很遗憾,这个 if
语句中的代码并不会被执行
那么为什么要特意在此处加入这个 if 语句呢?
我的理解是为了提高运行的效率,有一些底层的库已经打开过,加载过了,那么就完全没有必要再打开,搜索一次,直接把 library 的 fd
文件描述符拿过来用就可以了
const char* name = task->get_name(); | |
soinfo* needed_by = task->get_needed_by(); | |
const android_dlextinfo* extinfo = task->get_extinfo(); | |
// 如果 extinfo->flags 的标记是 ANDROID_DLEXT_USE_LIBRARY_FD, 则直接通过 | |
//fd 文件描述符来打开 | |
if (extinfo != nullptr && (extinfo->flags & ANDROID_DLEXT_USE_LIBRARY_FD) != 0) { | |
off64_t file_offset = 0; | |
if ((extinfo->flags & ANDROID_DLEXT_USE_LIBRARY_FD_OFFSET) != 0) { | |
file_offset = extinfo->library_fd_offset; | |
} | |
std::string realpath; | |
if (!realpath_fd(extinfo->library_fd, &realpath)) { | |
if (!is_first_stage_init()) { | |
PRINT( | |
"warning: unable to get realpath for the library \"%s\" by extinfo->library_fd. " | |
"Will use given name.", | |
name); | |
} | |
realpath = name; | |
} | |
task->set_fd(extinfo->library_fd, false); | |
task->set_file_offset(file_offset); | |
return load_library(ns, task, load_tasks, rtld_flags, realpath, search_linked_namespaces); | |
} |
之后,我们千辛万苦终于看到了这对于待加载的 so 的第一个操作,调用 open_library
打开它
// Open the file. | |
off64_t file_offset; | |
std::string realpath; | |
int fd = open_library(ns, zip_archive_cache, name, needed_by, &file_offset, &realpath); | |
// 如果打开 so 失败,寻找失败原因 | |
if (fd == -1) { | |
if (task->is_dt_needed()) { | |
if (needed_by->is_main_executable()) { | |
DL_OPEN_ERR("library \"%s\" not found: needed by main executable", name); | |
} else { | |
DL_OPEN_ERR("library \"%s\" not found: needed by %s in namespace %s", name, | |
needed_by->get_realpath(), task->get_start_from()->get_name()); | |
} | |
} else { | |
DL_OPEN_ERR("library \"%s\" not found", name); | |
} | |
return false; | |
} | |
//set fd and file_offset | |
task->set_fd(fd, true); | |
task->set_file_offset(file_offset); |
我们知道 System.load
需要指定待加载的 so 的绝对路径,这在 open_library
中便符合第一个 if 语句,所以接下来将会调用 open_library_at_path
static int open_library(android_namespace_t* ns, | |
ZipArchiveCache* zip_archive_cache, | |
const char* name, soinfo *needed_by, | |
off64_t* file_offset, std::string* realpath) { | |
TRACE("[ opening %s from namespace %s ]", name, ns->get_name()); | |
// If the name contains a slash, we should attempt to open it directly and not search the paths. | |
// 有斜杠,说明是绝对路径打开的 | |
if (strchr(name, '/') != nullptr) { | |
return open_library_at_path(zip_archive_cache, name, file_offset, realpath); | |
} | |
// LD_LIBRARY_PATH has the highest priority. We don't have to check accessibility when searching | |
// the namespace's path lists, because anything found on a namespace path list should always be | |
// accessible. | |
int fd = open_library_on_paths(zip_archive_cache, name, file_offset, ns->get_ld_library_paths(), realpath); | |
// Try the DT_RUNPATH, and verify that the library is accessible. | |
if (fd == -1 && needed_by != nullptr) { | |
fd = open_library_on_paths(zip_archive_cache, name, file_offset, needed_by->get_dt_runpath(), realpath); | |
if (fd != -1 && !ns->is_accessible(*realpath)) { | |
close(fd); | |
fd = -1; | |
} | |
} | |
// Finally search the namespace's main search path list. | |
if (fd == -1) { | |
fd = open_library_on_paths(zip_archive_cache, name, file_offset, ns->get_default_library_paths(), realpath); | |
} | |
return fd; | |
} |
在 open_library_at_path
中才算是真正的使用 open
函数打开了这个 so, 并返回文件描述符 fd
, 传入的两个标志的含义分别为
O_RDONLY
表示以只读方式打开文件。O_CLOEXEC
表示在 exec 族函数 (execl, execlp, execle, execv, execvp, execvpe) 调用后,将自动关闭文件描述符
static int open_library_at_path(ZipArchiveCache* zip_archive_cache, | |
const char* path, off64_t* file_offset, | |
std::string* realpath) { | |
int fd = -1; | |
// 如果路径中包含 "!/", 则通过 zipfile 打开库 | |
if (strstr(path, kZipFileSeparator) != nullptr) { | |
fd = open_library_in_zipfile(zip_archive_cache, path, file_offset, realpath); | |
} | |
if (fd == -1) { | |
fd = TEMP_FAILURE_RETRY(open(path, O_RDONLY | O_CLOEXEC)); | |
if (fd != -1) { | |
*file_offset = 0; | |
if (!realpath_fd(fd, realpath)) { | |
if (!is_first_stage_init()) { | |
PRINT("warning: unable to get realpath for the library \"%s\". Will use given path.", | |
path); | |
} | |
*realpath = path; | |
} | |
} | |
} | |
return fd; | |
} |
成功的打开了这个 so 之后,那就要开始解析这个 so 咯,看看 load_library
最终 return
的啥?竟然还是 load_library
! 不过细看第三个参数,怎么感觉类型不是 ZipArchiveCache*
呢
return load_library(ns, task, load_tasks, rtld_flags, realpath, search_linked_namespaces); |
找找函数的声明,原来 load_library
还有一个重载函数,它的第三个参数的类型就是 LoadTaskList*
此 load_library
函数的完整代码如下
//android-platform\bionic\linker\linker.cpp | |
static bool load_library(android_namespace_t* ns, | |
LoadTask* task, | |
ZipArchiveCache* zip_archive_cache, | |
LoadTaskList* load_tasks, | |
int rtld_flags, | |
bool search_linked_namespaces) { | |
const char* name = task->get_name(); | |
soinfo* needed_by = task->get_needed_by(); | |
const android_dlextinfo* extinfo = task->get_extinfo(); | |
// 如果 extinfo->flags 的标记是 ANDROID_DLEXT_USE_LIBRARY_FD, 则直接通过 | |
//fd 文件描述符来打开 | |
if (extinfo != nullptr && (extinfo->flags & ANDROID_DLEXT_USE_LIBRARY_FD) != 0) { | |
off64_t file_offset = 0; | |
if ((extinfo->flags & ANDROID_DLEXT_USE_LIBRARY_FD_OFFSET) != 0) { | |
file_offset = extinfo->library_fd_offset; | |
} | |
std::string realpath; | |
if (!realpath_fd(extinfo->library_fd, &realpath)) { | |
if (!is_first_stage_init()) { | |
PRINT( | |
"warning: unable to get realpath for the library \"%s\" by extinfo->library_fd. " | |
"Will use given name.", | |
name); | |
} | |
realpath = name; | |
} | |
task->set_fd(extinfo->library_fd, false); | |
task->set_file_offset(file_offset); | |
return load_library(ns, task, load_tasks, rtld_flags, realpath, search_linked_namespaces); | |
} | |
LD_LOG(kLogDlopen, | |
"load_library(ns=%s, task=%s, flags=0x%x, search_linked_namespaces=%d): calling " | |
"open_library", | |
ns->get_name(), name, rtld_flags, search_linked_namespaces); | |
// Open the file. | |
off64_t file_offset; | |
std::string realpath; | |
int fd = open_library(ns, zip_archive_cache, name, needed_by, &file_offset, &realpath); | |
// 如果打开 so 失败,寻找失败原因 | |
if (fd == -1) { | |
if (task->is_dt_needed()) { | |
if (needed_by->is_main_executable()) { | |
DL_OPEN_ERR("library \"%s\" not found: needed by main executable", name); | |
} else { | |
DL_OPEN_ERR("library \"%s\" not found: needed by %s in namespace %s", name, | |
needed_by->get_realpath(), task->get_start_from()->get_name()); | |
} | |
} else { | |
DL_OPEN_ERR("library \"%s\" not found", name); | |
} | |
return false; | |
} | |
//set fd and file_offset | |
task->set_fd(fd, true); | |
task->set_file_offset(file_offset); | |
return load_library(ns, task, load_tasks, rtld_flags, realpath, search_linked_namespaces); | |
} |
# 重载的 load_library
函数声明:
//android-platform\bionic\linker\linker.cpp | |
static bool load_library(android_namespace_t* ns, | |
LoadTask* task, | |
LoadTaskList* load_tasks, | |
int rtld_flags, | |
const std::string& realpath, | |
bool search_linked_namespaces); |
这个函数的开头做了一大堆参数合法性的检查,随后终于开始解析 so 了,其关键函数为 task->read(realpath.c_str(), file_stat.st_size)
soinfo* si = soinfo_alloc(ns, realpath.c_str(), &file_stat, file_offset, rtld_flags); | |
task->set_soinfo(si); | |
// Read the ELF header and some of the segments. | |
if (!task->read(realpath.c_str(), file_stat.st_size)) { | |
task->remove_cached_elf_reader(); | |
task->set_soinfo(nullptr); | |
soinfo_free(si); | |
return false; | |
} | |
// Find and set DT_RUNPATH, DT_SONAME, and DT_FLAGS_1. | |
// Note that these field values are temporary and are | |
// going to be overwritten on soinfo::prelink_image | |
// with values from PT_LOAD segments. | |
const ElfReader& elf_reader = task->get_elf_reader(); | |
for (const ElfW(Dyn)* d = elf_reader.dynamic(); d->d_tag != DT_NULL; ++d) { | |
if (d->d_tag == DT_RUNPATH) { | |
si->set_dt_runpath(elf_reader.get_string(d->d_un.d_val)); | |
} | |
if (d->d_tag == DT_SONAME) { | |
si->set_soname(elf_reader.get_string(d->d_un.d_val)); | |
} | |
// We need to identify a DF_1_GLOBAL library early so we can link it to namespaces. | |
if (d->d_tag == DT_FLAGS_1) { | |
si->set_dt_flags_1(d->d_un.d_val); | |
} | |
} | |
for_each_dt_needed(task->get_elf_reader(), [&](const char* name) { | |
LD_LOG(kLogDlopen, "load_library(ns=%s, task=%s): Adding DT_NEEDED task: %s", | |
ns->get_name(), task->get_name(), name); | |
load_tasks->push_back(LoadTask::create(name, si, ns, task->get_readers_map())); | |
}); |
LoadTask::read
初始化了一个 ElfReader
, 随后调用 elf_reader.Read
正式开始读取
//android-platform\bionic\linker\linker.cpp | |
bool read(const char* realpath, off64_t file_size) { | |
ElfReader& elf_reader = get_elf_reader(); | |
return elf_reader.Read(realpath, fd_, file_offset_, file_size); | |
} |
ElfReader::Read
兜兜转转了那么久,终于看到了这个亲切又熟悉的函数了!!
关于这个函数的更多分析请参考 ELF 结构分析及 ElfReader
//android-platform\bionic\linker\linker_phdr.cpp | |
bool ElfReader::Read(const char* name, int fd, off64_t file_offset, off64_t file_size) { | |
if (did_read_) { | |
return true; | |
} | |
name_ = name; | |
fd_ = fd; | |
file_offset_ = file_offset; | |
file_size_ = file_size; | |
if (ReadElfHeader() && | |
VerifyElfHeader() && | |
ReadProgramHeaders() && | |
ReadSectionHeaders() && | |
ReadDynamicSection()) { | |
did_read_ = true; | |
} | |
return did_read_; | |
} |
在获取到待加载的 so 的各个段的结构之后,接下来就是解析 .dynamic
中保存的符号
//android-platform\bionic\linker\linker.cpp | |
// Find and set DT_RUNPATH, DT_SONAME, and DT_FLAGS_1. | |
// Note that these field values are temporary and are | |
// going to be overwritten on soinfo::prelink_image | |
// with values from PT_LOAD segments. | |
const ElfReader& elf_reader = task->get_elf_reader(); | |
for (const ElfW(Dyn)* d = elf_reader.dynamic(); d->d_tag != DT_NULL; ++d) { | |
if (d->d_tag == DT_RUNPATH) { | |
si->set_dt_runpath(elf_reader.get_string(d->d_un.d_val)); | |
} | |
if (d->d_tag == DT_SONAME) { | |
si->set_soname(elf_reader.get_string(d->d_un.d_val)); | |
} | |
// We need to identify a DF_1_GLOBAL library early so we can link it to namespaces. | |
if (d->d_tag == DT_FLAGS_1) { | |
si->set_dt_flags_1(d->d_un.d_val); | |
} | |
} |
之后找到待加载的 so 的依赖库,这里有一个模板函数 for_each_dt_needed
, 找到 .dynamic
中所有带有 DT_NEEDED
标志的字符串,这些字符串的名称就是这个 so 所需要的依赖库,然后将它们添加到 load_tasks
队列中
//android-platform\bionic\linker\linker.cpp | |
for_each_dt_needed(task->get_elf_reader(), [&](const char* name) { | |
LD_LOG(kLogDlopen, "load_library(ns=%s, task=%s): Adding DT_NEEDED task: %s", | |
ns->get_name(), task->get_name(), name); | |
load_tasks->push_back(LoadTask::create(name, si, ns, task->get_readers_map())); | |
}); | |
//android-platform\bionic\linker\linker_soinfo.h | |
template<typename F> | |
void for_each_dt_needed(const soinfo* si, F action) { | |
for (const ElfW(Dyn)* d = si->dynamic; d->d_tag != DT_NULL; ++d) { | |
if (d->d_tag == DT_NEEDED) { | |
action(fix_dt_needed(si->get_string(d->d_un.d_val), si->get_realpath())); | |
} | |
} | |
} |
# 乱序加载库
乱序加载的原因如下,看上去是为了抵御攻击
ld.so(1) on ELF platforms now loads libraries in a random order for greater resistance to attacks
// Step 2: Load libraries in random order (see b/24047022) | |
LoadTaskList load_list; | |
for (auto&& task : load_tasks) { | |
soinfo* si = task->get_soinfo(); | |
auto pred = [&](const LoadTask* t) { | |
return t->get_soinfo() == si; | |
}; | |
if (!si->is_linked() && | |
std::find_if(load_list.begin(), load_list.end(), pred) == load_list.end() ) { | |
load_list.push_back(task); | |
} | |
} | |
bool reserved_address_recursive = false; | |
if (extinfo) { | |
reserved_address_recursive = extinfo->flags & ANDROID_DLEXT_RESERVED_ADDRESS_RECURSIVE; | |
} | |
if (!reserved_address_recursive) { | |
// Shuffle the load order in the normal case, but not if we are loading all | |
// the libraries to a reserved address range. | |
shuffle(&load_list); | |
} | |
// Set up address space parameters. | |
address_space_params extinfo_params, default_params; | |
size_t relro_fd_offset = 0; | |
if (extinfo) { | |
if (extinfo->flags & ANDROID_DLEXT_RESERVED_ADDRESS) { | |
extinfo_params.start_addr = extinfo->reserved_addr; | |
extinfo_params.reserved_size = extinfo->reserved_size; | |
extinfo_params.must_use_address = true; | |
} else if (extinfo->flags & ANDROID_DLEXT_RESERVED_ADDRESS_HINT) { | |
extinfo_params.start_addr = extinfo->reserved_addr; | |
extinfo_params.reserved_size = extinfo->reserved_size; | |
} | |
} | |
for (auto&& task : load_list) { | |
address_space_params* address_space = | |
(reserved_address_recursive || !task->is_dt_needed()) ? &extinfo_params : &default_params; | |
// 加载所有的库,包括待加载的 so | |
if (!task->load(address_space)) { | |
return false; | |
} | |
} |
load
函数就是将 ELF 的相关结构的值赋值给 si_
, 其中我们可以看到比较重要的字段有 phdr_count
, loaded_phdr
bool load(address_space_params* address_space) { | |
ElfReader& elf_reader = get_elf_reader(); | |
if (!elf_reader.Load(address_space)) { | |
return false; | |
} | |
si_->base = elf_reader.load_start(); | |
si_->size = elf_reader.load_size(); | |
si_->set_mapped_by_caller(elf_reader.is_mapped_by_caller()); | |
si_->load_bias = elf_reader.load_bias(); | |
si_->phnum = elf_reader.phdr_count(); | |
si_->phdr = elf_reader.loaded_phdr(); | |
si_->set_gap_start(elf_reader.gap_start()); | |
si_->set_gap_size(elf_reader.gap_size()); | |
return true; | |
} |
# 预链接解析所有依赖库
// Step 3: pre-link all DT_NEEDED libraries in breadth first order. | |
for (auto&& task : load_tasks) { | |
soinfo* si = task->get_soinfo(); | |
if (!si->is_linked() && !si->prelink_image()) { | |
return false; | |
} | |
register_soinfo_tls(si); | |
} |
这里我们看到调用了 prelink_image
来预链接依赖库,主要是遍历 .dynamic
节,来提取必要的信息例如 strtab_
, symtab_
, plt_rela_
, init_array_
等等各种必要的信息
bool soinfo::prelink_image() { | |
if (flags_ & FLAG_PRELINKED) return true; | |
/* Extract dynamic section */ | |
ElfW(Word) dynamic_flags = 0; | |
// 提取动态节(dynamic section) | |
phdr_table_get_dynamic_section(phdr, phnum, load_bias, &dynamic, &dynamic_flags); | |
/* We can't log anything until the linker is relocated */ | |
bool relocating_linker = (flags_ & FLAG_LINKER) != 0; | |
if (!relocating_linker) { | |
INFO("[ Linking \"%s\" ]", get_realpath()); | |
DEBUG("si->base = %p si->flags = 0x%08x", reinterpret_cast<void*>(base), flags_); | |
} | |
if (dynamic == nullptr) { | |
if (!relocating_linker) { | |
DL_ERR("missing PT_DYNAMIC in \"%s\"", get_realpath()); | |
} | |
return false; | |
} else { | |
if (!relocating_linker) { | |
DEBUG("dynamic = %p", dynamic); | |
} | |
} | |
#if defined(__arm__) | |
(void) phdr_table_get_arm_exidx(phdr, phnum, load_bias, | |
&ARM_exidx, &ARM_exidx_count); | |
#endif | |
TlsSegment tls_segment; | |
if (__bionic_get_tls_segment(phdr, phnum, load_bias, &tls_segment)) { | |
if (!__bionic_check_tls_alignment(&tls_segment.alignment)) { | |
if (!relocating_linker) { | |
DL_ERR("TLS segment alignment in \"%s\" is not a power of 2: %zu", | |
get_realpath(), tls_segment.alignment); | |
} | |
return false; | |
} | |
tls_ = std::make_unique<soinfo_tls>(); | |
tls_->segment = tls_segment; | |
} | |
// Extract useful information from dynamic section. | |
// Note that: "Except for the DT_NULL element at the end of the array, | |
// and the relative order of DT_NEEDED elements, entries may appear in any order." | |
// | |
// source: http://www.sco.com/developers/gabi/1998-04-29/ch5.dynamic.html | |
uint32_t needed_count = 0; | |
// 循环遍历每个动态节,并根据 d_tag 为对应节做相应的处理 | |
for (ElfW(Dyn)* d = dynamic; d->d_tag != DT_NULL; ++d) { | |
DEBUG("d = %p, d[0](tag) = %p d[1](val) = %p", | |
d, reinterpret_cast<void*>(d->d_tag), reinterpret_cast<void*>(d->d_un.d_val)); | |
switch (d->d_tag) { | |
case DT_SONAME: | |
// this is parsed after we have strtab initialized (see below). | |
break; | |
case DT_HASH: | |
nbucket_ = reinterpret_cast<uint32_t*>(load_bias + d->d_un.d_ptr)[0]; | |
nchain_ = reinterpret_cast<uint32_t*>(load_bias + d->d_un.d_ptr)[1]; | |
bucket_ = reinterpret_cast<uint32_t*>(load_bias + d->d_un.d_ptr + 8); | |
chain_ = reinterpret_cast<uint32_t*>(load_bias + d->d_un.d_ptr + 8 + nbucket_ * 4); | |
break; | |
case DT_GNU_HASH: | |
gnu_nbucket_ = reinterpret_cast<uint32_t*>(load_bias + d->d_un.d_ptr)[0]; | |
// skip symndx | |
gnu_maskwords_ = reinterpret_cast<uint32_t*>(load_bias + d->d_un.d_ptr)[2]; | |
gnu_shift2_ = reinterpret_cast<uint32_t*>(load_bias + d->d_un.d_ptr)[3]; | |
gnu_bloom_filter_ = reinterpret_cast<ElfW(Addr)*>(load_bias + d->d_un.d_ptr + 16); | |
gnu_bucket_ = reinterpret_cast<uint32_t*>(gnu_bloom_filter_ + gnu_maskwords_); | |
// amend chain for symndx = header[1] | |
gnu_chain_ = gnu_bucket_ + gnu_nbucket_ - | |
reinterpret_cast<uint32_t*>(load_bias + d->d_un.d_ptr)[1]; | |
if (!powerof2(gnu_maskwords_)) { | |
DL_ERR("invalid maskwords for gnu_hash = 0x%x, in \"%s\" expecting power to two", | |
gnu_maskwords_, get_realpath()); | |
return false; | |
} | |
--gnu_maskwords_; | |
flags_ |= FLAG_GNU_HASH; | |
break; | |
case DT_STRTAB: | |
strtab_ = reinterpret_cast<const char*>(load_bias + d->d_un.d_ptr); | |
break; | |
case DT_STRSZ: | |
strtab_size_ = d->d_un.d_val; | |
break; | |
case DT_SYMTAB: | |
symtab_ = reinterpret_cast<ElfW(Sym)*>(load_bias + d->d_un.d_ptr); | |
break; | |
case DT_SYMENT: | |
if (d->d_un.d_val != sizeof(ElfW(Sym))) { | |
DL_ERR("invalid DT_SYMENT: %zd in \"%s\"", | |
static_cast<size_t>(d->d_un.d_val), get_realpath()); | |
return false; | |
} | |
break; | |
case DT_PLTREL: | |
#if defined(USE_RELA) | |
if (d->d_un.d_val != DT_RELA) { | |
DL_ERR("unsupported DT_PLTREL in \"%s\"; expected DT_RELA", get_realpath()); | |
return false; | |
} | |
#else | |
if (d->d_un.d_val != DT_REL) { | |
DL_ERR("unsupported DT_PLTREL in \"%s\"; expected DT_REL", get_realpath()); | |
return false; | |
} | |
#endif | |
break; | |
case DT_JMPREL: | |
#if defined(USE_RELA) | |
plt_rela_ = reinterpret_cast<ElfW(Rela)*>(load_bias + d->d_un.d_ptr); | |
#else | |
plt_rel_ = reinterpret_cast<ElfW(Rel)*>(load_bias + d->d_un.d_ptr); | |
#endif | |
break; | |
case DT_PLTRELSZ: | |
#if defined(USE_RELA) | |
plt_rela_count_ = d->d_un.d_val / sizeof(ElfW(Rela)); | |
#else | |
plt_rel_count_ = d->d_un.d_val / sizeof(ElfW(Rel)); | |
#endif | |
break; | |
case DT_PLTGOT: | |
// Ignored (because RTLD_LAZY is not supported). | |
break; | |
case DT_DEBUG: | |
// Set the DT_DEBUG entry to the address of _r_debug for GDB | |
// if the dynamic table is writable | |
if ((dynamic_flags & PF_W) != 0) { | |
d->d_un.d_val = reinterpret_cast<uintptr_t>(&_r_debug); | |
} | |
break; | |
#if defined(USE_RELA) | |
case DT_RELA: | |
rela_ = reinterpret_cast<ElfW(Rela)*>(load_bias + d->d_un.d_ptr); | |
break; | |
case DT_RELASZ: | |
rela_count_ = d->d_un.d_val / sizeof(ElfW(Rela)); | |
break; | |
case DT_ANDROID_RELA: | |
android_relocs_ = reinterpret_cast<uint8_t*>(load_bias + d->d_un.d_ptr); | |
break; | |
case DT_ANDROID_RELASZ: | |
android_relocs_size_ = d->d_un.d_val; | |
break; | |
case DT_ANDROID_REL: | |
DL_ERR("unsupported DT_ANDROID_REL in \"%s\"", get_realpath()); | |
return false; | |
case DT_ANDROID_RELSZ: | |
DL_ERR("unsupported DT_ANDROID_RELSZ in \"%s\"", get_realpath()); | |
return false; | |
case DT_RELAENT: | |
if (d->d_un.d_val != sizeof(ElfW(Rela))) { | |
DL_ERR("invalid DT_RELAENT: %zd", static_cast<size_t>(d->d_un.d_val)); | |
return false; | |
} | |
break; | |
// Ignored (see DT_RELCOUNT comments for details). | |
case DT_RELACOUNT: | |
break; | |
case DT_REL: | |
DL_ERR("unsupported DT_REL in \"%s\"", get_realpath()); | |
return false; | |
case DT_RELSZ: | |
DL_ERR("unsupported DT_RELSZ in \"%s\"", get_realpath()); | |
return false; | |
#else | |
case DT_REL: | |
rel_ = reinterpret_cast<ElfW(Rel)*>(load_bias + d->d_un.d_ptr); | |
break; | |
case DT_RELSZ: | |
rel_count_ = d->d_un.d_val / sizeof(ElfW(Rel)); | |
break; | |
case DT_RELENT: | |
if (d->d_un.d_val != sizeof(ElfW(Rel))) { | |
DL_ERR("invalid DT_RELENT: %zd", static_cast<size_t>(d->d_un.d_val)); | |
return false; | |
} | |
break; | |
case DT_ANDROID_REL: | |
android_relocs_ = reinterpret_cast<uint8_t*>(load_bias + d->d_un.d_ptr); | |
break; | |
case DT_ANDROID_RELSZ: | |
android_relocs_size_ = d->d_un.d_val; | |
break; | |
case DT_ANDROID_RELA: | |
DL_ERR("unsupported DT_ANDROID_RELA in \"%s\"", get_realpath()); | |
return false; | |
case DT_ANDROID_RELASZ: | |
DL_ERR("unsupported DT_ANDROID_RELASZ in \"%s\"", get_realpath()); | |
return false; | |
// "Indicates that all RELATIVE relocations have been concatenated together, | |
// and specifies the RELATIVE relocation count." | |
// | |
// TODO: Spec also mentions that this can be used to optimize relocation process; | |
// Not currently used by bionic linker - ignored. | |
case DT_RELCOUNT: | |
break; | |
case DT_RELA: | |
DL_ERR("unsupported DT_RELA in \"%s\"", get_realpath()); | |
return false; | |
case DT_RELASZ: | |
DL_ERR("unsupported DT_RELASZ in \"%s\"", get_realpath()); | |
return false; | |
#endif | |
case DT_RELR: | |
case DT_ANDROID_RELR: | |
relr_ = reinterpret_cast<ElfW(Relr)*>(load_bias + d->d_un.d_ptr); | |
break; | |
case DT_RELRSZ: | |
case DT_ANDROID_RELRSZ: | |
relr_count_ = d->d_un.d_val / sizeof(ElfW(Relr)); | |
break; | |
case DT_RELRENT: | |
case DT_ANDROID_RELRENT: | |
if (d->d_un.d_val != sizeof(ElfW(Relr))) { | |
DL_ERR("invalid DT_RELRENT: %zd", static_cast<size_t>(d->d_un.d_val)); | |
return false; | |
} | |
break; | |
// Ignored (see DT_RELCOUNT comments for details). | |
// There is no DT_RELRCOUNT specifically because it would only be ignored. | |
case DT_ANDROID_RELRCOUNT: | |
break; | |
case DT_INIT: | |
init_func_ = reinterpret_cast<linker_ctor_function_t>(load_bias + d->d_un.d_ptr); | |
DEBUG("%s constructors (DT_INIT) found at %p", get_realpath(), init_func_); | |
break; | |
case DT_FINI: | |
fini_func_ = reinterpret_cast<linker_dtor_function_t>(load_bias + d->d_un.d_ptr); | |
DEBUG("%s destructors (DT_FINI) found at %p", get_realpath(), fini_func_); | |
break; | |
case DT_INIT_ARRAY: | |
init_array_ = reinterpret_cast<linker_ctor_function_t*>(load_bias + d->d_un.d_ptr); | |
DEBUG("%s constructors (DT_INIT_ARRAY) found at %p", get_realpath(), init_array_); | |
break; | |
case DT_INIT_ARRAYSZ: | |
init_array_count_ = static_cast<uint32_t>(d->d_un.d_val) / sizeof(ElfW(Addr)); | |
break; | |
case DT_FINI_ARRAY: | |
fini_array_ = reinterpret_cast<linker_dtor_function_t*>(load_bias + d->d_un.d_ptr); | |
DEBUG("%s destructors (DT_FINI_ARRAY) found at %p", get_realpath(), fini_array_); | |
break; | |
case DT_FINI_ARRAYSZ: | |
fini_array_count_ = static_cast<uint32_t>(d->d_un.d_val) / sizeof(ElfW(Addr)); | |
break; | |
case DT_PREINIT_ARRAY: | |
preinit_array_ = reinterpret_cast<linker_ctor_function_t*>(load_bias + d->d_un.d_ptr); | |
DEBUG("%s constructors (DT_PREINIT_ARRAY) found at %p", get_realpath(), preinit_array_); | |
break; | |
case DT_PREINIT_ARRAYSZ: | |
preinit_array_count_ = static_cast<uint32_t>(d->d_un.d_val) / sizeof(ElfW(Addr)); | |
break; | |
case DT_TEXTREL: | |
#if defined(__LP64__) | |
DL_ERR("\"%s\" has text relocations", get_realpath()); | |
return false; | |
#else | |
has_text_relocations = true; | |
break; | |
#endif | |
case DT_SYMBOLIC: | |
has_DT_SYMBOLIC = true; | |
break; | |
case DT_NEEDED: | |
++needed_count; | |
break; | |
case DT_FLAGS: | |
if (d->d_un.d_val & DF_TEXTREL) { | |
#if defined(__LP64__) | |
DL_ERR("\"%s\" has text relocations", get_realpath()); | |
return false; | |
#else | |
has_text_relocations = true; | |
#endif | |
} | |
if (d->d_un.d_val & DF_SYMBOLIC) { | |
has_DT_SYMBOLIC = true; | |
} | |
break; | |
case DT_FLAGS_1: | |
set_dt_flags_1(d->d_un.d_val); | |
if ((d->d_un.d_val & ~SUPPORTED_DT_FLAGS_1) != 0) { | |
DL_WARN("Warning: \"%s\" has unsupported flags DT_FLAGS_1=%p " | |
"(ignoring unsupported flags)", | |
get_realpath(), reinterpret_cast<void*>(d->d_un.d_val)); | |
} | |
break; | |
// Ignored: "Its use has been superseded by the DF_BIND_NOW flag" | |
case DT_BIND_NOW: | |
break; | |
case DT_VERSYM: | |
versym_ = reinterpret_cast<ElfW(Versym)*>(load_bias + d->d_un.d_ptr); | |
break; | |
case DT_VERDEF: | |
verdef_ptr_ = load_bias + d->d_un.d_ptr; | |
break; | |
case DT_VERDEFNUM: | |
verdef_cnt_ = d->d_un.d_val; | |
break; | |
case DT_VERNEED: | |
verneed_ptr_ = load_bias + d->d_un.d_ptr; | |
break; | |
case DT_VERNEEDNUM: | |
verneed_cnt_ = d->d_un.d_val; | |
break; | |
case DT_RUNPATH: | |
// this is parsed after we have strtab initialized (see below). | |
break; | |
case DT_TLSDESC_GOT: | |
case DT_TLSDESC_PLT: | |
// These DT entries are used for lazy TLSDESC relocations. Bionic | |
// resolves everything eagerly, so these can be ignored. | |
break; | |
#if defined(__aarch64__) | |
case DT_AARCH64_BTI_PLT: | |
case DT_AARCH64_PAC_PLT: | |
case DT_AARCH64_VARIANT_PCS: | |
// Ignored: AArch64 processor-specific dynamic array tags. | |
break; | |
#endif | |
default: | |
if (!relocating_linker) { | |
const char* tag_name; | |
if (d->d_tag == DT_RPATH) { | |
tag_name = "DT_RPATH"; | |
} else if (d->d_tag == DT_ENCODING) { | |
tag_name = "DT_ENCODING"; | |
} else if (d->d_tag >= DT_LOOS && d->d_tag <= DT_HIOS) { | |
tag_name = "unknown OS-specific"; | |
} else if (d->d_tag >= DT_LOPROC && d->d_tag <= DT_HIPROC) { | |
tag_name = "unknown processor-specific"; | |
} else { | |
tag_name = "unknown"; | |
} | |
DL_WARN("Warning: \"%s\" unused DT entry: %s (type %p arg %p) (ignoring)", | |
get_realpath(), | |
tag_name, | |
reinterpret_cast<void*>(d->d_tag), | |
reinterpret_cast<void*>(d->d_un.d_val)); | |
} | |
break; | |
} | |
} | |
DEBUG("si->base = %p, si->strtab = %p, si->symtab = %p", | |
reinterpret_cast<void*>(base), strtab_, symtab_); | |
// Validity checks. | |
if (relocating_linker && needed_count != 0) { | |
DL_ERR("linker cannot have DT_NEEDED dependencies on other libraries"); | |
return false; | |
} | |
if (nbucket_ == 0 && gnu_nbucket_ == 0) { | |
DL_ERR("empty/missing DT_HASH/DT_GNU_HASH in \"%s\" " | |
"(new hash type from the future?)", get_realpath()); | |
return false; | |
} | |
if (strtab_ == nullptr) { | |
DL_ERR("empty/missing DT_STRTAB in \"%s\"", get_realpath()); | |
return false; | |
} | |
if (symtab_ == nullptr) { | |
DL_ERR("empty/missing DT_SYMTAB in \"%s\"", get_realpath()); | |
return false; | |
} | |
// Second pass - parse entries relying on strtab. Skip this while relocating the linker so as to | |
// avoid doing heap allocations until later in the linker's initialization. | |
if (!relocating_linker) { | |
for (ElfW(Dyn)* d = dynamic; d->d_tag != DT_NULL; ++d) { | |
switch (d->d_tag) { | |
case DT_SONAME: | |
set_soname(get_string(d->d_un.d_val)); | |
break; | |
case DT_RUNPATH: | |
set_dt_runpath(get_string(d->d_un.d_val)); | |
break; | |
} | |
} | |
} | |
// Before M release, linker was using basename in place of soname. In the case when DT_SONAME is | |
// absent some apps stop working because they can't find DT_NEEDED library by soname. This | |
// workaround should keep them working. (Applies only for apps targeting sdk version < M.) Make | |
// an exception for the main executable, which does not need to have DT_SONAME. The linker has an | |
// DT_SONAME but the soname_ field is initialized later on. | |
if (soname_.empty() && this != solist_get_somain() && !relocating_linker && | |
get_application_target_sdk_version() < 23) { | |
soname_ = basename(realpath_.c_str()); | |
DL_WARN_documented_change(23, "missing-soname-enforced-for-api-level-23", | |
"\"%s\" has no DT_SONAME (will use %s instead)", get_realpath(), | |
soname_.c_str()); | |
// Don't call add_dlwarning because a missing DT_SONAME isn't important enough to show in the UI | |
} | |
// Validate each library's verdef section once, so we don't have to validate | |
// it each time we look up a symbol with a version. | |
if (!validate_verdef_section(this)) return false; | |
flags_ |= FLAG_PRELINKED; | |
return true; | |
} |
# 构造全局组
这一步为预链接的依赖库设置 DF_1_GLOBAL
全局标志,来标记这个库在全局组中
// Step 4: Construct the global group. DF_1_GLOBAL bit is force set for LD_PRELOADed libs because | |
// they must be added to the global group. Note: The DF_1_GLOBAL bit for a library is normally set | |
// in step 3. | |
if (ld_preloads != nullptr) { | |
for (auto&& si : *ld_preloads) { | |
si->set_dt_flags_1(si->get_dt_flags_1() | DF_1_GLOBAL); | |
} | |
} |
# 收集 local_groups 的根节点
看注释感觉挺抽象的,感觉是为了保证可以链接到别的 namespace 里面的依赖库
// Step 5: Collect roots of local_groups. | |
// Whenever needed_by->si link crosses a namespace boundary it forms its own local_group. | |
// Here we collect new roots to link them separately later on. Note that we need to avoid | |
// collecting duplicates. Also the order is important. They need to be linked in the same | |
// BFS order we link individual libraries. | |
std::vector<soinfo*> local_group_roots; | |
if (start_with != nullptr && add_as_children) { | |
local_group_roots.push_back(start_with); | |
} else { | |
CHECK(soinfos_count == 1); | |
local_group_roots.push_back(soinfos[0]); | |
} | |
for (auto&& task : load_tasks) { | |
soinfo* si = task->get_soinfo(); | |
soinfo* needed_by = task->get_needed_by(); | |
bool is_dt_needed = needed_by != nullptr && (needed_by != start_with || add_as_children); | |
android_namespace_t* needed_by_ns = | |
is_dt_needed ? needed_by->get_primary_namespace() : ns; | |
if (!si->is_linked() && si->get_primary_namespace() != needed_by_ns) { | |
auto it = std::find(local_group_roots.begin(), local_group_roots.end(), si); | |
LD_LOG(kLogDlopen, | |
"Crossing namespace boundary (si=%s@%p, si_ns=%s@%p, needed_by=%s@%p, ns=%s@%p, needed_by_ns=%s@%p) adding to local_group_roots: %s", | |
si->get_realpath(), | |
si, | |
si->get_primary_namespace()->get_name(), | |
si->get_primary_namespace(), | |
needed_by == nullptr ? "(nullptr)" : needed_by->get_realpath(), | |
needed_by, | |
ns->get_name(), | |
ns, | |
needed_by_ns->get_name(), | |
needed_by_ns, | |
it == local_group_roots.end() ? "yes" : "no"); | |
if (it == local_group_roots.end()) { | |
local_group_roots.push_back(si); | |
} | |
} | |
} |
# 链接所有的 local groups
// Step 6: Link all local groups | |
for (auto root : local_group_roots) { | |
soinfo_list_t local_group; | |
android_namespace_t* local_group_ns = root->get_primary_namespace(); | |
walk_dependencies_tree(root, | |
[&] (soinfo* si) { | |
if (local_group_ns->is_accessible(si)) { | |
local_group.push_back(si); | |
return kWalkContinue; | |
} else { | |
return kWalkSkip; | |
} | |
}); | |
// 获取全局组包含的 soinfo,因为预加载库是一起加载的跟 local_group_ns 是同一个命名空间 | |
// 所有这里的全局组已经包含了预加载库 | |
soinfo_list_t global_group = local_group_ns->get_global_group(); | |
// 将本地组和全局组都添加到 lookup_list 中 | |
SymbolLookupList lookup_list(global_group, local_group); | |
soinfo* local_group_root = local_group.front(); | |
bool linked = local_group.visit([&](soinfo* si) { | |
// Even though local group may contain accessible soinfos from other namespaces | |
// we should avoid linking them (because if they are not linked -> they | |
// are in the local_group_roots and will be linked later). | |
if (!si->is_linked() && si->get_primary_namespace() == local_group_ns) { | |
const android_dlextinfo* link_extinfo = nullptr; | |
if (si == soinfos[0] || reserved_address_recursive) { | |
// Only forward extinfo for the first library unless the recursive | |
// flag is set. | |
link_extinfo = extinfo; | |
} | |
if (__libc_shared_globals()->load_hook) { | |
__libc_shared_globals()->load_hook(si->load_bias, si->phdr, si->phnum); | |
} | |
lookup_list.set_dt_symbolic_lib(si->has_DT_SYMBOLIC ? si : nullptr); | |
// 调用 link_image 开始进行依赖库的动态链接,重定位等工作 | |
if (!si->link_image(lookup_list, local_group_root, link_extinfo, &relro_fd_offset) || | |
!get_cfi_shadow()->AfterLoad(si, solist_get_head())) { | |
return false; | |
} | |
} | |
return true; | |
}); | |
if (!linked) { | |
return false; | |
} | |
} |
当把所有的本地组和全局组加入到 lookup_list
中后,就开始调用 si->link_image
来对这些库进行链接的操作
bool soinfo::link_image(const SymbolLookupList& lookup_list, soinfo* local_group_root, | |
const android_dlextinfo* extinfo, size_t* relro_fd_offset) { | |
if (is_image_linked()) { | |
// already linked. | |
return true; | |
} | |
if (g_is_ldd && !is_main_executable()) { | |
async_safe_format_fd(STDOUT_FILENO, "\t%s => %s (%p)\n", get_soname(), | |
get_realpath(), reinterpret_cast<void*>(base)); | |
} | |
local_group_root_ = local_group_root; | |
if (local_group_root_ == nullptr) { | |
local_group_root_ = this; | |
} | |
if ((flags_ & FLAG_LINKER) == 0 && local_group_root_ == this) { | |
target_sdk_version_ = get_application_target_sdk_version(); | |
} | |
... | |
// 进行符号的重定位 | |
if (!relocate(lookup_list)) { | |
return false; | |
} | |
DEBUG("[ finished linking %s ]", get_realpath()); | |
... | |
} |
在 soinfo::link_image
中调用了 relocate
去进行符号的重定位
//android-platform\bionic\linker\linker_relocate.cpp | |
bool soinfo::relocate(const SymbolLookupList& lookup_list) { | |
VersionTracker version_tracker; | |
if (!version_tracker.init(this)) { | |
return false; | |
} | |
Relocator relocator(version_tracker, lookup_list); | |
relocator.si = this; | |
//`.strtab` 节保存的是符号字符串表,表中的内容会被 `.symtab` 的 `ElfN_Sym` 结构中的 `st_name` 引用 | |
relocator.si_strtab = strtab_; | |
relocator.si_strtab_size = has_min_version(1) ? strtab_size_ : SIZE_MAX; | |
//`.symtab` 节是一个 `ElfN_Sym` 的数组,保存了符号信息 | |
relocator.si_symtab = symtab_; | |
relocator.tlsdesc_args = &tlsdesc_args_; | |
relocator.tls_tp_base = __libc_shared_globals()->static_tls_layout.offset_thread_pointer(); | |
////android_relocs_在 prelink_image () 中设置,动态节有 DT_ANDROID_REL 才会设置 | |
if (android_relocs_ != nullptr) { | |
// check signature | |
if (android_relocs_size_ > 3 && | |
android_relocs_[0] == 'A' && | |
android_relocs_[1] == 'P' && | |
android_relocs_[2] == 'S' && | |
android_relocs_[3] == '2') { | |
DEBUG("[ android relocating %s ]", get_realpath()); | |
const uint8_t* packed_relocs = android_relocs_ + 4; | |
const size_t packed_relocs_size = android_relocs_size_ - 4; | |
if (!packed_relocate<RelocMode::Typical>(relocator, sleb128_decoder(packed_relocs, packed_relocs_size))) { | |
return false; | |
} | |
} else { | |
DL_ERR("bad android relocation header."); | |
return false; | |
} | |
} | |
if (relr_ != nullptr) { | |
DEBUG("[ relocating %s relr ]", get_realpath()); | |
if (!relocate_relr()) { | |
return false; | |
} | |
} | |
#if defined(USE_RELA) | |
if (rela_ != nullptr) { | |
DEBUG("[ relocating %s rela ]", get_realpath()); | |
if (!plain_relocate<RelocMode::Typical>(relocator, rela_, rela_count_)) { | |
return false; | |
} | |
} | |
if (plt_rela_ != nullptr) { | |
DEBUG("[ relocating %s plt rela ]", get_realpath()); | |
if (!plain_relocate<RelocMode::JumpTable>(relocator, plt_rela_, plt_rela_count_)) { | |
return false; | |
} | |
} | |
#else | |
if (rel_ != nullptr) { | |
DEBUG("[ relocating %s rel ]", get_realpath()); | |
if (!plain_relocate<RelocMode::Typical>(relocator, rel_, rel_count_)) { | |
return false; | |
} | |
} | |
if (plt_rel_ != nullptr) { | |
DEBUG("[ relocating %s plt rel ]", get_realpath()); | |
if (!plain_relocate<RelocMode::JumpTable>(relocator, plt_rel_, plt_rel_count_)) { | |
return false; | |
} | |
} | |
#endif | |
// Once the tlsdesc_args_ vector's size is finalized, we can write the addresses of its elements | |
// into the TLSDESC relocations. | |
#if defined(__aarch64__) | |
// Bionic currently only implements TLSDESC for arm64. | |
for (const std::pair<TlsDescriptor*, size_t>& pair : relocator.deferred_tlsdesc_relocs) { | |
TlsDescriptor* desc = pair.first; | |
desc->func = tlsdesc_resolver_dynamic; | |
desc->arg = reinterpret_cast<size_t>(&tlsdesc_args_[pair.second]); | |
} | |
#endif | |
return true; | |
} |
随后依次调用了 plain_relocate->plain_relocate_impl->process_relocation
template <RelocMode OptMode, typename ...Args> | |
static bool plain_relocate(Relocator& relocator, Args ...args) { | |
return needs_slow_relocate_loop(relocator) ? | |
plain_relocate_impl<RelocMode::General>(relocator, args...) : | |
plain_relocate_impl<OptMode>(relocator, args...); | |
} | |
template <RelocMode Mode> | |
__attribute__((noinline)) | |
static bool plain_relocate_impl(Relocator& relocator, rel_t* rels, size_t rel_count) { | |
for (size_t i = 0; i < rel_count; ++i) { | |
if (!process_relocation<Mode>(relocator, rels[i])) { | |
return false; | |
} | |
} | |
return true; | |
} | |
template <RelocMode Mode> | |
__attribute__((always_inline)) | |
static inline bool process_relocation(Relocator& relocator, const rel_t& reloc) { | |
return Mode == RelocMode::General ? | |
process_relocation_general(relocator, reloc) : | |
process_relocation_impl<Mode>(relocator, reloc); | |
} |
最终在 process_relocation_impl
实现了符号的重定向并调用 lookup_symbol
来查找符号
template <RelocMode Mode> | |
__attribute__((always_inline)) | |
static bool process_relocation_impl(Relocator& relocator, const rel_t& reloc) { | |
constexpr bool IsGeneral = Mode == RelocMode::General; | |
void* const rel_target = reinterpret_cast<void*>(reloc.r_offset + relocator.si->load_bias); | |
const uint32_t r_type = ELFW(R_TYPE)(reloc.r_info); | |
const uint32_t r_sym = ELFW(R_SYM)(reloc.r_info); | |
soinfo* found_in = nullptr; | |
const ElfW(Sym)* sym = nullptr; | |
const char* sym_name = nullptr; | |
ElfW(Addr) sym_addr = 0; | |
if (r_sym != 0) { | |
// 获取重定向的符号名 | |
sym_name = relocator.get_string(relocator.si_symtab[r_sym].st_name); | |
} | |
... | |
#if defined(USE_RELA) | |
auto get_addend_rel = [&]() -> ElfW(Addr) { return reloc.r_addend; }; | |
auto get_addend_norel = [&]() -> ElfW(Addr) { return reloc.r_addend; }; | |
#else | |
auto get_addend_rel = [&]() -> ElfW(Addr) { return *static_cast<ElfW(Addr)*>(rel_target); }; | |
auto get_addend_norel = [&]() -> ElfW(Addr) { return 0; }; | |
#endif | |
if (IsGeneral && is_tls_reloc(r_type)) { | |
... | |
} else { | |
if (r_sym == 0) { | |
// Do nothing. | |
} else { | |
// 利用 lookup_symbol 来查找符号 | |
if (!lookup_symbol<IsGeneral>(relocator, r_sym, sym_name, &found_in, &sym)) return false; | |
if (sym != nullptr) { | |
const bool should_protect_segments = handle_text_relocs && | |
found_in == relocator.si && | |
ELF_ST_TYPE(sym->st_info) == STT_GNU_IFUNC; | |
if (should_protect_segments && !protect_segments()) return false; | |
sym_addr = found_in->resolve_symbol_address(sym); | |
if (should_protect_segments && !unprotect_segments()) return false; | |
} else if constexpr (IsGeneral) { | |
// A weak reference to an undefined symbol. We typically use a zero symbol address, but | |
// use the relocation base for PC-relative relocations, so that the value written is zero. | |
switch (r_type) { | |
#if defined(__x86_64__) | |
case R_X86_64_PC32: | |
sym_addr = reinterpret_cast<ElfW(Addr)>(rel_target); | |
break; | |
#elif defined(__i386__) | |
case R_386_PC32: | |
sym_addr = reinterpret_cast<ElfW(Addr)>(rel_target); | |
break; | |
#endif | |
} | |
} | |
} | |
} | |
// 大部分符号的类型都是 R_GENERIC_JUMP_SLOT | |
if constexpr (IsGeneral || Mode == RelocMode::JumpTable) { | |
if (r_type == R_GENERIC_JUMP_SLOT) { | |
count_relocation_if<IsGeneral>(kRelocAbsolute); | |
const ElfW(Addr) result = sym_addr + get_addend_norel(); | |
trace_reloc("RELO JMP_SLOT %16p <- %16p %s", | |
rel_target, reinterpret_cast<void*>(result), sym_name); | |
*static_cast<ElfW(Addr)*>(rel_target) = result; | |
return true; | |
} | |
} | |
其他类型或架构的相关重定向计算省略 | |
... | |
} |
在 lookup_symbol
中调用了 soinfo_do_lookup
来查找符号
template <bool DoLogging> | |
__attribute__((always_inline)) | |
static inline bool lookup_symbol(Relocator& relocator, uint32_t r_sym, const char* sym_name, | |
soinfo** found_in, const ElfW(Sym)** sym) { | |
//relocator 是前面传进来包含全局组和本地组的 soinfos,全局组排在最前面 | |
// 如果上一次已经查找过这个符号,那么久没有必要再查找一次 | |
if (r_sym == relocator.cache_sym_val) { | |
*found_in = relocator.cache_si; | |
*sym = relocator.cache_sym; | |
count_relocation_if<DoLogging>(kRelocSymbolCached); | |
} else { | |
const version_info* vi = nullptr; | |
if (!relocator.si->lookup_version_info(relocator.version_tracker, r_sym, sym_name, &vi)) { | |
return false; | |
} | |
soinfo* local_found_in = nullptr; | |
// 最终调用 soinfo_do_lookup 来查找符号 | |
const ElfW(Sym)* local_sym = soinfo_do_lookup(sym_name, vi, &local_found_in, relocator.lookup_list); | |
relocator.cache_sym_val = r_sym; | |
relocator.cache_si = local_found_in; | |
relocator.cache_sym = local_sym; | |
*found_in = local_found_in; | |
*sym = local_sym; | |
} | |
if (*sym == nullptr) { | |
if (ELF_ST_BIND(relocator.si_symtab[r_sym].st_info) != STB_WEAK) { | |
DL_ERR("cannot locate symbol \"%s\" referenced by \"%s\"...", sym_name, relocator.si->get_realpath()); | |
return false; | |
} | |
} | |
count_relocation_if<DoLogging>(kRelocSymbol); | |
return true; | |
} |
在 soinfo_do_lookup
中最终调用模板函数 soinfo_do_lookup_impl
进行符号查找
//android-platform\bionic\linker\linker_soinfo.cpp | |
const ElfW(Sym)* soinfo_do_lookup(const char* name, const version_info* vi, | |
soinfo** si_found_in, const SymbolLookupList& lookup_list) { | |
return lookup_list.needs_slow_path() ? | |
soinfo_do_lookup_impl<true>(name, vi, si_found_in, lookup_list) : | |
soinfo_do_lookup_impl<false>(name, vi, si_found_in, lookup_list); | |
} |
有 hash 表查 hash 表,没 hash 表用符号查找
template <bool IsGeneral> | |
__attribute__((noinline)) static const ElfW(Sym)* | |
soinfo_do_lookup_impl(const char* name, const version_info* vi, | |
soinfo** si_found_in, const SymbolLookupList& lookup_list) { | |
const auto [ hash, name_len ] = calculate_gnu_hash(name); | |
constexpr uint32_t kBloomMaskBits = sizeof(ElfW(Addr)) * 8; | |
SymbolName elf_symbol_name(name); | |
const SymbolLookupLib* end = lookup_list.end(); | |
const SymbolLookupLib* it = lookup_list.begin(); | |
while (true) { | |
const SymbolLookupLib* lib; | |
uint32_t sym_idx; | |
// Iterate over libraries until we find one whose Bloom filter matches the symbol we're | |
// searching for. | |
// 在每一个库中都去寻找有没有指定的符号 | |
while (true) { | |
if (it == end) return nullptr; | |
lib = it++; | |
// 要是没有 hash 表,就通过名称来进行查找 | |
if (IsGeneral && lib->needs_sysv_lookup()) { | |
if (const ElfW(Sym)* sym = lib->si_->find_symbol_by_name(elf_symbol_name, vi)) { | |
*si_found_in = lib->si_; | |
return sym; | |
} | |
continue; | |
} | |
if (IsGeneral) { | |
TRACE_TYPE(LOOKUP, "SEARCH %s in %s@%p (gnu)", | |
name, lib->si_->get_realpath(), reinterpret_cast<void*>(lib->si_->base)); | |
} | |
// 计算符号 hash 桶查询链 | |
const uint32_t word_num = (hash / kBloomMaskBits) & lib->gnu_maskwords_; | |
const ElfW(Addr) bloom_word = lib->gnu_bloom_filter_[word_num]; | |
const uint32_t h1 = hash % kBloomMaskBits; | |
const uint32_t h2 = (hash >> lib->gnu_shift2_) % kBloomMaskBits; | |
if ((1 & (bloom_word >> h1) & (bloom_word >> h2)) == 1) { | |
sym_idx = lib->gnu_bucket_[hash % lib->gnu_nbucket_]; | |
if (sym_idx != 0) { | |
break; | |
} | |
} | |
if (IsGeneral) { | |
TRACE_TYPE(LOOKUP, "NOT FOUND %s in %s@%p", | |
name, lib->si_->get_realpath(), reinterpret_cast<void*>(lib->si_->base)); | |
} | |
} | |
// Search the library's hash table chain. | |
ElfW(Versym) verneed = kVersymNotNeeded; | |
bool calculated_verneed = false; | |
uint32_t chain_value = 0; | |
const ElfW(Sym)* sym = nullptr; | |
//// 根据符号 hash 快速查找 | |
do { | |
sym = lib->symtab_ + sym_idx; | |
chain_value = lib->gnu_chain_[sym_idx]; | |
if ((chain_value >> 1) == (hash >> 1)) { | |
if (vi != nullptr && !calculated_verneed) { | |
calculated_verneed = true; | |
verneed = find_verdef_version_index(lib->si_, vi); | |
} | |
if (check_symbol_version(lib->versym_, sym_idx, verneed) && | |
static_cast<size_t>(sym->st_name) + name_len + 1 <= lib->strtab_size_ && | |
memcmp(lib->strtab_ + sym->st_name, name, name_len + 1) == 0 && | |
is_symbol_global_and_defined(lib->si_, sym)) { | |
*si_found_in = lib->si_; | |
if (IsGeneral) { | |
TRACE_TYPE(LOOKUP, "FOUND %s in %s (%p) %zd", | |
name, lib->si_->get_realpath(), reinterpret_cast<void*>(sym->st_value), | |
static_cast<size_t>(sym->st_size)); | |
} | |
return sym; | |
} | |
} | |
++sym_idx; | |
} while ((chain_value & 1) == 0); | |
if (IsGeneral) { | |
TRACE_TYPE(LOOKUP, "NOT FOUND %s in %s@%p", | |
name, lib->si_->get_realpath(), reinterpret_cast<void*>(lib->si_->base)); | |
} | |
} | |
} |
# 收尾工作
// Step 7: Mark all load_tasks as linked and increment refcounts | |
// for references between load_groups (at this point it does not matter if | |
// referenced load_groups were loaded by previous dlopen or as part of this | |
// one on step 6) | |
if (start_with != nullptr && add_as_children) { | |
start_with->set_linked(); | |
} | |
for (auto&& task : load_tasks) { | |
soinfo* si = task->get_soinfo(); | |
si->set_linked(); | |
} | |
for (auto&& task : load_tasks) { | |
soinfo* si = task->get_soinfo(); | |
soinfo* needed_by = task->get_needed_by(); | |
if (needed_by != nullptr && | |
needed_by != start_with && | |
needed_by->get_local_group_root() != si->get_local_group_root()) { | |
si->increment_ref_count(); | |
} | |
} |
至此为止,一个 so 就被成功的加载进来了~
# 参考资料
- Android Linker 学习笔记
- Android 系统加载 so 的源码分析
- Android 动态修改 Linker 实现 LD_PRELOAD 全局库 PLT Hook