IDApython 在日常逆向的过程中是十分重要的存在，过去零零散散的接触过一些 idapython, 但是我觉得还是有必要去系统的整理一下相关的函数，以便未来倘若忘记了可以快速通过这篇笔记回忆起来

# 官方文档

IDA Help: Alphabetical list of IDC functions
IDAPython documentation

# IDApython 编写环境

PyCharm 2021.2.3 社区版
IDA7.7
python3.8

由于 IDA 自带的 IDE 没有代码补全十分的难用，所以我准备在 PyCharm 中编写 IDApython 代码

在 pycharm 中进入到 文件-->设置-->项目: xxx.py-->Python解释器 ，然后点击此处的 全部显示 如图

然后点击该位置来添加自定义路径

将 IDA安装路径\python\3 添加至解释器路径中如图

点击确定之后，在 Pycharm 中即可实现 IDApython 代码补全

当然啦，我们也可以直接在 windows 中添加 PYTHONPATH 的环境变量，同样可以做到 idapython 的代码补全

# 地址 address

idc.get_screen_ea()#获取当前光标所在地址

	idc.get_inf_attr(INF_MAX_EA)#获取本文件的最大地址

	idc.get_inf_attr(INF_MIN_EA)#获取本文件的最小地址

	idaapi.get_imagebase()#获取文件的基址

	idc.next_head(ea)/idc.prev_head(ea)#获取下一条 / 上一条指令的地址

	# 判断当前地址是否在程序中存在
	if idaapi.BADADDR != ea:
	print("valid address")

# 反汇编 disasm

	idc.GetDisasm(ea)#获取某一个地址的反编译汇编指令

	idc.print_insn_mnem(ea)#返回助记符，get mnemonic

	idc.print_operand(ea,n)#返回第 n + 1 个参数

	idc.get_operand_value(ea,n)#返回第 n + 1 个参数的数值形式

	idc.get_item_size(ea)#获取某行汇编的长度

举个例子

	ea = here()
	print(idc.GetDisasm(ea))# mov eax, [rbp+var_4]
	print(idc.print_insn_mnem(ea))# mov
	print(idc.print_operand(ea, 0))# eax
	print(idc.print_operand(ea, 1))# [rbp+var_4]

# 段 segment

	idc.get_segm_name(ea)#获取地址所在段的段名

	idc.get_segm_start(ea)/idc.get_segm_end(ea)#获取地址所在段起始 / 结束地址

	idautils.Segments()#获取所有的段首地址

	idc.get_first_seg()/idc.get_next_seg() # 获取第一个段 / 下一个段的地址

	idc.get_segm_attr(ea,attr)/idc.set_segm_attr(ea,attr,value) # 获取 / 设置函数的属性

	ida_segment.get_segm_by_name('.text')#通过 name 来获取段对象

举个例子

	#遍历所有的段
	for i in idautils.Segments():
	print(f"%s:\t0x%x\t0x%x" %(
	idc.get_segm_name(i),
	idc.get_segm_start(i),
	idc.get_segm_end(i)))

	# 通过名称来获取段对象
	sg = ida_segment.get_segm_by_name('.text')
	print(sg.start_ea,sg.end_ea,sg.size())

# 函数 function

	idc.get_func_name(ea)#通过地址获取函数名

	idaapi.get_func(ea)#通过地址获取地址所在的函数

	idc.get_next_func(ea)/idc.get_prev_func(ea) # 获取当前函数的前一个 / 后一个函数的地址

	idautils.Functions(start, end)#获取所有函数的首地址，若没有参数，则默认从头到尾

	idc.get_func_attr(func, FUNCATTR_FLAGS)#检索函数的信息

举个例子

	#遍历所有函数
	for func in idautils.Functions():
	print(hex(func), idc.get_func_name(func),sep=':')

	#获取函数的起始 / 结束地址
	func = idaapi.get_func(ea)
	print(func.start_ea, func.end_ea)

	#检索关于函数的信息，来判断该函数是否是库中代码，或者函数是否有返回值等等
	for func in idautils.Functions():#获取所有已知的函数首地址
	flags = idc.get_func_attr(func, FUNCATTR_FLAGS)#获取标志
	if flags & FUNC_NORET:
	print(hex(func), get_func_name(func), "FUNC_NORET")
	if flags & FUNC_FAR:
	print(hex(func), get_func_name(func),"FUNC_FAR")
	if flags & FUNC_LIB:
	print(hex(func), get_func_name(func),"FUNC_LIB")
	if flags & FUNC_STATIC:
	print(hex(func), get_func_name(func),"FUNC_STATIC")
	if flags & FUNC_FRAME:
	print(hex(func), get_func_name(func),"FUNC_FRAME")
	if flags & FUNC_USERFAR:
	print(hex(func), get_func_name(func),"FUNC_USERFAR")
	if flags & FUNC_HIDDEN:
	print(hex(func), get_func_name(func),"FUNC_HIDDEN")
	if flags & FUNC_THUNK:
	print(hex(func), get_func_name(func),"FUNC_THUNK")
	if flags & FUNC_LIB:
	print(hex(func), get_func_name(func),"FUNC_BOTTOMBP")

各个函数标志的含义如下

函数标志	含义
FUNC_NORET	这个标志表示某个函数是否有返回值，它本身的值是 1，下面是一个没有返回值的函数，注意它没有函数的最后并不是 `ret` 或者 `leave` 指令。
FUNC_FAR	这个标志非常少的出现，标志程序是否使用分段内存，它的值为 2。
FUNC_USERFAR	少见
FUNC_LIB	这个表示用于寻找库函数的代码。识别库函数代码是非常有必要的，因为我们在分析的时候一般将其跳过，它的值是 4。
FUNC_STATIC	静态函数
FUNC_FRAME	这个标志表示函数是否使用了 ebp 寄存器 (帧指针)，使用 ebp 寄存器的函数通常有如下的语法设定，目的是为了保存栈帧。
FUNC_BOTTOMBP	和 FUNC_FRAME 一样，该标志用于跟踪帧指针 (ebp)。它作用是识别函数中帧指针是否等于堆栈指针 (esp)。
FUNC_HIDDEN	带有 FUNC_HIDDEN 标志的函数意味着它们是隐藏的，这个函数需要展开才能查看。如果我们跳转到一个标记为 HIDDEN 的地址的话，它会自动的展开。
FUNC_THUNK	表示这个函数是否是一个 thunk 函数，thunk 函数表示的是一个简单的跳转函数。

# 块 block

遍历一个函数中的块

	import idaapi
	import idc
	f_blocks = idaapi.FlowChart(idaapi.get_func(0x401E80), flags=idaapi.FC_PREDS)
	for block in f_blocks:
	print(hex(block.start_ea),hex(block.end_ea),hex(idc.prev_head(block.end_ea)),block.id)# 特别注意，block.end_ea 表示下一个块的起始地址，并不是当前块的结束地址！！，idc.prev_head (block.end_ea) 才是获取这个块的结束地址

获取 block 的前驱块

	import idaapi
	f_blocks = idaapi.FlowChart(idaapi.get_func(0x401E80), flags=idaapi.FC_PREDS)
	block = f_blocks[1]
	# 获取当前块的前驱块
	for pred in block.preds():
	print("前驱块",hex(pred.start_ea),pred.id)

获取 block 的后继块

	import idaapi
	f_blocks = idaapi.FlowChart(idaapi.get_func(0x401E80), flags=idaapi.FC_PREDS)
	block = f_blocks[1]
	# 获取当前块的后继块
	for succ in block.succs():
	print("后继块",hex(succ.start_ea),succ.id)

获取块的开始地址和结束地址

	import idaapi
	f_blocks = idaapi.FlowChart(idaapi.get_func(0x401E80), flags=idaapi.FC_PREDS)
	block = f_blocks[1]
	print (hex (block.start_ea),hex (idc.prev_head (block.end_ea)))# 获取块的开始地址 / 结束地址

# 操作数

可以通过 idc.get_operand_type(curr_addr, n) 获取第 n+1 个操作数的类型，返回值有八种情况，值和含义如下

操作数	值	含义
o_void	0	指令没有任何操作数，如 `retn`
o_reg	1	操作数是寄存器
o_mem	2	操作数是直接寻址的内存，这种类型对寻找 DATA 的引用非常有帮助。如 `cmp ds:dword_A152B8, 0`
o_phrase	3	操作数是利用基址寄存器和变址寄存器的寻址操作的话，如 `mov [edi+ecx], eax`
o_displ	4	操作数是利用寄存器和位移的寻址操作的话，如 `mov eax, [edi+18h]`
o_imm	5	操作数是一个确定的数值，如 `add esp, 0Ch`
o_far	6	这种返回类型在 x86 和 x86_64 的逆向中不常见。它用来判断直接访问远端地址的操作数
o_near	7	这种返回类型在 x86 和 x86_64 的逆向中不常见。它用来判断直接访问近端地址的操作数

	# 判断在 curr_addr 的指令的第一个操作数的类型是否为利用寄存器和位移的寻址操作
	insn = ida_ua.insn_t()
	idaapi.decode_insn(insn, curr_addr)
	if insn.Op1.type == idaapi.o_displ:
	print("第一个操作数的类型是利用寄存器和位移的寻址操作!")

# insn 的各个属性

Python>dir(insn)

['Op1', 'Op2', 'Op3', 'Op4', 'Op5', 'Op6', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__get_auxpref__', '__get_operand__', '__get_ops__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__set_auxpref__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__swig_destroy__', '__weakref__', 'add_cref', 'add_dref', 'add_off_drefs', 'assign', 'auxpref', 'create_op_data', 'create_stkvar', 'cs', 'ea', 'flags', 'get_canon_feature', 'get_canon_mnem', 'get_next_byte', 'get_next_dword', 'get_next_qword', 'get_next_word', 'insnpref', 'ip', 'is_64bit', 'is_canon_insn', 'is_macro', 'itype', 'ops', 'segpref', 'size', 'this', 'thisown']

# 搜索

idc.FindBinary(ea,flag, searchstr, radix=16)#字节或者二进制的搜索

idc.FindBinary(ea,flag, searchstr, radix=16) 中 flag 的各个类型的含义

类型	值	含义
SEARCH_UP	0	向上搜索
SEARCH_DOWN	1	向下搜索
SEARCH_NEXT	2	获取下一个已经找到的对象
SEARCH_CASE	4	指明是否区分大小写
SEARCH_REGEX	8	正则搜索
SEARCH_NOBRK	16
SEARCH_NOSHOW	32	指明是否显示搜索的进度
SEARCH_UNICODE	64	将所有搜索字符串视为 Unicode
SEARCH_IDENT	128
SEARCH_BRK	256

举个例子

	pattern = '55 48 89 E5'
	addr = ida_ida.inf_get_min_ea()
	for x in range(0,5):
	addr = idc.find_binary(addr, SEARCH_DOWN\|SEARCH_NEXT, pattern)
	if addr != idc.BADADDR:
	print(hex(addr), idc.GetDisasm(addr))

# 数据

	ida_bytes.get_bytes(ea,size)
	ida_bytes.get_byte(ea)
	ida_bytes.get_word(ea)
	ida_bytes.get_dword(ea)
	ida_bytes.get_qword(ea)

# 动态调试

	idc.add_bpt(long Address)#在指定的地址设置断点

	idc.get_reg_value(string Register)#获取一个寄存器的名称

	idc.set_reg_value(long Value, string Register)#设置寄存器的值

	idc.run_to(long Address)#运行到指定的地址，然后停下。

	idc.wait_for_next_event(wfne,timeout)#等待下一个事件，此函数会继续执行进程，并等待调试器事件直到超时

在 idc.wait_for_next_event(wfne,timeout) 中参数即返回值的标志如下

wfne 标志

	// wfne flag is combination of the following:
	#define WFNE_ANY 0x0001 // return the first event (even if it doesn't suspend the process)
	// if the process is still running, the database
	// does not reflect the memory state. you might want
	// to call refresh_debugger_memory() in this case
	#define WFNE_SUSP 0x0002 // wait until the process gets suspended
	#define WFNE_SILENT 0x0004 // 1: be slient, 0:display modal boxes if necessary
	#define WFNE_CONT 0x0008 // continue from the suspended state
	#define WFNE_NOWAIT 0x0010 // do not wait for any event, immediately return DEC_TIMEOUT
	// (to be used with WFNE_CONT)
	#define WFNE_USEC 0x0020 // timeout is specified in microseconds
	// (minimum non-zero timeout is 40000us)

timeout ，等待的秒数，-1 为无限大

返回值 debugger event codes

	// debugger event codes
	#define NOTASK -2 // process does not exist
	#define DBG_ERROR -1 // error (e.g. network problems)
	#define DBG_TIMEOUT 0 // timeout
	#define PROCESS_STARTED 0x00000001 // New process started
	#define PROCESS_EXITED 0x00000002 // Process stopped
	#define THREAD_STARTED 0x00000004 // New thread started
	#define THREAD_EXITED 0x00000008 // Thread stopped
	#define BREAKPOINT 0x00000010 // Breakpoint reached
	#define STEP 0x00000020 // One instruction executed
	#define EXCEPTION 0x00000040 // Exception
	#define LIB_LOADED 0x00000080 // New library loaded
	#define LIB_UNLOADED 0x00000100 // Library unloaded
	#define INFORMATION 0x00000200 // User-defined information
	#define PROCESS_ATTACHED 0x00000400 // Attached to running process
	#define PROCESS_DETACHED 0x00000800 // Detached from process
	#define PROCESS_SUSPENDED 0x00001000 // Process has been suspended

# patch

	ida_bytes.patch_bytes(ea, bytes)
	ida_bytes.patch_byte(ea, byte)
	ida_bytes.patch_word(ea, word)
	ida_bytes.patch_dword(ea, dword)
	ida_bytes.patch_qword(ea, qword)

# IDApython 常用脚本

# 打印 IDA 函数列表

	import idautils
	import idc

	func_addr = []
	func_name = []
	for i in idautils.Functions():
	func_addr.append(i)
	func_name.append(idc.get_func_name(i))
	for i in func_addr:
	print(f"{hex(i)}, ",end='')
	print('')
	for i in func_name:
	print(f"\"{i}\", ",end='')

# 批量去除花指令

	import idc
	import ida_bytes
	import ida_segment

	def my_nop(addr, endaddr):
	while addr < endaddr:
	ida_bytes.patch_byte(addr, 0x90)
	addr += 1


	pattern = ["74 15 75 13 8D 44 24 FC 83 F0 22 3B 04 24 74 0A E8 1F 00 00 00 74 04",
	"74 0A 75 08 E8 10 00 00 00 EB 04 E8",
	"48 81 EC 08 03 00 00"]
	text_seg = ida_segment.get_segm_by_name(".text")
	start, end = text_seg.start_ea, text_seg.end_ea
	for i in range(len(pattern)):
	cur_addr = start
	end_addr = end

	while cur_addr < end_addr:
	cur_addr = idc.find_binary(cur_addr, idc.SEARCH_DOWN, pattern[i])
	print("patch address: " + hex(cur_addr)) # 打印提示信息
	if cur_addr == idc.BADADDR:
	break
	else:
	my_nop(cur_addr, cur_addr + len(pattern[i].split(' ')))
	cur_addr = idc.next_head(cur_addr)

# 去除 BCF 虚假控制流

更多细节请参考 ollvm 三种混淆模式的反混淆思路

	# 去除虚假控制流 idapython 脚本
	import ida_xref
	import ida_idaapi
	from ida_bytes import get_bytes, patch_bytes

	# 将 mov 寄存器，不透明谓词修改为 mov 寄存器，0
	def do_patch(ea):
	if get_bytes(ea, 1) == b"\x8B": # mov eax-edi, dword
	reg = (ord(get_bytes(ea + 1, 1)) & 0b00111000) >> 3
	patch_bytes(ea, (0xB8 + reg).to_bytes(1,'little') + b'\x00\x00\x00\x00\x90\x90')
	else:
	print('error')

	# 不透明谓词在.bss 段的范围
	seg = ida_segment.get_segm_by_name('.bss')
	start = seg.start_ea
	end = seg.end_ea

	for addr in range(start,end,4):
	ref = ida_xref.get_first_dref_to(addr)
	print(hex(addr).center(20,'-'))
	# 获取所有交叉引用
	while(ref != ida_idaapi.BADADDR):
	do_patch(ref)
	print('patch at ' + hex(ref))
	ref = ida_xref.get_next_dref_to(addr, ref)
	print('-' * 20)

# 去除寄存器跳转混淆

仅供参考，需要根据实际情况进行调整！

arm64 架构上的 csel-br 及 cset-br 类型寄存器跳转
这个脚本是我在做腾讯游戏安全 2023 的安卓题时写的，更多细节可以参阅细品 sec2023 安卓赛题

	import ida_segment
	import idautils
	import idc
	import ida_bytes
	from keystone import *


	def patch_nop(begin, end): # arm64 中的 NOP 指令是 b'\x1F\x20\x03\xD5'
	while end > begin:
	ida_bytes.patch_bytes(begin, b'\x1F\x20\x03\xD5')
	begin = begin + 4


	# 获取 text 段的起始地址
	text_seg = ida_segment.get_segm_by_name(".text")
	start, end = text_seg.start_ea, text_seg.end_ea
	# start, end = 0x3BA34, 0x3BA80
	# start, end = 0x37390,0x373B4# 测试 ADRP 指令
	# start, end = 0x3FCE0, 0x3FD00 # 测试 EQ 情况
	#start, end = 0x3AA90, 0x3AAA4
	# start, end = 0x3A078, 0x3A090# 测试 CSET-BR 去除情况
	current_addr = start
	# print(text_seg.start_ea,text_seg.end_ea)
	nop_addr_array_after_finish = [] # 在 CSEL/CSET-BR 结构修复完成后需要 NOP 的指令
	while current_addr < end:
	# 处理 CSEL-BR 结构
	if idc.print_insn_mnem(current_addr) == "CSEL":
	CSEL_addr = current_addr
	nop_addr_array_temp = []
	nop_addr_array_temp.append(CSEL_addr)
	BR_addr = 0
	BR_reg = ""
	temp_addr = idc.next_head(current_addr)
	for _ in range(9): # 向下搜寻 9 条指令，寻找是否有 BR 指令
	if idc.print_insn_mnem(temp_addr) == "BR":
	BR_addr = temp_addr
	BR_reg = idc.print_operand(temp_addr, 0)
	break
	if idc.print_insn_mnem(temp_addr) == "CSEL":
	break
	temp_addr = idc.next_head(temp_addr)
	if BR_addr != 0: # 匹配到了 CSEL-BR 结构的汇编，需要去除
	# 形如 CSEL X11, X12, X11, GE, 获取 CSEL 后的操作数 op1~3, 以及条件码 cond
	CSEL_op1 = idc.print_operand(CSEL_addr, 0)
	CSEL_op2 = idc.print_operand(CSEL_addr, 1)
	CSEL_op2_val = -1
	CSEL_op3 = idc.print_operand(CSEL_addr, 2)
	CSEL_op3_val = -1
	CSEL_cond = idc.print_operand(CSEL_addr, 3)

	# 读取条件分支语句 CSEL 中要赋值给目标寄存器的两个源寄存器中存储的值
	temp_addr = idc.prev_head(CSEL_addr)
	while (CSEL_op2_val == -1 or CSEL_op3_val == -1) and temp_addr > text_seg.start_ea:
	if CSEL_op2 == "XZR": # 如果寄存器的值是 XZR, 说明该值为 0
	CSEL_op2_val = 0
	if CSEL_op3 == "XZR":
	CSEL_op3_val = 0
	if idc.print_insn_mnem(temp_addr) == "MOV":
	if idc.print_operand(temp_addr, 0)[1::] == CSEL_op2[
	1::] and CSEL_op2_val == -1: # 寄存器 X11 和 W11 是同一个寄存器
	CSEL_op2_val = idc.get_operand_value(temp_addr, 1)
	nop_addr_array_temp.append(temp_addr)
	elif idc.print_operand(temp_addr, 0)[1::] == CSEL_op3[1::] and CSEL_op3_val == -1:
	CSEL_op3_val = idc.get_operand_value(temp_addr, 1)
	nop_addr_array_temp.append(temp_addr)
	temp_addr = idc.prev_head(temp_addr)
	# print(CSEL_op2_val, CSEL_op3_val, hex(current_addr))
	assert CSEL_op2_val != -1 and CSEL_op3_val != -1

	temp_addr = BR_addr
	jump_array_reg = "" # 存贮跳转表的寄存器名称
	jump_array_addr = -1 # 跳转表所在的位置
	add_reg = [] # 加到跳转表的值所在的寄存器
	add_val = -1 # 加到跳转表的值
	while temp_addr > CSEL_addr: # 从后往前找，以 BR 所在的地址开始，CSEL 所在的地址结束，匹配必要的寄存器名称和值
	# print(hex(temp_addr),idc.print_insn_mnem(temp_addr))
	if idc.print_insn_mnem(temp_addr) == "ADD" and idc.print_operand(temp_addr, 0) == BR_reg:
	add_reg.append(idc.print_operand(temp_addr, 1)[1::])
	add_reg.append(idc.print_operand(temp_addr, 2)[1::])
	nop_addr_array_temp.append(temp_addr)
	elif idc.print_insn_mnem(temp_addr) == "MOV":
	if idc.print_operand(temp_addr, 0)[1::] in add_reg:
	add_val = idc.get_operand_value(temp_addr, 1)
	nop_addr_array_temp.append(temp_addr)
	elif idc.print_insn_mnem(temp_addr) == "LDR":
	jump_array_reg = idc.print_operand(temp_addr, 1)[1:-1].split(',')[0] # 获取存储跳转表的寄存器名称
	nop_addr_array_temp.append(temp_addr)
	elif idc.print_insn_mnem(temp_addr) == "ADRL":
	jump_array_reg = idc.print_operand(temp_addr, 0)
	jump_array_addr = idc.get_operand_value(temp_addr, 1)
	nop_addr_array_temp.append(temp_addr)
	temp_addr = idc.prev_head(temp_addr)

	# 如果在 CSEL-BR 间的指令中没找到跳转表所在的位置，则向上寻找
	if jump_array_addr == -1:
	temp_addr = CSEL_addr
	while temp_addr > text_seg.start_ea:
	# print(hex(temp_addr), idc.print_insn_mnem(temp_addr))
	if idc.print_insn_mnem(temp_addr) == "ADRL":
	if idc.print_operand(temp_addr, 0) == jump_array_reg:
	jump_array_addr = idc.get_operand_value(temp_addr, 1)
	nop_addr_array_temp.append(temp_addr)
	break
	elif idc.print_insn_mnem(temp_addr) == "ADRP": # ADRP 指令，还需要加上另一部分
	if idc.print_operand(temp_addr, 0) == jump_array_reg:
	jump_array_addr = idc.get_operand_value(temp_addr, 1)
	nop_addr_array_temp.append(temp_addr)
	while temp_addr < text_seg.end_ea:
	if idc.print_insn_mnem(temp_addr) == "ADD":
	if idc.print_operand(temp_addr, 0) == jump_array_reg:
	jump_array_addr += idc.get_operand_value(temp_addr, 2)
	nop_addr_array_temp.append(temp_addr)
	break
	temp_addr = idc.next_head(temp_addr)
	break
	temp_addr = idc.prev_head(temp_addr)
	# print(hex(jump_array_addr),hex(add_val))

	if add_val == -1:
	temp_addr = CSEL_addr
	while temp_addr > text_seg.start_ea:
	# print(hex(temp_addr), idc.print_insn_mnem(temp_addr))
	if idc.print_insn_mnem(temp_addr) == "MOV":
	if idc.print_operand(temp_addr, 0)[1::] in add_reg and idc.print_operand(temp_addr, 0)[0] == 'X':
	add_val = idc.get_operand_value(temp_addr, 1)
	nop_addr_array_temp.append(temp_addr)
	break
	temp_addr = idc.prev_head(temp_addr)

	# 计算出分支跳转的两个位置
	branch_a = (ida_bytes.get_qword(jump_array_addr + CSEL_op2_val) + add_val) & 0xffffffffffffffff
	branch_b = (ida_bytes.get_qword(jump_array_addr + CSEL_op3_val) + add_val) & 0xffffffffffffffff
	# print(hex(branch_a), hex(branch_b))

	# print(CSEL_cond,hex(current_addr))

	# GE<->LT 有符号大于等于 vs 有符号小于
	# EQ<->NE 结果相等 vs 结果不相等
	# CC<->CS 无符号小于 vs 无符号大于等于
	# HI<->LS 无符号大于 vs 无符号小于等于
	# if CSEL_cond == "GE":# 构造 B.LT 跳转
	logic_rev = {"GE": "LT", "LT": "GE", "EQ": "NE", "NE": "EQ", "CC": "CS", "CS": "CC", "HI": "LS", "LS": "HI"}
	ks = Ks(KS_ARCH_ARM64, KS_MODE_LITTLE_ENDIAN)
	code = ""
	if branch_b == idc.next_head(BR_addr): # 判断逻辑不取反
	code = f"B.{CSEL_cond} #{hex(branch_a)}"
	elif branch_a == idc.next_head(BR_addr): # 判断逻辑取反
	code = f"B.{logic_rev[CSEL_cond]} #{hex(branch_b)}"

	#print(hex(current_addr), hex(add_val), CSEL_op2_val, CSEL_op3_val, hex(jump_array_addr), code)

	# 修复 BR 跳转
	if code != "":

	patch_br_byte, count = ks.asm(code, addr=BR_addr)
	ida_bytes.patch_bytes(BR_addr, bytes(patch_br_byte))
	print(f"fix CSEL-BR at {hex(BR_addr)}")
	nop_addr_array_after_finish.extend(nop_addr_array_temp)
	current_addr = idc.next_head(BR_addr)
	continue
	else:
	print(f"error! unable to fix CSEL-BR at {hex(current_addr)},branch:{hex(branch_a)}, {hex(branch_b)}")

	# 处理 CSET-BR 结构
	elif idc.print_insn_mnem(current_addr) == "CSET":
	CSET_addr = current_addr
	nop_addr_array_temp = []
	nop_addr_array_temp.append(CSET_addr)
	BR_addr = 0
	BR_reg = ""
	temp_addr = idc.next_head(current_addr)
	for _ in range(15): # 向下搜寻 15 条指令，寻找是否有 BR 指令
	if idc.print_insn_mnem(temp_addr) == "BR":
	BR_addr = temp_addr
	BR_reg = idc.print_operand(temp_addr, 0)
	break
	elif idc.print_insn_mnem(temp_addr) == "CSEL":
	break
	elif idc.print_insn_mnem(temp_addr) == "RET":
	break
	temp_addr = idc.next_head(temp_addr)
	if BR_addr != 0: # 匹配到了 CSET-BR 结构的汇编，需要去除
	# 形如 CSET W23, NE, 获取 CSET 后的操作数 op1, 以及条件码 cond
	CSET_op1 = idc.print_operand(CSET_addr, 0)
	CSET_op1_val = -1
	CSET_cond = idc.print_operand(CSET_addr, 1)

	temp_addr = BR_addr
	jump_array_reg = "" # 存贮跳转表的寄存器名称
	jump_array_addr = 0 # 跳转表所在的位置
	add_reg = [] # 加到跳转表的值所在的寄存器
	add_val = 0 # 加到跳转表的值
	Lshift_val = -1
	while temp_addr > CSET_addr: # 从后往前找，以 BR 所在的地址开始，CSET 所在的地址结束，匹配必要的寄存器名称和值
	# print(hex(temp_addr),idc.print_insn_mnem(temp_addr))
	if idc.print_insn_mnem(temp_addr) == "ADD" and idc.print_operand(temp_addr, 0) == BR_reg:
	add_reg.append(idc.print_operand(temp_addr, 1)[1::])
	add_reg.append(idc.print_operand(temp_addr, 2)[1::])
	nop_addr_array_temp.append(temp_addr)
	elif idc.print_insn_mnem(temp_addr) == "MOVK":
	if idc.print_operand(temp_addr, 0)[1::] in add_reg:
	add_val += (idc.get_operand_value(temp_addr, 1) << 16)
	elif idc.print_insn_mnem(temp_addr) == "MOV":
	if idc.print_operand(temp_addr, 0)[1::] in add_reg:
	add_val += idc.get_operand_value(temp_addr, 1)
	nop_addr_array_temp.append(temp_addr)
	elif idc.print_insn_mnem(temp_addr) == "LDR":
	LDR_temp = idc.print_operand(temp_addr, 1)[1:-1].split(',')
	jump_array_reg = LDR_temp[0] # 获取存储跳转表的寄存器名称
	if len(LDR_temp) == 3:
	Lshift_val = int(LDR_temp[2][-1:])
	nop_addr_array_temp.append(temp_addr)
	elif idc.print_insn_mnem(temp_addr) == "ADRL":
	jump_array_reg = idc.print_operand(temp_addr, 0)
	jump_array_addr = idc.get_operand_value(temp_addr, 1)
	nop_addr_array_temp.append(temp_addr)
	elif idc.print_insn_mnem(temp_addr) == "LSL":
	if idc.print_operand(temp_addr, 0)[1::] == CSET_op1[1::]:
	Lshift_val = idc.get_operand_value(temp_addr, 2)

	temp_addr = idc.prev_head(temp_addr)

	# 如果在 CSET-BR 间的指令中没找到跳转表所在的位置，则向上寻找
	if jump_array_addr == 0:
	temp_addr = CSET_addr
	while temp_addr > text_seg.start_ea:
	# print(hex(temp_addr), idc.print_insn_mnem(temp_addr))
	if idc.print_insn_mnem(temp_addr) == "ADRL":
	if idc.print_operand(temp_addr, 0) == jump_array_reg:
	jump_array_addr = idc.get_operand_value(temp_addr, 1)
	nop_addr_array_temp.append(temp_addr)
	break
	elif idc.print_insn_mnem(temp_addr) == "ADRP": # ADRP 指令，还需要加上另一部分
	if idc.print_operand(temp_addr, 0) == jump_array_reg:
	jump_array_addr = idc.get_operand_value(temp_addr, 1)
	nop_addr_array_temp.append(temp_addr)
	while temp_addr < text_seg.end_ea:
	if idc.print_insn_mnem(temp_addr) == "ADD":
	if idc.print_operand(temp_addr, 0) == jump_array_reg:
	jump_array_addr += idc.get_operand_value(temp_addr, 2)
	nop_addr_array_temp.append(temp_addr)
	break
	temp_addr = idc.next_head(temp_addr)
	break
	temp_addr = idc.prev_head(temp_addr)
	# print(hex(jump_array_addr),hex(add_val))

	# 向上寻找加到跳转表的值
	if add_val == 0:
	temp_addr = CSET_addr
	while temp_addr > text_seg.start_ea:
	# print(hex(temp_addr), idc.print_insn_mnem(temp_addr))
	if idc.print_insn_mnem(temp_addr) == "MOV":
	if idc.print_operand(temp_addr, 0)[1::] in add_reg:
	add_val = idc.get_operand_value(temp_addr, 1)
	nop_addr_array_temp.append(temp_addr)
	break
	elif idc.print_insn_mnem(temp_addr) == "MOVK": # 形如 MOV W9, #0x76BC;MOVK W9, #0x4C48,LSL#16; 的形式
	if idc.print_operand(temp_addr, 0)[1::] in add_reg:
	# print(hex(add_val))
	add_val = (idc.get_operand_value(temp_addr, 1) << 16)
	# print(hex(add_val))
	while temp_addr > text_seg.start_ea:
	if idc.print_insn_mnem(temp_addr) == "MOV":
	if idc.print_operand(temp_addr, 0)[1::] in add_reg:
	add_val += idc.get_operand_value(temp_addr, 1)
	# print(hex(add_val))
	break
	temp_addr = idc.prev_head(temp_addr)

	break

	temp_addr = idc.prev_head(temp_addr)

	# print(hex(current_addr))
	# 计算出分支跳转的两个位置
	branch_a = (ida_bytes.get_qword(jump_array_addr + (1 << Lshift_val)) + add_val) & 0xffffffffffffffff
	branch_b = (ida_bytes.get_qword(jump_array_addr + (0 << Lshift_val)) + add_val) & 0xffffffffffffffff
	# print(hex(branch_a), hex(branch_b))

	# print(CSEL_cond,hex(current_addr))

	# GE<->LT 有符号大于等于 vs 有符号小于
	# EQ<->NE 结果相等 vs 结果不相等
	# CC<->CS 无符号小于 vs 无符号大于等于
	# HI<->LS 无符号大于 vs 无符号小于等于
	# if CSEL_cond == "GE":# 构造 B.LT 跳转
	logic_rev = {"GE": "LT", "LT": "GE", "EQ": "NE", "NE": "EQ", "CC": "CS", "CS": "CC", "HI": "LS", "LS": "HI"}
	ks = Ks(KS_ARCH_ARM64, KS_MODE_LITTLE_ENDIAN)
	code = ""
	if branch_b == idc.next_head(BR_addr): # 判断逻辑不取反
	code = f"B.{CSET_cond} #{hex(branch_a)}"
	elif branch_a == idc.next_head(BR_addr): # 判断逻辑取反
	code = f"B.{logic_rev[CSET_cond]} #{hex(branch_b)}"

	# print(hex(current_addr),add_reg,hex(add_val),CSET_op1,CSET_op1_val,jump_array_reg,hex(jump_array_addr),Lshift_val,code)
	# 修复 BR 跳转
	if code != "":
	patch_br_byte, count = ks.asm(code, addr=BR_addr)
	ida_bytes.patch_bytes(BR_addr, bytes(patch_br_byte))
	print(f"fix CSET-BR at {hex(BR_addr)}")
	nop_addr_array_after_finish.extend(nop_addr_array_temp)
	current_addr = idc.next_head(BR_addr)
	continue
	else:
	print(f"error! unable to fix CSET-BR at {hex(current_addr)},branch:{hex(branch_a)}, {hex(branch_b)}")

	current_addr = idc.next_head(current_addr)

	for addr in nop_addr_array_after_finish:
	patch_nop(addr, addr + idc.get_item_size(addr))

x86_64 架构的 jmp rax 类型寄存器跳转
例题点这里下载 ACTF-obfuse

	import ida_segment
	import idautils
	import idc
	import ida_bytes
	import binascii
	import re
	from keystone import *


	def patch_nop(addr, endaddr):
	while addr < endaddr:
	ida_bytes.patch_byte(addr, 0x90)
	addr += 1

	# 获取 text 段的起始地址
	text_seg = ida_segment.get_segm_by_name(".text")
	start, end = text_seg.start_ea, text_seg.end_ea
	# start, end = 0x41143D,0x41145F# 测试 call rax
	#start, end = 0x411489,0x411498# 测试 jmp rax case1
	# start, end = 0x411568, 0x411575 # 测试 jmp rax case2
	#start, end = 0x410EC0,0x412670# 去除 check 函数的混淆
	#start, end = 0x410EC0,0x412670# 在 check 中测试 jmp rax case2
	current_addr = start
	call_table = 0x67F1A0 # call rax 跳转表地址
	'''
	这是一个call rax基本块需要去除mov rax, [rax+14E8h];call rax
	mov rax, [rax+14E8h]
	movzx edi, byte ptr [rbp+var_50+6]
	mov edx, offset dword_674040
	mov esi, 1
	lea rcx, [rbp+var_120]
	mov r8d, 2AE8944Ah
	call rax

	处理后应为如下形式
	movzx edi, byte ptr [rbp+var_50+6]
	mov edx, offset dword_674040
	mov esi, 1
	lea rcx, [rbp+var_120]
	mov r8d, 2AE8944Ah
	call sub_xxxxxx
	'''
	while current_addr <= end:
	#print(hex(current_addr))
	# 处理 call rax 结构
	if idc.print_insn_mnem(current_addr) == "call" and idc.print_operand(current_addr, 0) == "rax":
	# print("call rax")
	call_rax_addr = current_addr
	mov_rax_xxxh_addr = -1
	call_func_addr = -1
	# 获取需要跳转的地址
	temp_addr = call_rax_addr
	count = 1
	while temp_addr >= start and count<30:
	if idc.print_insn_mnem(temp_addr) == "mov" and idc.print_operand(temp_addr,
	0) == "rax" and "rax" in idc.print_operand(
	temp_addr, 1):
	mov_rax_xxxh_addr = temp_addr
	# 获取 [rax+14E8h] 中的 14E8 十六进制字符串
	tmp_call_table_offset_re_result = re.findall(r'\[\w+\+([\da-fA-F]+)', idc.print_operand(temp_addr, 1))
	if tmp_call_table_offset_re_result:
	tmp = tmp_call_table_offset_re_result[0]
	#print(tmp)
	if len(tmp)%2==1:
	if tmp.startswith('0'):
	tmp = tmp[1::]
	else:
	tmp = '0'+tmp
	call_table_offset = binascii.a2b_hex(tmp)
	else:
	break
	call_table_offset = int.from_bytes(call_table_offset, 'big')
	call_func_addr = ida_bytes.get_dword(call_table + call_table_offset)
	break
	temp_addr = idc.prev_head(temp_addr)
	count = count+1
	# print(hex(call_func_addr))

	if call_rax_addr == -1 or mov_rax_xxxh_addr == -1 or call_func_addr == -1:
	current_addr = idc.next_head(current_addr)
	continue

	# 准备 patch
	movRAX_callRAX_patch = b''
	# print(hex(idc.next_head(mov_rax_xxxh_addr)),hex(call_rax_addr))
	ea = idc.next_head(mov_rax_xxxh_addr)
	while ea < call_rax_addr:
	size = idc.next_head(ea) - ea
	#print(ida_bytes.get_bytes(ea, size))
	movRAX_callRAX_patch += ida_bytes.get_bytes(ea, size)
	ea = idc.next_head(ea)

	# 计算跳转到的地址
	if call_func_addr != -1:
	ks = Ks(KS_ARCH_X86, KS_MODE_64)
	code = f"call {call_func_addr}"
	patch_call_rax_byte, count = ks.asm(code, addr=(mov_rax_xxxh_addr + len(movRAX_callRAX_patch)))
	#print(call_func_addr, code, patch_call_rax_byte)
	else:
	continue
	movRAX_callRAX_patch += bytes(patch_call_rax_byte)
	# print(movRAX_callRAX_patch)
	ida_bytes.patch_bytes(mov_rax_xxxh_addr, b'\x90' * (idc.next_head(call_rax_addr) - mov_rax_xxxh_addr))
	ida_bytes.patch_bytes(mov_rax_xxxh_addr, movRAX_callRAX_patch)
	print(f"fix call rax at {hex(call_rax_addr)}")

	# 处理 jmp rax 结构
	'''
	考虑两种情况此时需要先获取rcx
	一:
	mov rax, cs:qword_67CA28
	mov ecx, 0ADAE163Ch
	add rax, rcx
	jmp rax

	二:
	mov rax, cs:qword_67CA30
	add rax, 5C65CCC7h
	jmp rax

	'''
	if idc.print_insn_mnem(current_addr) == "jmp" and idc.print_operand(current_addr, 0) == "rax":
	# print("jmp rax")
	mov_rax_qword_xxx_addr = -1
	mov_reg_xxx_addr = -1
	add_rax_xxx_addr = -1
	jmp_rax_addr = current_addr

	add_num1 = -1
	add_num2 = -1
	# 获取加上的第一个数
	temp_addr = jmp_rax_addr
	count = 1
	while temp_addr >= start and count<30:
	if idc.print_insn_mnem(temp_addr) == "mov" and idc.print_operand(temp_addr, 0) == "rax":
	mov_rax_qword_xxx_addr = temp_addr
	tmp = re.findall(r'cs:qword_([0-9A-Fa-f]+)', idc.print_operand(temp_addr, 1))
	if tmp:
	add_num1_addr = tmp[0]
	add_num1_addr = int.from_bytes(binascii.a2b_hex(add_num1_addr), 'big')
	add_num1 = ida_bytes.get_qword(add_num1_addr)
	else:
	break

	#print(add_num1_addr)
	break
	temp_addr = idc.prev_head(temp_addr)
	count = count+1

	# 获取加上的第二个数
	temp_addr = jmp_rax_addr
	count = 1
	while temp_addr >= start and count<30:
	if idc.print_insn_mnem(temp_addr) == "add" and idc.print_operand(temp_addr, 0) == "rax":
	add_rax_xxx_addr = temp_addr
	# 如果直接加上一个数
	if not idc.print_operand(temp_addr, 1).endswith('x'):
	add_num2 = idc.print_operand(temp_addr, 1)
	# 如果这个数是通过寄存器例如 ecx 赋值的
	else:
	tmp_add_num2_reg = idc.print_operand(temp_addr, 1)
	temp_addr_2 = temp_addr
	count2 = 1
	while temp_addr_2 >= start and count2<30:
	# print(idc.print_insn_mnem(temp_addr),idc.print_operand(temp_addr, 0)[1::],tmp_add_num2_reg[1::])
	if idc.print_insn_mnem(temp_addr_2) == "mov" and idc.print_operand(temp_addr_2, 0)[
	1::] == tmp_add_num2_reg[1::]:
	add_num2 = idc.print_operand(temp_addr_2, 1)
	mov_reg_xxx_addr = temp_addr_2
	break
	temp_addr_2 = idc.prev_head(temp_addr_2)
	count2=count2+1
	try:
	add_num2 = add_num2.strip('h')
	if len(add_num2) % 2 == 1:
	if add_num2.startswith('0'):
	add_num2 = add_num2[1::]
	else:
	add_num2 = '0' + add_num2
	add_num2 = int.from_bytes(binascii.a2b_hex(add_num2), 'big')
	#print(add_num2)
	except:
	break

	break

	temp_addr = idc.prev_head(temp_addr)
	count = count+1

	if add_num1 == -1 or add_num2 == -1 or mov_rax_qword_xxx_addr == -1 or add_rax_xxx_addr == -1 or jmp_rax_addr == -1:
	#print(add_num1,add_num2,mov_rax_qword_xxx_addr,add_rax_xxx_addr,jmp_rax_addr)
	current_addr = idc.next_head(current_addr)
	continue

	# 准备 patch
	movRAX_jmpRAX_patch = b''
	#print(hex(idc.next_head(mov_rax_xxxh_addr)), hex(call_rax_addr))
	should_pass_addr = [mov_rax_qword_xxx_addr, mov_reg_xxx_addr, add_rax_xxx_addr, jmp_rax_addr]
	ea = mov_rax_qword_xxx_addr
	while ea < jmp_rax_addr:
	if ea not in should_pass_addr:
	size = idc.next_head(ea) - ea
	# print(ida_bytes.get_bytes(ea, size))
	movRAX_jmpRAX_patch += ida_bytes.get_bytes(ea, size)
	ea = idc.next_head(ea)

	# 计算跳转到的地址
	#print(hex(add_num1), add_num2)
	jmp_addr = (add_num1 + add_num2) & 0xffffffff
	ks = Ks(KS_ARCH_X86, KS_MODE_64)
	code = f"jmp {jmp_addr}"
	patch_call_rax_byte, count = ks.asm(code, addr=(mov_rax_qword_xxx_addr + len(movRAX_jmpRAX_patch)))
	# print(call_func_addr, code, patch_call_rax_byte)

	movRAX_jmpRAX_patch += bytes(patch_call_rax_byte)
	# print(movRAX_callRAX_patch)
	ida_bytes.patch_bytes(mov_rax_qword_xxx_addr, b'\x90' * (idc.next_head(jmp_rax_addr) - mov_rax_qword_xxx_addr))
	ida_bytes.patch_bytes(mov_rax_qword_xxx_addr, movRAX_jmpRAX_patch)
	print(f"fix jmp rax at {hex(jmp_rax_addr)}")

	current_addr = idc.next_head(current_addr)

	#patch_nop(0x410FB3,0x41142C)

# 参考资料

ida python 使用
idapython 笔记学习
在 PyCharm 中写 IDAPython 脚本

idapython

# 官方文档

# IDApython 编写环境

# 地址 address

# 反汇编 disasm

# 段 segment

# 函数 function

# 块 block

# 操作数

# 搜索

# 数据

# 动态调试

# patch

# IDApython 常用脚本

# 打印 IDA 函数列表

# 批量去除花指令

# 去除 BCF 虚假控制流

# 去除寄存器跳转混淆

# 参考资料

将rwProcMem33编译进安卓内核

细品sec2023安卓赛题