According to Official doc on find_text
The first argument is start_ea but when I try to use it, the first hit is before my start_ea. My code looks something like this (I have simplified it slightly)
import ida_search
import ida_idaapi
import ida_ida
from typing import List
def search_text(arg_search_for: str,
arg_search_direction_down: bool = True,
arg_search_is_regex: bool = False,
arg_start_ea: int = 0,
arg_max_hits: int = 1,
arg_debug: bool = False) -> List[int]:
res = []
l_max_hits = arg_max_hits
l_min_ea = arg_start_ea if arg_start_ea else ida_ida.inf_get_min_ea()
l_search_flags = ida_search.SEARCH_DOWN if arg_search_direction_down else ida_search.SEARCH_UP
l_search_flags |= ida_search.SEARCH_NEXT
l_search_flags |= ida_search.SEARCH_BRK # return BADADDR if the search was cancelled
if arg_search_is_regex:
l_search_flags |= ida_search.SEARCH_REGEX
l_start_next_search_at = l_min_ea
while True:
l_useless_y: int = 0
l_useless_x: int = 0
print(f"Calling ida_search.find_text(0x{l_start_next_search_at:x}, {l_useless_y}, {l_useless_x}, '{arg_search_for}', {l_search_flags})")
l_start_next_search_at = ida_search.find_text(l_start_next_search_at, l_useless_y, l_useless_x, arg_search_for, l_search_flags)
print(f"result from ida_search.find_text(): 0x{l_start_next_search_at:x}")
if l_start_next_search_at == ida_idaapi.BADADDR:
break
res.append(l_start_next_search_at)
l_start_next_search_at += 1
l_max_hits -= 1
if l_max_hits == 0:
break
return res
When I search for something that I know exists multiple times:
In[42]: search_text("xor eax, eax", arg_max_hits=2)
Calling ida_search.find_text(0x180000000, 0, 0, 'xor eax, eax', 3)
result from ida_search.find_text(): 0x1800014f7
Calling ida_search.find_text(0x1800014f8, 0, 0, 'xor eax, eax', 3)
result from ida_search.find_text(): 0x1800014f7
Out[42]: [0x1800014f7 (UrlHashA + 0x37), 0x1800014f7 (UrlHashA + 0x37)]
After som investigation, it seems that the find_text() starts from get_item_head(start_ea) meaning I have to jump to the end if the item after each search. This code works as expected:
import ida_search
import ida_idaapi
import ida_ida
import ida_bytes
from typing import List
def search_text(arg_search_for: str,
arg_search_direction_down: bool = True,
arg_search_is_regex: bool = False,
arg_start_ea: int = 0,
arg_max_hits: int = 1,
arg_debug: bool = False) -> List[int]:
res = []
l_max_hits = arg_max_hits
l_min_ea = arg_start_ea if arg_start_ea else ida_ida.inf_get_min_ea()
l_search_flags = ida_search.SEARCH_DOWN if arg_search_direction_down else ida_search.SEARCH_UP
l_search_flags |= ida_search.SEARCH_NEXT
l_search_flags |= ida_search.SEARCH_BRK # return BADADDR if the search was cancelled
if arg_search_is_regex:
l_search_flags |= ida_search.SEARCH_REGEX
l_start_next_search_at = l_min_ea
while True:
l_useless_y: int = 0
l_useless_x: int = 0
print(f"Calling ida_search.find_text(0x{l_start_next_search_at:x}, {l_useless_y}, {l_useless_x}, '{arg_search_for}', {l_search_flags})")
l_start_next_search_at = ida_search.find_text(l_start_next_search_at, l_useless_y, l_useless_x, arg_search_for, l_search_flags)
print(f"result from ida_search.find_text(): 0x{l_start_next_search_at:x}")
if l_start_next_search_at == ida_idaapi.BADADDR:
break
res.append(l_start_next_search_at)
# Here is the fix:
l_start_next_search_at = ida_bytes.get_item_end(l_start_next_search_at) if arg_search_direction_down else (ida_bytes.get_item_head(l_start_next_search_at) - 1)
l_max_hits -= 1
if l_max_hits == 0:
break
return res
Oops. My bad, I only remembered that the up/down naming is tricky.
Btw, I think that if you do yield l_start_next_search_at instead of res.append(l_start_next_search_at) you wont’t have to deal with the l_max_hits variable.
I should discourage you from using find_text(). The disassembly is not stored anywhere and has to be generated on the fly. This is slow.
There are usually better ways how to achieve the same goal.
I agree that find_text() should not be used very often. I’m writing a wrapper around some of the IDA Python APIs and a friend of mine wanted find_text() so I added it.