I created a self contained script to trigger the issues.
Step:
- The script will create the Swift::String type.
- Manually change the signature of sub_1004B96F8 to __int64 __fastcall sub_1004B96F8(Swift::String);
- Trigger decompile of the function (F5)
- It should crash with the code
It happens on my MAC, and I think it did work on a windows, so make sure to test it on mac.
from dataclasses import dataclass
from typing import Literal
import ida_hexrays
import ida_typeinf
import idaapi
import idc
from ida_hexrays import (
cexpr_t,
ctree_parentee_t,
)
from ida_typeinf import tinfo_t
@dataclass
class MemrefConstInfo:
"""Holds the parts of a `<var>.<mem at off> op <const number>` expression where op is either `=` or `==`."""
var: cexpr_t
mem_off: int
value: int
# noinspection PyTypeHints
op: "Literal[ida_hexrays.cot_asg] | Literal[ida_hexrays.cot_eq]"
def _unpack_memref_const(e: cexpr_t) -> MemrefConstInfo | None:
"""If `e` is `<var>.<mem at off> op <const number>`, return the parts. Otherwise, return None."""
# Check assign
if e.op not in (ida_hexrays.cot_asg, ida_hexrays.cot_eq):
return None
lhs, rhs = e.x, e.y
# Check LHS is a memref
if lhs.op != ida_hexrays.cot_memref:
return None
# Support a cast around the number
if rhs.op == ida_hexrays.cot_cast and rhs.x.op == ida_hexrays.cot_num:
rhs = rhs.x
if rhs.op != ida_hexrays.cot_num:
return None
return MemrefConstInfo(var=lhs.x, mem_off=lhs.m, value=rhs.numval(), op=e.op)
def _is_memref_const_specific(e: cexpr_t, var_x: cexpr_t, wanted_off: int, wanted_op: int) -> bool:
"""Check if 'e' is '<var_x>.<mem at wanted_off> op <const number>'."""
if (info := _unpack_memref_const(e)) is None:
return False
return _is_info_specific(info, var_x, wanted_off, wanted_op)
def _is_info_specific(info: MemrefConstInfo, var_x: cexpr_t, wanted_off: int, wanted_op: int) -> bool:
"""Check if 'e' is '<var_x>.<mem at wanted_off> op <const number>'."""
return info.var == var_x and info.mem_off == wanted_off and info.op == wanted_op
class SwiftStringVisitor(ctree_parentee_t):
"""
Finds pairs of assignments to Swift::String.{_countAndFlagsBits (off 0), _object (off 8)}
in either order, possibly separated by other statements, decodes the string,
and rewrites the second assignment to construct the Swift::String directly.
"""
def __init__(self, swift_str_type: tinfo_t):
super().__init__()
self.swift_str_type = swift_str_type
def visit_expr(self, expr: cexpr_t) -> int:
if expr.op == ida_hexrays.cot_eq:
self.visit_eq_expr(expr)
return 0
def visit_eq_expr(self, expr: cexpr_t):
# Support equality comparisons, for cases like `if (str._countAndFlagsBits == 0 && str._object == 0)`
# If we are an expression, we cannot be the root, so there is always a parent
parent = self.parents[len(self.parents) - 1].to_specific_type
# Support only being the right side of an `x && y` expression
if parent.op != ida_hexrays.cot_land or parent.y != expr:
return
if (eq_info := _unpack_memref_const(expr)) is None:
return
var_x, cur_off, value = eq_info.var, eq_info.mem_off, eq_info.value
# Only offsets 0 (countAndFlagsBits) & 8 (_object)
if cur_off not in (0, 8):
return
# Find the complementary assignment earlier in the same block/comma
need_off = 0 if cur_off == 8 else 8
prior_expr = parent.x
if (prior_info := _unpack_memref_const(prior_expr)) is None or not _is_info_specific(
prior_info, var_x, need_off, ida_hexrays.cot_eq
):
return
# Extract values (bits @ off 0, object @ off 8)
if cur_off == 8:
bits_val = prior_info.value
obj_val = value
else: # cur_off == 0
bits_val = value
obj_val = prior_info.value
# Decode the string
s = "test" if bits_val == 1702125924 and obj_val == 0xE400000000000000 else None
if not s:
return
# Build a helper-call that returns Swift::String from a C string
call = call_helper_from_sig(
"__SwiftStr",
self.swift_str_type,
[from_string(s)],
)
new_comparison = from_binary_op(
cexpr_t(expr.x.x), call, ida_hexrays.cot_eq, from_c_type("bool"), parent.ea
)
# FIXME why swap cause an internal error on mac ida 9.2
parent.swap(new_comparison)
def from_c_type(c_type: str) -> tinfo_t | None:
"""Given a C type string, return matching `tinfo_t`"""
tif = tinfo_t()
if c_type == "void":
tif.create_simple_type(ida_typeinf.BT_VOID)
return tinfo_t(ida_typeinf.BT_VOID)
else:
# noinspection PyTypeChecker
if (
ida_typeinf.parse_decl(
tif,
None,
c_type + ";",
ida_typeinf.PT_SIL | ida_typeinf.PT_NDC | ida_typeinf.PT_TYP,
)
is not None
):
return tif
return None
def from_string(s: str, ea: int = idaapi.BADADDR) -> cexpr_t:
"""Create a string expression from a const string."""
e = cexpr_t()
e.ea = ea
e.op = ida_hexrays.cot_str
e.type = from_c_type("char*")
e.string = s
return e
def call_helper_from_sig(name: str, ret_type: tinfo_t, args: list[cexpr_t | ida_hexrays.carg_t]) -> cexpr_t:
"""Create a call expression from a name, arguments and return type"""
return ida_hexrays.call_helper(ret_type, arglist_from_expr_arr(*args), name)
def arglist_from_expr_arr(*args: cexpr_t | ida_hexrays.carg_t) -> ida_hexrays.carglist_t:
"""Convert a list of cexpr_t to a carglist_t."""
arglist = ida_hexrays.carglist_t()
for arg in args:
if arg is None:
print("[Warning]: argument is None")
continue
arglist.push_back(carg_from_expr(arg))
return arglist
def carg_from_expr(expr: cexpr_t) -> ida_hexrays.carg_t:
"""Convert a cexpr_t to a carg_t."""
if isinstance(expr, ida_hexrays.carg_t):
return expr
else:
carg = ida_hexrays.carg_t()
carg.assign(expr)
return carg
def from_binary_op(lhs: cexpr_t, rhs: cexpr_t, op: int, typ: tinfo_t, ea: int = idaapi.BADADDR) -> cexpr_t:
"""Create the binary operation expression `lhs op rhs`."""
bin_expr = cexpr_t()
bin_expr.ea = ea
bin_expr.type = typ
bin_expr.op = op
bin_expr.x = lhs
bin_expr.y = rhs
return bin_expr
DECLS = """
typedef long long s64;
typedef unsigned long long u64;
typedef s64 Int;
typedef u64 Bool;
struct Swift::String
{
u64 _countAndFlagsBits;
void *_object;
};
union Swift_ElementAny {
Swift::String stringElement;
};
struct Swift_Any {
Swift_ElementAny element;
u64 unknown;
s64 type;
};
struct Swift_ArrayAny {
s64 length;
Swift_Any *items;
};
"""
ida_typeinf.idc_parse_types(DECLS, 0)
class SwiftStringsHook(ida_hexrays.Hexrays_Hooks):
def maturity(self, func: ida_hexrays.cfuncptr_t, new_maturity: int) -> int:
# Run once the function has a reasonably stable AST
if new_maturity < ida_hexrays.CMAT_CPA:
return 0
swift_str_type = from_c_type("Swift::String")
if swift_str_type is None:
return 0
# noinspection PyTypeChecker,PyPropertyAccess
SwiftStringVisitor(swift_str_type).apply_to(func.body, None) # pyright: ignore[reportArgumentType]
return 0
if 'my_hooks' in locals():
locals()['my_hooks'].unhook()
my_hooks = SwiftStringsHook()
my_hooks.hook()