Internal error 50065 when trying to replace an ast expression

I’m trying to optimize the pattern:

a1._countAndFlagsBits == 1702125924 && a1._object == (void *)0xE400000000000000LL

Where a1 is a Swift::String, to

a1 == __SwiftStr("date")

I have the code here, but the important part is:

# In def visit_expr(self, expr: cexpr_t) -> int:
# expr is the `a1._object == (void *)0xE400000000000000LL`
# parent is `a1._countAndFlagsBits == 1702125924 && a1._object == (void *)0xE400000000000000LL`

call = cexpr.call_helper_from_sig(
    "__SwiftStr",
    self.swift_str_type,
    [cexpr.from_string(s)],
)

new_comparison = cexpr.from_binary_op(
    cexpr_t(expr.x.x), call, ida_hexrays.cot_eq, tif.from_c_type("bool"), parent.ea
)

parent.swap(new_comparison)

When running it on sharingd, in the function sub_1004B96F8, it throws internal error 50065. The error is not available on the verifier so I have no idea what went wrong.
I’ve uploaded the DB with my email so I hope it helps.

Could you maybe check what this error mean so I could try to fix it?

Thanks!

Utils are:


# Utils:

def call_helper_from_sig(name: str, ret_type: tinfo_t, args: list[cexpr_t | carg_t]) -> cexpr_t:
    """Create a call expression from a name, arguments and return type"""
    return ida_hexrays.call_helper(ret_type, arglist_from_expr_arr(*args), name)

def arglist_from_expr_arr(*args: cexpr_t | carg_t) -> carglist_t:
    """Convert a list of cexpr_t to a carglist_t."""
    arglist = carglist_t()
    for arg in args:
        if arg is None:
            print("[Warning]: argument is None")
            continue

        arglist.push_back(carg_from_expr(arg))
    return arglist

def carg_from_expr(expr: cexpr_t) -> carg_t:
    """Convert a cexpr_t to a carg_t."""
    if isinstance(expr, carg_t):
        return expr
    else:
        carg = carg_t()
        carg.assign(expr)
    return carg

def from_binary_op(lhs: cexpr_t, rhs: cexpr_t, op: int, typ: tinfo_t, ea: int = idaapi.BADADDR) -> cexpr_t:
    """Create the binary operation expression `lhs op rhs`."""
    bin_expr = cexpr_t()
    bin_expr.ea = ea
    bin_expr.type = typ
    bin_expr.op = op
    bin_expr.x = lhs
    bin_expr.y = rhs
    return bin_expr

def from_string(s: str, ea: int = idaapi.BADADDR) -> cexpr_t:
    """Create a string expression from a const string."""
    e = cexpr_t()
    e.ea = ea
    e.op = ida_hexrays.cot_str
    e.type = tif.from_c_type("char*")
    e.string = s
    return e

def from_c_type(c_type: str) -> tinfo_t | None:
    """Given a C type string, return matching `tinfo_t`"""
    tif = tinfo_t()
    if c_type == "void":
        tif.create_simple_type(ida_typeinf.BT_VOID)
        return VOID
    else:
        # noinspection PyTypeChecker
        if (
            ida_typeinf.parse_decl(
                tif,
                None,
                c_type + ";",
                ida_typeinf.PT_SIL | ida_typeinf.PT_NDC | ida_typeinf.PT_TYP,
            )
            is not None
        ):
            return tif
    return None
1 Like

Hi! Thanks for the detailed report. We’ve received your DB files and opened a ticket to investigate this further, so you may expect to get the latest updates via our support channel.

Hi @yoavst ,

do you still observe the issue? We tried your IDB with the current ida-ios-helper, and there is no interr.

FYI, this interr happens when ctree_visitor_t encounters a citem_t(can be a cexpr_t or cinsn_t) with an unexpected op value, which normally should not happen.

If you can repro, please try to produce the minimal code necessary to trigger the issue (i.e. inline all dependencies and remove all code not related to the problematic code path).

I created a self contained script to trigger the issues.

Step:

  1. The script will create the Swift::String type.
  2. Manually change the signature of sub_1004B96F8 to __int64 __fastcall sub_1004B96F8(Swift::String);
  3. Trigger decompile of the function (F5)
  4. It should crash with the code

It happens on my MAC, and I think it did work on a windows, so make sure to test it on mac.


from dataclasses import dataclass
from typing import Literal

import ida_hexrays
import ida_typeinf
import idaapi
import idc
from ida_hexrays import (
    cexpr_t,
    ctree_parentee_t,
)
from ida_typeinf import tinfo_t


@dataclass
class MemrefConstInfo:
    """Holds the parts of a `<var>.<mem at off> op <const number>` expression where op is either `=` or `==`."""

    var: cexpr_t
    mem_off: int
    value: int
    # noinspection PyTypeHints
    op: "Literal[ida_hexrays.cot_asg] | Literal[ida_hexrays.cot_eq]"


def _unpack_memref_const(e: cexpr_t) -> MemrefConstInfo | None:
    """If `e` is `<var>.<mem at off> op <const number>`, return the parts. Otherwise, return None."""
    # Check assign
    if e.op not in (ida_hexrays.cot_asg, ida_hexrays.cot_eq):
        return None

    lhs, rhs = e.x, e.y
    # Check LHS is a memref
    if lhs.op != ida_hexrays.cot_memref:
        return None

    # Support a cast around the number
    if rhs.op == ida_hexrays.cot_cast and rhs.x.op == ida_hexrays.cot_num:
        rhs = rhs.x
    if rhs.op != ida_hexrays.cot_num:
        return None

    return MemrefConstInfo(var=lhs.x, mem_off=lhs.m, value=rhs.numval(), op=e.op)


def _is_memref_const_specific(e: cexpr_t, var_x: cexpr_t, wanted_off: int, wanted_op: int) -> bool:
    """Check if 'e' is '<var_x>.<mem at wanted_off> op <const number>'."""
    if (info := _unpack_memref_const(e)) is None:
        return False
    return _is_info_specific(info, var_x, wanted_off, wanted_op)


def _is_info_specific(info: MemrefConstInfo, var_x: cexpr_t, wanted_off: int, wanted_op: int) -> bool:
    """Check if 'e' is '<var_x>.<mem at wanted_off> op <const number>'."""
    return info.var == var_x and info.mem_off == wanted_off and info.op == wanted_op

class SwiftStringVisitor(ctree_parentee_t):
    """
    Finds pairs of assignments to Swift::String.{_countAndFlagsBits (off 0), _object (off 8)}
    in either order, possibly separated by other statements, decodes the string,
    and rewrites the second assignment to construct the Swift::String directly.
    """

    def __init__(self, swift_str_type: tinfo_t):
        super().__init__()
        self.swift_str_type = swift_str_type

    def visit_expr(self, expr: cexpr_t) -> int:
        if expr.op == ida_hexrays.cot_eq:
            self.visit_eq_expr(expr)
        return 0

    def visit_eq_expr(self, expr: cexpr_t):
        # Support equality comparisons, for cases like `if (str._countAndFlagsBits == 0 && str._object == 0)`

        # If we are an expression, we cannot be the root, so there is always a parent
        parent = self.parents[len(self.parents) - 1].to_specific_type
        # Support only being the right side of an `x && y` expression
        if parent.op != ida_hexrays.cot_land or parent.y != expr:
            return

        if (eq_info := _unpack_memref_const(expr)) is None:
            return
        var_x, cur_off, value = eq_info.var, eq_info.mem_off, eq_info.value

        # Only offsets 0 (countAndFlagsBits) & 8 (_object)
        if cur_off not in (0, 8):
            return

        # Find the complementary assignment earlier in the same block/comma
        need_off = 0 if cur_off == 8 else 8
        prior_expr = parent.x
        if (prior_info := _unpack_memref_const(prior_expr)) is None or not _is_info_specific(
            prior_info, var_x, need_off, ida_hexrays.cot_eq
        ):
            return

        # Extract values (bits @ off 0, object @ off 8)
        if cur_off == 8:
            bits_val = prior_info.value
            obj_val = value
        else:  # cur_off == 0
            bits_val = value
            obj_val = prior_info.value

        # Decode the string
        s = "test" if bits_val == 1702125924 and obj_val == 0xE400000000000000 else None
        if not s:
            return

        # Build a helper-call that returns Swift::String from a C string
        call = call_helper_from_sig(
            "__SwiftStr",
            self.swift_str_type,
            [from_string(s)],
        )

        new_comparison = from_binary_op(
            cexpr_t(expr.x.x), call, ida_hexrays.cot_eq, from_c_type("bool"), parent.ea
        )
        # FIXME why swap cause an internal error on mac ida 9.2
        parent.swap(new_comparison)


def from_c_type(c_type: str) -> tinfo_t | None:
    """Given a C type string, return matching `tinfo_t`"""
    tif = tinfo_t()
    if c_type == "void":
        tif.create_simple_type(ida_typeinf.BT_VOID)
        return  tinfo_t(ida_typeinf.BT_VOID)
    else:
        # noinspection PyTypeChecker
        if (
            ida_typeinf.parse_decl(
                tif,
                None,
                c_type + ";",
                ida_typeinf.PT_SIL | ida_typeinf.PT_NDC | ida_typeinf.PT_TYP,
            )
            is not None
        ):
            return tif
    return None

def from_string(s: str, ea: int = idaapi.BADADDR) -> cexpr_t:
    """Create a string expression from a const string."""
    e = cexpr_t()
    e.ea = ea
    e.op = ida_hexrays.cot_str
    e.type = from_c_type("char*")
    e.string = s
    return e


def call_helper_from_sig(name: str, ret_type: tinfo_t, args: list[cexpr_t | ida_hexrays.carg_t]) -> cexpr_t:
    """Create a call expression from a name, arguments and return type"""
    return ida_hexrays.call_helper(ret_type, arglist_from_expr_arr(*args), name)

def arglist_from_expr_arr(*args: cexpr_t | ida_hexrays.carg_t) -> ida_hexrays.carglist_t:
    """Convert a list of cexpr_t to a carglist_t."""
    arglist = ida_hexrays.carglist_t()
    for arg in args:
        if arg is None:
            print("[Warning]: argument is None")
            continue

        arglist.push_back(carg_from_expr(arg))
    return arglist


def carg_from_expr(expr: cexpr_t) -> ida_hexrays.carg_t:
    """Convert a cexpr_t to a carg_t."""
    if isinstance(expr, ida_hexrays.carg_t):
        return expr
    else:
        carg = ida_hexrays.carg_t()
        carg.assign(expr)
    return carg

def from_binary_op(lhs: cexpr_t, rhs: cexpr_t, op: int, typ: tinfo_t, ea: int = idaapi.BADADDR) -> cexpr_t:
    """Create the binary operation expression `lhs op rhs`."""
    bin_expr = cexpr_t()
    bin_expr.ea = ea
    bin_expr.type = typ
    bin_expr.op = op
    bin_expr.x = lhs
    bin_expr.y = rhs
    return bin_expr


DECLS = """
typedef long long s64;
typedef unsigned long long u64;

typedef s64 Int;
typedef u64 Bool;

struct Swift::String
{
  u64 _countAndFlagsBits;
  void *_object;
};

union Swift_ElementAny {
    Swift::String stringElement;
};

struct Swift_Any {
    Swift_ElementAny element;
    u64 unknown;
    s64 type;
};

struct Swift_ArrayAny {
    s64 length;
    Swift_Any *items;
};
"""
ida_typeinf.idc_parse_types(DECLS, 0)


class SwiftStringsHook(ida_hexrays.Hexrays_Hooks):
    def maturity(self, func: ida_hexrays.cfuncptr_t, new_maturity: int) -> int:
        # Run once the function has a reasonably stable AST
        if new_maturity < ida_hexrays.CMAT_CPA:
            return 0

        swift_str_type = from_c_type("Swift::String")
        if swift_str_type is None:
            return 0

        # noinspection PyTypeChecker,PyPropertyAccess
        SwiftStringVisitor(swift_str_type).apply_to(func.body, None)  # pyright: ignore[reportArgumentType]
        return 0

if 'my_hooks' in locals():
    locals()['my_hooks'].unhook()
my_hooks = SwiftStringsHook()
my_hooks.hook()

Thanks, we could reproduce the problem! Apparently after your callback we end up with a deleted item in ctree. We’ll see if we can detect this situation better.

Okay so if I understood correctly, I modified the parent of the current item mid traverse and it lead to working on deleted node. Therefore the solution should be to work on parent directly and swap it there.