64-bit __OFADD__ and __CFADD__ are not properly recognized by x86 decompiler

Consider these two x86(32-bit) functions which return overflow flag of subtraction and carry flag of subtraction of two 64-bit values(which are emulated by the compiler in 32-bit mode), respectively:

100015E0 ofsub64(__int64, __int64) proc near
100015E0
100015E0 arg_0           = qword ptr  8
100015E0 arg_8           = qword ptr  10h
100015E0
100015E0                 push    ebp
100015E1                 mov     ebp, esp
100015E3                 mov     eax, dword ptr [ebp+arg_0]
100015E6                 cmp     eax, dword ptr [ebp+arg_8]
100015E9                 mov     eax, dword ptr [ebp+arg_0+4]
100015EC                 sbb     eax, dword ptr [ebp+arg_8+4]
100015EF                 seto    al
100015F2                 pop     ebp
100015F3                 retn
10001620 cfsub64(unsigned __int64, unsigned __int64) proc near
10001620
10001620 arg_0           = qword ptr  8
10001620 arg_8           = qword ptr  10h
10001620
10001620                 push    ebp
10001621                 mov     ebp, esp
10001623                 mov     eax, dword ptr [ebp+arg_0]
10001626                 cmp     eax, dword ptr [ebp+arg_8]
10001629                 mov     eax, dword ptr [ebp+arg_0+4]
1000162C                 sbb     eax, dword ptr [ebp+arg_8+4]
1000162F                 setb    al
10001632                 pop     ebp
10001633                 retn

The x86 Hex-Rays decompiler correctly recognizes the underlying Hex-Rays intrinsic for the substraction and the whole function fits in a single return statement with just one intrinsic:

bool ofsub64(__int64 a1, __int64 a2)
{
    return __OFSUB__(a1, a2); // hovering a mouse over this intrinsic shows `bool __stdcall(_QWORD, _QWORD)`
}

bool cfsub64(unsigned __int64 a1, unsigned __int64 a2)
{
    return a1 < a2;
}

However, for addition operation, Hex-Rays decompiler struggles to recognize the appropriate intrinsic:

10001600 ofadd64(__int64, __int64) proc near
10001600
10001600 arg_0           = qword ptr  8
10001600 arg_8           = qword ptr  10h
10001600
10001600                 push    ebp
10001601                 mov     ebp, esp
10001603                 mov     eax, dword ptr [ebp+arg_0]
10001606                 add     eax, dword ptr [ebp+arg_8]
10001609                 mov     eax, dword ptr [ebp+arg_0+4]
1000160C                 adc     eax, dword ptr [ebp+arg_8+4]
1000160F                 seto    al
10001612                 pop     ebp
10001613                 retn
10001613
10001640 cfadd64(unsigned __int64, unsigned __int64) proc near
10001640
10001640 arg_0           = qword ptr  8
10001640 arg_8           = qword ptr  10h
10001640
10001640                 push    ebp
10001641                 mov     ebp, esp
10001643                 mov     eax, dword ptr [ebp+arg_0]
10001646                 add     eax, dword ptr [ebp+arg_8]
10001649                 mov     eax, dword ptr [ebp+arg_0+4]
1000164C                 adc     eax, dword ptr [ebp+arg_8+4]
1000164F                 setb    al
10001652                 pop     ebp
10001653                 retn

The output is the following:

int ofadd64(__int64 a1, __int64 a2)
{
    int result; // eax

    result = (unsigned __int64)(a2 + a1) >> 32;
    LOBYTE(result) = __OFADD__(__CFADD__((_DWORD)a2, (_DWORD)a1), HIDWORD(a1)) | __OFADD__(HIDWORD(a2), __CFADD__((_DWORD)a2, (_DWORD)a1) + HIDWORD(a1));
    return result;
}

int cfadd64(unsigned __int64 a1, unsigned __int64 a2)
{
    int result; // eax

    result = (a2 + a1) >> 32;
    LOBYTE(result) = __CFADD__(__CFADD__((_DWORD)a2, (_DWORD)a1), HIDWORD(a1)) | __CFADD__(HIDWORD(a2), __CFADD__((_DWORD)a2, (_DWORD)a1) + HIDWORD(a1));
    return result;
}

For both of the function, the expected output should be as simple as for the subtraction, or __OFADD__ and __CFADD__ intrinsics respectively. The following example is just a simplification demonstrated in separate functions, but for larger programs that make heavy use of 64-bit integers on 32-bit x86, this is a clear problem because the lack of these intrinsics recognition leads to a more convoluted code. Thanks!

Update: looks like this also applies to ROR and ROL:

10001350 ror64(unsigned __int64, unsigned __int64) proc near
10001350
10001350 arg_0           = dword ptr  8
10001350 arg_4           = dword ptr  0Ch
10001350 arg_8           = dword ptr  10h
10001350
10001350                 push    ebp
10001351                 mov     ebp, esp
10001353                 push    esi
10001354                 mov     eax, [ebp+arg_0]
10001357                 mov     esi, [ebp+arg_4]
1000135A                 mov     ecx, [ebp+arg_8]
1000135D                 test    cl, 20h
10001360                 mov     edx, eax
10001362                 cmovz   edx, esi
10001365                 cmovz   esi, eax
10001368                 mov     eax, esi
1000136A                 shrd    eax, edx, cl
1000136D                 shrd    edx, esi, cl
10001370                 pop     esi
10001371                 pop     ebp
10001372                 retn
10001320 rol64(unsigned __int64, unsigned __int64) proc near
10001320
10001320 arg_0           = qword ptr  8
10001320 arg_8           = dword ptr  10h
10001320
10001320                 push    ebp
10001321                 mov     ebp, esp
10001323                 push    esi
10001324                 mov     esi, dword ptr [ebp+arg_0]
10001327                 mov     eax, dword ptr [ebp+arg_0+4]
1000132A                 mov     ecx, [ebp+arg_8]
1000132D                 test    cl, 20h
10001330                 mov     edx, eax
10001332                 cmovnz  edx, esi
10001335                 cmovnz  esi, eax
10001338                 mov     eax, esi
1000133A                 shld    eax, edx, cl
1000133D                 shld    edx, esi, cl
10001340                 pop     esi
10001341                 pop     ebp
10001342                 retn
// it should simply return __ROR8__(a1, a2)
int ror64(unsigned __int64 a1, unsigned __int64 a2)
{
    unsigned int v2; // esi
    unsigned int v3; // edx

    v2 = HIDWORD(a1);
    v3 = a1;
    if ( (a2 & 0x20) == 0 )
    {
        v3 = HIDWORD(a1);
        v2 = a1;
    }

    return __PAIR64__(v3, v2) >> a2;
}

// it should simply return __ROL8__(a1, a2)
int rol64(unsigned __int64 a1, unsigned __int64 a2)
{
    unsigned __int64 v2; // kr08_8

    v2 = __PAIR64__(a1, HIDWORD(a1));
    if ( (a2 & 0x20) != 0 )
        v2 = a1;

    return v2 << a2 >> 32;
}

Also for some other intrinics but I haven’t discovered them yet.

Hi! Thank you for your detailed explanation and reasoning. We’ve noted your feedback and moved this topic to the Feature requests category. While it’s not something we currently plan to address in the near future, we’ll keep an eye on how it resonates with the rest of the community.