Consider these two x86(32-bit) functions which return overflow flag of subtraction and carry flag of subtraction of two 64-bit values(which are emulated by the compiler in 32-bit mode), respectively:
100015E0 ofsub64(__int64, __int64) proc near
100015E0
100015E0 arg_0 = qword ptr 8
100015E0 arg_8 = qword ptr 10h
100015E0
100015E0 push ebp
100015E1 mov ebp, esp
100015E3 mov eax, dword ptr [ebp+arg_0]
100015E6 cmp eax, dword ptr [ebp+arg_8]
100015E9 mov eax, dword ptr [ebp+arg_0+4]
100015EC sbb eax, dword ptr [ebp+arg_8+4]
100015EF seto al
100015F2 pop ebp
100015F3 retn
10001620 cfsub64(unsigned __int64, unsigned __int64) proc near
10001620
10001620 arg_0 = qword ptr 8
10001620 arg_8 = qword ptr 10h
10001620
10001620 push ebp
10001621 mov ebp, esp
10001623 mov eax, dword ptr [ebp+arg_0]
10001626 cmp eax, dword ptr [ebp+arg_8]
10001629 mov eax, dword ptr [ebp+arg_0+4]
1000162C sbb eax, dword ptr [ebp+arg_8+4]
1000162F setb al
10001632 pop ebp
10001633 retn
The x86 Hex-Rays decompiler correctly recognizes the underlying Hex-Rays intrinsic for the substraction and the whole function fits in a single return statement with just one intrinsic:
bool ofsub64(__int64 a1, __int64 a2)
{
return __OFSUB__(a1, a2); // hovering a mouse over this intrinsic shows `bool __stdcall(_QWORD, _QWORD)`
}
bool cfsub64(unsigned __int64 a1, unsigned __int64 a2)
{
return a1 < a2;
}
However, for addition operation, Hex-Rays decompiler struggles to recognize the appropriate intrinsic:
10001600 ofadd64(__int64, __int64) proc near
10001600
10001600 arg_0 = qword ptr 8
10001600 arg_8 = qword ptr 10h
10001600
10001600 push ebp
10001601 mov ebp, esp
10001603 mov eax, dword ptr [ebp+arg_0]
10001606 add eax, dword ptr [ebp+arg_8]
10001609 mov eax, dword ptr [ebp+arg_0+4]
1000160C adc eax, dword ptr [ebp+arg_8+4]
1000160F seto al
10001612 pop ebp
10001613 retn
10001613
10001640 cfadd64(unsigned __int64, unsigned __int64) proc near
10001640
10001640 arg_0 = qword ptr 8
10001640 arg_8 = qword ptr 10h
10001640
10001640 push ebp
10001641 mov ebp, esp
10001643 mov eax, dword ptr [ebp+arg_0]
10001646 add eax, dword ptr [ebp+arg_8]
10001649 mov eax, dword ptr [ebp+arg_0+4]
1000164C adc eax, dword ptr [ebp+arg_8+4]
1000164F setb al
10001652 pop ebp
10001653 retn
The output is the following:
int ofadd64(__int64 a1, __int64 a2)
{
int result; // eax
result = (unsigned __int64)(a2 + a1) >> 32;
LOBYTE(result) = __OFADD__(__CFADD__((_DWORD)a2, (_DWORD)a1), HIDWORD(a1)) | __OFADD__(HIDWORD(a2), __CFADD__((_DWORD)a2, (_DWORD)a1) + HIDWORD(a1));
return result;
}
int cfadd64(unsigned __int64 a1, unsigned __int64 a2)
{
int result; // eax
result = (a2 + a1) >> 32;
LOBYTE(result) = __CFADD__(__CFADD__((_DWORD)a2, (_DWORD)a1), HIDWORD(a1)) | __CFADD__(HIDWORD(a2), __CFADD__((_DWORD)a2, (_DWORD)a1) + HIDWORD(a1));
return result;
}
For both of the function, the expected output should be as simple as for the subtraction, or __OFADD__ and __CFADD__ intrinsics respectively. The following example is just a simplification demonstrated in separate functions, but for larger programs that make heavy use of 64-bit integers on 32-bit x86, this is a clear problem because the lack of these intrinsics recognition leads to a more convoluted code. Thanks!