C++ – Why does this loop produce “warning: iteration 3u invokes undefined behavior” and output more than 4 lines

cgccundefined-behavior

Compiling this:

#include <iostream>

int main()
{
    for (int i = 0; i < 4; ++i)
        std::cout << i*1000000000 << std::endl;
}

and gcc produces the following warning:

warning: iteration 3u invokes undefined behavior [-Waggressive-loop-optimizations]
   std::cout << i*1000000000 << std::endl;
                  ^

I understand there is a signed integer overflow.

What I cannot get is why i value is broken by that overflow operation?

I've read the answers to Why does integer overflow on x86 with GCC cause an infinite loop?, but I'm still not clear on why this happens – I get that "undefined" means "anything can happen", but what's the underlying cause of this specific behavior?

Online: http://ideone.com/dMrRKR

Compiler: gcc (4.8)

Best Answer

Signed integer overflow (as strictly speaking, there is no such thing as "unsigned integer overflow") means undefined behaviour. And this means anything can happen, and discussing why does it happen under the rules of C++ doesn't make sense.

C++11 draft N3337: §5.4:¹

If during the evaluation of an expression, the result is not mathematically deﬁned or not in the range of representable values for its type, the behavior is undeﬁned. [ Note: most existing implementations of C++ ignore integer overﬂows. Treatment of division by zero, forming a remainder using a zero divisor, and all ﬂoating point exceptions vary among machines, and is usually adjustable by a library function. —end note ]

Your code compiled with g++ -O3 emits warning (even without -Wall)

a.cpp: In function 'int main()':
a.cpp:11:18: warning: iteration 3u invokes undefined behavior [-Waggressive-loop-optimizations]
   std::cout << i*1000000000 << std::endl;
                  ^
a.cpp:9:2: note: containing loop
  for (int i = 0; i < 4; ++i)
  ^

The only way we can analyze what the program is doing, is by reading the generated assembly code.

Here is the full assembly listing:

    .file   "a.cpp"
    .section    .text$_ZNKSt5ctypeIcE8do_widenEc,"x"
    .linkonce discard
    .align 2
LCOLDB0:
LHOTB0:
    .align 2
    .p2align 4,,15
    .globl  __ZNKSt5ctypeIcE8do_widenEc
    .def    __ZNKSt5ctypeIcE8do_widenEc;    .scl    2;  .type   32; .endef
__ZNKSt5ctypeIcE8do_widenEc:
LFB860:
    .cfi_startproc
    movzbl  4(%esp), %eax
    ret $4
    .cfi_endproc
LFE860:
LCOLDE0:
LHOTE0:
    .section    .text.unlikely,"x"
LCOLDB1:
    .text
LHOTB1:
    .p2align 4,,15
    .def    ___tcf_0;   .scl    3;  .type   32; .endef
___tcf_0:
LFB1091:
    .cfi_startproc
    movl    $__ZStL8__ioinit, %ecx
    jmp __ZNSt8ios_base4InitD1Ev
    .cfi_endproc
LFE1091:
    .section    .text.unlikely,"x"
LCOLDE1:
    .text
LHOTE1:
    .def    ___main;    .scl    2;  .type   32; .endef
    .section    .text.unlikely,"x"
LCOLDB2:
    .section    .text.startup,"x"
LHOTB2:
    .p2align 4,,15
    .globl  _main
    .def    _main;  .scl    2;  .type   32; .endef
_main:
LFB1084:
    .cfi_startproc
    leal    4(%esp), %ecx
    .cfi_def_cfa 1, 0
    andl    $-16, %esp
    pushl   -4(%ecx)
    pushl   %ebp
    .cfi_escape 0x10,0x5,0x2,0x75,0
    movl    %esp, %ebp
    pushl   %edi
    pushl   %esi
    pushl   %ebx
    pushl   %ecx
    .cfi_escape 0xf,0x3,0x75,0x70,0x6
    .cfi_escape 0x10,0x7,0x2,0x75,0x7c
    .cfi_escape 0x10,0x6,0x2,0x75,0x78
    .cfi_escape 0x10,0x3,0x2,0x75,0x74
    xorl    %edi, %edi
    subl    $24, %esp
    call    ___main
L4:
    movl    %edi, (%esp)
    movl    $__ZSt4cout, %ecx
    call    __ZNSolsEi
    movl    %eax, %esi
    movl    (%eax), %eax
    subl    $4, %esp
    movl    -12(%eax), %eax
    movl    124(%esi,%eax), %ebx
    testl   %ebx, %ebx
    je  L15
    cmpb    $0, 28(%ebx)
    je  L5
    movsbl  39(%ebx), %eax
L6:
    movl    %esi, %ecx
    movl    %eax, (%esp)
    addl    $1000000000, %edi
    call    __ZNSo3putEc
    subl    $4, %esp
    movl    %eax, %ecx
    call    __ZNSo5flushEv
    jmp L4
    .p2align 4,,10
L5:
    movl    %ebx, %ecx
    call    __ZNKSt5ctypeIcE13_M_widen_initEv
    movl    (%ebx), %eax
    movl    24(%eax), %edx
    movl    $10, %eax
    cmpl    $__ZNKSt5ctypeIcE8do_widenEc, %edx
    je  L6
    movl    $10, (%esp)
    movl    %ebx, %ecx
    call    *%edx
    movsbl  %al, %eax
    pushl   %edx
    jmp L6
L15:
    call    __ZSt16__throw_bad_castv
    .cfi_endproc
LFE1084:
    .section    .text.unlikely,"x"
LCOLDE2:
    .section    .text.startup,"x"
LHOTE2:
    .section    .text.unlikely,"x"
LCOLDB3:
    .section    .text.startup,"x"
LHOTB3:
    .p2align 4,,15
    .def    __GLOBAL__sub_I_main;   .scl    3;  .type   32; .endef
__GLOBAL__sub_I_main:
LFB1092:
    .cfi_startproc
    subl    $28, %esp
    .cfi_def_cfa_offset 32
    movl    $__ZStL8__ioinit, %ecx
    call    __ZNSt8ios_base4InitC1Ev
    movl    $___tcf_0, (%esp)
    call    _atexit
    addl    $28, %esp
    .cfi_def_cfa_offset 4
    ret
    .cfi_endproc
LFE1092:
    .section    .text.unlikely,"x"
LCOLDE3:
    .section    .text.startup,"x"
LHOTE3:
    .section    .ctors,"w"
    .align 4
    .long   __GLOBAL__sub_I_main
.lcomm __ZStL8__ioinit,1,1
    .ident  "GCC: (i686-posix-dwarf-rev1, Built by MinGW-W64 project) 4.9.0"
    .def    __ZNSt8ios_base4InitD1Ev;   .scl    2;  .type   32; .endef
    .def    __ZNSolsEi; .scl    2;  .type   32; .endef
    .def    __ZNSo3putEc;   .scl    2;  .type   32; .endef
    .def    __ZNSo5flushEv; .scl    2;  .type   32; .endef
    .def    __ZNKSt5ctypeIcE13_M_widen_initEv;  .scl    2;  .type   32; .endef
    .def    __ZSt16__throw_bad_castv;   .scl    2;  .type   32; .endef
    .def    __ZNSt8ios_base4InitC1Ev;   .scl    2;  .type   32; .endef
    .def    _atexit;    .scl    2;  .type   32; .endef

I can barely even read assembly, but even I can see the addl $1000000000, %edi line. The resulting code looks more like

for(int i = 0; /* nothing, that is - infinite loop */; i += 1000000000)
    std::cout << i << std::endl;

This comment of @T.C.:

I suspect that it's something like: (1) because every iteration with i of any value larger than 2 has undefined behavior -> (2) we can assume that i <= 2 for optimization purposes -> (3) the loop condition is always true -> (4) it's optimized away into an infinite loop.

gave me idea to compare the assembly code of the OP's code to the assembly code of the following code, with no undefined behaviour.

#include <iostream>

int main()
{
    // changed the termination condition
    for (int i = 0; i < 3; ++i)
        std::cout << i*1000000000 << std::endl;
}

And, in fact, the correct code has termination condition.

    ; ...snip...
L6:
    mov ecx, edi
    mov DWORD PTR [esp], eax
    add esi, 1000000000
    call    __ZNSo3putEc
    sub esp, 4
    mov ecx, eax
    call    __ZNSo5flushEv
    cmp esi, -1294967296 // here it is
    jne L7
    lea esp, [ebp-16]
    xor eax, eax
    pop ecx
    ; ...snip...

Unfortunately this is the consequences of writing buggy code.

Fortunately you can make use of better diagnostics and better debugging tools - that's what they are for:

enable all warnings
-Wall is the gcc option that enables all useful warnings with no false positives. This is a bare minimum that you should always use.
gcc has many other warning options, however, they are not enabled with -Wall as they may warn on false positives
Visual C++ unfortunately is lagging behind with the ability to give useful warnings. At least the IDE enables some by default.
use debug flags for debugging
- for integer overflow -ftrapv traps the program on overflow,
- Clang compiler is excellent for this: -fcatch-undefined-behavior catches a lot of instances of undefined behaviour (note: "a lot of" != "all of them")

I have a spaghetti mess of a program not written by me that needs to be shipped tomorrow! HELP!!!!!!111oneone

Use gcc's -fwrapv

This option instructs the compiler to assume that signed arithmetic overflow of addition, subtraction and multiplication wraps around using twos-complement representation.

¹ - this rule does not apply to "unsigned integer overflow", as §3.9.1.4 says that

Unsigned integers, declared unsigned, shall obey the laws of arithmetic modulo 2ⁿ where n is the number of bits in the value representation of that particular size of integer.

and e.g. result of UINT_MAX + 1 is mathematically defined - by the rules of arithmetic modulo 2ⁿ

Related Solutions

Why are these constructs using pre and post-increment undefined behavior

C has the concept of undefined behavior, i.e. some language constructs are syntactically valid but you can't predict the behavior when the code is run.

As far as I know, the standard doesn't explicitly say why the concept of undefined behavior exists. In my mind, it's simply because the language designers wanted there to be some leeway in the semantics, instead of i.e. requiring that all implementations handle integer overflow in the exact same way, which would very likely impose serious performance costs, they just left the behavior undefined so that if you write code that causes integer overflow, anything can happen.

So, with that in mind, why are these "issues"? The language clearly says that certain things lead to undefined behavior. There is no problem, there is no "should" involved. If the undefined behavior changes when one of the involved variables is declared volatile, that doesn't prove or change anything. It is undefined; you cannot reason about the behavior.

Your most interesting-looking example, the one with

u = (u++);

is a text-book example of undefined behavior (see Wikipedia's entry on sequence points).

C++ – Detecting signed overflow in C/C++

No, your 2nd code isn't correct, but you are close: if you set

int half = INT_MAX/2;
int half1 = half + 1;

the result of an addition is INT_MAX. (INT_MAX is always an odd number). So this is valid input. But in your routine you will have INT_MAX - half == half1 and you would abort. A false positive.

This error can be repaired by putting < instead of <= in both checks.

But then also your code isn't optimal. The following would do:

int add(int lhs, int rhs)
{
 if (lhs >= 0) {
  if (INT_MAX - lhs < rhs) {
   /* would overflow */
   abort();
  }
 }
 else {
  if (rhs < INT_MIN - lhs) {
   /* would overflow */
   abort();
  }
 }
 return lhs + rhs;
}

To see that this is valid, you have to symbolically add lhs on both sides of the inequalities, and this gives you exactly the arithmetical conditions that your result is out of bounds.

Best Answer

Related Solutions

Why are these constructs using pre and post-increment undefined behavior

C++ – Detecting signed overflow in C/C++

Related Topic