Wednesday 27 November 2013

Misunderstanding of c++ 02 -- new and delete must be slower than malloc and free

    Many c programmers believe that new and delete must be slower than new and delete, because they do too many jobs under the and delete have to deal with exception, new and delete must call constructor and destructor etc.

What kind of mistakes they make

1 :  We could disable exception, today many major compilers could do that for us(g++, clang++, visual c++).

2 : If the constructor and destructor are trivial, new and delete don't need to call the constructor and destructor.

3 : The comparison is unfair, by default, new == malloc + constructor, delete = destructor + free.

    For more details, you could check stack overflow.

  In theory, compiler of c++ should be able to perform same performance about new and malloc, delete and free, but could the compiler do that?


#include <stdio.h>
#include <stdlib.h> // pulls in declaration of malloc, free

int main()
    char *a = (char*)malloc(1024);

    for(int i = 0; i != 1024; ++i){
        printf("%c", a[i]);

    a = NULL;

    return 0;

#include <cstdio>

int main()
    char *a = new char[1024];

    for(int i = 0; i != 1024; ++i){
        printf("%c", a[i]);

    delete []a;

    return 0;

c assembly : clang -S -O3 -mllvm --x86-asm-syntax=intel mallocAndNew00.c

.section    __TEXT,__text,regular,pure_instructions
    .globl  _main
    .align  4, 0x90
_main:                                  ## @main
## BB#0:                                ## %entry
    push    RBP
    .cfi_def_cfa_offset 16
    .cfi_offset rbp, -16
    mov RBP, RSP
    .cfi_def_cfa_register rbp
    push    R14
    push    RBX
    .cfi_offset rbx, -32
    .cfi_offset r14, -24
    mov EDI, 1024
    call    _malloc
    mov R14, RAX
    mov EBX, 1
    xor EDI, EDI
    jmp LBB0_1
    .align  4, 0x90
LBB0_2:                                 ## %for.body.for.body_crit_edge
                                        ##   in Loop: Header=BB0_1 Depth=1
    movsx   EDI, BYTE PTR [R14 + RBX]
    inc RBX
LBB0_1:                                 ## %for.body
                                        ## =>This Inner Loop Header: Depth=1
    call    _putchar
    cmp EBX, 1024
    jne LBB0_2
## BB#3:                                ## %for.end
    mov RDI, R14
    call    _free
    xor EAX, EAX
    pop RBX
    pop R14
    pop RBP


c++ assembly : clang++ -S -O3 -std=c++11 -mllvm --x86-asm-syntax=intel mallocAndNew00.cpp
 .section __TEXT,__text,regular,pure_instructions
 .globl _main
 .align 4, 0x90
_main:                                  ## @main
## BB#0:                                ## %entry
 push RBP
 .cfi_def_cfa_offset 16
 .cfi_offset rbp, -16
 mov RBP, RSP
 .cfi_def_cfa_register rbp
 push R14
 push RBX
 .cfi_offset rbx, -32
 .cfi_offset r14, -24
 mov EDI, 1024
 call __Znam
 mov R14, RAX
 mov EBX, 1
 xor EDI, EDI
 jmp LBB0_1
 .align 4, 0x90
LBB0_2:                                 ## %for.body.for.body_crit_edge
                                        ##   in Loop: Header=BB0_1 Depth=1
 movsx EDI, BYTE PTR [R14 + RBX]
 inc RBX
LBB0_1:                                 ## %for.body
                                        ## =>This Inner Loop Header: Depth=1
 call _putchar
 cmp EBX, 1024
 jne LBB0_2
## BB#3:                                ## %for.end
 test R14, R14
 je LBB0_5
## BB#4:                                ## %delete.notnull
 mov RDI, R14
 call __ZdaPv
LBB0_5:                                 ## %delete.end
 xor EAX, EAX
 pop RBX
 pop R14
 pop RBP


  This time the codes of c++ and c have some different, c++ call the assembly of new and delete(?), c call the assembly of malloc and new. c++ check the pointer is point to nullptr or not before delete, but c do not check it anyway.If you want the same behavior, call malloc and free in c++, do make sure what are you doing before you call malloc and free but not new and delete.

codes can download from github.

