Test output binary speed of VC++ 2008, Intel C++ 10.1, and gcc version 3.2 20020927 (prerelease)

The following code snippet is used to test output binary performance. The purpose of the code is for performance test, its accuracy is not verified.

// fastinit.cpp : Defines the entry point for the console application.
// g++ -I/cygdrive/d/opensource/boost/boost_1_35_0 -O3 fastinit.cpp -o f.exe
#include <iostream>
#include <string>
#include <windows.h>
#include <boost/scoped_array.hpp>
class ProfileTimer
{
public:
    ProfileTimer(const std::string& name)
    {
        m_description = name;
        QueryPerformanceCounter( &m_start_time);
        m_start_tick = GetTickCount();
    }
    ~ProfileTimer()
    {
        DWORD used_tick = GetTickCount()-m_start_tick;
        LARGE_INTEGER  current_time;
        QueryPerformanceCounter( &current_time);
        LARGE_INTEGER  frequency;
        QueryPerformanceFrequency(&frequency);
        double seconds = (current_time.QuadPart-m_start_time.QuadPart)*1000.0/frequency.QuadPart;
        std::cout << m_description << " use " << seconds << " milliseconds, "
            << used_tick << " tick count."<< std::endl;
    }
    LARGE_INTEGER  m_start_time;
    DWORD m_start_tick;
    std::string m_description;

};
template<typename T>
void fastinit(T* array_op, std::size_t count, const T& init_value)
{
    if (sizeof(T)==1)
    {
        memset(array_op, *((int*)&init_value), count);
        return;
    }
    if (count<1)
        return;
    array_op[0] = init_value;
    int initialed_count = 1;
    int copy_bytes = sizeof(T);
    int max_bytes = sizeof(T)*count;
    while(copy_bytes < max_bytes)
    {
        int copy_count = min(max_bytes-copy_bytes, copy_bytes);
        memcpy(array_op+initialed_count, array_op, copy_count);
        copy_bytes+=copy_bytes;
    }
}

template<typename T>
void forinit(T* array_op, std::size_t count, const T& init_value)
{
    for(std::size_t i=0;i<count;i++)
        array_op[i] = init_value;
}

struct s1
{
    int a;
    long b;
    float c;
    double d;
    short e;
    char f;
};

int main(int argc, char* argv[])
{

    int times = 20000;
    if (argc>1)
        times = atoi(argv[1]);
    while (times<20000)
        times += times;
    const int element_count = 10240;
    std::cout << std::endl;

#if defined _MSC_VER && !defined (__INTEL_COMPILER)
    std::cout << " VC++ " << _MSC_VER;
#endif

#ifdef __GNUC__
    std::cout << " GCC " << __GNUC__;
#endif

#ifdef __INTEL_COMPILER
    std::cout << " Intel C++ " << __INTEL_COMPILER
        << " VC++ compability level: " << __INTEL_MS_COMPAT_LEVEL;
#endif

    std::cout << " element count:" <<element_count
        <<", loop " << times << " times." << std::endl;
    int data[element_count];
    {

        ProfileTimer a("initial int arrray with fast method");
        {
            for(int i=0;i<times; i++)
                fastinit(data, element_count, 0x12345678);
        }
    }

    {
        ProfileTimer  b( "initial int arrray with for method");
        {
            for(int i=0;i<times; i++)
                forinit(data, element_count, 0x12345678);
        }
    }
    boost::scoped_array<s1> s_array(new s1[element_count]);
    s1 s_one = {1,2,3,4,5,6};
    {

        ProfileTimer a("initial struct arrray with fast method");
        {
            for(int i=0;i<times; i++)
                fastinit(s_array.get(), element_count, s_one);
        }
    }

    {
        ProfileTimer  b( "initial struct arrray with for method");
        {
            for(int i=0;i<times; i++)
                forinit(s_array.get(), element_count, s_one);
        }
    }

    return 0;
}

 

VC++ 2008 Optimize level: /O2

G++ Optimize level: -O3

Result:

GCC 3 element count:10240, loop 80000 times.
initial int arrray with fast method use 4629.66 milliseconds, 4640 tick count.
initial int arrray with for method use 822.303 milliseconds, 813 tick count.
initial struct arrray with fast method use 7409.42 milliseconds, 7422 tick count.
initial struct arrray with for method use 4208.53 milliseconds, 4203 tick count.

 

Intel C++ 1010 VC++ compability level: 1 element count:10240, loop 80000 times.
initial int arrray with fast method use 582.76 milliseconds, 578 tick count.
initial int arrray with for method use 590.338 milliseconds, 594 tick count.
initial struct arrray with fast method use 3487.38 milliseconds, 3484 tick count.
initial struct arrray with for method use 4362.1 milliseconds, 4375 tick count.

VC++ 1500 element count:10240, loop 80000 times.

initial int arrray with fast method use 959.708 milliseconds, 969 tick count.
initial int arrray with for method use 0.000596903 milliseconds, 0 tick count.
initial struct arrray with fast method use 6287.94 milliseconds, 6281 tick count.
initial struct arrray with for method use 8070.71 milliseconds, 8079 tick count.

But, the following code can’t tell difference between compilers:

// compilertest.cpp : Defines the entry point for the console application.
//  g++ compilertest.cpp -O3 -o b.exe
#ifdef _MSC_VER
#include "stdafx.h"
#endif
#include <windows.h>
#include <iostream>
int test1(int a, int b)
{
    int ret = a*a + b*b + a*b;
    ret = ret+__LINE__;
    ret = ret * (ret+1) * (ret+2)*(ret+3);
    return ret;
}
int main(int argc, char* argv[])
{
    DWORD start_tick = GetTickCount();
    int run_count = 1000000;
    if (argc>1)
        run_count = atoi(argv[1]);
    while (run_count < 1000000)
        run_count += run_count;
    int ret = 0;
    for (int i=0;i<run_count; i++)
    {
        ret = test1(__LINE__, ret);
    }
    DWORD used_tick = GetTickCount() – start_tick;

#ifdef _MSC_VER
    std::cout << "VC++ " << _MSC_VER;
#endif

#ifdef __GNUC__
    std::cout << " GCC " << __GNUC__;
#endif

    std::cout << " Run " << run_count << " loops return " << ret
        <<" takes " << used_tick << " ms." << std::endl;
    return 0;
}

This entry was posted in IDE. Bookmark the permalink.

Leave a comment