コンパイラ速さ比べ Windows 用 Fortran 三世代

年末なので暇つぶしに Windows 用 Fortran 三世代で速さ比べをしてみます。用いたコンパイラは、Intel Fortran Ver.16.0 と Compaq Visual Fortran Ver.6.6C と Microsoft Fortran Powerstation Ver.4.0 の三つです。

簡単な CG 法のプログラムで 2000x2000 の密行列の線型方程式を解いて、計算時間を比較してみます。それぞれ実行速度優先の optimization をかけています。

実行画面出力　CPU Intel Core i7-4770@3.4GHz

Intel Fortran v16.0 / Windows10

2015-12-30 02:56:51.7 cpu_time = 0.00 (sec) ::make matrix
2015-12-30 02:56:51.7 cpu_time = 0.312E-01(sec) ::solve Ax = b
difference (rms) = 5.372769248802961E-007
2015-12-30 02:56:54.8 cpu_time = 3.11 (sec) ::normal end
続行するには何かキーを押してください . . .

CVF6.6C / Windows2000 (VMware)

2015-12-30 02:20:34.7 cpu_time = 0.00 (sec) ::make matrix
2015-12-30 02:20:34.7 cpu_time = 0.313E-01(sec) ::solve Ax = b
difference (rms) = 6.402642212039837E-007
2015-12-30 02:20:45.4 cpu_time = 10.8 (sec) ::normal end
Press any key to continue

MSFPS4.0 / Windows98 (VMware)

015-12-30 02:20:49.2 cpu_time = .000 (sec) ::make matrix
015-12-30 02:20:49.4 cpu_time = .220 (sec) ::solve Ax = b
difference (rms) = 6.670762952641053E-007
015-12-30 02:21:37.0 cpu_time = 47.9 (sec) ::normal end
Press any key to continue

実行結果

実行時間は IF16.0, CVF6.6C, MSFPS4.0 それぞれで 3.11(sec) : 10.8(sec) : 47.9(sec) となっています。計算時間は、乱数で与えた初期値にも依存するので、三桁も有効桁は無く 10% くらいの幅があります。

最新版のコンパイラと20年前の物との間で、オーダー１桁のスピード差が出ています。なお、それぞれのコンパイラの対応している CPU は、世代的に AVX2/Xeon Phi, Pnetium4/AMD Athlon, 無印Pentium となっています。（ちなみに MSFPS1.0 では 486 まで、MS-FORTRAN5.1 では 286 までの対応となっています。）

コンパイラも新しいものが欲しいですね。

実行プログラム

Intel と Compaq は、Fortran95 対応ですが、MS は発売時期的に Fortran90 のみ対応で、 subroutine cpu_time が無いので、ベンダー固有の function timef を使用しました。

module m_stamp
  ! use portlib ! MS-FPS4.0
  implicit none
  logical :: first = .true.
  real(kind(0.0d0)) :: t0, t1
contains
  subroutine stamp(text) ! print time stamp
    character(len = *), intent(in) :: text
    character(len =  8) :: date
    character(len = 10) :: time
    if (first) then 
      first = .false.
      call cpu_time(t0) ! t0 = timef() ! MS-FPS4.0
    end if 
    call cpu_time(t1)   ! t1 = timef() ! MS-FPS4.0
    call date_and_time(date, time)
    print '(6a, 6a, a, g15.3, 2a)', &
        date(1:4), '-', date(5:6), '-', date(7:8), '  ', &
        time(1:2), ':', time(3:4), ':', time(5:8), '  ', &
        ' cpu_time =', t1 - t0, '(sec) ::', text 
  end subroutine stamp  
end module m_stamp    
!===================================================
module m_cg
  implicit none
  integer, parameter :: kd = kind(0.0d0) !double precision
contains
  subroutine cg(a, b, x) ! conjugate gradient method
    real(kd), intent(in) :: a(:, :), b(:)
    real(kd), intent(in out) :: x(:)
    real(kd) :: alpha, beta, r2n, r2d, eb2, p(size(b)), q(size(b)), r(size(b))
    integer :: i
    r = b - matmul(a, x) 
    p = r
    r2n = dot_product(r, r)
    eb2 = epsilon(b) * dot_product(b, b)
    do i = 1, size(b)
      if ( r2n < eb2 ) exit
      r2d = r2n
      q = matmul(a, p)
      alpha = dot_product(p, r) / dot_product(p, q)
      x = x + alpha * p
      r = r - alpha * q
      r2n = dot_product(r, r)
      beta = r2n / r2d
      p = r + beta * p			   
    end do
  end subroutine cg
end module m_cg
!===================================================
program cg_main
  use m_cg
  use m_stamp
  implicit none
  integer, parameter :: ns = 2000 
  real(kd) :: a(ns, ns), b(ns), x(ns)
  integer :: i, j
  call stamp('make matrix')
  do i = 1, ns
    b(i) = 1.0_kd
    do j = i, ns ! givens matrix,  x = (1, 0, 0, ... , 0)t
      a(i, j) = real( 2 * min(i, j) - 1, kd )  
      a(j, i) = a(i, j)
    end do
  end do
  call stamp('solve Ax = b')
! 
  call random_seed() 
  call random_number(x) ! starting vector
  call cg(a, b, x)     
  x = matmul(a, x) - b
  print *, 'difference (rms) =', sqrt( dot_product(x, x) )
  call stamp('normal end')
end program cg_main