【Intel Fortran v.19.1】 F2018 対応 do concurrent での変数の局所性指定
追記:R3.3.30
文法チェックには引っかかりませんが、さりとてオプションとしては機能していない感じです。default(none) のチェックも効かず、local も効いてない。
do concurrent ではスレッド並列の並列実行がなされるので、do loop 内の変数のスレッド独立性の問題が出ます。F2008 では block..end block を利用してスレッド独立な変数を局所的に宣言する方法が用意されていましたが、F2018 では OpenMP の影響でしょうか、do concurrent 文の後ろに新たに修飾子が加わって、局所性や値の引継ぎなどが制御できるようになりました。Intel Fortran v.19.1 で実行できるようなので試してみます。
software.intel.com
Locality of variables in DO CONCURRENT constructs can now be declared on the DO CONCURRENT statement
help file より
You can specify LOCAL, LOCAL_INIT, SHARED, and DEFAULT (NONE) in the same DO CONCURRENT statement.
実行結果
コンパイル時のオプションで並列化を on/off することで do concurrent の効果をみてみます。計算量が少ないのであまり大きな差は出ていませんが、約二倍のスピードが出ています。(600 vs 300; 単位はシステムクロックのティック数の差だった気がします。)
並列なし
do concurrent time = 4.687500000000000E-002 600
並列あり
do concurrent time = 0.187500000000000 300
ソース・プログラム
昔作ったプログラムを修正してみます。
ほとんど覚えていないのですが、メモリーリークしているかもしれません。マルチスレッドにすることで win32 で絵をかきつつプログラムの実行を続けていました。その辺よく分かっていないので非推奨 API を使っています。いまは coarray や OpenMP で行けるかもしれません。
64bit windows で動くようにしました。block...end block によりお絵かき部分を分離しました。
fortran66.hatenablog.com
計算の胆部分。ループ変数は do concurrent 内で局所宣言します。
call system_clock(it0)
call cpu_time(t0)
do concurrent (integer::ix = 0:imax, iy = 0:jmax) local(x, y)
x = xmin + ix * dx
y = ymax - iy * dy
ic(ix, iy) = imandel(x, y)
end do
call cpu_time(t1)
call system_clock(it1)
print *, ' do concurrent time =', t1 - t0, it1 - it0
コード全体
module m_oop
implicit none
type :: t_rgb
integer :: ir, ig, ib
end type t_rgb
type, abstract :: t_device
character(len = 80) :: title = 'Plotter'
integer :: nsize_x = 640, nsize_y = 480
integer :: line_width = 1
type (t_rgb) :: rgb = t_rgb(0, 0, 0)
contains
procedure (device_on), deferred, pass :: on
procedure (device_off), deferred, pass :: off
procedure (device_show), deferred, pass :: show
procedure (device_pen), deferred, pass :: pen
procedure (device_lineTo), deferred, pass :: lineTo
procedure (device_moveTo), deferred, pass :: moveTo
procedure (device_dot), deferred, pass :: dot
end type t_device
abstract interface
subroutine device_on(self)
import :: t_device
class(t_device), intent(in out) :: self
end subroutine device_on
subroutine device_off(self)
import :: t_device
class(t_device), intent(in) :: self
end subroutine device_off
subroutine device_show(self)
import :: t_device
class(t_device), intent(in) :: self
end subroutine device_show
subroutine device_pen(self, line_width, rgb)
import :: t_device, t_rgb
class(t_device), intent(in out) :: self
integer, intent(in), optional :: line_width
type (t_rgb), intent(in), optional :: rgb
end subroutine device_pen
subroutine device_lineTo(self, ix, iy)
import :: t_device
class(t_device), intent(in) :: self
integer, intent(in) :: ix, iy
end subroutine device_lineTo
subroutine device_moveTo(self, ix, iy)
import :: t_device
class(t_device), intent(in) :: self
integer, intent(in) :: ix, iy
end subroutine device_moveTo
subroutine device_dot(self, ix, iy, icol)
import :: t_device, t_rgb
class(t_device), intent(in) :: self
integer, intent(in) :: ix, iy
integer, intent(in) :: icol
end subroutine device_dot
end interface
end module m_oop
module m_win32
use ifwina
use ifwinty
use ifmt, only : RTL_CRITICAL_SECTION
use m_oop
implicit none
type, extends(t_device) :: t_win32
contains
procedure, pass :: on => gr_on
procedure, pass :: off => gr_off
procedure, pass :: show => gr_show
procedure, pass :: pen => gr_pen
procedure, pass :: lineTo => gr_lineTo
procedure, pass :: moveTo => gr_moveTo
procedure, pass :: dot => gr_dot
end type t_win32
type :: t_wnd
integer (HANDLE) :: hWnd
integer (HANDLE) :: hDC
integer (LPINT) :: hThread
integer (LPDWORD):: id
integer (HANDLE) :: hPen
type (RTL_CRITICAL_SECTION) :: lpCriticalSection
end type t_wnd
type (t_wnd) :: wnd
contains
integer(HANDLE) function WinMain( hInstance, nCmdShow, win32 )
implicit none
integer (HANDLE), intent(in) :: hInstance
integer (SINT) , intent(in) :: nCmdShow
type (t_win32), intent(in) :: win32
type (T_WNDCLASS) :: wc
type (T_MSG) :: mesg
integer (HANDLE) :: hWndMain
integer (BOOL) :: iretb
character (LEN = 256) :: ClassName = 'Fortran'//char(0)
integer :: iwindow_frame_x, iwindow_frame_y
logical, save :: first = .true.
iwindow_frame_x = 2 * GetSystemMetrics(SM_CXFIXEDFRAME)
iwindow_frame_y = 2 * GetSystemMetrics(SM_CYFIXEDFRAME) + GetSystemMetrics(SM_CYCAPTION)
if (first) then
WinMain = -1
wc%lpszClassName = loc(ClassName)
wc%lpfnWndProc = loc(MainWndProc)
wc%style = ior(CS_VREDRAW , CS_HREDRAW)
wc%hInstance = hInstance
wc%hIcon = NULL
wc%hCursor = LoadCursor( NULL, IDC_ARROW )
wc%hbrBackground = ( COLOR_WINDOW + 1 )
if ( RegisterClass(wc) == 0 ) return
first = .false.
end if
WinMain = -2
hWndMain = CreateWindow( ClassName, &
trim(win32%title)//char(0), &
int(ior(WS_OVERLAPPED, WS_SYSMENU)), &
CW_USEDEFAULT, CW_USEDEFAULT, &
win32%nsize_x + iwindow_frame_x, &
win32%nsize_y + iwindow_frame_y, &
0, 0, &
hInstance, &
NULL )
if (hWndMain == 0) return
iretb = ShowWindow( hWndMain, nCmdShow )
iretb = UpdateWindow( hWndMain )
do while ( GetMessage (mesg, NULL, 0, 0) )
iretb = TranslateMessage( mesg )
iretb = DispatchMessage( mesg )
end do
WinMain = mesg%wParam
end function WinMain
integer (LRESULT) function MainWndProc( hWnd, mesg, wParam, lParam )
integer (HANDLE) , intent(in) :: hWnd
integer (UINT) , intent(in) :: mesg
integer (fwParam), intent(in) :: wParam
integer (flParam), intent(in) :: lParam
integer (HANDLE) :: hDC, hBmp
integer (BOOL) :: iretb
type (T_PAINTSTRUCT) :: ps
type (T_RECT) :: rc
MainWndProc = 0
select case ( mesg )
case (WM_CREATE)
wnd%hWnd = hWnd
hDC = GetDC(hWnd)
wnd%hDC = CreateCompatibleDC(hDC)
iretb = GetClientRect(hWnd, rc)
hBmp = CreateCompatibleBitmap(hDC, rc%right - rc%left, rc%bottom - rc%top)
iretb = SelectObject(wnd%hDC, hBmp)
iretb = PatBlt(wnd%hDC, 0, 0, rc%right - rc%left, rc%bottom - rc%top, WHITENESS)
iretb = ReleaseDC(hWnd, hDC)
iretb = DeleteObject(hBmp)
case (WM_DESTROY)
call EnterCriticalSection( loc(wnd%lpCriticalSection) )
iretb = DeleteObject( wnd%hDC )
call PostQuitMessage( 0 )
call LeaveCriticalSection( loc(wnd%lpCriticalSection) )
case (WM_PAINT)
call EnterCriticalSection( loc(wnd%lpCriticalSection) )
hDC = BeginPaint( wnd%hWnd, ps )
iretb = GetClientRect( wnd%hWnd, rc )
iretb = BitBlt(hDC, 0, 0, rc%right - rc%left, rc%bottom - rc%top, wnd%hDC, 0, 0, SRCCOPY)
iretb = endPaint( wnd%hWnd, ps )
call LeaveCriticalSection( loc(wnd%lpCriticalSection) )
case (WM_RBUTTONUP)
call EnterCriticalSection( loc(wnd%lpCriticalSection) )
iretb = DeleteObject( wnd%hDC )
call PostQuitMessage( 0 )
call LeaveCriticalSection( loc(wnd%lpCriticalSection) )
case default
MainWndProc = DefWindowProc( hWnd, mesg, wParam, lParam )
end select
end function MainWndProc
subroutine gr_on(self)
use IFMT, only : CreateThread
class(t_win32), intent(in out) :: self
integer (BOOL) :: iretb
integer (HANDLE) :: hBmp
type (T_RECT) :: rc
call InitializeCriticalSection( loc(wnd%lpCriticalSection) )
wnd%hThread = CreateThread(NULL, 0_LPINT, Thread_Proc, NULL, CREATE_SUSPENDED, wnd%id)
iretb = SetThreadPriority(wnd%hThread, THREAD_PRIORITY_BELOW_NORMAL)
iretb = ResumeThread(wnd%hThread)
call sleep(100)
iretb = GetClientRect(wnd%hWnd, rc)
hBmp = CreateCompatibleBitmap(wnd%hDC, rc%right - rc%left, rc%bottom - rc%top)
iretb = SelectObject(wnd%hDC, hBmp)
iretb = DeleteObject(hBmp)
iretb = PatBlt(wnd%hDC, 0, 0, rc%right - rc%left, rc%bottom - rc%top, WHITENESS)
wnd%hPen = CreatePen(PS_SOLID, 1, 0)
contains
integer (LONG) function Thread_Proc(lp_ThreadParameter)
integer (LPINT), intent(in) :: lp_ThreadParameter
integer (LPINT):: hInst
hInst = GetModuleHandle(NULL)
Thread_Proc = WinMain(hInst, SW_SHOWNORMAL, self)
end function Thread_Proc
end subroutine gr_on
subroutine gr_off(self)
class(t_win32), intent(in) :: self
integer (BOOL) :: iretb
integer (DWORD) :: iwait
iwait = INFINITE
call gr_show(self)
iretb = DeleteObject(wnd%hPen)
iretb = WaitForSingleObject(wnd%hThread, iwait)
iretb = CloseHandle(wnd%hThread)
iretb = PostMessage(wnd%hWnd, WM_DESTROY, NULL, NULL)
wnd%hThread = NULL
call DeleteCriticalSection( loc(wnd%lpCriticalSection) )
end subroutine gr_off
subroutine gr_show(self)
class(t_win32), intent(in) :: self
integer (BOOL):: iretb
call EnterCriticalSection( loc(wnd%lpCriticalSection) )
iretb = InvalidateRect(wnd%hWnd, NULL, FALSE)
call LeaveCriticalSection( loc(wnd%lpCriticalSection) )
end subroutine gr_show
subroutine gr_pen(self, line_width, rgb)
class(t_win32), intent(in out) :: self
integer, intent(in), optional :: line_width
type (t_rgb), intent(in), optional :: rgb
integer (BOOL) :: iretb
associate( rgb_ => self%rgb, line_width_ => self%line_width )
if ( present(rgb) ) rgb_ = rgb
if ( present(line_width) ) line_width_ = line_width
call EnterCriticalSection( loc(wnd%lpCriticalSection) )
iretb = DeleteObject(wnd%hPen)
wnd%hPen = CreatePen(PS_SOLID, line_width_, irgb(rgb_))
iretb = SelectObject(wnd%hDC, wnd%hPen)
iretb = MoveToEx(wnd%hDC, 0, 0, NULL)
call LeaveCriticalSection( loc(wnd%lpCriticalSection) )
end associate
contains
integer function irgb(rgb)
type(t_rgb), intent(in) :: rgb
irgb = rgb%ir + (rgb%ig + (rgb%ib * 256)) * 256
end function irgb
end subroutine gr_pen
subroutine gr_moveTo(self, ix, iy)
class(t_win32), intent(in) :: self
integer, intent(in) :: ix, iy
integer (BOOL):: iretb
call EnterCriticalSection( loc(wnd%lpCriticalSection) )
iretb = MoveToEx(wnd%hDC, ix, iy, NULL)
call LeaveCriticalSection( loc(wnd%lpCriticalSection) )
end subroutine gr_moveTo
subroutine gr_lineTo(self, ix, iy)
class(t_win32), intent(in) :: self
integer, intent(in) :: ix, iy
integer (BOOL):: iretb
call EnterCriticalSection( loc(wnd%lpCriticalSection) )
iretb = LineTo(wnd%hDC, ix, iy)
call LeaveCriticalSection( loc(wnd%lpCriticalSection) )
end subroutine gr_lineTo
subroutine gr_dot(self, ix, iy, icol)
class(t_win32), intent(in) :: self
integer, intent(in) :: ix, iy, icol
integer (BOOL):: iretb
call EnterCriticalSection( loc(wnd%lpCriticalSection) )
iretb = SetPixel(wnd%hDC, ix, iy, icol)
call LeaveCriticalSection( loc(wnd%lpCriticalSection) )
end subroutine gr_dot
end module m_win32
module m_plot
use m_oop
use m_win32
implicit none
private
public :: t_rgb, t_device, t_win32
end module m_plot
module m_mandel
implicit none
integer, parameter :: kd = kind(0.0d0)
contains
pure elemental integer function imandel(x, y)
real(kd), intent(in) :: x, y
real(kd) :: a, b, a2, b2
integer :: icount
a = x
b = y
a2 = a * a
b2 = b * b
icount = 150
do while (a2 + b2 <= 4.0_kd .AND. icount > 0)
b = 2.0_kd * a * b - y
a = a2 - b2 - x
a2 = a * a
b2 = b * b
icount = icount - 1
end do
imandel = icount
end function imandel
end module m_mandel
program Mandel
use m_mandel
implicit none
integer, parameter :: m = 1000
integer :: nwinx = 1024, nwiny = 1024
integer :: i, j, imax, jmax, maxiter, icount
real (kd) :: xmin, xmax, ymin, ymax
real (kd) :: xmin1, xmax1, ymin1, ymax1
real (kd) :: x, y, a, b, a2, b2, dx, dy
real (kd) :: t0, t1
integer, allocatable :: ic(:, :)
integer :: icol(0:m), it0, it1
xmin = -2.0d0
xmax = 2.0d0
ymin = -2.0d0
ymax = 2.0d0
maxiter = 252
dx = xmax - xmin
dy = ymax - ymin
if (dx <= 0.0_kd .OR. dy <= 0.0_kd .OR. maxiter <= 0 .OR. maxiter > M) stop 'input error'
if (dx * nwinx > dy * nwiny) then
imax = nwinx
jmax = nint(nwinx * dy / dx)
else
imax = nint(nwiny * dx / dy)
jmax = int(nwiny)
end if
dx = dx / real(imax, kd)
dy = dy / real(jmax, kd)
icol(0) = 0
j = irgb(255, 255, 255)
do i = maxiter, 1, -1
icol(i) = j
if (j > 1) j = j - irgb(255, 255, 255) / maxiter
end do
allocate( ic(0:imax, 0:jmax) )
call system_clock(it0)
call cpu_time(t0)
do concurrent (integer::ix = 0:imax, iy = 0:jmax) local(x, y)
x = xmin + ix * dx
y = ymax - iy * dy
ic(ix, iy) = imandel(x, y)
end do
call cpu_time(t1)
call system_clock(it1)
print *, ' do concurrent time =', t1 - t0, it1 - it0
block
use m_plot
class(t_device), allocatable :: fig
type(t_rgb), parameter :: rgb_black = t_rgb(0, 0, 0)
fig = t_win32('Mandelbrot 1', imax, jmax, 1, rgb_black)
call fig%on()
do i = 0, imax
do j = 0, jmax
call fig%dot(i, j, icol(ic(i, j)))
end do
call fig%show()
end do
call fig%off()
end block
contains
integer function irgb(ir, ig, ib)
integer, intent(in) :: ir, ig, ib
irgb = ir + (ig + (ib * 256)) * 256
end function irgb
end program Mandel