fortran66のブログ

fortran について書きます。

【メモ帳】 do concurrent での locality 制御

Intel Fortran v.19.1】 F2018 対応 do concurrent での変数の局所性指定

追記:R3.3.30 文法チェックには引っかかりませんが、さりとてオプションとしては機能していない感じです。default(none) のチェックも効かず、local も効いてない。

do concurrent ではスレッド並列の並列実行がなされるので、do loop 内の変数のスレッド独立性の問題が出ます。F2008 では block..end block を利用してスレッド独立な変数を局所的に宣言する方法が用意されていましたが、F2018 では OpenMP の影響でしょうか、do concurrent 文の後ろに新たに修飾子が加わって、局所性や値の引継ぎなどが制御できるようになりました。Intel Fortran v.19.1 で実行できるようなので試してみます。

software.intel.com

Locality of variables in DO CONCURRENT constructs can now be declared on the DO CONCURRENT statement

help file より

You can specify LOCAL, LOCAL_INIT, SHARED, and DEFAULT (NONE) in the same DO CONCURRENT statement.

実行結果

コンパイル時のオプションで並列化を on/off することで do concurrent の効果をみてみます。計算量が少ないのであまり大きな差は出ていませんが、約二倍のスピードが出ています。(600 vs 300; 単位はシステムクロックのティック数の差だった気がします。)

並列なし

  do concurrent time =  4.687500000000000E-002         600

並列あり

  do concurrent time =  0.187500000000000              300

f:id:fortran66:20200603205050j:plain

ソース・プログラム

昔作ったプログラムを修正してみます。 ほとんど覚えていないのですが、メモリーリークしているかもしれません。マルチスレッドにすることで win32 で絵をかきつつプログラムの実行を続けていました。その辺よく分かっていないので非推奨 API を使っています。いまは coarray や OpenMP で行けるかもしれません。

64bit windows で動くようにしました。block...end block によりお絵かき部分を分離しました。

fortran66.hatenablog.com

計算の胆部分。ループ変数は do concurrent 内で局所宣言します。

    call system_clock(it0)
    call cpu_time(t0)
    do concurrent (integer::ix = 0:imax, iy = 0:jmax) local(x, y)
        x = xmin + ix * dx
        y = ymax - iy * dy
        ic(ix, iy) = imandel(x, y)
    end do
    call cpu_time(t1)
    call system_clock(it1)
    print *, ' do concurrent time =', t1 - t0, it1 - it0

コード全体

module m_oop
    implicit none

    type :: t_rgb
        integer :: ir, ig, ib
    end type t_rgb
  
    type, abstract :: t_device
        character(len = 80) :: title = 'Plotter'
        integer :: nsize_x = 640, nsize_y = 480
        integer :: line_width = 1
        type (t_rgb) :: rgb = t_rgb(0, 0, 0)
    contains
        procedure (device_on), deferred, pass :: on
        procedure (device_off), deferred, pass :: off
        procedure (device_show), deferred, pass :: show
        procedure (device_pen), deferred, pass :: pen
        procedure (device_lineTo), deferred, pass :: lineTo
        procedure (device_moveTo), deferred, pass :: moveTo
        procedure (device_dot), deferred, pass :: dot
    end type t_device 

    abstract interface 
        subroutine device_on(self)
            import :: t_device
            class(t_device), intent(in out) :: self
        end subroutine device_on
  
        subroutine device_off(self)
            import :: t_device
            class(t_device), intent(in) :: self
        end subroutine device_off

        subroutine device_show(self)
            import :: t_device
            class(t_device), intent(in) :: self
        end subroutine device_show
    
        subroutine device_pen(self, line_width, rgb)
            import :: t_device, t_rgb
            class(t_device), intent(in out) :: self
            integer, intent(in), optional :: line_width
            type (t_rgb), intent(in), optional :: rgb
        end subroutine device_pen

        subroutine device_lineTo(self, ix, iy)
            import :: t_device
            class(t_device), intent(in) :: self
            integer, intent(in) :: ix, iy
        end subroutine device_lineTo
  
        subroutine device_moveTo(self, ix, iy)
            import :: t_device
            class(t_device), intent(in) :: self
            integer, intent(in) :: ix, iy
        end subroutine device_moveTo

        subroutine device_dot(self, ix, iy, icol)
            import :: t_device, t_rgb
            class(t_device), intent(in) :: self
            integer, intent(in) :: ix, iy
            integer, intent(in) :: icol
        !      type (t_rgb), intent(in) :: rgb
        end subroutine device_dot
    end interface
end module m_oop


module m_win32
    use ifwina
    use ifwinty
    use ifmt, only : RTL_CRITICAL_SECTION
    use m_oop
    implicit none

    type, extends(t_device) :: t_win32
    contains 
        procedure, pass :: on     => gr_on
        procedure, pass :: off    => gr_off
        procedure, pass :: show   => gr_show
        procedure, pass :: pen    => gr_pen
        procedure, pass :: lineTo => gr_lineTo
        procedure, pass :: moveTo => gr_moveTo
        procedure, pass :: dot    => gr_dot
    end type t_win32

    type :: t_wnd
        integer (HANDLE) :: hWnd    
        integer (HANDLE) :: hDC       
        integer (LPINT)  :: hThread  
        integer (LPDWORD):: id      
        integer (HANDLE) :: hPen     
        type (RTL_CRITICAL_SECTION) :: lpCriticalSection
    end type t_wnd      

    type (t_wnd) :: wnd
  
contains
    integer(HANDLE) function WinMain( hInstance, nCmdShow, win32 )
        implicit none
        integer (HANDLE), intent(in) :: hInstance 
        integer (SINT)  , intent(in) :: nCmdShow
        type (t_win32), intent(in) :: win32
        type (T_WNDCLASS) :: wc
        type (T_MSG)      :: mesg
        integer (HANDLE)  :: hWndMain
        integer (BOOL)    :: iretb
        character (LEN = 256) :: ClassName = 'Fortran'//char(0)
        integer :: iwindow_frame_x, iwindow_frame_y
        logical, save :: first = .true. 
        ! Init Main window
        iwindow_frame_x = 2 * GetSystemMetrics(SM_CXFIXEDFRAME) !side line = 6, title bar = 25
        iwindow_frame_y = 2 * GetSystemMetrics(SM_CYFIXEDFRAME) + GetSystemMetrics(SM_CYCAPTION)
        !
        if (first) then
            WinMain = -1 ! Error code 
            wc%lpszClassName =  loc(ClassName)     ! non-standard Fortran :: LOC(xxx) = TRANSFER(C_LOC(xxx), iii)
            wc%lpfnWndProc   =  loc(MainWndProc)   ! CALLBACK procedure name
            wc%style        = ior(CS_VREDRAW , CS_HREDRAW)
            wc%hInstance     = hInstance
            wc%hIcon        = NULL   
            wc%hCursor      = LoadCursor( NULL, IDC_ARROW )
            wc%hbrBackground = ( COLOR_WINDOW + 1 )
            if ( RegisterClass(wc) == 0 ) return    ! initialize window
            first = .false.
        end if
        ! Init instance
        WinMain = -2 ! Error code 
        hWndMain = CreateWindow(    ClassName,                         &
                                    trim(win32%title)//char(0),           &
                                    int(ior(WS_OVERLAPPED, WS_SYSMENU)),  &
                                    CW_USEDEFAULT, CW_USEDEFAULT,         &
                                    win32%nsize_x + iwindow_frame_x,      &
                                    win32%nsize_y + iwindow_frame_y,      &
                                    0, 0,                                 &
                                    hInstance,                            &
                                    NULL                           ) 
        if (hWndMain == 0) return
        iretb = ShowWindow( hWndMain, nCmdShow )
        iretb = UpdateWindow( hWndMain )
        ! Message Loop
        do while ( GetMessage (mesg, NULL, 0, 0) ) 
            iretb = TranslateMessage( mesg ) 
            iretb = DispatchMessage(  mesg )
        end do
        WinMain = mesg%wParam
    end function WinMain
   
    integer (LRESULT) function MainWndProc( hWnd, mesg, wParam, lParam ) 
    !DEC$ ATTRIBUTES STDcall, DECORATE, ALIAS : 'MainWndProc' :: MainWndProc
        integer (HANDLE) , intent(in) :: hWnd
        integer (UINT)   , intent(in) :: mesg
        integer (fwParam), intent(in) :: wParam
        integer (flParam), intent(in) :: lParam
        !
        integer (HANDLE) :: hDC, hBmp
        integer (BOOL)   :: iretb
        type (T_PAINTSTRUCT) :: ps
        type (T_RECT)       :: rc
        !
        MainWndProc = 0
        select case ( mesg )
        case (WM_CREATE)
            wnd%hWnd = hWnd
            hDC      = GetDC(hWnd)
            wnd%hDC  = CreateCompatibleDC(hDC)
            iretb    = GetClientRect(hWnd, rc)
            hBmp     = CreateCompatibleBitmap(hDC, rc%right - rc%left, rc%bottom - rc%top)
            iretb    = SelectObject(wnd%hDC, hBmp)
            iretb    = PatBlt(wnd%hDC, 0, 0, rc%right - rc%left, rc%bottom - rc%top, WHITENESS)
            iretb    = ReleaseDC(hWnd, hDC)
            iretb    = DeleteObject(hBmp)
        case (WM_DESTROY)
            call EnterCriticalSection( loc(wnd%lpCriticalSection) )
            iretb = DeleteObject( wnd%hDC )
            call PostQuitMessage( 0 )
            call LeaveCriticalSection( loc(wnd%lpCriticalSection) )
        case (WM_PAINT)
            call EnterCriticalSection( loc(wnd%lpCriticalSection) )
            hDC    = BeginPaint(    wnd%hWnd, ps )
            iretb  = GetClientRect( wnd%hWnd, rc )
            iretb  = BitBlt(hDC, 0, 0, rc%right - rc%left, rc%bottom - rc%top, wnd%hDC, 0, 0, SRCCOPY)
            iretb  = endPaint( wnd%hWnd, ps )
            call LeaveCriticalSection( loc(wnd%lpCriticalSection) )
        case (WM_RBUTTONUP)
            call EnterCriticalSection( loc(wnd%lpCriticalSection) )
            iretb = DeleteObject( wnd%hDC )
            call PostQuitMessage( 0 )
            call LeaveCriticalSection( loc(wnd%lpCriticalSection) )
        case default
            MainWndProc = DefWindowProc( hWnd, mesg, wParam, lParam )
        end select 
    end function MainWndProc

    subroutine gr_on(self)
        use IFMT, only : CreateThread ! multithread module
        class(t_win32), intent(in out) :: self
        integer (BOOL)    :: iretb
        integer (HANDLE)  :: hBmp
        type (T_RECT)    :: rc
        call InitializeCriticalSection( loc(wnd%lpCriticalSection) ) ! non-standard Fortran :: LOC
        wnd%hThread = CreateThread(NULL, 0_LPINT, Thread_Proc, NULL, CREATE_SUSPENDED, wnd%id) 
        iretb      = SetThreadPriority(wnd%hThread, THREAD_PRIORITY_BELOW_NORMAL)
        iretb      = ResumeThread(wnd%hThread)
        call sleep(100) ! wait for Window initialization 
        iretb = GetClientRect(wnd%hWnd, rc)
        hBmp  = CreateCompatibleBitmap(wnd%hDC, rc%right - rc%left, rc%bottom - rc%top)
        iretb = SelectObject(wnd%hDC, hBmp)
        iretb = DeleteObject(hBmp)
        iretb = PatBlt(wnd%hDC, 0, 0, rc%right - rc%left, rc%bottom - rc%top, WHITENESS)
        wnd%hPen = CreatePen(PS_SOLID, 1, 0)
    contains 

        integer (LONG) function Thread_Proc(lp_ThreadParameter)
    !    !DEC$ ATTRIBUTES STDcall, ALIAS:"_thread_proc" :: Thread_Proc
            integer (LPINT), intent(in) :: lp_ThreadParameter
            integer (LPINT):: hInst
            hInst       = GetModuleHandle(NULL)
            Thread_Proc = WinMain(hInst, SW_SHOWNORMAL, self)
        end function Thread_Proc
    end subroutine gr_on

    subroutine gr_off(self)
        class(t_win32), intent(in) :: self
        integer (BOOL)  :: iretb
        integer (DWORD) :: iwait
        iwait = INFINITE
        call gr_show(self) 
        iretb = DeleteObject(wnd%hPen) 
        iretb = WaitForSingleObject(wnd%hThread, iwait)
        iretb = CloseHandle(wnd%hThread)
        iretb = PostMessage(wnd%hWnd, WM_DESTROY, NULL, NULL)
        wnd%hThread = NULL
        call DeleteCriticalSection( loc(wnd%lpCriticalSection) ) ! non-standard Fortran :: LOC
    end subroutine gr_off

    subroutine gr_show(self)
        class(t_win32), intent(in) :: self
        integer (BOOL):: iretb
        call EnterCriticalSection( loc(wnd%lpCriticalSection) ) ! non-standard Fortran :: LOC
        iretb = InvalidateRect(wnd%hWnd, NULL, FALSE)
        call LeaveCriticalSection( loc(wnd%lpCriticalSection) ) ! non-standard Fortran :: LOC
    end subroutine gr_show

    subroutine gr_pen(self, line_width, rgb)
        class(t_win32), intent(in out) :: self
        integer, intent(in), optional :: line_width
        type (t_rgb), intent(in), optional :: rgb
        integer (BOOL) :: iretb
        associate( rgb_ => self%rgb, line_width_ => self%line_width )
            if ( present(rgb) ) rgb_ = rgb
            if ( present(line_width) ) line_width_ = line_width
            call EnterCriticalSection( loc(wnd%lpCriticalSection) ) ! non-standard Fortran :: LOC  
            iretb    = DeleteObject(wnd%hPen) 
            wnd%hPen = CreatePen(PS_SOLID, line_width_, irgb(rgb_))
            iretb    = SelectObject(wnd%hDC, wnd%hPen)
            iretb    = MoveToEx(wnd%hDC, 0, 0, NULL)
            call LeaveCriticalSection( loc(wnd%lpCriticalSection) ) ! non-standard Fortran :: LOC
        end associate
    contains 
        integer function irgb(rgb)
            type(t_rgb), intent(in) :: rgb
            irgb = rgb%ir + (rgb%ig + (rgb%ib * 256)) * 256
        end function irgb
    end subroutine gr_pen

    subroutine gr_moveTo(self, ix, iy)
        class(t_win32), intent(in) :: self
        integer, intent(in) :: ix, iy
        integer (BOOL):: iretb
        call EnterCriticalSection( loc(wnd%lpCriticalSection) ) ! non-standard Fortran :: LOC
        iretb = MoveToEx(wnd%hDC, ix, iy, NULL)
        call LeaveCriticalSection( loc(wnd%lpCriticalSection) ) ! non-standard Fortran :: LOC
    end subroutine gr_moveTo

    subroutine gr_lineTo(self, ix, iy)
        class(t_win32), intent(in) :: self
        integer, intent(in) :: ix, iy
        integer (BOOL):: iretb
        call EnterCriticalSection( loc(wnd%lpCriticalSection) ) ! non-standard Fortran :: LOC
        iretb = LineTo(wnd%hDC, ix, iy)
        call LeaveCriticalSection( loc(wnd%lpCriticalSection) ) ! non-standard Fortran :: LOC
    end subroutine gr_lineTo
    
    subroutine gr_dot(self, ix, iy, icol)
        class(t_win32), intent(in) :: self
        integer, intent(in) :: ix, iy, icol
        integer (BOOL):: iretb
        call EnterCriticalSection( loc(wnd%lpCriticalSection) ) ! non-standard Fortran :: LOC
        iretb = SetPixel(wnd%hDC, ix, iy, icol)
        call LeaveCriticalSection( loc(wnd%lpCriticalSection) ) ! non-standard Fortran :: LOC
    end subroutine gr_dot
end module m_win32

    
module m_plot
    use m_oop
    use m_win32
    implicit none
    private
    public :: t_rgb, t_device, t_win32
end module m_plot


module m_mandel
    implicit none 
    integer, parameter :: kd = kind(0.0d0)
contains
    pure elemental integer function imandel(x, y)
        real(kd), intent(in) :: x, y
        real(kd) :: a, b, a2, b2
        integer :: icount
        a = x
        b = y
        a2 = a * a
        b2 = b * b
        icount = 150 !maxiter
        do while (a2 + b2 <= 4.0_kd .AND. icount > 0) 
            b = 2.0_kd * a * b - y
            a = a2 - b2 - x
            a2 = a * a
            b2 = b * b
            icount = icount - 1
        end do
        imandel = icount
    end function imandel
end module m_mandel
 

program Mandel
    use m_mandel
    implicit none
    !integer, parameter :: kd = SELECTED_REAL_KIND(15)
    integer, parameter :: m = 1000
    integer :: nwinx = 1024, nwiny = 1024
    integer :: i, j, imax, jmax, maxiter, icount
    real (kd) :: xmin, xmax, ymin, ymax 
    real (kd) :: xmin1, xmax1, ymin1, ymax1 
    real (kd) :: x, y, a, b, a2, b2, dx, dy
    real (kd) :: t0, t1 
    integer, allocatable :: ic(:, :)
    integer :: icol(0:m), it0, it1

    !
    xmin = -2.0d0 !1.10950d0
    xmax =  2.0d0 !1.10951d0
    ymin = -2.0d0 !0.24758d0 
    ymax =  2.0d0 !0.24759d0 
    maxiter = 252
    !
    dx = xmax - xmin
    dy = ymax - ymin
    if (dx <= 0.0_kd .OR. dy <= 0.0_kd .OR. maxiter <= 0 .OR. maxiter > M) stop 'input error'
    if (dx * nwinx > dy * nwiny) then
        imax = nwinx
        jmax = nint(nwinx * dy / dx)
    else
        imax = nint(nwiny * dx / dy)
        jmax = int(nwiny)
    end if
    !
    dx = dx / real(imax, kd)
    dy = dy / real(jmax, kd)
    icol(0) = 0 ! black
    j = irgb(255, 255, 255)
    do i = maxiter, 1, -1
       icol(i) = j 
       if (j > 1) j = j - irgb(255, 255, 255) / maxiter
    end do
    !
    allocate( ic(0:imax, 0:jmax) )  
    !
    call system_clock(it0)
    call cpu_time(t0)
    do concurrent (integer::ix = 0:imax, iy = 0:jmax) local(x, y)
        x = xmin + ix * dx
        y = ymax - iy * dy
        ic(ix, iy) = imandel(x, y)
    end do
    call cpu_time(t1)
    call system_clock(it1)
    print *, ' do concurrent time =', t1 - t0, it1 - it0
    !
    ! plotter
    !
    block
        use m_plot
        class(t_device), allocatable :: fig
        type(t_rgb), parameter :: rgb_black = t_rgb(0, 0, 0)
        fig = t_win32('Mandelbrot 1', imax, jmax, 1, rgb_black)
        call fig%on()
        do i = 0, imax
           do j = 0, jmax
              call fig%dot(i, j, icol(ic(i, j)))  
           end do
           call fig%show()
        end do
        call fig%off()
    end block
contains 
    integer function irgb(ir, ig, ib)
        integer, intent(in) :: ir, ig, ib
        irgb = ir + (ig + (ib * 256)) * 256
    end function irgb
end program Mandel

数値計算のためのFortran90/95プログラミング入門(第2版)

数値計算のためのFortran90/95プログラミング入門(第2版)

  • 作者:牛島 省
  • 発売日: 2020/01/28
  • メディア: 単行本(ソフトカバー)

Fortran ハンドブック

Fortran ハンドブック