fortran66のブログ

fortran について書きます。

【メモ帳】ifx で do concurrent の GPU offload を試す

最新 ifx は do concurrent の GPU offload 可能

最新の intel fortran llvm 版 compiler ifx は do concurrent の GPU offload 可能になったというので試してみます。intel 第 12 世代ノート用 CPU の内臓 GPU で試してみます。なお単精度実数にしか対応していません。

コンパイル時のオプションとして、OpenMP の指示を与える必要があります。OpenMP 稼働オプション、GPU 用バイナリ吐くオプション、do concurrent を暗黙に OpenMP 適用するオプションのようです。明示的に OpenMP 命令を書く必要はなく、標準 Fortran 命令で書けます。

 /Qopenmp-targets:spir64 /Qiopenmp  /Qopenmp-target-do-concurrent

あと、コンパイル時の最適化レポートを出させると、実行結果に影響が出たりします。まだ判然としないのですが…

/Qopt-report:2

実行時のオプション

実行時には、コマンドライン上で環境変数を与えることにより、GPU への offload を強制したり、禁止したりできます。

GPU offload 強制

set OMP_TARGET_OFFLOAD=MANDATORY

GPU offload 禁止

set OMP_TARGET_OFFLOAD=disabled

また実行時に環境変数をセットすることで offload されたかなどの情報を書き出させることも可能です。

set LIBOMPTARGET_DEBUG=1  

0 は非表示、1,2,4 で書き出しがなされます。

Mandelbrot 図形

Mndelbrot 図形を描いてみました。計算は GPU を使用しないほうが早く終わっています。メモリーの転送などにかかる時間のほうが計算している時間より長いのかもしれません。

Mandelbrot

module m_oop
    implicit none

    type :: t_rgb
        integer :: ir, ig, ib
    end type t_rgb
  
    type, abstract :: t_device
        character(len = 80) :: title = 'Plotter'
        integer :: nsize_x = 640, nsize_y = 480
        integer :: line_width = 1
        type (t_rgb) :: rgb = t_rgb(0, 0, 0)
    contains
        procedure (device_on), deferred, pass :: on
        procedure (device_off), deferred, pass :: off
        procedure (device_show), deferred, pass :: show
        procedure (device_pen), deferred, pass :: pen
        procedure (device_lineTo), deferred, pass :: lineTo
        procedure (device_moveTo), deferred, pass :: moveTo
        procedure (device_dot), deferred, pass :: dot
    end type t_device 

    abstract interface 
        subroutine device_on(self)
            import :: t_device
            class(t_device), intent(in out) :: self
        end subroutine device_on
  
        subroutine device_off(self)
            import :: t_device
            class(t_device), intent(in) :: self
        end subroutine device_off

        subroutine device_show(self)
            import :: t_device
            class(t_device), intent(in) :: self
        end subroutine device_show
    
        subroutine device_pen(self, line_width, rgb)
            import :: t_device, t_rgb
            class(t_device), intent(in out) :: self
            integer, intent(in), optional :: line_width
            type (t_rgb), intent(in), optional :: rgb
        end subroutine device_pen

        subroutine device_lineTo(self, ix, iy)
            import :: t_device
            class(t_device), intent(in) :: self
            integer, intent(in) :: ix, iy
        end subroutine device_lineTo
  
        subroutine device_moveTo(self, ix, iy)
            import :: t_device
            class(t_device), intent(in) :: self
            integer, intent(in) :: ix, iy
        end subroutine device_moveTo

        subroutine device_dot(self, ix, iy, icol)
            import :: t_device, t_rgb
            class(t_device), intent(in) :: self
            integer, intent(in) :: ix, iy
            integer, intent(in) :: icol
        !      type (t_rgb), intent(in) :: rgb
        end subroutine device_dot
    end interface
end module m_oop


module m_win32
    use ifwina
    use ifwinty
    use ifmt, only : RTL_CRITICAL_SECTION
    use m_oop
    implicit none

    type, extends(t_device) :: t_win32
    contains 
        procedure, pass :: on     => gr_on
        procedure, pass :: off    => gr_off
        procedure, pass :: show   => gr_show
        procedure, pass :: pen    => gr_pen
        procedure, pass :: lineTo => gr_lineTo
        procedure, pass :: moveTo => gr_moveTo
        procedure, pass :: dot    => gr_dot
    end type t_win32

    type :: t_wnd
        integer (HANDLE) :: hWnd    
        integer (HANDLE) :: hDC       
        integer (LPINT)  :: hThread  
        integer (LPDWORD):: id      
        integer (HANDLE) :: hPen     
        type (RTL_CRITICAL_SECTION) :: lpCriticalSection
    end type t_wnd      

    type (t_wnd) :: wnd
  
contains
    integer(HANDLE) function WinMain( hInstance, nCmdShow, win32 )
        implicit none
        integer (HANDLE), intent(in) :: hInstance 
        integer (SINT)  , intent(in) :: nCmdShow
        type (t_win32), intent(in) :: win32
        type (T_WNDCLASS) :: wc
        type (T_MSG)      :: mesg
        integer (HANDLE)  :: hWndMain
        integer (BOOL)    :: iretb
        character (LEN = 256) :: ClassName = 'Fortran'//char(0)
        integer :: iwindow_frame_x, iwindow_frame_y
        logical, save :: first = .true. 
        ! Init Main window
        iwindow_frame_x = 2 * GetSystemMetrics(SM_CXFIXEDFRAME) !side line = 6, title bar = 25
        iwindow_frame_y = 2 * GetSystemMetrics(SM_CYFIXEDFRAME) + GetSystemMetrics(SM_CYCAPTION)
        !
        if (first) then
            WinMain = -1 ! Error code 
            wc%lpszClassName =  loc(ClassName)     ! non-standard Fortran :: LOC(xxx) = TRANSFER(C_LOC(xxx), iii)
            wc%lpfnWndProc   =  loc(MainWndProc)   ! CALLBACK procedure name
            wc%style        = ior(CS_VREDRAW , CS_HREDRAW)
            wc%hInstance     = hInstance
            wc%hIcon        = NULL   
            wc%hCursor      = LoadCursor( NULL, IDC_ARROW )
            wc%hbrBackground = ( COLOR_WINDOW + 1 )
            if ( RegisterClass(wc) == 0 ) return    ! initialize window
            first = .false.
        end if
        ! Init instance
        WinMain = -2 ! Error code 
        hWndMain = CreateWindow(    ClassName,                            &
                                    trim(win32%title)//char(0),           &
                                    int(ior(WS_OVERLAPPED, WS_SYSMENU)),  &
                                    CW_USEDEFAULT, CW_USEDEFAULT,         &
                                    win32%nsize_x + iwindow_frame_x,      &
                                    win32%nsize_y + iwindow_frame_y,      &
                                    0, 0,                                 &
                                    hInstance,                            &
                                    NULL                           ) 
        if (hWndMain == 0) return
        iretb = ShowWindow( hWndMain, nCmdShow )
        iretb = UpdateWindow( hWndMain )
        ! Message Loop
        do while ( GetMessage (mesg, NULL, 0, 0) ) 
            iretb = TranslateMessage( mesg ) 
            iretb = DispatchMessage(  mesg )
        end do
        WinMain = mesg%wParam
    end function WinMain
   
    integer (LRESULT) function MainWndProc( hWnd, mesg, wParam, lParam ) 
    !DEC$ ATTRIBUTES STDcall, DECORATE, ALIAS : 'MainWndProc' :: MainWndProc
        integer (HANDLE) , intent(in) :: hWnd
        integer (UINT)   , intent(in) :: mesg
        integer (fwParam), intent(in) :: wParam
        integer (flParam), intent(in) :: lParam
        !
        integer (HANDLE) :: hDC, hBmp
        integer (BOOL)   :: iretb
        type (T_PAINTSTRUCT) :: ps
        type (T_RECT)       :: rc
        !
        MainWndProc = 0
        select case ( mesg )
        case (WM_CREATE)
            wnd%hWnd = hWnd
            hDC      = GetDC(hWnd)
            wnd%hDC  = CreateCompatibleDC(hDC)
            iretb    = GetClientRect(hWnd, rc)
            hBmp     = CreateCompatibleBitmap(hDC, rc%right - rc%left, rc%bottom - rc%top)
            iretb    = SelectObject(wnd%hDC, hBmp)
            iretb    = PatBlt(wnd%hDC, 0, 0, rc%right - rc%left, rc%bottom - rc%top, WHITENESS)
            iretb    = ReleaseDC(hWnd, hDC)
            iretb    = DeleteObject(hBmp)
        case (WM_DESTROY)
            call EnterCriticalSection( loc(wnd%lpCriticalSection) )
            iretb = DeleteObject( wnd%hDC )
            call PostQuitMessage( 0 )
            call LeaveCriticalSection( loc(wnd%lpCriticalSection) )
        case (WM_PAINT)
            call EnterCriticalSection( loc(wnd%lpCriticalSection) )
            hDC    = BeginPaint(    wnd%hWnd, ps )
            iretb  = GetClientRect( wnd%hWnd, rc )
            iretb  = BitBlt(hDC, 0, 0, rc%right - rc%left, rc%bottom - rc%top, wnd%hDC, 0, 0, SRCCOPY)
            iretb  = endPaint( wnd%hWnd, ps )
            call LeaveCriticalSection( loc(wnd%lpCriticalSection) )
        case (WM_RBUTTONUP)
            call EnterCriticalSection( loc(wnd%lpCriticalSection) )
            iretb = DeleteObject( wnd%hDC )
            call PostQuitMessage( 0 )
            call LeaveCriticalSection( loc(wnd%lpCriticalSection) )
        case default
            MainWndProc = DefWindowProc( hWnd, mesg, wParam, lParam )
        end select 
    end function MainWndProc

    subroutine gr_on(self)
        use IFMT, only : CreateThread ! multithread module
        class(t_win32), intent(in out) :: self
        integer (BOOL)    :: iretb
        integer (HANDLE)  :: hBmp
        type (T_RECT)    :: rc
        call InitializeCriticalSection( loc(wnd%lpCriticalSection) ) ! non-standard Fortran :: LOC
        wnd%hThread = CreateThread(NULL, 0_LPINT, Thread_Proc, NULL, CREATE_SUSPENDED, wnd%id) 
        iretb      = SetThreadPriority(wnd%hThread, THREAD_PRIORITY_BELOW_NORMAL)
        iretb      = ResumeThread(wnd%hThread)
        call sleep(100) ! wait for Window initialization 
        iretb = GetClientRect(wnd%hWnd, rc)
        hBmp  = CreateCompatibleBitmap(wnd%hDC, rc%right - rc%left, rc%bottom - rc%top)
        iretb = SelectObject(wnd%hDC, hBmp)
        iretb = DeleteObject(hBmp)
        iretb = PatBlt(wnd%hDC, 0, 0, rc%right - rc%left, rc%bottom - rc%top, WHITENESS)
        wnd%hPen = CreatePen(PS_SOLID, 1, 0)
    contains 

        integer (LONG) function Thread_Proc(lp_ThreadParameter)
    !    !DEC$ ATTRIBUTES STDcall, ALIAS:"_thread_proc" :: Thread_Proc
            integer (LPINT), intent(in) :: lp_ThreadParameter
            integer (LPINT):: hInst
            hInst       = GetModuleHandle(NULL)
            Thread_Proc = WinMain(hInst, SW_SHOWNORMAL, self)
        end function Thread_Proc
    end subroutine gr_on

    subroutine gr_off(self)
        class(t_win32), intent(in) :: self
        integer (BOOL)  :: iretb
        integer (DWORD) :: iwait
        iwait = INFINITE
        call gr_show(self) 
        iretb = DeleteObject(wnd%hPen) 
        iretb = WaitForSingleObject(wnd%hThread, iwait)
        iretb = CloseHandle(wnd%hThread)
        iretb = PostMessage(wnd%hWnd, WM_DESTROY, NULL, NULL)
        wnd%hThread = NULL
        call DeleteCriticalSection( loc(wnd%lpCriticalSection) ) ! non-standard Fortran :: LOC
    end subroutine gr_off

    subroutine gr_show(self)
        class(t_win32), intent(in) :: self
        integer (BOOL):: iretb
        call EnterCriticalSection( loc(wnd%lpCriticalSection) ) ! non-standard Fortran :: LOC
        iretb = InvalidateRect(wnd%hWnd, NULL, FALSE)
        call LeaveCriticalSection( loc(wnd%lpCriticalSection) ) ! non-standard Fortran :: LOC
    end subroutine gr_show

    subroutine gr_pen(self, line_width, rgb)
        class(t_win32), intent(in out) :: self
        integer, intent(in), optional :: line_width
        type (t_rgb), intent(in), optional :: rgb
        integer (BOOL) :: iretb
        associate( rgb_ => self%rgb, line_width_ => self%line_width )
            if ( present(rgb) ) rgb_ = rgb
            if ( present(line_width) ) line_width_ = line_width
            call EnterCriticalSection( loc(wnd%lpCriticalSection) ) ! non-standard Fortran :: LOC  
            iretb    = DeleteObject(wnd%hPen) 
            wnd%hPen = CreatePen(PS_SOLID, line_width_, irgb(rgb_))
            iretb    = SelectObject(wnd%hDC, wnd%hPen)
            iretb    = MoveToEx(wnd%hDC, 0, 0, NULL)
            call LeaveCriticalSection( loc(wnd%lpCriticalSection) ) ! non-standard Fortran :: LOC
        end associate
    contains 
        integer function irgb(rgb)
            type(t_rgb), intent(in) :: rgb
            irgb = rgb%ir + (rgb%ig + (rgb%ib * 256)) * 256
        end function irgb
    end subroutine gr_pen

    subroutine gr_moveTo(self, ix, iy)
        class(t_win32), intent(in) :: self
        integer, intent(in) :: ix, iy
        integer (BOOL):: iretb
        call EnterCriticalSection( loc(wnd%lpCriticalSection) ) ! non-standard Fortran :: LOC
        iretb = MoveToEx(wnd%hDC, ix, iy, NULL)
        call LeaveCriticalSection( loc(wnd%lpCriticalSection) ) ! non-standard Fortran :: LOC
    end subroutine gr_moveTo

    subroutine gr_lineTo(self, ix, iy)
        class(t_win32), intent(in) :: self
        integer, intent(in) :: ix, iy
        integer (BOOL):: iretb
        call EnterCriticalSection( loc(wnd%lpCriticalSection) ) ! non-standard Fortran :: LOC
        iretb = LineTo(wnd%hDC, ix, iy)
        call LeaveCriticalSection( loc(wnd%lpCriticalSection) ) ! non-standard Fortran :: LOC
    end subroutine gr_lineTo
    
    subroutine gr_dot(self, ix, iy, icol)
        class(t_win32), intent(in) :: self
        integer, intent(in) :: ix, iy, icol
        integer (BOOL):: iretb
        call EnterCriticalSection( loc(wnd%lpCriticalSection) ) ! non-standard Fortran :: LOC
        iretb = SetPixel(wnd%hDC, ix, iy, icol)
        call LeaveCriticalSection( loc(wnd%lpCriticalSection) ) ! non-standard Fortran :: LOC
    end subroutine gr_dot
end module m_win32

    
module m_plot
    use m_oop
    use m_win32
    implicit none
    private
    public :: t_rgb, t_device, t_win32
end module m_plot
program Mandel
    implicit none
    integer, parameter :: kd = kind(0.0e0)
    integer, parameter :: m = 256
    integer :: nwinx = 1024, nwiny = 1024
    integer :: i, j, imax, jmax, maxiter
    real (kd) :: xmin, xmax, ymin, ymax, dx, dy
    real (kd) :: t0, t1 
    complex (kd) :: c, z
    integer, allocatable :: ic(:, :)
    integer :: icol(0:m), it0, it1
    !
    xmin = -2.0_kd !1.10950d0
    xmax =  2.0_kd !1.10951d0
    ymin = -2.0_kd !0.24758d0 
    ymax =  2.0_kd !0.24759d0 
    maxiter = 253
    !
    dx = xmax - xmin
    dy = ymax - ymin
    if (dx <= 0.0_kd .OR. dy <= 0.0_kd .OR. maxiter <= 0 .OR. maxiter > M) stop 'input error'
    if (dx * nwinx > dy * nwiny) then
        imax = nwinx
        jmax = nint(nwinx * dy / dx)
    else
        imax = nint(nwiny * dx / dy)
        jmax = int(nwiny)
    end if
    !
    dx = dx / real(imax, kd)
    dy = dy / real(jmax, kd)
    icol(0) = 0 ! black
    j = irgb(255, 255, 255)
    do i = maxiter, 1, -1
       icol(i) = j 
       if (j > 1) j = j - irgb(255, 255, 255) / maxiter
    end do
    !
    allocate( ic(0:imax, 0:jmax), source = 0 )  
    !
    print *, 'before do concurrent'
    call system_clock(it0)
    call cpu_time(t0)

GPU_offload:do concurrent (integer::ix = 0:imax, iy = 0:jmax) local(i, c, z) !shared(ic)
        c = cmplx(xmin + ix * dx, ymax - iy * dy)
        z = c
        do i = 0, maxiter
            if (abs(z) > 2.0_kd) exit
            z = z * z + c
        end do
        ic(ix, iy) = i 
    end do GPU_offload    

    call cpu_time(t1)
    call system_clock(it1)
    print *, ' do concurrent time =', t1 - t0, it1 - it0
    !
    ! plotter 
    !
plot: block
        use m_plot
        class(t_device), allocatable :: fig
        type(t_rgb), parameter :: rgb_black = t_rgb(0, 0, 0)
        fig = t_win32('Mandelbrot 1', imax, jmax, 1, rgb_black)
        call fig%on()
        do i = 0, imax
           do j = 0, jmax
              call fig%dot(i, j, icol(ic(i, j)))  
           end do
           call fig%show()
        end do
        call fig%off()
    end block plot
contains 
    integer function irgb(ir, ig, ib)
        integer, intent(in) :: ir, ig, ib
        irgb = ir + (ig + (ib * 256)) * 256
    end function irgb

end program Mandel

出力

C:>ifx mandel.f90 /O2 /Qxalderlake /Qopenmp-targets:spir64 /Qiopenmp /Qopt-report:2 /Qopenmp-target-do-concurrent
Intel(R) Fortran Compiler for applications running on Intel(R) 64, Version 2022.2.0 Build 20220730
Copyright (C) 1985-2022 Intel Corporation. All rights reserved.

Microsoft (R) Incremental Linker Version 14.33.31630.0
Copyright (C) Microsoft Corporation.  All rights reserved.

-out:mandel.exe
-debug
-pdb:mandel.pdb
-subsystem:console
-defaultlib:libiomp5md.lib
-nodefaultlib:vcomp.lib
-nodefaultlib:vcompd.lib
C:\Temp\754833.obj
C:\Temp\7548345.o
-defaultlib:omptarget.lib

C:>set OMP_TARGET_OFFLOAD=disabled

C:>mandel
Libomptarget --> Init target library!
Libomptarget --> No RTL found for image 0x00007ff7b9567000!
Libomptarget --> Done registering entries!
 start
 before do concurrent
Libomptarget --> Entering target region with entry point 0x00007ff7b94fe8c6 and device Id 0
Libomptarget --> Offload is disabled
Libomptarget --> Not offloading to device 0
  do concurrent time =  9.3750000E-02        2470
Libomptarget --> Unloading target library!
Libomptarget --> No RTLs in use support the image 0x00007ff7b9567000!
Libomptarget --> Done unregistering images!
Libomptarget --> Translation table for descriptor 0x00007ff7b9566000 cannot be found, probably it has been already removed.
Libomptarget --> Done unregistering library!
Libomptarget --> Deinit target library!
C:>set OMP_TARGET_OFFLOAD=MANDATORY

C:>mandel
Libomptarget --> Init target library!
Libomptarget --> Initialized OMPT
Libomptarget --> Loading RTLs...
Libomptarget --> Loading library 'omptarget.rtl.level0.dll'...
Target LEVEL0 RTL --> Init Level0 plugin!
Target LEVEL0 RTL --> omp_get_thread_limit() returned 2147483647
Target LEVEL0 RTL --> omp_get_max_teams() returned 0
Libomptarget --> Successfully loaded library 'omptarget.rtl.level0.dll'!
Target LEVEL0 RTL --> Looking for Level0 devices...
Target LEVEL0 RTL --> Found a GPU device, Name = Intel(R) Iris(R) Xe Graphics
Target LEVEL0 RTL --> Found 1 root devices, 1 total devices.
Target LEVEL0 RTL --> List of devices (DeviceID[.SubID[.CCSID]])
Target LEVEL0 RTL --> -- 0
Target LEVEL0 RTL --> Root Device Information
Target LEVEL0 RTL --> Device 0
Target LEVEL0 RTL --> -- Name                         : Intel(R) Iris(R) Xe Graphics
Target LEVEL0 RTL --> -- PCI ID                       : 0x46a6
Target LEVEL0 RTL --> -- Number of total EUs          : 96
Target LEVEL0 RTL --> -- Number of threads per EU     : 7
Target LEVEL0 RTL --> -- EU SIMD width                : 8
Target LEVEL0 RTL --> -- Number of EUs per subslice   : 8
Target LEVEL0 RTL --> -- Number of subslices per slice: 12
Target LEVEL0 RTL --> -- Number of slices             : 1
Target LEVEL0 RTL --> -- Local memory size (bytes)    : 65536
Target LEVEL0 RTL --> -- Global memory size (bytes)   : 6722351104
Target LEVEL0 RTL --> -- Cache size (bytes)           : 1048576
Target LEVEL0 RTL --> -- Max clock frequency (MHz)    : 1400
Target LEVEL0 RTL --> Driver API version is 10003
Target LEVEL0 RTL --> Interop property IDs, Names, Descriptions
Target LEVEL0 RTL --> -- 0, device_num_eus, intptr_t, total number of EUs
Target LEVEL0 RTL --> -- 1, device_num_threads_per_eu, intptr_t, number of threads per EU
Target LEVEL0 RTL --> -- 2, device_eu_simd_width, intptr_t, physical EU simd width
Target LEVEL0 RTL --> -- 3, device_num_eus_per_subslice, intptr_t, number of EUs per sub-slice
Target LEVEL0 RTL --> -- 4, device_num_subslices_per_slice, intptr_t, number of sub-slices per slice
Target LEVEL0 RTL --> -- 5, device_num_slices, intptr_t, number of slices
Target LEVEL0 RTL --> -- 6, device_local_mem_size, intptr_t, local memory size in bytes
Target LEVEL0 RTL --> -- 7, device_global_mem_size, intptr_t, global memory size in bytes
Target LEVEL0 RTL --> -- 8, device_global_mem_cache_size, intptr_t, global memory cache size in bytes
Target LEVEL0 RTL --> -- 9, device_max_clock_frequency, intptr_t, max clock frequency in MHz
Target LEVEL0 RTL --> Found driver extensions:
Target LEVEL0 RTL --> -- ZE_extension_float_atomics
Target LEVEL0 RTL --> -- ZE_experimental_relaxed_allocation_limits
Target LEVEL0 RTL --> -- ZE_experimental_module_program
Target LEVEL0 RTL --> -- ZE_experimental_scheduling_hints
Target LEVEL0 RTL --> -- ZE_experimental_global_offset
Target LEVEL0 RTL --> -- ZE_extension_pci_properties
Target LEVEL0 RTL --> -- ZE_extension_memory_compression_hints
Target LEVEL0 RTL --> -- ZE_extension_memory_free_policies
Target LEVEL0 RTL --> -- ZE_extension_device_memory_properties
Target LEVEL0 RTL --> Returning 1 top-level devices
Libomptarget --> Registering RTL omptarget.rtl.level0.dll supporting 1 devices!
Libomptarget --> Optional interface: __tgt_rtl_data_alloc_base
Libomptarget --> Optional interface: __tgt_rtl_data_alloc_managed
Libomptarget --> Optional interface: __tgt_rtl_data_realloc
Libomptarget --> Optional interface: __tgt_rtl_data_aligned_alloc
Libomptarget --> Optional interface: __tgt_rtl_register_host_pointer
Libomptarget --> Optional interface: __tgt_rtl_unregister_host_pointer
Libomptarget --> Optional interface: __tgt_rtl_get_context_handle
Libomptarget --> Optional interface: __tgt_rtl_init_ompt
Libomptarget --> Optional interface: __tgt_rtl_requires_mapping
Libomptarget --> Optional interface: __tgt_rtl_push_subdevice
Libomptarget --> Optional interface: __tgt_rtl_pop_subdevice
Libomptarget --> Optional interface: __tgt_rtl_add_build_options
Libomptarget --> Optional interface: __tgt_rtl_is_supported_device
Libomptarget --> Optional interface: __tgt_rtl_deinit
Libomptarget --> Optional interface: __tgt_rtl_create_interop
Libomptarget --> Optional interface: __tgt_rtl_release_interop
Libomptarget --> Optional interface: __tgt_rtl_use_interop
Libomptarget --> Optional interface: __tgt_rtl_get_num_interop_properties
Libomptarget --> Optional interface: __tgt_rtl_get_interop_property_value
Libomptarget --> Optional interface: __tgt_rtl_get_interop_property_info
Libomptarget --> Optional interface: __tgt_rtl_get_interop_rc_desc
Libomptarget --> Optional interface: __tgt_rtl_get_num_sub_devices
Libomptarget --> Optional interface: __tgt_rtl_is_accessible_addr_range
Libomptarget --> Optional interface: __tgt_rtl_notify_indirect_access
Libomptarget --> Optional interface: __tgt_rtl_is_private_arg_on_host
Libomptarget --> Optional interface: __tgt_rtl_command_batch_begin
Libomptarget --> Optional interface: __tgt_rtl_command_batch_end
Libomptarget --> Optional interface: __tgt_rtl_kernel_batch_begin
Libomptarget --> Optional interface: __tgt_rtl_kernel_batch_end
Libomptarget --> Optional interface: __tgt_rtl_alloc_per_hw_thread_scratch
Libomptarget --> Optional interface: __tgt_rtl_free_per_hw_thread_scratch
Libomptarget --> Optional interface: __tgt_rtl_run_target_team_nd_region
Libomptarget --> Optional interface: __tgt_rtl_get_device_info
Target LEVEL0 RTL --> Initialized OMPT
Libomptarget --> Loading library 'omptarget.rtl.opencl.dll'...
Target OPENCL RTL --> Init OpenCL plugin!
Target OPENCL RTL --> omp_get_thread_limit() returned 2147483647
Target OPENCL RTL --> omp_get_max_teams() returned 0
Target OPENCL RTL --> Target device type is set to GPU
Libomptarget --> Successfully loaded library 'omptarget.rtl.opencl.dll'!
Target OPENCL RTL --> Start initializing OpenCL
Target OPENCL RTL --> Platform OpenCL 3.0  has 1 Devices
Target OPENCL RTL --> Extension clGetMemAllocInfoINTEL is found.
Target OPENCL RTL --> Extension clHostMemAllocINTEL is found.
Target OPENCL RTL --> Extension clDeviceMemAllocINTEL is found.
Target OPENCL RTL --> Extension clSharedMemAllocINTEL is found.
Target OPENCL RTL --> Extension clMemFreeINTEL is found.
Target OPENCL RTL --> Extension clSetKernelArgMemPointerINTEL is found.
Target OPENCL RTL --> Extension clEnqueueMemcpyINTEL is found.
Target OPENCL RTL --> Extension clSetProgramSpecializationConstant is found.
Target OPENCL RTL --> Extension clGetDeviceGlobalVariablePointerINTEL is found.
Target OPENCL RTL --> Extension clGetKernelSuggestedLocalWorkSizeINTEL is found.
Target OPENCL RTL --> Warning: Extension clGitsIndirectAllocationOffsets is not found.
Target OPENCL RTL --> Device 0: Intel(R) Iris(R) Xe Graphics
Target OPENCL RTL --> Number of execution units on the device is 96
Target OPENCL RTL --> Maximum work group size for the device is 256
Target OPENCL RTL --> Maximum memory allocation size is 3361175552
Target OPENCL RTL --> Device local mem size: 65536
Libomptarget --> Registering RTL omptarget.rtl.opencl.dll supporting 1 devices!
Libomptarget --> Optional interface: __tgt_rtl_data_alloc_base
Libomptarget --> Optional interface: __tgt_rtl_data_alloc_managed
Libomptarget --> Optional interface: __tgt_rtl_data_realloc
Libomptarget --> Optional interface: __tgt_rtl_data_aligned_alloc
Libomptarget --> Optional interface: __tgt_rtl_get_device_name
Libomptarget --> Optional interface: __tgt_rtl_get_context_handle
Libomptarget --> Optional interface: __tgt_rtl_get_data_alloc_info
Libomptarget --> Optional interface: __tgt_rtl_init_ompt
Libomptarget --> Optional interface: __tgt_rtl_requires_mapping
Libomptarget --> Optional interface: __tgt_rtl_manifest_data_for_region
Libomptarget --> Optional interface: __tgt_rtl_add_build_options
Libomptarget --> Optional interface: __tgt_rtl_is_supported_device
Libomptarget --> Optional interface: __tgt_rtl_deinit
Libomptarget --> Optional interface: __tgt_rtl_create_interop
Libomptarget --> Optional interface: __tgt_rtl_release_interop
Libomptarget --> Optional interface: __tgt_rtl_use_interop
Libomptarget --> Optional interface: __tgt_rtl_get_num_interop_properties
Libomptarget --> Optional interface: __tgt_rtl_get_interop_property_value
Libomptarget --> Optional interface: __tgt_rtl_get_interop_property_info
Libomptarget --> Optional interface: __tgt_rtl_get_interop_rc_desc
Libomptarget --> Optional interface: __tgt_rtl_is_accessible_addr_range
Libomptarget --> Optional interface: __tgt_rtl_notify_indirect_access
Libomptarget --> Optional interface: __tgt_rtl_is_private_arg_on_host
Libomptarget --> Optional interface: __tgt_rtl_alloc_per_hw_thread_scratch
Libomptarget --> Optional interface: __tgt_rtl_free_per_hw_thread_scratch
Libomptarget --> Optional interface: __tgt_rtl_run_target_team_nd_region
Target OPENCL RTL --> Initialized OMPT
Libomptarget --> Loading library 'libomptarget.rtl.ppc64.so'...
Libomptarget --> Call to LoadLibray() was unsuccessful with code 0x7e
Libomptarget --> Unable to load library 'libomptarget.rtl.ppc64.so': c喙U0・_0・ク0・・・L!
Libomptarget --> Loading library 'omptarget.rtl.x86_64.dll'...
Libomptarget --> Call to LoadLibray() was unsuccessful with code 0x7e
Libomptarget --> Unable to load library 'omptarget.rtl.x86_64.dll': c喙U0・_0・ク0・・・L!
Libomptarget --> Loading library 'libomptarget.rtl.cuda.so'...
Libomptarget --> Call to LoadLibray() was unsuccessful with code 0x7e
Libomptarget --> Unable to load library 'libomptarget.rtl.cuda.so': c喙U0・_0・ク0・・・L!
Libomptarget --> Loading library 'libomptarget.rtl.aarch64.so'...
Libomptarget --> Call to LoadLibray() was unsuccessful with code 0x7e
Libomptarget --> Unable to load library 'libomptarget.rtl.aarch64.so': c喙U0・_0・ク0・・・L!
Libomptarget --> Loading library 'libomptarget.rtl.ve.so'...
Libomptarget --> Call to LoadLibray() was unsuccessful with code 0x7e
Libomptarget --> Unable to load library 'libomptarget.rtl.ve.so': c喙U0・_0・ク0・・・L!
Libomptarget --> Loading library 'libomptarget.rtl.amdgpu.so'...
Libomptarget --> Call to LoadLibray() was unsuccessful with code 0x7e
Libomptarget --> Unable to load library 'libomptarget.rtl.amdgpu.so': c喙U0・_0・ク0・・・L!
Libomptarget --> Loading library 'libomptarget.rtl.rpc.so'...
Libomptarget --> Call to LoadLibray() was unsuccessful with code 0x7e
Libomptarget --> Unable to load library 'libomptarget.rtl.rpc.so': c喙U0・_0・ク0・・・L!
Libomptarget --> RTLs loaded!
Target LEVEL0 RTL --> Target binary is a valid oneAPI OpenMP image.
Libomptarget --> Image 0x00007ff7b9567000 is compatible with RTL omptarget.rtl.level0.dll!
Libomptarget --> RTL 0x00007ffa1fb70000 has index 0!
Libomptarget --> Registering image 0x00007ff7b9567000 with RTL omptarget.rtl.level0.dll!
Libomptarget --> Done registering entries!
 start
 before do concurrent
Libomptarget --> Entering target region with entry point 0x00007ff7b94fe8c6 and device Id 0
Libomptarget --> Call to omp_get_num_devices returning 1
Libomptarget --> Call to omp_get_num_devices returning 1
Libomptarget --> Call to omp_get_initial_device returning 1
Libomptarget --> Checking whether device 0 is ready.
Libomptarget --> Is the device 0 (local ID 0) initialized? 0
Target LEVEL0 RTL --> Initialize requires flags to 0
Target LEVEL0 RTL --> Allocated a device memory 0xffffb80200010000
Target LEVEL0 RTL --> Initialized device memory pool for device 0x00000147c4e45418: AllocUnit = 65536, AllocMax = 1048576, Capacity = 4, PoolSizeMax = 268435456
Target LEVEL0 RTL --> Allocated a shared memory object 0x00000147c6160000
Target LEVEL0 RTL --> Initialized shared memory pool for device 0x00000147c4e45418: AllocUnit = 65536, AllocMax = 8388608, Capacity = 4, PoolSizeMax = 268435456
Target LEVEL0 RTL --> Allocated a host memory 0x00000147c6160000
Target LEVEL0 RTL --> Initialized host memory pool for device 0x00000147c4e45418: AllocUnit = 65536, AllocMax = 1048576, Capacity = 4, PoolSizeMax = 268435456
Target LEVEL0 RTL --> Created a command queue 0x00000147c4e81d68 (Ordinal: 0, Index: 0) for device 0.
Target LEVEL0 RTL --> Initialized Level0 device 0
Libomptarget --> Device 0 is ready to use.
Target LEVEL0 RTL --> Device 0: Loading binary from 0x00007ff7b9567000
Target LEVEL0 RTL --> Expecting to have 10 entries defined
Target LEVEL0 RTL --> Base L0 module compilation options: -cl-std=CL2.0
Target LEVEL0 RTL --> Found a single section in the image
Target LEVEL0 RTL --> Created module from image #0.
Target LEVEL0 RTL --> Module link is not required
Target LEVEL0 RTL --> Looking up device global variable '__omp_offloading_entries_table_size' of size 8 bytes on device 0.
Target LEVEL0 RTL --> Global variable lookup succeeded (size: 8 bytes).
Target LEVEL0 RTL --> Created a command list 0x00000147c5c1bac8 (Ordinal: 0) for device 0.
Target LEVEL0 RTL --> Warning: number of entries in host and device offload tables mismatch (10 != 2).
Target LEVEL0 RTL --> Looking up device global variable '__omp_offloading_entries_table' of size 80 bytes on device 0.
Target LEVEL0 RTL --> Global variable lookup succeeded (size: 80 bytes).
Target LEVEL0 RTL --> Device offload table loaded:
Target LEVEL0 RTL -->   0:      _ZL7pone_ld_3d4ae508d8dbf78737978824de0e0216
Target LEVEL0 RTL -->   1:      __omp_offloading_56af2408_2649_MAIN___l379
Target LEVEL0 RTL --> Looking up device global variable '__omp_offloading_56af2408_2649_MAIN___l379_kernel_info' of unknown size on device 0.
Target LEVEL0 RTL --> Global variable lookup succeeded (size: 176 bytes).
Target LEVEL0 RTL --> Kernel 0: Entry = 0x00007ff7b94fe8c6, Name = __omp_offloading_56af2408_2649_MAIN___l379, NumArgs = 18, Handle = 0x00000147c4f32760
Target LEVEL0 RTL --> Warning: Entry with a nullptr name!!!
Target LEVEL0 RTL --> Warning: Entry with a nullptr name!!!
Target LEVEL0 RTL --> Warning: Entry with a nullptr name!!!
Target LEVEL0 RTL --> Warning: Entry with a nullptr name!!!
Target LEVEL0 RTL --> Warning: Entry with a nullptr name!!!
Target LEVEL0 RTL --> Warning: Entry with a nullptr name!!!
Target LEVEL0 RTL --> Warning: Entry with a nullptr name!!!
Target LEVEL0 RTL --> Warning: Entry with a nullptr name!!!
Target LEVEL0 RTL --> Warning: Entry with a nullptr name!!!
Target LEVEL0 RTL --> Looking up device global variable '__omp_spirv_program_data' of size 56 bytes on device 0.
Target LEVEL0 RTL --> Global variable lookup succeeded (size: 56 bytes).
Libomptarget --> Entry  0: Base=0x0000000dbd18f940, Begin=0x0000000dbd18f940, Size=96, Type=0x20, Name=MANDEL$IC
Libomptarget --> Entry  1: Base=0x0000000dbd18f940, Begin=0x00000147c5d501c0, Size=4202500, Type=0x1000000000017, Name=MANDEL$IC_addr_a0
Libomptarget --> Entry  2: Base=0x0000000dbd18f940, Begin=0x0000000dbd18f948, Size=88, Type=0x1000000000005, Name=MANDEL$IC_dv_len
Libomptarget --> Entry  3: Base=0x00000000000000fd, Begin=0x00000000000000fd, Size=0, Type=0x120, Name=unknown
Libomptarget --> Entry  4: Base=0x000000003b800000, Begin=0x000000003b800000, Size=0, Type=0x120, Name=unknown
Libomptarget --> Entry  5: Base=0x0000000040000000, Begin=0x0000000040000000, Size=0, Type=0x120, Name=unknown
Libomptarget --> Entry  6: Base=0x000000003b800000, Begin=0x000000003b800000, Size=0, Type=0x120, Name=unknown
Libomptarget --> Entry  7: Base=0x00000000c0000000, Begin=0x00000000c0000000, Size=0, Type=0x120, Name=unknown
Libomptarget --> Entry  8: Base=0x0000000000000400, Begin=0x0000000000000400, Size=0, Type=0x120, Name=unknown
Libomptarget --> Entry  9: Base=0x0000000000000000, Begin=0x0000000000000000, Size=0, Type=0x120, Name=unknown
Libomptarget --> Entry 10: Base=0x0000000000000001, Begin=0x0000000000000001, Size=0, Type=0x120, Name=unknown
Libomptarget --> Entry 11: Base=0x0000000000000400, Begin=0x0000000000000400, Size=0, Type=0x120, Name=unknown
Libomptarget --> Entry 12: Base=0x0000000000000000, Begin=0x0000000000000000, Size=0, Type=0x120, Name=unknown
Libomptarget --> Entry 13: Base=0x0000000000000400, Begin=0x0000000000000400, Size=0, Type=0x120, Name=unknown
Libomptarget --> Entry 14: Base=0x0000000000000000, Begin=0x0000000000000000, Size=0, Type=0x120, Name=unknown
Libomptarget --> Entry 15: Base=0x0000000000000001, Begin=0x0000000000000001, Size=0, Type=0x120, Name=unknown
Libomptarget --> Entry 16: Base=0x0000000000000400, Begin=0x0000000000000400, Size=0, Type=0x120, Name=unknown
Libomptarget --> Entry 17: Base=0x0000000000000000, Begin=0x0000000000000000, Size=0, Type=0x120, Name=unknown
Libomptarget --> Entry 18: Base=0x0000000000000000, Begin=0x0000000000000000, Size=0, Type=0x120, Name=unknown
Libomptarget --> Entry 19: Base=0x0000000000100800, Begin=0x0000000000100800, Size=0, Type=0x120, Name=unknown
Libomptarget --> Entry 20: Base=0x0000000dbd18f1d0, Begin=0x0000000dbd18f1d0, Size=32, Type=0x800, Name=unknown
Libomptarget --> Looking up mapping(HstPtrBegin=0x0000000dbd18f940, Size=96)...
Target LEVEL0 RTL --> Ptr 0x0000000dbd18f940 requires mapping
Target LEVEL0 RTL --> Allocated a shared memory object 0x00000147c6170000
Target LEVEL0 RTL --> New block allocation for shared memory pool: base = 0x00000147c6170000, size = 65536, pool size = 65536
Libomptarget --> Creating new map entry with HstPtrBegin=0x0000000dbd18f940, TgtPtrBegin=0x00000147c6170000, Size=96, DynRefCount=1, HoldRefCount=0, Name=MANDEL$IC
Libomptarget --> There are 96 bytes allocated at target address 0x00000147c6170000 - is new
Libomptarget --> Has a pointer entry:
Libomptarget --> Looking up mapping(HstPtrBegin=0x0000000dbd18f940, Size=8)...
Libomptarget --> Mapping exists with HstPtrBegin=0x0000000dbd18f940, TgtPtrBegin=0x00000147c6170000, Size=8, DynRefCount=1 (update suppressed), HoldRefCount=0, Name=unknown
Libomptarget --> There are 8 bytes allocated at target address 0x00000147c6170000 - is new
Libomptarget --> Looking up mapping(HstPtrBegin=0x00000147c5d501c0, Size=4202500)...
Target LEVEL0 RTL --> Ptr 0x00000147c5d501c0 requires mapping
Target LEVEL0 RTL --> Allocated a shared memory object 0x00000147d9970000
Target LEVEL0 RTL --> New block allocation for shared memory pool: base = 0x00000147d9970000, size = 33554432, pool size = 33619968
Libomptarget --> Creating new map entry with HstPtrBegin=0x00000147c5d501c0, TgtPtrBegin=0x00000147d9970000, Size=4202500, DynRefCount=1, HoldRefCount=0, Name=MANDEL$IC_addr_a0
Libomptarget --> Moving 4202500 bytes (hst:0x00000147c5d501c0) -> (tgt:0x00000147d9970000)
Target LEVEL0 RTL --> Copied 4202500 bytes (hst:0x00000147c5d501c0) -> (tgt:0x00000147d9970000)
Libomptarget --> There are 4202500 bytes allocated at target address 0x00000147d9970000 - is new
Libomptarget --> Update pointer (0x00000147c6170000) -> [0x00000147d9970000]
Target LEVEL0 RTL --> Copied 8 bytes (hst:0x00000147c8435f60) -> (tgt:0x00000147c6170000)
Libomptarget --> Looking up mapping(HstPtrBegin=0x0000000dbd18f940, Size=8)...
Target LEVEL0 RTL --> Notifying indirect access: 0x00000147c6170000 + 0
Libomptarget --> Looking up mapping(HstPtrBegin=0x0000000dbd18f948, Size=88)...
Libomptarget --> Mapping exists with HstPtrBegin=0x0000000dbd18f948, TgtPtrBegin=0x00000147c6170008, Size=88, DynRefCount=1 (update suppressed), HoldRefCount=0, Name=MANDEL$IC_dv_len
Libomptarget --> Moving 88 bytes (hst:0x0000000dbd18f948) -> (tgt:0x00000147c6170008)
Target LEVEL0 RTL --> Copied 88 bytes (hst:0x0000000dbd18f948) -> (tgt:0x00000147c6170008)
Libomptarget --> There are 88 bytes allocated at target address 0x00000147c6170008 - is new
Libomptarget --> Looking up mapping(HstPtrBegin=0x0000000dbd18f940, Size=96)...
Libomptarget --> Mapping exists with HstPtrBegin=0x0000000dbd18f940, TgtPtrBegin=0x00000147c6170000, Size=96, DynRefCount=1 (update suppressed), HoldRefCount=0
Libomptarget --> Obtained target argument (Begin: 0x00000147c6170000, Offset: 0) from host pointer 0x0000000dbd18f940
Libomptarget --> Forwarding first-private value 0x00000000000000fd to the target construct
Libomptarget --> Forwarding first-private value 0x000000003b800000 to the target construct
Libomptarget --> Forwarding first-private value 0x0000000040000000 to the target construct
Libomptarget --> Forwarding first-private value 0x000000003b800000 to the target construct
Libomptarget --> Forwarding first-private value 0x00000000c0000000 to the target construct
Libomptarget --> Forwarding first-private value 0x0000000000000400 to the target construct
Libomptarget --> Forwarding first-private value 0x0000000000000000 to the target construct
Libomptarget --> Forwarding first-private value 0x0000000000000001 to the target construct
Libomptarget --> Forwarding first-private value 0x0000000000000400 to the target construct
Libomptarget --> Forwarding first-private value 0x0000000000000000 to the target construct
Libomptarget --> Forwarding first-private value 0x0000000000000400 to the target construct
Libomptarget --> Forwarding first-private value 0x0000000000000000 to the target construct
Libomptarget --> Forwarding first-private value 0x0000000000000001 to the target construct
Libomptarget --> Forwarding first-private value 0x0000000000000400 to the target construct
Libomptarget --> Forwarding first-private value 0x0000000000000000 to the target construct
Libomptarget --> Forwarding first-private value 0x0000000000000000 to the target construct
Libomptarget --> Forwarding first-private value 0x0000000000100800 to the target construct
Libomptarget --> Launching target execution __omp_offloading_56af2408_2649_MAIN___l379 with pointer 0x00000147c8a42d10 (index=0).
Target LEVEL0 RTL --> Executing a kernel 0x00000147c8a42d10...
Target LEVEL0 RTL --> Assumed kernel SIMD width is 16
Target LEVEL0 RTL --> Preferred group size is multiple of 32
Target LEVEL0 RTL --> Loop 0: lower bound = 0, upper bound = 1050624, Stride = 1
Target LEVEL0 RTL --> Team sizes = {32, 1, 1}
Target LEVEL0 RTL --> Number of teams = {32833, 1, 1}
Target LEVEL0 RTL --> Kernel Pointer argument 0 (value: 0x00000147c6170000) was set successfully for device 0.
Target LEVEL0 RTL --> Kernel Scalar argument 1 (value: 0x00000000000000fd) was set successfully for device 0.
Target LEVEL0 RTL --> Kernel Scalar argument 2 (value: 0x000000003b800000) was set successfully for device 0.
Target LEVEL0 RTL --> Kernel Scalar argument 3 (value: 0x0000000040000000) was set successfully for device 0.
Target LEVEL0 RTL --> Kernel Scalar argument 4 (value: 0x000000003b800000) was set successfully for device 0.
Target LEVEL0 RTL --> Kernel Scalar argument 5 (value: 0x00000000c0000000) was set successfully for device 0.
Target LEVEL0 RTL --> Kernel Scalar argument 6 (value: 0x0000000000000400) was set successfully for device 0.
Target LEVEL0 RTL --> Kernel Scalar argument 7 (value: 0x0000000000000000) was set successfully for device 0.
Target LEVEL0 RTL --> Kernel Scalar argument 8 (value: 0x0000000000000001) was set successfully for device 0.
Target LEVEL0 RTL --> Kernel Scalar argument 9 (value: 0x0000000000000400) was set successfully for device 0.
Target LEVEL0 RTL --> Kernel Scalar argument 10 (value: 0x0000000000000000) was set successfully for device 0.
Target LEVEL0 RTL --> Kernel Scalar argument 11 (value: 0x0000000000000400) was set successfully for device 0.
Target LEVEL0 RTL --> Kernel Scalar argument 12 (value: 0x0000000000000000) was set successfully for device 0.
Target LEVEL0 RTL --> Kernel Scalar argument 13 (value: 0x0000000000000001) was set successfully for device 0.
Target LEVEL0 RTL --> Kernel Scalar argument 14 (value: 0x0000000000000400) was set successfully for device 0.
Target LEVEL0 RTL --> Kernel Scalar argument 15 (value: 0x0000000000000000) was set successfully for device 0.
Target LEVEL0 RTL --> Kernel Scalar argument 16 (value: 0x0000000000000000) was set successfully for device 0.
Target LEVEL0 RTL --> Kernel Scalar argument 17 (value: 0x0000000000100800) was set successfully for device 0.
Target LEVEL0 RTL --> Setting indirect access flags 0x0000000000000004
Target LEVEL0 RTL --> Submitted kernel 0x00000147c4f32760 to device 0
Target LEVEL0 RTL --> Executed kernel entry 0x00000147c8a42d10 on device 0
Libomptarget --> Looking up mapping(HstPtrBegin=0x0000000dbd18f948, Size=88)...
Libomptarget --> Mapping exists with HstPtrBegin=0x0000000dbd18f948, TgtPtrBegin=0x00000147c6170008, Size=88, DynRefCount=1 (update suppressed), HoldRefCount=0
Libomptarget --> There are 88 bytes allocated at target address 0x00000147c6170008 - is last
Libomptarget --> Looking up mapping(HstPtrBegin=0x00000147c5d501c0, Size=4202500)...
Libomptarget --> Mapping exists with HstPtrBegin=0x00000147c5d501c0, TgtPtrBegin=0x00000147d9970000, Size=4202500, DynRefCount=0 (decremented, delayed deletion), HoldRefCount=0
Libomptarget --> There are 4202500 bytes allocated at target address 0x00000147d9970000 - is last
Libomptarget --> Moving 4202500 bytes (tgt:0x00000147d9970000) -> (hst:0x00000147c5d501c0)
Target LEVEL0 RTL --> Copied 4202500 bytes (tgt:0x00000147d9970000) -> (hst:0x00000147c5d501c0)
Libomptarget --> Looking up mapping(HstPtrBegin=0x0000000dbd18f940, Size=96)...
Libomptarget --> Mapping exists with HstPtrBegin=0x0000000dbd18f940, TgtPtrBegin=0x00000147c6170000, Size=96, DynRefCount=0 (decremented, delayed deletion), HoldRefCount=0
Libomptarget --> There are 96 bytes allocated at target address 0x00000147c6170000 - is last
Libomptarget --> Looking up mapping(HstPtrBegin=0x00000147c5d501c0, Size=4202500)...
Libomptarget --> Deleting tgt data 0x00000147d9970000 of size 4202500
Libomptarget --> Removing map entry with HstPtrBegin=0x00000147c5d501c0, TgtPtrBegin=0x00000147d9970000, Size=4202500, Name=MANDEL$IC_addr_a0
Libomptarget --> Looking up mapping(HstPtrBegin=0x0000000dbd18f940, Size=96)...
Libomptarget --> Removing shadow pointer 0x0000000dbd18f940
Libomptarget --> Deleting tgt data 0x00000147c6170000 of size 96
Libomptarget --> Removing map entry with HstPtrBegin=0x0000000dbd18f940, TgtPtrBegin=0x00000147c6170000, Size=96, Name=MANDEL$IC
  do concurrent time =  0.6250000           18170
Libomptarget --> Unloading target library!
Target LEVEL0 RTL --> Target binary is a valid oneAPI OpenMP image.
Libomptarget --> Image 0x00007ff7b9567000 is compatible with RTL 0x00007ffa1fb70000!
Libomptarget --> Unregistered image 0x00007ff7b9567000 from RTL 0x00007ffa1fb70000!
Libomptarget --> Done unregistering images!
Libomptarget --> Removing translation table for descriptor 0x00007ff7b9566000
Target LEVEL0 RTL --> MemPool usage for shared memory, device 0x00000147c4e45418
Target LEVEL0 RTL --> -- AllocMax=8(MB), Capacity=4, PoolSizeMax=256(MB)
Target LEVEL0 RTL --> --                   :   NewAlloc      Reuse     Hit(%)
Target LEVEL0 RTL --> -- Bucket[       128]:          1          0       0.00
Target LEVEL0 RTL --> -- Bucket[   8388608]:          1          0       0.00
Target LEVEL0 RTL --> MemPool usage for device memory, device 0x00000147c4e45418
Target LEVEL0 RTL --> -- Not used
Target LEVEL0 RTL --> Memory usage for device memory, device 0x00000147c4e45418
Target LEVEL0 RTL --> -- Not used
Target LEVEL0 RTL --> Memory usage for shared memory, device 0x00000147c4e45418
Target LEVEL0 RTL --> -- Allocator:       Native,         Pool
Target LEVEL0 RTL --> -- Requested:     33619968,      4202596
Target LEVEL0 RTL --> -- Allocated:     33619968,      8388736
Target LEVEL0 RTL --> -- Freed    :     33619968,      8388736
Target LEVEL0 RTL --> -- InUse    :            0,            0
Target LEVEL0 RTL --> -- PeakUse  :     33619968,      8388736
Target LEVEL0 RTL --> -- NumAllocs:            2,            2
Target LEVEL0 RTL --> MemPool usage for host memory, device 0x00000147c4e45418
Target LEVEL0 RTL --> -- Not used
Target LEVEL0 RTL --> Memory usage for host memory, device 0x00000147c4e45418
Target LEVEL0 RTL --> -- Not used
Target LEVEL0 RTL --> Closed RTL successfully
Target LEVEL0 RTL --> Deinit Level0 plugin!
Libomptarget --> Done unregistering library!
Libomptarget --> Deinit target library!