最新 ifx は do concurrent の GPU offload 可能
最新の intel fortran llvm 版 compiler ifx は do concurrent の GPU offload 可能になったというので試してみます。intel 第 12 世代ノート用 CPU の内臓 GPU で試してみます。なお単精度実数にしか対応していません。
コンパイル時のオプションとして、OpenMP の指示を与える必要があります。OpenMP 稼働オプション、GPU 用バイナリ吐くオプション、do concurrent を暗黙に OpenMP 適用するオプションのようです。明示的に OpenMP 命令を書く必要はなく、標準 Fortran 命令で書けます。
- windows 用
/Qopenmp-targets:spir64 /Qiopenmp /Qopenmp-target-do-concurrent
あと、コンパイル時の最適化レポートを出させると、実行結果に影響が出たりします。まだ判然としないのですが…
/Qopt-report:2
実行時のオプション
実行時には、コマンドライン上で環境変数を与えることにより、GPU への offload を強制したり、禁止したりできます。
GPU offload 強制
set OMP_TARGET_OFFLOAD=MANDATORY
GPU offload 禁止
set OMP_TARGET_OFFLOAD=disabled
また実行時に環境変数をセットすることで offload されたかなどの情報を書き出させることも可能です。
set LIBOMPTARGET_DEBUG=1
0 は非表示、1,2,4 で書き出しがなされます。
Mandelbrot 図形
Mndelbrot 図形を描いてみました。計算は GPU を使用しないほうが早く終わっています。メモリーの転送などにかかる時間のほうが計算している時間より長いのかもしれません。
module m_oop implicit none type :: t_rgb integer :: ir, ig, ib end type t_rgb type, abstract :: t_device character(len = 80) :: title = 'Plotter' integer :: nsize_x = 640, nsize_y = 480 integer :: line_width = 1 type (t_rgb) :: rgb = t_rgb(0, 0, 0) contains procedure (device_on), deferred, pass :: on procedure (device_off), deferred, pass :: off procedure (device_show), deferred, pass :: show procedure (device_pen), deferred, pass :: pen procedure (device_lineTo), deferred, pass :: lineTo procedure (device_moveTo), deferred, pass :: moveTo procedure (device_dot), deferred, pass :: dot end type t_device abstract interface subroutine device_on(self) import :: t_device class(t_device), intent(in out) :: self end subroutine device_on subroutine device_off(self) import :: t_device class(t_device), intent(in) :: self end subroutine device_off subroutine device_show(self) import :: t_device class(t_device), intent(in) :: self end subroutine device_show subroutine device_pen(self, line_width, rgb) import :: t_device, t_rgb class(t_device), intent(in out) :: self integer, intent(in), optional :: line_width type (t_rgb), intent(in), optional :: rgb end subroutine device_pen subroutine device_lineTo(self, ix, iy) import :: t_device class(t_device), intent(in) :: self integer, intent(in) :: ix, iy end subroutine device_lineTo subroutine device_moveTo(self, ix, iy) import :: t_device class(t_device), intent(in) :: self integer, intent(in) :: ix, iy end subroutine device_moveTo subroutine device_dot(self, ix, iy, icol) import :: t_device, t_rgb class(t_device), intent(in) :: self integer, intent(in) :: ix, iy integer, intent(in) :: icol ! type (t_rgb), intent(in) :: rgb end subroutine device_dot end interface end module m_oop module m_win32 use ifwina use ifwinty use ifmt, only : RTL_CRITICAL_SECTION use m_oop implicit none type, extends(t_device) :: t_win32 contains procedure, pass :: on => gr_on procedure, pass :: off => gr_off procedure, pass :: show => gr_show procedure, pass :: pen => gr_pen procedure, pass :: lineTo => gr_lineTo procedure, pass :: moveTo => gr_moveTo procedure, pass :: dot => gr_dot end type t_win32 type :: t_wnd integer (HANDLE) :: hWnd integer (HANDLE) :: hDC integer (LPINT) :: hThread integer (LPDWORD):: id integer (HANDLE) :: hPen type (RTL_CRITICAL_SECTION) :: lpCriticalSection end type t_wnd type (t_wnd) :: wnd contains integer(HANDLE) function WinMain( hInstance, nCmdShow, win32 ) implicit none integer (HANDLE), intent(in) :: hInstance integer (SINT) , intent(in) :: nCmdShow type (t_win32), intent(in) :: win32 type (T_WNDCLASS) :: wc type (T_MSG) :: mesg integer (HANDLE) :: hWndMain integer (BOOL) :: iretb character (LEN = 256) :: ClassName = 'Fortran'//char(0) integer :: iwindow_frame_x, iwindow_frame_y logical, save :: first = .true. ! Init Main window iwindow_frame_x = 2 * GetSystemMetrics(SM_CXFIXEDFRAME) !side line = 6, title bar = 25 iwindow_frame_y = 2 * GetSystemMetrics(SM_CYFIXEDFRAME) + GetSystemMetrics(SM_CYCAPTION) ! if (first) then WinMain = -1 ! Error code wc%lpszClassName = loc(ClassName) ! non-standard Fortran :: LOC(xxx) = TRANSFER(C_LOC(xxx), iii) wc%lpfnWndProc = loc(MainWndProc) ! CALLBACK procedure name wc%style = ior(CS_VREDRAW , CS_HREDRAW) wc%hInstance = hInstance wc%hIcon = NULL wc%hCursor = LoadCursor( NULL, IDC_ARROW ) wc%hbrBackground = ( COLOR_WINDOW + 1 ) if ( RegisterClass(wc) == 0 ) return ! initialize window first = .false. end if ! Init instance WinMain = -2 ! Error code hWndMain = CreateWindow( ClassName, & trim(win32%title)//char(0), & int(ior(WS_OVERLAPPED, WS_SYSMENU)), & CW_USEDEFAULT, CW_USEDEFAULT, & win32%nsize_x + iwindow_frame_x, & win32%nsize_y + iwindow_frame_y, & 0, 0, & hInstance, & NULL ) if (hWndMain == 0) return iretb = ShowWindow( hWndMain, nCmdShow ) iretb = UpdateWindow( hWndMain ) ! Message Loop do while ( GetMessage (mesg, NULL, 0, 0) ) iretb = TranslateMessage( mesg ) iretb = DispatchMessage( mesg ) end do WinMain = mesg%wParam end function WinMain integer (LRESULT) function MainWndProc( hWnd, mesg, wParam, lParam ) !DEC$ ATTRIBUTES STDcall, DECORATE, ALIAS : 'MainWndProc' :: MainWndProc integer (HANDLE) , intent(in) :: hWnd integer (UINT) , intent(in) :: mesg integer (fwParam), intent(in) :: wParam integer (flParam), intent(in) :: lParam ! integer (HANDLE) :: hDC, hBmp integer (BOOL) :: iretb type (T_PAINTSTRUCT) :: ps type (T_RECT) :: rc ! MainWndProc = 0 select case ( mesg ) case (WM_CREATE) wnd%hWnd = hWnd hDC = GetDC(hWnd) wnd%hDC = CreateCompatibleDC(hDC) iretb = GetClientRect(hWnd, rc) hBmp = CreateCompatibleBitmap(hDC, rc%right - rc%left, rc%bottom - rc%top) iretb = SelectObject(wnd%hDC, hBmp) iretb = PatBlt(wnd%hDC, 0, 0, rc%right - rc%left, rc%bottom - rc%top, WHITENESS) iretb = ReleaseDC(hWnd, hDC) iretb = DeleteObject(hBmp) case (WM_DESTROY) call EnterCriticalSection( loc(wnd%lpCriticalSection) ) iretb = DeleteObject( wnd%hDC ) call PostQuitMessage( 0 ) call LeaveCriticalSection( loc(wnd%lpCriticalSection) ) case (WM_PAINT) call EnterCriticalSection( loc(wnd%lpCriticalSection) ) hDC = BeginPaint( wnd%hWnd, ps ) iretb = GetClientRect( wnd%hWnd, rc ) iretb = BitBlt(hDC, 0, 0, rc%right - rc%left, rc%bottom - rc%top, wnd%hDC, 0, 0, SRCCOPY) iretb = endPaint( wnd%hWnd, ps ) call LeaveCriticalSection( loc(wnd%lpCriticalSection) ) case (WM_RBUTTONUP) call EnterCriticalSection( loc(wnd%lpCriticalSection) ) iretb = DeleteObject( wnd%hDC ) call PostQuitMessage( 0 ) call LeaveCriticalSection( loc(wnd%lpCriticalSection) ) case default MainWndProc = DefWindowProc( hWnd, mesg, wParam, lParam ) end select end function MainWndProc subroutine gr_on(self) use IFMT, only : CreateThread ! multithread module class(t_win32), intent(in out) :: self integer (BOOL) :: iretb integer (HANDLE) :: hBmp type (T_RECT) :: rc call InitializeCriticalSection( loc(wnd%lpCriticalSection) ) ! non-standard Fortran :: LOC wnd%hThread = CreateThread(NULL, 0_LPINT, Thread_Proc, NULL, CREATE_SUSPENDED, wnd%id) iretb = SetThreadPriority(wnd%hThread, THREAD_PRIORITY_BELOW_NORMAL) iretb = ResumeThread(wnd%hThread) call sleep(100) ! wait for Window initialization iretb = GetClientRect(wnd%hWnd, rc) hBmp = CreateCompatibleBitmap(wnd%hDC, rc%right - rc%left, rc%bottom - rc%top) iretb = SelectObject(wnd%hDC, hBmp) iretb = DeleteObject(hBmp) iretb = PatBlt(wnd%hDC, 0, 0, rc%right - rc%left, rc%bottom - rc%top, WHITENESS) wnd%hPen = CreatePen(PS_SOLID, 1, 0) contains integer (LONG) function Thread_Proc(lp_ThreadParameter) ! !DEC$ ATTRIBUTES STDcall, ALIAS:"_thread_proc" :: Thread_Proc integer (LPINT), intent(in) :: lp_ThreadParameter integer (LPINT):: hInst hInst = GetModuleHandle(NULL) Thread_Proc = WinMain(hInst, SW_SHOWNORMAL, self) end function Thread_Proc end subroutine gr_on subroutine gr_off(self) class(t_win32), intent(in) :: self integer (BOOL) :: iretb integer (DWORD) :: iwait iwait = INFINITE call gr_show(self) iretb = DeleteObject(wnd%hPen) iretb = WaitForSingleObject(wnd%hThread, iwait) iretb = CloseHandle(wnd%hThread) iretb = PostMessage(wnd%hWnd, WM_DESTROY, NULL, NULL) wnd%hThread = NULL call DeleteCriticalSection( loc(wnd%lpCriticalSection) ) ! non-standard Fortran :: LOC end subroutine gr_off subroutine gr_show(self) class(t_win32), intent(in) :: self integer (BOOL):: iretb call EnterCriticalSection( loc(wnd%lpCriticalSection) ) ! non-standard Fortran :: LOC iretb = InvalidateRect(wnd%hWnd, NULL, FALSE) call LeaveCriticalSection( loc(wnd%lpCriticalSection) ) ! non-standard Fortran :: LOC end subroutine gr_show subroutine gr_pen(self, line_width, rgb) class(t_win32), intent(in out) :: self integer, intent(in), optional :: line_width type (t_rgb), intent(in), optional :: rgb integer (BOOL) :: iretb associate( rgb_ => self%rgb, line_width_ => self%line_width ) if ( present(rgb) ) rgb_ = rgb if ( present(line_width) ) line_width_ = line_width call EnterCriticalSection( loc(wnd%lpCriticalSection) ) ! non-standard Fortran :: LOC iretb = DeleteObject(wnd%hPen) wnd%hPen = CreatePen(PS_SOLID, line_width_, irgb(rgb_)) iretb = SelectObject(wnd%hDC, wnd%hPen) iretb = MoveToEx(wnd%hDC, 0, 0, NULL) call LeaveCriticalSection( loc(wnd%lpCriticalSection) ) ! non-standard Fortran :: LOC end associate contains integer function irgb(rgb) type(t_rgb), intent(in) :: rgb irgb = rgb%ir + (rgb%ig + (rgb%ib * 256)) * 256 end function irgb end subroutine gr_pen subroutine gr_moveTo(self, ix, iy) class(t_win32), intent(in) :: self integer, intent(in) :: ix, iy integer (BOOL):: iretb call EnterCriticalSection( loc(wnd%lpCriticalSection) ) ! non-standard Fortran :: LOC iretb = MoveToEx(wnd%hDC, ix, iy, NULL) call LeaveCriticalSection( loc(wnd%lpCriticalSection) ) ! non-standard Fortran :: LOC end subroutine gr_moveTo subroutine gr_lineTo(self, ix, iy) class(t_win32), intent(in) :: self integer, intent(in) :: ix, iy integer (BOOL):: iretb call EnterCriticalSection( loc(wnd%lpCriticalSection) ) ! non-standard Fortran :: LOC iretb = LineTo(wnd%hDC, ix, iy) call LeaveCriticalSection( loc(wnd%lpCriticalSection) ) ! non-standard Fortran :: LOC end subroutine gr_lineTo subroutine gr_dot(self, ix, iy, icol) class(t_win32), intent(in) :: self integer, intent(in) :: ix, iy, icol integer (BOOL):: iretb call EnterCriticalSection( loc(wnd%lpCriticalSection) ) ! non-standard Fortran :: LOC iretb = SetPixel(wnd%hDC, ix, iy, icol) call LeaveCriticalSection( loc(wnd%lpCriticalSection) ) ! non-standard Fortran :: LOC end subroutine gr_dot end module m_win32 module m_plot use m_oop use m_win32 implicit none private public :: t_rgb, t_device, t_win32 end module m_plot
program Mandel implicit none integer, parameter :: kd = kind(0.0e0) integer, parameter :: m = 256 integer :: nwinx = 1024, nwiny = 1024 integer :: i, j, imax, jmax, maxiter real (kd) :: xmin, xmax, ymin, ymax, dx, dy real (kd) :: t0, t1 complex (kd) :: c, z integer, allocatable :: ic(:, :) integer :: icol(0:m), it0, it1 ! xmin = -2.0_kd !1.10950d0 xmax = 2.0_kd !1.10951d0 ymin = -2.0_kd !0.24758d0 ymax = 2.0_kd !0.24759d0 maxiter = 253 ! dx = xmax - xmin dy = ymax - ymin if (dx <= 0.0_kd .OR. dy <= 0.0_kd .OR. maxiter <= 0 .OR. maxiter > M) stop 'input error' if (dx * nwinx > dy * nwiny) then imax = nwinx jmax = nint(nwinx * dy / dx) else imax = nint(nwiny * dx / dy) jmax = int(nwiny) end if ! dx = dx / real(imax, kd) dy = dy / real(jmax, kd) icol(0) = 0 ! black j = irgb(255, 255, 255) do i = maxiter, 1, -1 icol(i) = j if (j > 1) j = j - irgb(255, 255, 255) / maxiter end do ! allocate( ic(0:imax, 0:jmax), source = 0 ) ! print *, 'before do concurrent' call system_clock(it0) call cpu_time(t0) GPU_offload:do concurrent (integer::ix = 0:imax, iy = 0:jmax) local(i, c, z) !shared(ic) c = cmplx(xmin + ix * dx, ymax - iy * dy) z = c do i = 0, maxiter if (abs(z) > 2.0_kd) exit z = z * z + c end do ic(ix, iy) = i end do GPU_offload call cpu_time(t1) call system_clock(it1) print *, ' do concurrent time =', t1 - t0, it1 - it0 ! ! plotter ! plot: block use m_plot class(t_device), allocatable :: fig type(t_rgb), parameter :: rgb_black = t_rgb(0, 0, 0) fig = t_win32('Mandelbrot 1', imax, jmax, 1, rgb_black) call fig%on() do i = 0, imax do j = 0, jmax call fig%dot(i, j, icol(ic(i, j))) end do call fig%show() end do call fig%off() end block plot contains integer function irgb(ir, ig, ib) integer, intent(in) :: ir, ig, ib irgb = ir + (ig + (ib * 256)) * 256 end function irgb end program Mandel
出力
C:>ifx mandel.f90 /O2 /Qxalderlake /Qopenmp-targets:spir64 /Qiopenmp /Qopt-report:2 /Qopenmp-target-do-concurrent Intel(R) Fortran Compiler for applications running on Intel(R) 64, Version 2022.2.0 Build 20220730 Copyright (C) 1985-2022 Intel Corporation. All rights reserved. Microsoft (R) Incremental Linker Version 14.33.31630.0 Copyright (C) Microsoft Corporation. All rights reserved. -out:mandel.exe -debug -pdb:mandel.pdb -subsystem:console -defaultlib:libiomp5md.lib -nodefaultlib:vcomp.lib -nodefaultlib:vcompd.lib C:\Temp\754833.obj C:\Temp\7548345.o -defaultlib:omptarget.lib C:>set OMP_TARGET_OFFLOAD=disabled C:>mandel Libomptarget --> Init target library! Libomptarget --> No RTL found for image 0x00007ff7b9567000! Libomptarget --> Done registering entries! start before do concurrent Libomptarget --> Entering target region with entry point 0x00007ff7b94fe8c6 and device Id 0 Libomptarget --> Offload is disabled Libomptarget --> Not offloading to device 0 do concurrent time = 9.3750000E-02 2470 Libomptarget --> Unloading target library! Libomptarget --> No RTLs in use support the image 0x00007ff7b9567000! Libomptarget --> Done unregistering images! Libomptarget --> Translation table for descriptor 0x00007ff7b9566000 cannot be found, probably it has been already removed. Libomptarget --> Done unregistering library! Libomptarget --> Deinit target library!
C:>set OMP_TARGET_OFFLOAD=MANDATORY C:>mandel Libomptarget --> Init target library! Libomptarget --> Initialized OMPT Libomptarget --> Loading RTLs... Libomptarget --> Loading library 'omptarget.rtl.level0.dll'... Target LEVEL0 RTL --> Init Level0 plugin! Target LEVEL0 RTL --> omp_get_thread_limit() returned 2147483647 Target LEVEL0 RTL --> omp_get_max_teams() returned 0 Libomptarget --> Successfully loaded library 'omptarget.rtl.level0.dll'! Target LEVEL0 RTL --> Looking for Level0 devices... Target LEVEL0 RTL --> Found a GPU device, Name = Intel(R) Iris(R) Xe Graphics Target LEVEL0 RTL --> Found 1 root devices, 1 total devices. Target LEVEL0 RTL --> List of devices (DeviceID[.SubID[.CCSID]]) Target LEVEL0 RTL --> -- 0 Target LEVEL0 RTL --> Root Device Information Target LEVEL0 RTL --> Device 0 Target LEVEL0 RTL --> -- Name : Intel(R) Iris(R) Xe Graphics Target LEVEL0 RTL --> -- PCI ID : 0x46a6 Target LEVEL0 RTL --> -- Number of total EUs : 96 Target LEVEL0 RTL --> -- Number of threads per EU : 7 Target LEVEL0 RTL --> -- EU SIMD width : 8 Target LEVEL0 RTL --> -- Number of EUs per subslice : 8 Target LEVEL0 RTL --> -- Number of subslices per slice: 12 Target LEVEL0 RTL --> -- Number of slices : 1 Target LEVEL0 RTL --> -- Local memory size (bytes) : 65536 Target LEVEL0 RTL --> -- Global memory size (bytes) : 6722351104 Target LEVEL0 RTL --> -- Cache size (bytes) : 1048576 Target LEVEL0 RTL --> -- Max clock frequency (MHz) : 1400 Target LEVEL0 RTL --> Driver API version is 10003 Target LEVEL0 RTL --> Interop property IDs, Names, Descriptions Target LEVEL0 RTL --> -- 0, device_num_eus, intptr_t, total number of EUs Target LEVEL0 RTL --> -- 1, device_num_threads_per_eu, intptr_t, number of threads per EU Target LEVEL0 RTL --> -- 2, device_eu_simd_width, intptr_t, physical EU simd width Target LEVEL0 RTL --> -- 3, device_num_eus_per_subslice, intptr_t, number of EUs per sub-slice Target LEVEL0 RTL --> -- 4, device_num_subslices_per_slice, intptr_t, number of sub-slices per slice Target LEVEL0 RTL --> -- 5, device_num_slices, intptr_t, number of slices Target LEVEL0 RTL --> -- 6, device_local_mem_size, intptr_t, local memory size in bytes Target LEVEL0 RTL --> -- 7, device_global_mem_size, intptr_t, global memory size in bytes Target LEVEL0 RTL --> -- 8, device_global_mem_cache_size, intptr_t, global memory cache size in bytes Target LEVEL0 RTL --> -- 9, device_max_clock_frequency, intptr_t, max clock frequency in MHz Target LEVEL0 RTL --> Found driver extensions: Target LEVEL0 RTL --> -- ZE_extension_float_atomics Target LEVEL0 RTL --> -- ZE_experimental_relaxed_allocation_limits Target LEVEL0 RTL --> -- ZE_experimental_module_program Target LEVEL0 RTL --> -- ZE_experimental_scheduling_hints Target LEVEL0 RTL --> -- ZE_experimental_global_offset Target LEVEL0 RTL --> -- ZE_extension_pci_properties Target LEVEL0 RTL --> -- ZE_extension_memory_compression_hints Target LEVEL0 RTL --> -- ZE_extension_memory_free_policies Target LEVEL0 RTL --> -- ZE_extension_device_memory_properties Target LEVEL0 RTL --> Returning 1 top-level devices Libomptarget --> Registering RTL omptarget.rtl.level0.dll supporting 1 devices! Libomptarget --> Optional interface: __tgt_rtl_data_alloc_base Libomptarget --> Optional interface: __tgt_rtl_data_alloc_managed Libomptarget --> Optional interface: __tgt_rtl_data_realloc Libomptarget --> Optional interface: __tgt_rtl_data_aligned_alloc Libomptarget --> Optional interface: __tgt_rtl_register_host_pointer Libomptarget --> Optional interface: __tgt_rtl_unregister_host_pointer Libomptarget --> Optional interface: __tgt_rtl_get_context_handle Libomptarget --> Optional interface: __tgt_rtl_init_ompt Libomptarget --> Optional interface: __tgt_rtl_requires_mapping Libomptarget --> Optional interface: __tgt_rtl_push_subdevice Libomptarget --> Optional interface: __tgt_rtl_pop_subdevice Libomptarget --> Optional interface: __tgt_rtl_add_build_options Libomptarget --> Optional interface: __tgt_rtl_is_supported_device Libomptarget --> Optional interface: __tgt_rtl_deinit Libomptarget --> Optional interface: __tgt_rtl_create_interop Libomptarget --> Optional interface: __tgt_rtl_release_interop Libomptarget --> Optional interface: __tgt_rtl_use_interop Libomptarget --> Optional interface: __tgt_rtl_get_num_interop_properties Libomptarget --> Optional interface: __tgt_rtl_get_interop_property_value Libomptarget --> Optional interface: __tgt_rtl_get_interop_property_info Libomptarget --> Optional interface: __tgt_rtl_get_interop_rc_desc Libomptarget --> Optional interface: __tgt_rtl_get_num_sub_devices Libomptarget --> Optional interface: __tgt_rtl_is_accessible_addr_range Libomptarget --> Optional interface: __tgt_rtl_notify_indirect_access Libomptarget --> Optional interface: __tgt_rtl_is_private_arg_on_host Libomptarget --> Optional interface: __tgt_rtl_command_batch_begin Libomptarget --> Optional interface: __tgt_rtl_command_batch_end Libomptarget --> Optional interface: __tgt_rtl_kernel_batch_begin Libomptarget --> Optional interface: __tgt_rtl_kernel_batch_end Libomptarget --> Optional interface: __tgt_rtl_alloc_per_hw_thread_scratch Libomptarget --> Optional interface: __tgt_rtl_free_per_hw_thread_scratch Libomptarget --> Optional interface: __tgt_rtl_run_target_team_nd_region Libomptarget --> Optional interface: __tgt_rtl_get_device_info Target LEVEL0 RTL --> Initialized OMPT Libomptarget --> Loading library 'omptarget.rtl.opencl.dll'... Target OPENCL RTL --> Init OpenCL plugin! Target OPENCL RTL --> omp_get_thread_limit() returned 2147483647 Target OPENCL RTL --> omp_get_max_teams() returned 0 Target OPENCL RTL --> Target device type is set to GPU Libomptarget --> Successfully loaded library 'omptarget.rtl.opencl.dll'! Target OPENCL RTL --> Start initializing OpenCL Target OPENCL RTL --> Platform OpenCL 3.0 has 1 Devices Target OPENCL RTL --> Extension clGetMemAllocInfoINTEL is found. Target OPENCL RTL --> Extension clHostMemAllocINTEL is found. Target OPENCL RTL --> Extension clDeviceMemAllocINTEL is found. Target OPENCL RTL --> Extension clSharedMemAllocINTEL is found. Target OPENCL RTL --> Extension clMemFreeINTEL is found. Target OPENCL RTL --> Extension clSetKernelArgMemPointerINTEL is found. Target OPENCL RTL --> Extension clEnqueueMemcpyINTEL is found. Target OPENCL RTL --> Extension clSetProgramSpecializationConstant is found. Target OPENCL RTL --> Extension clGetDeviceGlobalVariablePointerINTEL is found. Target OPENCL RTL --> Extension clGetKernelSuggestedLocalWorkSizeINTEL is found. Target OPENCL RTL --> Warning: Extension clGitsIndirectAllocationOffsets is not found. Target OPENCL RTL --> Device 0: Intel(R) Iris(R) Xe Graphics Target OPENCL RTL --> Number of execution units on the device is 96 Target OPENCL RTL --> Maximum work group size for the device is 256 Target OPENCL RTL --> Maximum memory allocation size is 3361175552 Target OPENCL RTL --> Device local mem size: 65536 Libomptarget --> Registering RTL omptarget.rtl.opencl.dll supporting 1 devices! Libomptarget --> Optional interface: __tgt_rtl_data_alloc_base Libomptarget --> Optional interface: __tgt_rtl_data_alloc_managed Libomptarget --> Optional interface: __tgt_rtl_data_realloc Libomptarget --> Optional interface: __tgt_rtl_data_aligned_alloc Libomptarget --> Optional interface: __tgt_rtl_get_device_name Libomptarget --> Optional interface: __tgt_rtl_get_context_handle Libomptarget --> Optional interface: __tgt_rtl_get_data_alloc_info Libomptarget --> Optional interface: __tgt_rtl_init_ompt Libomptarget --> Optional interface: __tgt_rtl_requires_mapping Libomptarget --> Optional interface: __tgt_rtl_manifest_data_for_region Libomptarget --> Optional interface: __tgt_rtl_add_build_options Libomptarget --> Optional interface: __tgt_rtl_is_supported_device Libomptarget --> Optional interface: __tgt_rtl_deinit Libomptarget --> Optional interface: __tgt_rtl_create_interop Libomptarget --> Optional interface: __tgt_rtl_release_interop Libomptarget --> Optional interface: __tgt_rtl_use_interop Libomptarget --> Optional interface: __tgt_rtl_get_num_interop_properties Libomptarget --> Optional interface: __tgt_rtl_get_interop_property_value Libomptarget --> Optional interface: __tgt_rtl_get_interop_property_info Libomptarget --> Optional interface: __tgt_rtl_get_interop_rc_desc Libomptarget --> Optional interface: __tgt_rtl_is_accessible_addr_range Libomptarget --> Optional interface: __tgt_rtl_notify_indirect_access Libomptarget --> Optional interface: __tgt_rtl_is_private_arg_on_host Libomptarget --> Optional interface: __tgt_rtl_alloc_per_hw_thread_scratch Libomptarget --> Optional interface: __tgt_rtl_free_per_hw_thread_scratch Libomptarget --> Optional interface: __tgt_rtl_run_target_team_nd_region Target OPENCL RTL --> Initialized OMPT Libomptarget --> Loading library 'libomptarget.rtl.ppc64.so'... Libomptarget --> Call to LoadLibray() was unsuccessful with code 0x7e Libomptarget --> Unable to load library 'libomptarget.rtl.ppc64.so': c喙U0・_0・ク0・・・L! Libomptarget --> Loading library 'omptarget.rtl.x86_64.dll'... Libomptarget --> Call to LoadLibray() was unsuccessful with code 0x7e Libomptarget --> Unable to load library 'omptarget.rtl.x86_64.dll': c喙U0・_0・ク0・・・L! Libomptarget --> Loading library 'libomptarget.rtl.cuda.so'... Libomptarget --> Call to LoadLibray() was unsuccessful with code 0x7e Libomptarget --> Unable to load library 'libomptarget.rtl.cuda.so': c喙U0・_0・ク0・・・L! Libomptarget --> Loading library 'libomptarget.rtl.aarch64.so'... Libomptarget --> Call to LoadLibray() was unsuccessful with code 0x7e Libomptarget --> Unable to load library 'libomptarget.rtl.aarch64.so': c喙U0・_0・ク0・・・L! Libomptarget --> Loading library 'libomptarget.rtl.ve.so'... Libomptarget --> Call to LoadLibray() was unsuccessful with code 0x7e Libomptarget --> Unable to load library 'libomptarget.rtl.ve.so': c喙U0・_0・ク0・・・L! Libomptarget --> Loading library 'libomptarget.rtl.amdgpu.so'... Libomptarget --> Call to LoadLibray() was unsuccessful with code 0x7e Libomptarget --> Unable to load library 'libomptarget.rtl.amdgpu.so': c喙U0・_0・ク0・・・L! Libomptarget --> Loading library 'libomptarget.rtl.rpc.so'... Libomptarget --> Call to LoadLibray() was unsuccessful with code 0x7e Libomptarget --> Unable to load library 'libomptarget.rtl.rpc.so': c喙U0・_0・ク0・・・L! Libomptarget --> RTLs loaded! Target LEVEL0 RTL --> Target binary is a valid oneAPI OpenMP image. Libomptarget --> Image 0x00007ff7b9567000 is compatible with RTL omptarget.rtl.level0.dll! Libomptarget --> RTL 0x00007ffa1fb70000 has index 0! Libomptarget --> Registering image 0x00007ff7b9567000 with RTL omptarget.rtl.level0.dll! Libomptarget --> Done registering entries! start before do concurrent Libomptarget --> Entering target region with entry point 0x00007ff7b94fe8c6 and device Id 0 Libomptarget --> Call to omp_get_num_devices returning 1 Libomptarget --> Call to omp_get_num_devices returning 1 Libomptarget --> Call to omp_get_initial_device returning 1 Libomptarget --> Checking whether device 0 is ready. Libomptarget --> Is the device 0 (local ID 0) initialized? 0 Target LEVEL0 RTL --> Initialize requires flags to 0 Target LEVEL0 RTL --> Allocated a device memory 0xffffb80200010000 Target LEVEL0 RTL --> Initialized device memory pool for device 0x00000147c4e45418: AllocUnit = 65536, AllocMax = 1048576, Capacity = 4, PoolSizeMax = 268435456 Target LEVEL0 RTL --> Allocated a shared memory object 0x00000147c6160000 Target LEVEL0 RTL --> Initialized shared memory pool for device 0x00000147c4e45418: AllocUnit = 65536, AllocMax = 8388608, Capacity = 4, PoolSizeMax = 268435456 Target LEVEL0 RTL --> Allocated a host memory 0x00000147c6160000 Target LEVEL0 RTL --> Initialized host memory pool for device 0x00000147c4e45418: AllocUnit = 65536, AllocMax = 1048576, Capacity = 4, PoolSizeMax = 268435456 Target LEVEL0 RTL --> Created a command queue 0x00000147c4e81d68 (Ordinal: 0, Index: 0) for device 0. Target LEVEL0 RTL --> Initialized Level0 device 0 Libomptarget --> Device 0 is ready to use. Target LEVEL0 RTL --> Device 0: Loading binary from 0x00007ff7b9567000 Target LEVEL0 RTL --> Expecting to have 10 entries defined Target LEVEL0 RTL --> Base L0 module compilation options: -cl-std=CL2.0 Target LEVEL0 RTL --> Found a single section in the image Target LEVEL0 RTL --> Created module from image #0. Target LEVEL0 RTL --> Module link is not required Target LEVEL0 RTL --> Looking up device global variable '__omp_offloading_entries_table_size' of size 8 bytes on device 0. Target LEVEL0 RTL --> Global variable lookup succeeded (size: 8 bytes). Target LEVEL0 RTL --> Created a command list 0x00000147c5c1bac8 (Ordinal: 0) for device 0. Target LEVEL0 RTL --> Warning: number of entries in host and device offload tables mismatch (10 != 2). Target LEVEL0 RTL --> Looking up device global variable '__omp_offloading_entries_table' of size 80 bytes on device 0. Target LEVEL0 RTL --> Global variable lookup succeeded (size: 80 bytes). Target LEVEL0 RTL --> Device offload table loaded: Target LEVEL0 RTL --> 0: _ZL7pone_ld_3d4ae508d8dbf78737978824de0e0216 Target LEVEL0 RTL --> 1: __omp_offloading_56af2408_2649_MAIN___l379 Target LEVEL0 RTL --> Looking up device global variable '__omp_offloading_56af2408_2649_MAIN___l379_kernel_info' of unknown size on device 0. Target LEVEL0 RTL --> Global variable lookup succeeded (size: 176 bytes). Target LEVEL0 RTL --> Kernel 0: Entry = 0x00007ff7b94fe8c6, Name = __omp_offloading_56af2408_2649_MAIN___l379, NumArgs = 18, Handle = 0x00000147c4f32760 Target LEVEL0 RTL --> Warning: Entry with a nullptr name!!! Target LEVEL0 RTL --> Warning: Entry with a nullptr name!!! Target LEVEL0 RTL --> Warning: Entry with a nullptr name!!! Target LEVEL0 RTL --> Warning: Entry with a nullptr name!!! Target LEVEL0 RTL --> Warning: Entry with a nullptr name!!! Target LEVEL0 RTL --> Warning: Entry with a nullptr name!!! Target LEVEL0 RTL --> Warning: Entry with a nullptr name!!! Target LEVEL0 RTL --> Warning: Entry with a nullptr name!!! Target LEVEL0 RTL --> Warning: Entry with a nullptr name!!! Target LEVEL0 RTL --> Looking up device global variable '__omp_spirv_program_data' of size 56 bytes on device 0. Target LEVEL0 RTL --> Global variable lookup succeeded (size: 56 bytes). Libomptarget --> Entry 0: Base=0x0000000dbd18f940, Begin=0x0000000dbd18f940, Size=96, Type=0x20, Name=MANDEL$IC Libomptarget --> Entry 1: Base=0x0000000dbd18f940, Begin=0x00000147c5d501c0, Size=4202500, Type=0x1000000000017, Name=MANDEL$IC_addr_a0 Libomptarget --> Entry 2: Base=0x0000000dbd18f940, Begin=0x0000000dbd18f948, Size=88, Type=0x1000000000005, Name=MANDEL$IC_dv_len Libomptarget --> Entry 3: Base=0x00000000000000fd, Begin=0x00000000000000fd, Size=0, Type=0x120, Name=unknown Libomptarget --> Entry 4: Base=0x000000003b800000, Begin=0x000000003b800000, Size=0, Type=0x120, Name=unknown Libomptarget --> Entry 5: Base=0x0000000040000000, Begin=0x0000000040000000, Size=0, Type=0x120, Name=unknown Libomptarget --> Entry 6: Base=0x000000003b800000, Begin=0x000000003b800000, Size=0, Type=0x120, Name=unknown Libomptarget --> Entry 7: Base=0x00000000c0000000, Begin=0x00000000c0000000, Size=0, Type=0x120, Name=unknown Libomptarget --> Entry 8: Base=0x0000000000000400, Begin=0x0000000000000400, Size=0, Type=0x120, Name=unknown Libomptarget --> Entry 9: Base=0x0000000000000000, Begin=0x0000000000000000, Size=0, Type=0x120, Name=unknown Libomptarget --> Entry 10: Base=0x0000000000000001, Begin=0x0000000000000001, Size=0, Type=0x120, Name=unknown Libomptarget --> Entry 11: Base=0x0000000000000400, Begin=0x0000000000000400, Size=0, Type=0x120, Name=unknown Libomptarget --> Entry 12: Base=0x0000000000000000, Begin=0x0000000000000000, Size=0, Type=0x120, Name=unknown Libomptarget --> Entry 13: Base=0x0000000000000400, Begin=0x0000000000000400, Size=0, Type=0x120, Name=unknown Libomptarget --> Entry 14: Base=0x0000000000000000, Begin=0x0000000000000000, Size=0, Type=0x120, Name=unknown Libomptarget --> Entry 15: Base=0x0000000000000001, Begin=0x0000000000000001, Size=0, Type=0x120, Name=unknown Libomptarget --> Entry 16: Base=0x0000000000000400, Begin=0x0000000000000400, Size=0, Type=0x120, Name=unknown Libomptarget --> Entry 17: Base=0x0000000000000000, Begin=0x0000000000000000, Size=0, Type=0x120, Name=unknown Libomptarget --> Entry 18: Base=0x0000000000000000, Begin=0x0000000000000000, Size=0, Type=0x120, Name=unknown Libomptarget --> Entry 19: Base=0x0000000000100800, Begin=0x0000000000100800, Size=0, Type=0x120, Name=unknown Libomptarget --> Entry 20: Base=0x0000000dbd18f1d0, Begin=0x0000000dbd18f1d0, Size=32, Type=0x800, Name=unknown Libomptarget --> Looking up mapping(HstPtrBegin=0x0000000dbd18f940, Size=96)... Target LEVEL0 RTL --> Ptr 0x0000000dbd18f940 requires mapping Target LEVEL0 RTL --> Allocated a shared memory object 0x00000147c6170000 Target LEVEL0 RTL --> New block allocation for shared memory pool: base = 0x00000147c6170000, size = 65536, pool size = 65536 Libomptarget --> Creating new map entry with HstPtrBegin=0x0000000dbd18f940, TgtPtrBegin=0x00000147c6170000, Size=96, DynRefCount=1, HoldRefCount=0, Name=MANDEL$IC Libomptarget --> There are 96 bytes allocated at target address 0x00000147c6170000 - is new Libomptarget --> Has a pointer entry: Libomptarget --> Looking up mapping(HstPtrBegin=0x0000000dbd18f940, Size=8)... Libomptarget --> Mapping exists with HstPtrBegin=0x0000000dbd18f940, TgtPtrBegin=0x00000147c6170000, Size=8, DynRefCount=1 (update suppressed), HoldRefCount=0, Name=unknown Libomptarget --> There are 8 bytes allocated at target address 0x00000147c6170000 - is new Libomptarget --> Looking up mapping(HstPtrBegin=0x00000147c5d501c0, Size=4202500)... Target LEVEL0 RTL --> Ptr 0x00000147c5d501c0 requires mapping Target LEVEL0 RTL --> Allocated a shared memory object 0x00000147d9970000 Target LEVEL0 RTL --> New block allocation for shared memory pool: base = 0x00000147d9970000, size = 33554432, pool size = 33619968 Libomptarget --> Creating new map entry with HstPtrBegin=0x00000147c5d501c0, TgtPtrBegin=0x00000147d9970000, Size=4202500, DynRefCount=1, HoldRefCount=0, Name=MANDEL$IC_addr_a0 Libomptarget --> Moving 4202500 bytes (hst:0x00000147c5d501c0) -> (tgt:0x00000147d9970000) Target LEVEL0 RTL --> Copied 4202500 bytes (hst:0x00000147c5d501c0) -> (tgt:0x00000147d9970000) Libomptarget --> There are 4202500 bytes allocated at target address 0x00000147d9970000 - is new Libomptarget --> Update pointer (0x00000147c6170000) -> [0x00000147d9970000] Target LEVEL0 RTL --> Copied 8 bytes (hst:0x00000147c8435f60) -> (tgt:0x00000147c6170000) Libomptarget --> Looking up mapping(HstPtrBegin=0x0000000dbd18f940, Size=8)... Target LEVEL0 RTL --> Notifying indirect access: 0x00000147c6170000 + 0 Libomptarget --> Looking up mapping(HstPtrBegin=0x0000000dbd18f948, Size=88)... Libomptarget --> Mapping exists with HstPtrBegin=0x0000000dbd18f948, TgtPtrBegin=0x00000147c6170008, Size=88, DynRefCount=1 (update suppressed), HoldRefCount=0, Name=MANDEL$IC_dv_len Libomptarget --> Moving 88 bytes (hst:0x0000000dbd18f948) -> (tgt:0x00000147c6170008) Target LEVEL0 RTL --> Copied 88 bytes (hst:0x0000000dbd18f948) -> (tgt:0x00000147c6170008) Libomptarget --> There are 88 bytes allocated at target address 0x00000147c6170008 - is new Libomptarget --> Looking up mapping(HstPtrBegin=0x0000000dbd18f940, Size=96)... Libomptarget --> Mapping exists with HstPtrBegin=0x0000000dbd18f940, TgtPtrBegin=0x00000147c6170000, Size=96, DynRefCount=1 (update suppressed), HoldRefCount=0 Libomptarget --> Obtained target argument (Begin: 0x00000147c6170000, Offset: 0) from host pointer 0x0000000dbd18f940 Libomptarget --> Forwarding first-private value 0x00000000000000fd to the target construct Libomptarget --> Forwarding first-private value 0x000000003b800000 to the target construct Libomptarget --> Forwarding first-private value 0x0000000040000000 to the target construct Libomptarget --> Forwarding first-private value 0x000000003b800000 to the target construct Libomptarget --> Forwarding first-private value 0x00000000c0000000 to the target construct Libomptarget --> Forwarding first-private value 0x0000000000000400 to the target construct Libomptarget --> Forwarding first-private value 0x0000000000000000 to the target construct Libomptarget --> Forwarding first-private value 0x0000000000000001 to the target construct Libomptarget --> Forwarding first-private value 0x0000000000000400 to the target construct Libomptarget --> Forwarding first-private value 0x0000000000000000 to the target construct Libomptarget --> Forwarding first-private value 0x0000000000000400 to the target construct Libomptarget --> Forwarding first-private value 0x0000000000000000 to the target construct Libomptarget --> Forwarding first-private value 0x0000000000000001 to the target construct Libomptarget --> Forwarding first-private value 0x0000000000000400 to the target construct Libomptarget --> Forwarding first-private value 0x0000000000000000 to the target construct Libomptarget --> Forwarding first-private value 0x0000000000000000 to the target construct Libomptarget --> Forwarding first-private value 0x0000000000100800 to the target construct Libomptarget --> Launching target execution __omp_offloading_56af2408_2649_MAIN___l379 with pointer 0x00000147c8a42d10 (index=0). Target LEVEL0 RTL --> Executing a kernel 0x00000147c8a42d10... Target LEVEL0 RTL --> Assumed kernel SIMD width is 16 Target LEVEL0 RTL --> Preferred group size is multiple of 32 Target LEVEL0 RTL --> Loop 0: lower bound = 0, upper bound = 1050624, Stride = 1 Target LEVEL0 RTL --> Team sizes = {32, 1, 1} Target LEVEL0 RTL --> Number of teams = {32833, 1, 1} Target LEVEL0 RTL --> Kernel Pointer argument 0 (value: 0x00000147c6170000) was set successfully for device 0. Target LEVEL0 RTL --> Kernel Scalar argument 1 (value: 0x00000000000000fd) was set successfully for device 0. Target LEVEL0 RTL --> Kernel Scalar argument 2 (value: 0x000000003b800000) was set successfully for device 0. Target LEVEL0 RTL --> Kernel Scalar argument 3 (value: 0x0000000040000000) was set successfully for device 0. Target LEVEL0 RTL --> Kernel Scalar argument 4 (value: 0x000000003b800000) was set successfully for device 0. Target LEVEL0 RTL --> Kernel Scalar argument 5 (value: 0x00000000c0000000) was set successfully for device 0. Target LEVEL0 RTL --> Kernel Scalar argument 6 (value: 0x0000000000000400) was set successfully for device 0. Target LEVEL0 RTL --> Kernel Scalar argument 7 (value: 0x0000000000000000) was set successfully for device 0. Target LEVEL0 RTL --> Kernel Scalar argument 8 (value: 0x0000000000000001) was set successfully for device 0. Target LEVEL0 RTL --> Kernel Scalar argument 9 (value: 0x0000000000000400) was set successfully for device 0. Target LEVEL0 RTL --> Kernel Scalar argument 10 (value: 0x0000000000000000) was set successfully for device 0. Target LEVEL0 RTL --> Kernel Scalar argument 11 (value: 0x0000000000000400) was set successfully for device 0. Target LEVEL0 RTL --> Kernel Scalar argument 12 (value: 0x0000000000000000) was set successfully for device 0. Target LEVEL0 RTL --> Kernel Scalar argument 13 (value: 0x0000000000000001) was set successfully for device 0. Target LEVEL0 RTL --> Kernel Scalar argument 14 (value: 0x0000000000000400) was set successfully for device 0. Target LEVEL0 RTL --> Kernel Scalar argument 15 (value: 0x0000000000000000) was set successfully for device 0. Target LEVEL0 RTL --> Kernel Scalar argument 16 (value: 0x0000000000000000) was set successfully for device 0. Target LEVEL0 RTL --> Kernel Scalar argument 17 (value: 0x0000000000100800) was set successfully for device 0. Target LEVEL0 RTL --> Setting indirect access flags 0x0000000000000004 Target LEVEL0 RTL --> Submitted kernel 0x00000147c4f32760 to device 0 Target LEVEL0 RTL --> Executed kernel entry 0x00000147c8a42d10 on device 0 Libomptarget --> Looking up mapping(HstPtrBegin=0x0000000dbd18f948, Size=88)... Libomptarget --> Mapping exists with HstPtrBegin=0x0000000dbd18f948, TgtPtrBegin=0x00000147c6170008, Size=88, DynRefCount=1 (update suppressed), HoldRefCount=0 Libomptarget --> There are 88 bytes allocated at target address 0x00000147c6170008 - is last Libomptarget --> Looking up mapping(HstPtrBegin=0x00000147c5d501c0, Size=4202500)... Libomptarget --> Mapping exists with HstPtrBegin=0x00000147c5d501c0, TgtPtrBegin=0x00000147d9970000, Size=4202500, DynRefCount=0 (decremented, delayed deletion), HoldRefCount=0 Libomptarget --> There are 4202500 bytes allocated at target address 0x00000147d9970000 - is last Libomptarget --> Moving 4202500 bytes (tgt:0x00000147d9970000) -> (hst:0x00000147c5d501c0) Target LEVEL0 RTL --> Copied 4202500 bytes (tgt:0x00000147d9970000) -> (hst:0x00000147c5d501c0) Libomptarget --> Looking up mapping(HstPtrBegin=0x0000000dbd18f940, Size=96)... Libomptarget --> Mapping exists with HstPtrBegin=0x0000000dbd18f940, TgtPtrBegin=0x00000147c6170000, Size=96, DynRefCount=0 (decremented, delayed deletion), HoldRefCount=0 Libomptarget --> There are 96 bytes allocated at target address 0x00000147c6170000 - is last Libomptarget --> Looking up mapping(HstPtrBegin=0x00000147c5d501c0, Size=4202500)... Libomptarget --> Deleting tgt data 0x00000147d9970000 of size 4202500 Libomptarget --> Removing map entry with HstPtrBegin=0x00000147c5d501c0, TgtPtrBegin=0x00000147d9970000, Size=4202500, Name=MANDEL$IC_addr_a0 Libomptarget --> Looking up mapping(HstPtrBegin=0x0000000dbd18f940, Size=96)... Libomptarget --> Removing shadow pointer 0x0000000dbd18f940 Libomptarget --> Deleting tgt data 0x00000147c6170000 of size 96 Libomptarget --> Removing map entry with HstPtrBegin=0x0000000dbd18f940, TgtPtrBegin=0x00000147c6170000, Size=96, Name=MANDEL$IC do concurrent time = 0.6250000 18170 Libomptarget --> Unloading target library! Target LEVEL0 RTL --> Target binary is a valid oneAPI OpenMP image. Libomptarget --> Image 0x00007ff7b9567000 is compatible with RTL 0x00007ffa1fb70000! Libomptarget --> Unregistered image 0x00007ff7b9567000 from RTL 0x00007ffa1fb70000! Libomptarget --> Done unregistering images! Libomptarget --> Removing translation table for descriptor 0x00007ff7b9566000 Target LEVEL0 RTL --> MemPool usage for shared memory, device 0x00000147c4e45418 Target LEVEL0 RTL --> -- AllocMax=8(MB), Capacity=4, PoolSizeMax=256(MB) Target LEVEL0 RTL --> -- : NewAlloc Reuse Hit(%) Target LEVEL0 RTL --> -- Bucket[ 128]: 1 0 0.00 Target LEVEL0 RTL --> -- Bucket[ 8388608]: 1 0 0.00 Target LEVEL0 RTL --> MemPool usage for device memory, device 0x00000147c4e45418 Target LEVEL0 RTL --> -- Not used Target LEVEL0 RTL --> Memory usage for device memory, device 0x00000147c4e45418 Target LEVEL0 RTL --> -- Not used Target LEVEL0 RTL --> Memory usage for shared memory, device 0x00000147c4e45418 Target LEVEL0 RTL --> -- Allocator: Native, Pool Target LEVEL0 RTL --> -- Requested: 33619968, 4202596 Target LEVEL0 RTL --> -- Allocated: 33619968, 8388736 Target LEVEL0 RTL --> -- Freed : 33619968, 8388736 Target LEVEL0 RTL --> -- InUse : 0, 0 Target LEVEL0 RTL --> -- PeakUse : 33619968, 8388736 Target LEVEL0 RTL --> -- NumAllocs: 2, 2 Target LEVEL0 RTL --> MemPool usage for host memory, device 0x00000147c4e45418 Target LEVEL0 RTL --> -- Not used Target LEVEL0 RTL --> Memory usage for host memory, device 0x00000147c4e45418 Target LEVEL0 RTL --> -- Not used Target LEVEL0 RTL --> Closed RTL successfully Target LEVEL0 RTL --> Deinit Level0 plugin! Libomptarget --> Done unregistering library! Libomptarget --> Deinit target library!