diff --git a/opencl/source/api/api.cpp b/opencl/source/api/api.cpp index f0c286e0c8..df23b690ae 100644 --- a/opencl/source/api/api.cpp +++ b/opencl/source/api/api.cpp @@ -3449,7 +3449,7 @@ cl_int CL_API_CALL clEnqueueNDRangeKernel(cl_command_queue commandQueue, } retVal = pCommandQueue->enqueueKernel( - kernel, + pKernel, workDim, globalWorkOffset, globalWorkSize, @@ -5904,7 +5904,7 @@ cl_int CL_API_CALL clEnqueueNDCountKernelINTEL(cl_command_queue commandQueue, } retVal = pCommandQueue->enqueueKernel( - kernel, + pKernel, workDim, globalWorkOffset, globalWorkSize, diff --git a/opencl/source/command_queue/command_queue.h b/opencl/source/command_queue/command_queue.h index 019721a08b..2182c03340 100644 --- a/opencl/source/command_queue/command_queue.h +++ b/opencl/source/command_queue/command_queue.h @@ -85,7 +85,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> { virtual cl_int enqueueFillBuffer(Buffer *buffer, const void *pattern, size_t patternSize, size_t offset, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; - virtual cl_int enqueueKernel(cl_kernel kernel, cl_uint workDim, const size_t *globalWorkOffset, const size_t *globalWorkSize, + virtual cl_int enqueueKernel(Kernel *kernel, cl_uint workDim, const size_t *globalWorkOffset, const size_t *globalWorkSize, const size_t *localWorkSize, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueBarrierWithWaitList(cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; diff --git a/opencl/source/command_queue/command_queue_hw.h b/opencl/source/command_queue/command_queue_hw.h index 15532d8d1e..21b8488660 100644 --- a/opencl/source/command_queue/command_queue_hw.h +++ b/opencl/source/command_queue/command_queue_hw.h @@ -144,7 +144,7 @@ class CommandQueueHw : public CommandQueue { const cl_event *eventWaitList, cl_event *event) override; - cl_int enqueueKernel(cl_kernel kernel, + cl_int enqueueKernel(Kernel *kernel, cl_uint workDim, const size_t *globalWorkOffset, const size_t *globalWorkSize, diff --git a/opencl/source/command_queue/enqueue_kernel.h b/opencl/source/command_queue/enqueue_kernel.h index 14a109c816..e14bdecbe3 100644 --- a/opencl/source/command_queue/enqueue_kernel.h +++ b/opencl/source/command_queue/enqueue_kernel.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2020 Intel Corporation + * Copyright (C) 2017-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -22,7 +22,7 @@ namespace NEO { template cl_int CommandQueueHw::enqueueKernel( - cl_kernel clKernel, + Kernel *pKernel, cl_uint workDim, const size_t *globalWorkOffsetIn, const size_t *globalWorkSizeIn, @@ -36,7 +36,7 @@ cl_int CommandQueueHw::enqueueKernel( size_t workGroupSize[3] = {1, 1, 1}; size_t enqueuedLocalWorkSize[3] = {0, 0, 0}; - auto &kernel = *castToObjectOrAbort(clKernel); + auto &kernel = *pKernel; auto rootDeviceIndex = device->getRootDeviceIndex(); const auto &kernelInfo = kernel.getKernelInfo(rootDeviceIndex); diff --git a/opencl/test/unit_test/mocks/mock_command_queue.h b/opencl/test/unit_test/mocks/mock_command_queue.h index 92b6446fa1..9d49e3e3ca 100644 --- a/opencl/test/unit_test/mocks/mock_command_queue.h +++ b/opencl/test/unit_test/mocks/mock_command_queue.h @@ -88,7 +88,7 @@ class MockCommandQueue : public CommandQueue { size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; } - cl_int enqueueKernel(cl_kernel kernel, cl_uint workDim, const size_t *globalWorkOffset, + cl_int enqueueKernel(Kernel *kernel, cl_uint workDim, const size_t *globalWorkOffset, const size_t *globalWorkSize, const size_t *localWorkSize, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; } diff --git a/opencl/test/unit_test/profiling/profiling_tests.cpp b/opencl/test/unit_test/profiling/profiling_tests.cpp index 593aa1faff..68775cb3c0 100644 --- a/opencl/test/unit_test/profiling/profiling_tests.cpp +++ b/opencl/test/unit_test/profiling/profiling_tests.cpp @@ -151,10 +151,9 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfolingWhenWa size_t workItems[3] = {1, 1, 1}; uint32_t dimensions = 1; cl_event event; - cl_kernel clKernel = &kernel; static_cast *>(pCmdQ)->enqueueKernel( - clKernel, + &kernel, dimensions, globalOffsets, workItems, @@ -198,10 +197,9 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfilingWhenNo size_t workItems[3] = {1, 1, 1}; uint32_t dimensions = 1; cl_event event; - cl_kernel clKernel = &kernel; static_cast *>(pCmdQ)->enqueueKernel( - clKernel, + &kernel, dimensions, globalOffsets, workItems, @@ -687,9 +685,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingWithPerfCountersTests, GivenCommandQueueWit size_t workItems[3] = {1, 1, 1}; uint32_t dimensions = 1; cl_event event; - cl_kernel clKernel = kernel->mockKernel; - static_cast *>(pCmdQ.get())->enqueueKernel(clKernel, dimensions, globalOffsets, workItems, nullptr, 0, nullptr, &event); + static_cast *>(pCmdQ.get())->enqueueKernel(kernel->mockKernel, dimensions, globalOffsets, workItems, nullptr, 0, nullptr, &event); HardwareParse parse; auto &cmdList = parse.cmdList; @@ -739,9 +736,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingWithPerfCountersTests, GivenCommandQueueWit size_t workItems[3] = {1, 1, 1}; uint32_t dimensions = 1; cl_event event; - cl_kernel clKernel = kernel->mockKernel; - static_cast *>(pCmdQ.get())->enqueueKernel(clKernel, dimensions, globalOffsets, workItems, nullptr, 0, nullptr, &event); + static_cast *>(pCmdQ.get())->enqueueKernel(kernel->mockKernel, dimensions, globalOffsets, workItems, nullptr, 0, nullptr, &event); HardwareParse parse; auto &cmdList = parse.cmdList; @@ -792,8 +788,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingWithPerfCountersTests, GivenCommandQueueBlo uint32_t dimensions = 1; cl_event event; cl_event ue = new UserEvent(); - cl_kernel clKernel = kernel->mockKernel; - static_cast *>(pCmdQ.get())->enqueueKernel(clKernel, dimensions, globalOffsets, workItems, nullptr, + static_cast *>(pCmdQ.get())->enqueueKernel(kernel->mockKernel, dimensions, globalOffsets, workItems, nullptr, 1, // one user event to block queue &ue, // user event not signaled &event); @@ -849,9 +844,8 @@ HWTEST_F(ProfilingWithPerfCountersTests, GivenCommandQueueWithProfilingPerfCount size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {1, 1, 1}; uint32_t dimensions = 1; - cl_kernel clKernel = kernel->mockKernel; - static_cast *>(pCmdQ.get())->enqueueKernel(clKernel, dimensions, globalOffsets, workItems, nullptr, 0, nullptr, nullptr); + static_cast *>(pCmdQ.get())->enqueueKernel(kernel->mockKernel, dimensions, globalOffsets, workItems, nullptr, 0, nullptr, nullptr); HardwareParse parse; auto &cmdList = parse.cmdList; @@ -917,9 +911,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingWithPerfCountersTests, GivenCommandQueueWit size_t workItems[3] = {1, 1, 1}; uint32_t dimensions = 1; cl_event event; - cl_kernel clKernel = kernel->mockKernel; - static_cast *>(pCmdQ.get())->enqueueKernel(clKernel, dimensions, globalOffsets, workItems, nullptr, 0, nullptr, &event); + static_cast *>(pCmdQ.get())->enqueueKernel(kernel->mockKernel, dimensions, globalOffsets, workItems, nullptr, 0, nullptr, &event); auto pEvent = static_cast *>(event); EXPECT_EQ(pEvent->getHwTimeStampNode()->getGpuAddress(), timeStampGpuAddress); @@ -972,10 +965,9 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingWithPerfCountersOnCCSTests, givenCommandQue uint32_t dimensions = 1; cl_event event; cl_event userEvent = clCreateUserEvent(context.get(), nullptr); - cl_kernel clKernel = kernel->mockKernel; CommandQueueHw *cmdQHw = static_cast *>(pCmdQ.get()); - cmdQHw->enqueueKernel(clKernel, dimensions, globalOffsets, workItems, nullptr, 1, &userEvent, &event); + cmdQHw->enqueueKernel(kernel->mockKernel, dimensions, globalOffsets, workItems, nullptr, 1, &userEvent, &event); ASSERT_NE(nullptr, pCmdQ->virtualEvent); ASSERT_NE(nullptr, pCmdQ->virtualEvent->peekCommand()); NEO::LinearStream *eventCommandStream = pCmdQ->virtualEvent->peekCommand()->getCommandStream(); @@ -1026,10 +1018,9 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingWithPerfCountersOnCCSTests, givenCommandQue size_t workItems[3] = {1, 1, 1}; uint32_t dimensions = 1; cl_event event; - cl_kernel clKernel = kernel->mockKernel; CommandQueueHw *cmdQHw = static_cast *>(pCmdQ.get()); - cmdQHw->enqueueKernel(clKernel, dimensions, globalOffsets, workItems, nullptr, 0, nullptr, &event); + cmdQHw->enqueueKernel(kernel->mockKernel, dimensions, globalOffsets, workItems, nullptr, 0, nullptr, &event); HardwareParse parse; auto &cmdList = parse.cmdList;