/* * Copyright (c) 2017 - 2018, Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include "runtime/built_ins/built_ins.h" #include "runtime/command_stream/thread_arbitration_policy.h" #include "runtime/gen_common/reg_configs.h" #include "runtime/helpers/preamble.h" #include "runtime/memory_manager/graphics_allocation.h" #include "runtime/memory_manager/memory_constants.h" #include "unit_tests/command_queue/enqueue_fixture.h" #include "unit_tests/fixtures/hello_world_fixture.h" #include "unit_tests/fixtures/memory_management_fixture.h" #include "unit_tests/helpers/hw_parse.h" #include "unit_tests/helpers/debug_manager_state_restore.h" #include "unit_tests/mocks/mock_csr.h" #include "unit_tests/mocks/mock_buffer.h" #include "unit_tests/mocks/mock_submissions_aggregator.h" #include "runtime/helpers/hw_info.h" using namespace OCLRT; typedef HelloWorldFixture EnqueueKernelFixture; typedef Test EnqueueKernelTest; TEST_F(EnqueueKernelTest, clEnqueueNDRangeKernel_null_kernel) { size_t globalWorkSize[3] = {1, 1, 1}; auto retVal = clEnqueueNDRangeKernel( pCmdQ, nullptr, 1, nullptr, globalWorkSize, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL, retVal); } TEST_F(EnqueueKernelTest, clEnqueueNDRangeKernel_SetCompletionStampTaskCountOnBuffersAndDcFlushOnMapBufferIfItsTaskCountIsGreaterThanLatestDcFlushTaskCount) { const size_t n = 512; size_t globalWorkSize[3] = {n, 1, 1}; size_t localWorkSize[3] = {256, 1, 1}; cl_int retVal = CL_SUCCESS; CommandQueue *pCmdQ2 = createCommandQueue(pDevice, 0); auto b0 = clCreateBuffer(context, 0, n * sizeof(float), nullptr, nullptr); auto b1 = clCreateBuffer(context, 0, n * sizeof(float), nullptr, nullptr); retVal = clSetKernelArg(pKernel, 0, sizeof(cl_mem), &b0); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArg(pKernel, 1, sizeof(cl_mem), &b1); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueNDRangeKernel(pCmdQ2, pKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); auto taskCount0 = castToObject(b0)->getCompletionStamp().taskCount; auto taskCount1 = castToObject(b1)->getCompletionStamp().taskCount; EXPECT_EQ(1u, taskCount0); EXPECT_EQ(1u, taskCount1); auto ptrResult = clEnqueueMapBuffer(pCmdQ, b1, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, 8, 0, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); clEnqueueUnmapMemObject(pCmdQ, b1, ptrResult, 0, nullptr, nullptr); retVal = clReleaseMemObject(b0); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(b1); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseCommandQueue(pCmdQ2); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EnqueueKernelTest, givenKernelWhenAllArgsAreSetThenClEnqueueNDRangeKernelReturnsSuccess) { const size_t n = 512; size_t globalWorkSize[3] = {n, 1, 1}; size_t localWorkSize[3] = {256, 1, 1}; cl_int retVal = CL_SUCCESS; CommandQueue *pCmdQ2 = createCommandQueue(pDevice, 0); std::unique_ptr kernel(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer"), &retVal)); EXPECT_EQ(CL_SUCCESS, retVal); auto b0 = clCreateBuffer(context, 0, n * sizeof(float), nullptr, nullptr); auto b1 = clCreateBuffer(context, 0, n * sizeof(float), nullptr, nullptr); EXPECT_FALSE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ2, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, retVal); retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &b0); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ2, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, retVal); retVal = clSetKernelArg(kernel.get(), 1, sizeof(cl_mem), &b1); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ2, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(b0); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(b1); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseCommandQueue(pCmdQ2); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EnqueueKernelTest, givenKernelWhenNotAllArgsAreSetButSetKernelArgIsCalledTwiceThenClEnqueueNDRangeKernelReturnsError) { const size_t n = 512; size_t globalWorkSize[3] = {n, 1, 1}; size_t localWorkSize[3] = {256, 1, 1}; cl_int retVal = CL_SUCCESS; CommandQueue *pCmdQ2 = createCommandQueue(pDevice, 0); std::unique_ptr kernel(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer"), &retVal)); EXPECT_EQ(CL_SUCCESS, retVal); auto b0 = clCreateBuffer(context, 0, n * sizeof(float), nullptr, nullptr); auto b1 = clCreateBuffer(context, 0, n * sizeof(float), nullptr, nullptr); EXPECT_FALSE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ2, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, retVal); retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &b0); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ2, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, retVal); retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &b1); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ2, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, retVal); retVal = clReleaseMemObject(b0); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(b1); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseCommandQueue(pCmdQ2); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EnqueueKernelTest, givenKernelWhenSetKernelArgIsCalledForEachArgButAtLeastFailsThenClEnqueueNDRangeKernelReturnsError) { const size_t n = 512; size_t globalWorkSize[3] = {n, 1, 1}; size_t localWorkSize[3] = {256, 1, 1}; cl_int retVal = CL_SUCCESS; CommandQueue *pCmdQ2 = createCommandQueue(pDevice, 0); std::unique_ptr kernel(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer"), &retVal)); EXPECT_EQ(CL_SUCCESS, retVal); auto b0 = clCreateBuffer(context, 0, n * sizeof(float), nullptr, nullptr); auto b1 = clCreateBuffer(context, 0, n * sizeof(float), nullptr, nullptr); EXPECT_FALSE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ2, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, retVal); retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &b0); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ2, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, retVal); retVal = clSetKernelArg(kernel.get(), 1, 2 * sizeof(cl_mem), &b1); EXPECT_NE(CL_SUCCESS, retVal); EXPECT_FALSE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ2, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, retVal); retVal = clReleaseMemObject(b0); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(b1); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseCommandQueue(pCmdQ2); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EnqueueKernelTest, clEnqueueNDRangeKernel_invalid_event_list_count) { size_t globalWorkSize[3] = {1, 1, 1}; auto retVal = clEnqueueNDRangeKernel( pCmdQ, pKernel, 1, nullptr, globalWorkSize, nullptr, 1, nullptr, nullptr); EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, retVal); } TEST_F(EnqueueKernelTest, clEnqueueNDRangeKernel_invalidWorkGroupSize) { size_t globalWorkSize[3] = {12, 12, 12}; size_t localWorkSize[3] = {11, 12, 12}; auto retVal = clEnqueueNDRangeKernel( pCmdQ, pKernel, 3, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_WORK_GROUP_SIZE, retVal); } struct TestParam2 { cl_uint ScratchSize; } TestParamTable2[] = {{1024}, {2048}, {4096}, {8192}, {16384}, {32768}, {65536}, {131072}}; struct TestParam { cl_uint globalWorkSizeX; cl_uint globalWorkSizeY; cl_uint globalWorkSizeZ; cl_uint localWorkSizeX; cl_uint localWorkSizeY; cl_uint localWorkSizeZ; } TestParamTable[] = { {1, 1, 1, 1, 1, 1}, {16, 1, 1, 1, 1, 1}, {16, 1, 1, 16, 1, 1}, {32, 1, 1, 1, 1, 1}, {32, 1, 1, 16, 1, 1}, {32, 1, 1, 32, 1, 1}, {64, 1, 1, 1, 1, 1}, {64, 1, 1, 16, 1, 1}, {64, 1, 1, 32, 1, 1}, {64, 1, 1, 64, 1, 1}, {190, 1, 1, 95, 1, 1}, {510, 1, 1, 255, 1, 1}, {512, 1, 1, 256, 1, 1}}; template struct EnqueueKernelTypeTest : public HelloWorldFixture, public HardwareParse, ::testing::TestWithParam { typedef HelloWorldFixture ParentClass; using ParentClass::pCmdBuffer; using ParentClass::pCS; EnqueueKernelTypeTest() { } void FillValues() { globalWorkSize[0] = 1; globalWorkSize[1] = 1; globalWorkSize[2] = 1; localWorkSize[0] = 1; localWorkSize[1] = 1; localWorkSize[2] = 1; }; template void enqueueKernel(Kernel *inputKernel = nullptr) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT; cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; FillValues(); // Compute # of expected work items expectedWorkItems = 1; for (auto i = 0u; i < workDim; i++) { expectedWorkItems *= globalWorkSize[i]; } auto usedKernel = inputKernel ? inputKernel : pKernel; auto retVal = pCmdQ->enqueueKernel( usedKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); parseCommands(*pCmdQ); } void SetUp() override { ParentClass::SetUp(); HardwareParse::SetUp(); } void TearDown() override { HardwareParse::TearDown(); ParentClass::TearDown(); } size_t globalWorkSize[3]; size_t localWorkSize[3]; size_t expectedWorkItems = 0; }; template <> void EnqueueKernelTypeTest::FillValues() { const TestParam ¶m = GetParam(); globalWorkSize[0] = param.globalWorkSizeX; globalWorkSize[1] = param.globalWorkSizeY; globalWorkSize[2] = param.globalWorkSizeZ; localWorkSize[0] = param.localWorkSizeX; localWorkSize[1] = param.localWorkSizeY; localWorkSize[2] = param.localWorkSizeZ; } typedef EnqueueKernelTypeTest EnqueueWorkItemTests; HWTEST_P(EnqueueWorkItemTests, GPGPUWalker) { typedef typename FamilyType::PARSE PARSE; typedef typename PARSE::GPGPU_WALKER GPGPU_WALKER; enqueueKernel(); ASSERT_NE(cmdList.end(), itorWalker); auto *cmd = (GPGPU_WALKER *)*itorWalker; // Verify GPGPU_WALKER parameters EXPECT_NE(0u, cmd->getThreadGroupIdXDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdYDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdZDimension()); EXPECT_NE(0u, cmd->getRightExecutionMask()); EXPECT_NE(0u, cmd->getBottomExecutionMask()); EXPECT_EQ(GPGPU_WALKER::SIMD_SIZE_SIMD32, cmd->getSimdSize()); EXPECT_NE(0u, cmd->getIndirectDataLength()); EXPECT_FALSE(cmd->getIndirectParameterEnable()); // Compute the SIMD lane mask size_t simd = cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD32 ? 32 : cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD16 ? 16 : 8; uint64_t simdMask = (1ull << simd) - 1; // Mask off lanes based on the execution masks auto laneMaskRight = cmd->getRightExecutionMask() & simdMask; auto lanesPerThreadX = 0; while (laneMaskRight) { lanesPerThreadX += laneMaskRight & 1; laneMaskRight >>= 1; } auto numWorkItems = ((cmd->getThreadWidthCounterMaximum() - 1) * simd + lanesPerThreadX) * cmd->getThreadGroupIdXDimension(); EXPECT_EQ(expectedWorkItems, numWorkItems); } HWTEST_P(EnqueueWorkItemTests, bumpsTaskLevel) { auto taskLevelBefore = pCmdQ->taskLevel; enqueueKernel(); EXPECT_GT(pCmdQ->taskLevel, taskLevelBefore); } HWTEST_P(EnqueueWorkItemTests, alignsToCSR) { //this test case assumes IOQ auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = pCmdQ->taskCount + 100; csr.taskLevel = pCmdQ->taskLevel + 50; enqueueKernel(); EXPECT_EQ(pCmdQ->taskCount, csr.peekTaskCount()); EXPECT_EQ(pCmdQ->taskLevel + 1, csr.peekTaskLevel()); } HWTEST_P(EnqueueWorkItemTests, addsCommands) { auto usedCmdBufferBefore = pCS->getUsed(); enqueueKernel(); EXPECT_NE(usedCmdBufferBefore, pCS->getUsed()); } HWTEST_P(EnqueueWorkItemTests, addsIndirectData) { auto dshBefore = pDSH->getUsed(); auto iohBefore = pIOH->getUsed(); auto ihBefore = pIH->getUsed(); auto sshBefore = pSSH->getUsed(); enqueueKernel(); EXPECT_NE(dshBefore, pDSH->getUsed()); EXPECT_NE(iohBefore, pIOH->getUsed()); EXPECT_NE(ihBefore, pIH->getUsed()); if (pKernel->requiresSshForBuffers() || (pKernel->getKernelInfo().patchInfo.imageMemObjKernelArgs.size() > 0)) { EXPECT_NE(sshBefore, pSSH->getUsed()); } } HWTEST_P(EnqueueWorkItemTests, LoadRegisterImmediateL3CNTLREG) { typedef typename FamilyType::PARSE PARSE; typedef typename PARSE::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; enqueueKernel(); // All state should be programmed before walker auto itorCmd = findMmio(cmdList.begin(), itorWalker, L3CNTLRegisterOffset::registerOffset); ASSERT_NE(itorWalker, itorCmd); auto *cmd = genCmdCast(*itorCmd); ASSERT_NE(nullptr, cmd); auto RegisterOffset = L3CNTLRegisterOffset::registerOffset; EXPECT_EQ(RegisterOffset, cmd->getRegisterOffset()); auto l3Cntlreg = cmd->getDataDword(); auto numURBWays = (l3Cntlreg >> 1) & 0x7f; auto L3ClientPool = (l3Cntlreg >> 25) & 0x7f; EXPECT_NE(0u, numURBWays); EXPECT_NE(0u, L3ClientPool); } HWTEST_P(EnqueueWorkItemTests, StateBaseAddress) { typedef typename FamilyType::PARSE PARSE; typedef typename PARSE::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; enqueueKernel(); // All state should be programmed before walker auto itorCmd = find(itorPipelineSelect, itorWalker); ASSERT_NE(itorWalker, itorCmd); auto *cmd = (STATE_BASE_ADDRESS *)*itorCmd; bool force32BitAddressing = context->getMemoryManager()->peekForce32BitAllocations(); // Verify all addresses are getting programmed EXPECT_TRUE(cmd->getDynamicStateBaseAddressModifyEnable()); EXPECT_TRUE(cmd->getGeneralStateBaseAddressModifyEnable()); EXPECT_TRUE(cmd->getSurfaceStateBaseAddressModifyEnable()); EXPECT_TRUE(cmd->getIndirectObjectBaseAddressModifyEnable()); EXPECT_TRUE(cmd->getInstructionBaseAddressModifyEnable()); EXPECT_EQ((uintptr_t)pDSH->getBase(), cmd->getDynamicStateBaseAddress()); // Stateless accesses require GSH.base to be 0. if (force32BitAddressing) { EXPECT_EQ(context->getMemoryManager()->allocator32Bit->getBase(), cmd->getGeneralStateBaseAddress()); } else { EXPECT_EQ(0u, cmd->getGeneralStateBaseAddress()); } EXPECT_EQ((uintptr_t)pSSH->getBase(), cmd->getSurfaceStateBaseAddress()); EXPECT_EQ((uintptr_t)pIOH->getBase(), cmd->getIndirectObjectBaseAddress()); EXPECT_EQ((uintptr_t)pIH->getBase(), cmd->getInstructionBaseAddress()); // Verify all sizes are getting programmed EXPECT_TRUE(cmd->getDynamicStateBufferSizeModifyEnable()); EXPECT_TRUE(cmd->getGeneralStateBufferSizeModifyEnable()); EXPECT_TRUE(cmd->getIndirectObjectBufferSizeModifyEnable()); EXPECT_TRUE(cmd->getInstructionBufferSizeModifyEnable()); EXPECT_EQ(pDSH->getMaxAvailableSpace(), cmd->getDynamicStateBufferSize() * MemoryConstants::pageSize); EXPECT_NE(0u, cmd->getGeneralStateBufferSize()); EXPECT_EQ(pIOH->getMaxAvailableSpace(), cmd->getIndirectObjectBufferSize() * MemoryConstants::pageSize); EXPECT_EQ(pIH->getMaxAvailableSpace(), cmd->getInstructionBufferSize() * MemoryConstants::pageSize); // Generically validate this command PARSE::template validateCommand(cmdList.begin(), itorCmd); } HWTEST_P(EnqueueWorkItemTests, MediaInterfaceDescriptorLoad) { typedef typename FamilyType::PARSE PARSE; typedef typename PARSE::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename PARSE::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueKernel(); // All state should be programmed before walker auto itorCmd = find(itorPipelineSelect, itorWalker); ASSERT_NE(itorWalker, itorCmd); auto *cmd = genCmdCast(*itorCmd); // Verify we have a valid length -- multiple of INTERFACE_DESCRIPTOR_DATAs EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % sizeof(INTERFACE_DESCRIPTOR_DATA)); // Validate the start address size_t alignmentStartAddress = 64 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorDataStartAddress() % alignmentStartAddress); // Validate the length EXPECT_NE(0u, cmd->getInterfaceDescriptorTotalLength()); size_t alignmentTotalLength = 32 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % alignmentTotalLength); // Generically validate this command PARSE::template validateCommand(cmdList.begin(), itorCmd); } HWTEST_P(EnqueueWorkItemTests, InterfaceDescriptorData) { typedef typename FamilyType::PARSE PARSE; typedef typename PARSE::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename PARSE::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; typedef typename PARSE::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueKernel(); // Extract the MIDL command auto itorCmd = find(itorPipelineSelect, itorWalker); ASSERT_NE(itorWalker, itorCmd); auto *cmdMIDL = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)*itorCmd; // Extract the SBA command itorCmd = find(cmdList.begin(), itorWalker); ASSERT_NE(itorWalker, itorCmd); auto *cmdSBA = (STATE_BASE_ADDRESS *)*itorCmd; // Extrach the DSH auto DSH = cmdSBA->getDynamicStateBaseAddress(); ASSERT_NE(0u, DSH); // IDD should be located within DSH auto iddStart = cmdMIDL->getInterfaceDescriptorDataStartAddress(); auto IDDEnd = iddStart + cmdMIDL->getInterfaceDescriptorTotalLength(); ASSERT_LE(IDDEnd, cmdSBA->getDynamicStateBufferSize() * MemoryConstants::pageSize); // Extract the IDD auto &IDD = *(INTERFACE_DESCRIPTOR_DATA *)(DSH + iddStart); // Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value. auto kernelStartPointer = ((uint64_t)IDD.getKernelStartPointerHigh() << 32) + IDD.getKernelStartPointer(); EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize); EXPECT_NE(0u, IDD.getNumberOfThreadsInGpgpuThreadGroup()); EXPECT_NE(0u, IDD.getCrossThreadConstantDataReadLength()); EXPECT_NE(0u, IDD.getConstantIndirectUrbEntryReadLength()); } HWTEST_P(EnqueueWorkItemTests, PipelineSelect) { enqueueKernel(); int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); EXPECT_EQ(1, numCommands); } HWTEST_P(EnqueueWorkItemTests, MediaVFEState) { typedef typename FamilyType::PARSE PARSE; typedef typename PARSE::MEDIA_VFE_STATE MEDIA_VFE_STATE; enqueueKernel(); // All state should be programmed before walker auto itorCmd = find(itorPipelineSelect, itorWalker); ASSERT_NE(itorWalker, itorCmd); auto *cmd = (MEDIA_VFE_STATE *)*itorCmd; // Verify we have a valid length EXPECT_EQ(pDevice->getHardwareInfo().pSysInfo->ThreadCount, cmd->getMaximumNumberOfThreads()); EXPECT_EQ(1u, cmd->getNumberOfUrbEntries()); uint32_t expectedVal = PreambleHelper::getUrbEntryAllocationSize(); EXPECT_EQ(expectedVal, cmd->getUrbEntryAllocationSize()); EXPECT_EQ(0u, cmd->getScratchSpaceBasePointer()); EXPECT_EQ(0u, cmd->getPerThreadScratchSpace()); EXPECT_EQ(0u, cmd->getStackSize()); // Generically validate this command PARSE::template validateCommand(cmdList.begin(), itorCmd); } INSTANTIATE_TEST_CASE_P(EnqueueKernel, EnqueueWorkItemTests, ::testing::ValuesIn(TestParamTable)); typedef EnqueueKernelTypeTest EnqueueScratchSpaceTests; HWTEST_P(EnqueueScratchSpaceTests, GivenKernelRequiringScratchWhenItIsEnqueuedWithDifferentScratchSizesThenMediaVFEStateAndStateBaseAddressAreProperlyProgrammed) { typedef typename FamilyType::PARSE PARSE; typedef typename PARSE::MEDIA_VFE_STATE MEDIA_VFE_STATE; typedef typename PARSE::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; auto &csr = pDevice->getUltCommandStreamReceiver(); csr.getMemoryManager()->setForce32BitAllocations(false); EXPECT_TRUE(csr.getMemoryManager()->allocationsForReuse.peekIsEmpty()); SPatchMediaVFEState mediaVFEstate; auto scratchSize = GetParam().ScratchSize; mediaVFEstate.PerThreadScratchSpace = scratchSize; MockKernelWithInternals mockKernel(*pDevice); mockKernel.kernelInfo.patchInfo.mediavfestate = &mediaVFEstate; auto sizeToProgram = (scratchSize / MemoryConstants::kiloByte); auto bitValue = 0u; while (sizeToProgram >>= 1) { bitValue++; } auto valueToProgram = Kernel::getScratchSizeValueToProgramMediaVfeState(scratchSize); EXPECT_EQ(bitValue, valueToProgram); enqueueKernel(mockKernel); // All state should be programmed before walker auto itorCmd = find(itorPipelineSelect, itorWalker); auto itorCmdForStateBase = find(itorPipelineSelect, itorWalker); ASSERT_NE(itorWalker, itorCmd); ASSERT_NE(itorWalker, itorCmdForStateBase); auto *cmd = (MEDIA_VFE_STATE *)*itorCmd; auto *sba = (STATE_BASE_ADDRESS *)*itorCmdForStateBase; // Verify we have a valid length EXPECT_EQ(pDevice->getHardwareInfo().pSysInfo->ThreadCount, cmd->getMaximumNumberOfThreads()); EXPECT_NE(0u, cmd->getNumberOfUrbEntries()); EXPECT_NE(0u, cmd->getUrbEntryAllocationSize()); EXPECT_EQ(bitValue, cmd->getPerThreadScratchSpace()); EXPECT_EQ(bitValue, cmd->getStackSize()); auto graphicsAllocation = csr.getScratchAllocation(); auto GSHaddress = (uintptr_t)sba->getGeneralStateBaseAddress(); if (is32bit) { EXPECT_NE(0u, cmd->getScratchSpaceBasePointer()); EXPECT_EQ(0u, GSHaddress); } else { EXPECT_EQ(PreambleHelper::getScratchSpaceOffsetFor64bit(), cmd->getScratchSpaceBasePointer()); EXPECT_EQ(GSHaddress + PreambleHelper::getScratchSpaceOffsetFor64bit(), (uintptr_t)graphicsAllocation->getUnderlyingBuffer()); } auto allocationSize = scratchSize * pDevice->getDeviceInfo().computeUnitsUsedForScratch; EXPECT_EQ(graphicsAllocation->getUnderlyingBufferSize(), allocationSize); // Generically validate this command PARSE::template validateCommand(cmdList.begin(), itorCmd); scratchSize *= 2; //skip if size to big 4MB, no point in stressing memory allocator. if (allocationSize > 4194304) { return; } mediaVFEstate.PerThreadScratchSpace = scratchSize; auto itorfirstBBEnd = find(itorWalker, cmdList.end()); ASSERT_NE(cmdList.end(), itorfirstBBEnd); enqueueKernel(mockKernel); bitValue++; itorCmd = find(itorfirstBBEnd, cmdList.end()); itorCmdForStateBase = find(itorWalker, cmdList.end()); ASSERT_NE(itorWalker, itorCmd); if (is64bit) { ASSERT_NE(itorCmdForStateBase, itorCmd); } else { //no SBA not dirty ASSERT_EQ(itorCmdForStateBase, cmdList.end()); } auto *cmd2 = (MEDIA_VFE_STATE *)*itorCmd; // Verify we have a valid length EXPECT_EQ(pDevice->getHardwareInfo().pSysInfo->ThreadCount, cmd2->getMaximumNumberOfThreads()); EXPECT_NE(0u, cmd2->getNumberOfUrbEntries()); EXPECT_NE(0u, cmd2->getUrbEntryAllocationSize()); EXPECT_EQ(bitValue, cmd2->getPerThreadScratchSpace()); EXPECT_EQ(bitValue, cmd2->getStackSize()); auto graphicsAllocation2 = csr.getScratchAllocation(); if (is32bit) { auto scratchBase = (uintptr_t)cmd2->getScratchSpaceBasePointer(); EXPECT_NE(0u, scratchBase); auto graphicsAddress = (uintptr_t)graphicsAllocation2->getUnderlyingBuffer(); EXPECT_EQ(graphicsAddress, scratchBase); } else { auto *sba2 = (STATE_BASE_ADDRESS *)*itorCmdForStateBase; auto GSHaddress2 = sba2->getGeneralStateBaseAddress(); EXPECT_NE(0u, GSHaddress2); EXPECT_EQ(PreambleHelper::getScratchSpaceOffsetFor64bit(), cmd2->getScratchSpaceBasePointer()); EXPECT_NE(GSHaddress2, GSHaddress); } EXPECT_EQ(graphicsAllocation->getUnderlyingBufferSize(), allocationSize); EXPECT_NE(graphicsAllocation2, graphicsAllocation); // Generically validate this command PARSE::template validateCommand(cmdList.begin(), itorCmd); // Trigger SBA generation LinearStream dirtyDsh; csr.dshState.updateAndCheck(&dirtyDsh); enqueueKernel(mockKernel); auto finalItorToSBA = find(itorCmd, cmdList.end()); ASSERT_NE(finalItorToSBA, cmdList.end()); auto *finalSba2 = (STATE_BASE_ADDRESS *)*finalItorToSBA; auto GSBaddress = finalSba2->getGeneralStateBaseAddress(); if (is32bit) { EXPECT_EQ(0u, GSBaddress); } else if (is64bit) { EXPECT_EQ((uintptr_t)graphicsAllocation2->getUnderlyingBuffer(), GSBaddress + PreambleHelper::getScratchSpaceOffsetFor64bit()); } EXPECT_TRUE(csr.getMemoryManager()->allocationsForReuse.peekIsEmpty()); } INSTANTIATE_TEST_CASE_P(EnqueueKernel, EnqueueScratchSpaceTests, ::testing::ValuesIn(TestParamTable2)); typedef EnqueueKernelTypeTest EnqueueKernelWithScratch; HWTEST_P(EnqueueKernelWithScratch, GivenKernelRequiringScratchWhenItIsEnqueuedWithDifferentScratchSizesThenPreviousScratchAllocationIsMadeNonResidentPriorStoringOnResueList) { auto mockCsr = new MockCsrHw(pDevice->getHardwareInfo()); pDevice->resetCommandStreamReceiver(mockCsr); SPatchMediaVFEState mediaVFEstate; auto scratchSize = 1024; mediaVFEstate.PerThreadScratchSpace = scratchSize; MockKernelWithInternals mockKernel(*pDevice); mockKernel.kernelInfo.patchInfo.mediavfestate = &mediaVFEstate; auto sizeToProgram = (scratchSize / MemoryConstants::kiloByte); auto bitValue = 0u; while (sizeToProgram >>= 1) { bitValue++; } auto valueToProgram = Kernel::getScratchSizeValueToProgramMediaVfeState(scratchSize); EXPECT_EQ(bitValue, valueToProgram); enqueueKernel(mockKernel); auto graphicsAllocation = mockCsr->getScratchAllocation(); EXPECT_TRUE(mockCsr->isMadeResident(graphicsAllocation)); // Enqueue With ScratchSize bigger then previous scratchSize = 8196; mediaVFEstate.PerThreadScratchSpace = scratchSize; enqueueKernel(mockKernel); EXPECT_TRUE(mockCsr->isMadeNonResident(graphicsAllocation)); } HWTEST_P(EnqueueKernelWithScratch, givenDeviceForcing32bitAllocationsWhenKernelWithScratchIsEnqueuedThenGeneralStateHeapBaseAddressIsCorrectlyProgrammedAndMediaVFEStateContainsProgramming) { typedef typename FamilyType::PARSE PARSE; typedef typename PARSE::MEDIA_VFE_STATE MEDIA_VFE_STATE; typedef typename PARSE::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; if (is64bit) { CommandStreamReceiver *csr = &pDevice->getCommandStreamReceiver(); auto memoryManager = csr->getMemoryManager(); memoryManager->setForce32BitAllocations(true); SPatchMediaVFEState mediaVFEstate; auto scratchSize = 1024; mediaVFEstate.PerThreadScratchSpace = scratchSize; MockKernelWithInternals mockKernel(*pDevice); mockKernel.kernelInfo.patchInfo.mediavfestate = &mediaVFEstate; enqueueKernel(mockKernel); auto graphicsAllocation = csr->getScratchAllocation(); EXPECT_TRUE(graphicsAllocation->is32BitAllocation); auto graphicsAddress = (uint64_t)graphicsAllocation->getGpuAddress(); auto baseAddress = graphicsAllocation->gpuBaseAddress; // All state should be programmed before walker auto itorCmd = find(itorPipelineSelect, itorWalker); auto itorCmdForStateBase = find(itorPipelineSelect, itorWalker); auto *mediaVfeState = (MEDIA_VFE_STATE *)*itorCmd; auto scratchBaseLowPart = (uint64_t)mediaVfeState->getScratchSpaceBasePointer(); auto scratchBaseHighPart = (uint64_t)mediaVfeState->getScratchSpaceBasePointerHigh(); uint64_t scratchBaseAddr = scratchBaseHighPart << 32 | scratchBaseLowPart; EXPECT_EQ(graphicsAddress - baseAddress, scratchBaseAddr); ASSERT_NE(itorCmdForStateBase, itorWalker); auto *sba = (STATE_BASE_ADDRESS *)*itorCmdForStateBase; auto GSHaddress = (uintptr_t)sba->getGeneralStateBaseAddress(); EXPECT_EQ(memoryManager->allocator32Bit->getBase(), GSHaddress); //now re-try to see if SBA is not programmed scratchSize *= 2; mediaVFEstate.PerThreadScratchSpace = scratchSize; enqueueKernel(mockKernel); itorCmdForStateBase = find(itorWalker, cmdList.end()); EXPECT_EQ(itorCmdForStateBase, cmdList.end()); } } INSTANTIATE_TEST_CASE_P(EnqueueKernel, EnqueueKernelWithScratch, testing::Values(1)); TestParam TestParamPrintf[] = { {1, 1, 1, 1, 1, 1}}; typedef EnqueueKernelTypeTest EnqueueKernelPrintfTest; HWTEST_P(EnqueueKernelPrintfTest, GivenKernelWithPrintfThenPatchCrossTHreadData) { typedef typename FamilyType::PARSE PARSE; SPatchAllocateStatelessPrintfSurface patchData; patchData.SurfaceStateHeapOffset = 0; patchData.Size = 256; patchData.DataParamOffset = 64; MockKernelWithInternals mockKernel(*pDevice); mockKernel.crossThreadData[64] = 0; mockKernel.kernelInfo.patchInfo.pAllocateStatelessPrintfSurface = &patchData; enqueueKernel(mockKernel); EXPECT_EQ(mockKernel.crossThreadData[64], 0); } HWTEST_P(EnqueueKernelPrintfTest, GivenKernelWithPrintfWhenBeingDispatchedThenL3CacheIsFlushed) { typedef typename FamilyType::PARSE PARSE; SPatchAllocateStatelessPrintfSurface patchData; patchData.Size = 256; patchData.DataParamOffset = 64; MockKernelWithInternals mockKernel(*pDevice); mockKernel.crossThreadData[64] = 0; mockKernel.kernelInfo.patchInfo.pAllocateStatelessPrintfSurface = &patchData; auto &csr = pCmdQ->getDevice().getCommandStreamReceiver(); auto latestSentTaskCount = csr.peekTaskCount(); enqueueKernel(mockKernel); auto newLatestSentTaskCount = csr.peekTaskCount(); EXPECT_GT(newLatestSentTaskCount, latestSentTaskCount); EXPECT_EQ(pCmdQ->latestTaskCountWaited, newLatestSentTaskCount); } HWTEST_P(EnqueueKernelPrintfTest, GivenKernelWithPrintfBlockedByEventWhenEventUnblockedThenL3CacheIsFlushed) { typedef typename FamilyType::PARSE PARSE; UserEvent userEvent(context); SPatchAllocateStatelessPrintfSurface patchData; patchData.Size = 256; patchData.DataParamOffset = 64; MockKernelWithInternals mockKernel(*pDevice); mockKernel.crossThreadData[64] = 0; mockKernel.kernelInfo.patchInfo.pAllocateStatelessPrintfSurface = &patchData; auto &csr = pCmdQ->getDevice().getCommandStreamReceiver(); auto latestSentDcFlushTaskCount = csr.peekTaskCount(); cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; FillValues(); cl_event blockedEvent = &userEvent; auto retVal = pCmdQ->enqueueKernel( mockKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, 1, &blockedEvent, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); userEvent.setStatus(CL_COMPLETE); parseCommands(*pCmdQ); auto newLatestSentDCFlushTaskCount = csr.peekTaskCount(); EXPECT_GT(newLatestSentDCFlushTaskCount, latestSentDcFlushTaskCount); EXPECT_EQ(pCmdQ->latestTaskCountWaited, newLatestSentDCFlushTaskCount); } HWTEST_P(EnqueueKernelPrintfTest, GivenKernelWithPrintfBlockedByEventWhenEventUnblockedThenOutputPrinted) { typedef typename FamilyType::PARSE PARSE; // In scenarios with 32bit allocator and 64 bit tests this code won't work // due to inability to retrieve original buffer pointer as it is done in this test. if (!pDevice->getMemoryManager()->peekForce32BitAllocations()) { testing::internal::CaptureStdout(); UserEvent userEvent(context); SPatchAllocateStatelessPrintfSurface patchData; patchData.Size = 256; patchData.DataParamSize = 8; patchData.DataParamOffset = 0; MockKernelWithInternals mockKernel(*pDevice); mockKernel.kernelInfo.patchInfo.pAllocateStatelessPrintfSurface = &patchData; auto crossThreadData = reinterpret_cast(mockKernel.mockKernel->getCrossThreadData()); char *testString = new char[sizeof("test")]; strcpy_s(testString, sizeof("test"), "test"); PrintfStringInfo printfStringInfo; printfStringInfo.SizeInBytes = sizeof("test"); printfStringInfo.pStringData = testString; mockKernel.kernelInfo.patchInfo.stringDataMap.insert(std::make_pair(0, printfStringInfo)); cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; FillValues(); cl_event blockedEvent = &userEvent; auto retVal = pCmdQ->enqueueKernel( mockKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, 1, &blockedEvent, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); auto printfAllocation = reinterpret_cast(*crossThreadData); printfAllocation[0] = 8; printfAllocation[1] = 0; userEvent.setStatus(CL_COMPLETE); parseCommands(*pCmdQ); std::string output = testing::internal::GetCapturedStdout(); EXPECT_STREQ("test", output.c_str()); } } INSTANTIATE_TEST_CASE_P(EnqueueKernel, EnqueueKernelPrintfTest, ::testing::ValuesIn(TestParamPrintf)); TEST_F(EnqueueKernelTest, GivenKernelWithBuiltinDispatchInfoBuilderWhenBeingDispatchedThenBuiltinDispatcherIsUsedForDispatchValidation) { struct MockBuiltinDispatchBuilder : BuiltinDispatchInfoBuilder { MockBuiltinDispatchBuilder(BuiltIns &builtins) : BuiltinDispatchInfoBuilder(builtins) { } cl_int validateDispatch(Kernel *kernel, uint32_t inworkDim, const Vec3 &gws, const Vec3 &elws, const Vec3 &offset) const override { receivedKernel = kernel; receivedWorkDim = inworkDim; receivedGws = gws; receivedElws = elws; receivedOffset = offset; wasValidateDispatchCalled = true; return valueToReturn; } cl_int valueToReturn = CL_SUCCESS; mutable Kernel *receivedKernel = nullptr; mutable uint32_t receivedWorkDim = 0; mutable Vec3 receivedGws = {0, 0, 0}; mutable Vec3 receivedElws = {0, 0, 0}; mutable Vec3 receivedOffset = {0, 0, 0}; mutable bool wasValidateDispatchCalled = false; }; MockBuiltinDispatchBuilder mockNuiltinDispatchBuilder(BuiltIns::getInstance()); MockKernelWithInternals mockKernel(*pDevice); mockKernel.kernelInfo.builtinDispatchBuilder = &mockNuiltinDispatchBuilder; EXPECT_FALSE(mockNuiltinDispatchBuilder.wasValidateDispatchCalled); mockNuiltinDispatchBuilder.valueToReturn = CL_SUCCESS; size_t gws[2] = {10, 1}; size_t lws[2] = {5, 1}; size_t off[2] = {7, 0}; uint32_t dim = 1; auto ret = pCmdQ->enqueueKernel(mockKernel.mockKernel, dim, off, gws, lws, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_TRUE(mockNuiltinDispatchBuilder.wasValidateDispatchCalled); EXPECT_EQ(mockKernel.mockKernel, mockNuiltinDispatchBuilder.receivedKernel); EXPECT_EQ(gws[0], mockNuiltinDispatchBuilder.receivedGws.x); EXPECT_EQ(lws[0], mockNuiltinDispatchBuilder.receivedElws.x); EXPECT_EQ(off[0], mockNuiltinDispatchBuilder.receivedOffset.x); EXPECT_EQ(dim, mockNuiltinDispatchBuilder.receivedWorkDim); mockNuiltinDispatchBuilder.wasValidateDispatchCalled = false; gws[0] = 26; lws[0] = 13; off[0] = 17; dim = 2; cl_int forcedErr = 37; mockNuiltinDispatchBuilder.valueToReturn = forcedErr; ret = pCmdQ->enqueueKernel(mockKernel.mockKernel, dim, off, gws, lws, 0, nullptr, nullptr); EXPECT_EQ(forcedErr, ret); EXPECT_TRUE(mockNuiltinDispatchBuilder.wasValidateDispatchCalled); EXPECT_EQ(mockKernel.mockKernel, mockNuiltinDispatchBuilder.receivedKernel); EXPECT_EQ(gws[0], mockNuiltinDispatchBuilder.receivedGws.x); EXPECT_EQ(lws[0], mockNuiltinDispatchBuilder.receivedElws.x); EXPECT_EQ(off[0], mockNuiltinDispatchBuilder.receivedOffset.x); EXPECT_EQ(dim, mockNuiltinDispatchBuilder.receivedWorkDim); BuiltIns::shutDown(); } HWTEST_F(EnqueueKernelTest, givenSecondEnqueueWithTheSameScratchRequirementWhenPreemptionIsEnabledThenDontProgramMVSAgain) { typedef typename FamilyType::MEDIA_VFE_STATE MEDIA_VFE_STATE; pDevice->setPreemptionMode(PreemptionMode::ThreadGroup); auto &csr = pDevice->getCommandStreamReceiver(); csr.getMemoryManager()->setForce32BitAllocations(false); HardwareParse hwParser; size_t off[3] = {0, 0, 0}; size_t gws[3] = {1, 1, 1}; SPatchMediaVFEState mediaVFEstate; uint32_t scratchSize = 4096u; mediaVFEstate.PerThreadScratchSpace = scratchSize; MockKernelWithInternals mockKernel(*pDevice); mockKernel.kernelInfo.patchInfo.mediavfestate = &mediaVFEstate; auto sizeToProgram = Kernel::getScratchSizeValueToProgramMediaVfeState(scratchSize); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); hwParser.parseCommands(*pCmdQ); // All state should be programmed before walker auto itorCmd = find(hwParser.itorPipelineSelect, hwParser.itorWalker); ASSERT_NE(hwParser.itorWalker, itorCmd); auto *cmd = (MEDIA_VFE_STATE *)*itorCmd; EXPECT_EQ(sizeToProgram, cmd->getPerThreadScratchSpace()); EXPECT_EQ(sizeToProgram, cmd->getStackSize()); auto scratchAlloc = csr.getScratchAllocation(); auto itorfirstBBEnd = find(hwParser.itorWalker, hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), itorfirstBBEnd); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); hwParser.parseCommands(*pCmdQ); itorCmd = find(itorfirstBBEnd, hwParser.cmdList.end()); ASSERT_EQ(hwParser.cmdList.end(), itorCmd); EXPECT_EQ(csr.getScratchAllocation(), scratchAlloc); } HWTEST_F(EnqueueKernelTest, givenEnqueueWithGlobalWorkSizeWhenZeroValueIsPassedInDimensionThenTheKernelCommandWillTriviallySucceed) { size_t gws[3] = {0, 0, 0}; MockKernelWithInternals mockKernel(*pDevice); auto ret = pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, ret); } HWTEST_F(EnqueueKernelTest, givenCommandStreamReceiverInBatchingModeWhenEnqueueKernelIsCalledThenKernelIsRecorded) { auto mockCsr = new MockCsrHw2(pDevice->getHardwareInfo()); mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pDevice); size_t gws[3] = {1, 0, 0}; auto ret = pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_FALSE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); auto cmdBuffer = mockedSubmissionsAggregator->peekCmdBufferList().peekHead(); size_t csrSurfaceCount = (pDevice->getPreemptionMode() == PreemptionMode::MidThread) ? 1 : 0; EXPECT_EQ(0, mockCsr->flushCalledCount); EXPECT_EQ(6u + csrSurfaceCount, cmdBuffer->surfaces.size()); } HWTEST_F(EnqueueKernelTest, givenDefaultCommandStreamReceiverWhenClFlushIsCalledThenSuccessIsReturned) { MockKernelWithInternals mockKernel(*pDevice); size_t gws[3] = {1, 0, 0}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); auto ret = clFlush(pCmdQ); EXPECT_EQ(CL_SUCCESS, ret); } HWTEST_F(EnqueueKernelTest, givenCommandStreamReceiverInBatchingModeAndBatchedKernelWhenFlushIsCalledThenKernelIsSubmitted) { auto mockCsrmockCsr = new MockCsrHw2(pDevice->getHardwareInfo()); mockCsrmockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsrmockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsrmockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pDevice); size_t gws[3] = {1, 0, 0}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_FALSE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); auto ret = clFlush(pCmdQ); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_TRUE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); EXPECT_EQ(1, mockCsrmockCsr->flushCalledCount); } HWTEST_F(EnqueueKernelTest, givenCommandStreamReceiverInBatchingModeAndBatchedKernelWhenFlushIsCalledTwiceThenNothingChanges) { auto mockCsrmockCsr = new MockCsrHw2(pDevice->getHardwareInfo()); mockCsrmockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsrmockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsrmockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pDevice); size_t gws[3] = {1, 0, 0}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); auto ret = clFlush(pCmdQ); EXPECT_EQ(CL_SUCCESS, ret); ret = clFlush(pCmdQ); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_TRUE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); EXPECT_EQ(1, mockCsrmockCsr->flushCalledCount); } HWTEST_F(EnqueueKernelTest, givenCommandStreamReceiverInBatchingModeWhenKernelIsEnqueuedTwiceThenTwoSubmissionsAreRecorded) { auto mockCsrmockCsr = new MockCsrHw2(pDevice->getHardwareInfo()); mockCsrmockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsrmockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsrmockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pDevice); size_t gws[3] = {1, 0, 0}; //make sure csr emits something mockCsrmockCsr->overrideMediaVFEStateDirty(true); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); mockCsrmockCsr->overrideMediaVFEStateDirty(true); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_FALSE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); auto &cmdBufferList = mockedSubmissionsAggregator->peekCmdBufferList(); EXPECT_NE(nullptr, cmdBufferList.peekHead()); EXPECT_NE(cmdBufferList.peekTail(), cmdBufferList.peekHead()); auto cmdBuffer1 = cmdBufferList.peekHead(); auto cmdBuffer2 = cmdBufferList.peekTail(); EXPECT_EQ(cmdBuffer1->surfaces.size(), cmdBuffer2->surfaces.size()); EXPECT_EQ(cmdBuffer1->batchBuffer.commandBufferAllocation, cmdBuffer2->batchBuffer.commandBufferAllocation); EXPECT_GT(cmdBuffer2->batchBuffer.startOffset, cmdBuffer1->batchBuffer.startOffset); } HWTEST_F(EnqueueKernelTest, givenCommandStreamReceiverInBatchingModeWhenFlushIsCalledOnTwoBatchedKernelsThenTheyAreExecutedInOrder) { auto mockCsr = new MockCsrHw2(pDevice->getHardwareInfo()); mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pDevice); size_t gws[3] = {1, 0, 0}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); pCmdQ->flush(); EXPECT_TRUE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); EXPECT_EQ(1, mockCsr->flushCalledCount); } HWTEST_F(EnqueueKernelTest, givenCsrInBatchingModeWhenFinishIsCalledThenBatchesSubmissionsAreFlushed) { auto mockCsr = new MockCsrHw2(pDevice->getHardwareInfo()); mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pDevice); size_t gws[3] = {1, 0, 0}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); pCmdQ->finish(false); EXPECT_TRUE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); EXPECT_EQ(1, mockCsr->flushCalledCount); } HWTEST_F(EnqueueKernelTest, givenCsrInBatchingModeWhenThressEnqueueKernelsAreCalledThenBatchesSubmissionsAreFlushed) { auto mockCsr = new MockCsrHw2(pDevice->getHardwareInfo()); mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pDevice); size_t gws[3] = {1, 0, 0}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); pCmdQ->finish(false); EXPECT_TRUE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); EXPECT_EQ(1, mockCsr->flushCalledCount); } HWTEST_F(EnqueueKernelTest, givenCsrInBatchingModeWhenWaitForEventsIsCalledThenBatchedSubmissionsAreFlushed) { auto mockCsr = new MockCsrHw2(pDevice->getHardwareInfo()); mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pDevice); size_t gws[3] = {1, 0, 0}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); cl_event event; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &event); auto status = clWaitForEvents(1, &event); EXPECT_EQ(CL_SUCCESS, status); EXPECT_TRUE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); EXPECT_EQ(1, mockCsr->flushCalledCount); status = clReleaseEvent(event); EXPECT_EQ(CL_SUCCESS, status); } HWTEST_F(EnqueueKernelTest, givenCsrInBatchingModeWhenCommandIsFlushedThenFlushStampIsUpdatedInCommandQueueCsrAndEvent) { auto mockCsr = new MockCsrHw2(pDevice->getHardwareInfo()); mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pDevice); size_t gws[3] = {1, 0, 0}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); cl_event event; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &event); auto neoEvent = castToObject(event); EXPECT_EQ(0u, mockCsr->flushStamp->peekStamp()); EXPECT_EQ(0u, neoEvent->flushStamp->peekStamp()); EXPECT_EQ(0u, pCmdQ->flushStamp->peekStamp()); auto status = clWaitForEvents(1, &event); EXPECT_EQ(1, neoEvent->getRefInternalCount()); EXPECT_EQ(1u, mockCsr->flushStamp->peekStamp()); EXPECT_EQ(1u, neoEvent->flushStamp->peekStamp()); EXPECT_EQ(1u, pCmdQ->flushStamp->peekStamp()); status = clFinish(pCmdQ); EXPECT_EQ(1u, pCmdQ->flushStamp->peekStamp()); status = clReleaseEvent(event); } HWTEST_F(EnqueueKernelTest, givenCsrInBatchingModeWhenCommandWithEventIsFollowedByCommandWithoutEventThenFlushStampIsUpdatedInCommandQueueCsrAndEvent) { auto mockCsr = new MockCsrHw2(pDevice->getHardwareInfo()); mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pDevice); size_t gws[3] = {1, 0, 0}; cl_event event; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &event); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); auto neoEvent = castToObject(event); EXPECT_EQ(0u, mockCsr->flushStamp->peekStamp()); EXPECT_EQ(0u, neoEvent->flushStamp->peekStamp()); EXPECT_EQ(0u, pCmdQ->flushStamp->peekStamp()); auto status = clWaitForEvents(1, &event); EXPECT_EQ(1, neoEvent->getRefInternalCount()); EXPECT_EQ(1u, mockCsr->flushStamp->peekStamp()); EXPECT_EQ(1u, neoEvent->flushStamp->peekStamp()); EXPECT_EQ(1u, pCmdQ->flushStamp->peekStamp()); status = clFinish(pCmdQ); EXPECT_EQ(1u, pCmdQ->flushStamp->peekStamp()); status = clReleaseEvent(event); } HWTEST_F(EnqueueKernelTest, givenCsrInBatchingModeWhenClFlushIsCalledThenQueueFlushStampIsUpdated) { auto mockCsr = new MockCsrHw2(pDevice->getHardwareInfo()); mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); MockKernelWithInternals mockKernel(*pDevice); size_t gws[3] = {1, 0, 0}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(0u, mockCsr->flushStamp->peekStamp()); EXPECT_EQ(0u, pCmdQ->flushStamp->peekStamp()); clFlush(pCmdQ); EXPECT_EQ(1u, mockCsr->flushStamp->peekStamp()); EXPECT_EQ(1u, pCmdQ->flushStamp->peekStamp()); } HWTEST_F(EnqueueKernelTest, givenCsrInBatchingModeWhenWaitForEventsIsCalledWithUnflushedTaskCountThenBatchedSubmissionsAreFlushed) { auto mockCsr = new MockCsrHw2(pDevice->getHardwareInfo()); mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pDevice); size_t gws[3] = {1, 0, 0}; cl_event event; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); pCmdQ->enqueueMarkerWithWaitList(0, nullptr, &event); auto status = clWaitForEvents(1, &event); EXPECT_EQ(CL_SUCCESS, status); EXPECT_TRUE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); EXPECT_EQ(1, mockCsr->flushCalledCount); status = clReleaseEvent(event); EXPECT_EQ(CL_SUCCESS, status); } HWTEST_F(EnqueueKernelTest, givenCsrInBatchingModeWhenFinishIsCalledWithUnflushedTaskCountThenBatchedSubmissionsAreFlushed) { auto mockCsr = new MockCsrHw2(pDevice->getHardwareInfo()); mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pDevice); size_t gws[3] = {1, 0, 0}; cl_event event; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); pCmdQ->enqueueMarkerWithWaitList(0, nullptr, &event); auto status = clFinish(pCmdQ); EXPECT_EQ(CL_SUCCESS, status); EXPECT_TRUE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); EXPECT_EQ(1, mockCsr->flushCalledCount); status = clReleaseEvent(event); EXPECT_EQ(CL_SUCCESS, status); } HWTEST_F(EnqueueKernelTest, givenOutOfOrderCommandQueueWhenEnqueueKernelIsMadeThenPipeControlPositionIsRecorded) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0}; auto ooq = clCreateCommandQueueWithProperties(context, pDevice, props, nullptr); auto mockCsr = new MockCsrHw2(pDevice->getHardwareInfo()); mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pDevice); size_t gws[3] = {1, 0, 0}; clEnqueueNDRangeKernel(ooq, mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_FALSE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); auto cmdBuffer = mockedSubmissionsAggregator->peekCmdBufferList().peekHead(); EXPECT_NE(nullptr, cmdBuffer->pipeControlLocation); clReleaseCommandQueue(ooq); } HWTEST_F(EnqueueKernelTest, givenInOrderCommandQueueWhenEnqueueKernelIsMadeThenPipeControlPositionIsRecorded) { const cl_queue_properties props[] = {0}; auto inOrderQueue = clCreateCommandQueueWithProperties(context, pDevice, props, nullptr); auto mockCsr = new MockCsrHw2(pDevice->getHardwareInfo()); mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pDevice); size_t gws[3] = {1, 0, 0}; clEnqueueNDRangeKernel(inOrderQueue, mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_FALSE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); auto cmdBuffer = mockedSubmissionsAggregator->peekCmdBufferList().peekHead(); EXPECT_NE(nullptr, cmdBuffer->pipeControlLocation); clReleaseCommandQueue(inOrderQueue); } HWTEST_F(EnqueueKernelTest, givenInOrderCommandQueueWhenEnqueueKernelThatHasSharedObjectsAsArgIsMadeThenPipeControlPositionIsNotRecorded) { const cl_queue_properties props[] = {0}; auto inOrderQueue = clCreateCommandQueueWithProperties(context, pDevice, props, nullptr); auto mockCsr = new MockCsrHw2(pDevice->getHardwareInfo()); mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pDevice); size_t gws[3] = {1, 0, 0}; mockKernel.mockKernel->setUsingSharedArgs(true); clEnqueueNDRangeKernel(inOrderQueue, mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_FALSE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); auto cmdBuffer = mockedSubmissionsAggregator->peekCmdBufferList().peekHead(); EXPECT_EQ(nullptr, cmdBuffer->pipeControlLocation); clReleaseCommandQueue(inOrderQueue); } HWTEST_F(EnqueueKernelTest, givenInOrderCommandQueueWhenEnqueueKernelReturningEventIsMadeThenPipeControlPositionIsNotRecorded) { const cl_queue_properties props[] = {0}; auto inOrderQueue = clCreateCommandQueueWithProperties(context, pDevice, props, nullptr); auto mockCsr = new MockCsrHw2(pDevice->getHardwareInfo()); mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pDevice); size_t gws[3] = {1, 0, 0}; cl_event event; clEnqueueNDRangeKernel(inOrderQueue, mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &event); EXPECT_FALSE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); auto cmdBuffer = mockedSubmissionsAggregator->peekCmdBufferList().peekHead(); EXPECT_EQ(nullptr, cmdBuffer->pipeControlLocation); clReleaseCommandQueue(inOrderQueue); clReleaseEvent(event); } HWTEST_F(EnqueueKernelTest, givenOutOfOrderCommandQueueWhenEnqueueKernelReturningEventIsMadeThenPipeControlPositionIsRecorded) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0}; auto inOrderQueue = clCreateCommandQueueWithProperties(context, pDevice, props, nullptr); auto mockCsr = new MockCsrHw2(pDevice->getHardwareInfo()); mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pDevice); size_t gws[3] = {1, 0, 0}; cl_event event; clEnqueueNDRangeKernel(inOrderQueue, mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &event); EXPECT_FALSE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); auto cmdBuffer = mockedSubmissionsAggregator->peekCmdBufferList().peekHead(); EXPECT_NE(nullptr, cmdBuffer->pipeControlLocation); clReleaseCommandQueue(inOrderQueue); clReleaseEvent(event); } HWTEST_F(EnqueueKernelTest, givenCsrInBatchingModeWhenBlockingCallIsMadeThenEventAssociatedWithCommandHasProperFlushStamp) { DebugManagerStateRestore stateRestore; DebugManager.flags.MakeEachEnqueueBlocking.set(true); auto mockCsr = new MockCsrHw2(pDevice->getHardwareInfo()); mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); MockKernelWithInternals mockKernel(*pDevice); size_t gws[3] = {1, 0, 0}; cl_event event; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &event); auto neoEvent = castToObject(event); EXPECT_EQ(1u, neoEvent->flushStamp->peekStamp()); EXPECT_EQ(1, mockCsr->flushCalledCount); auto status = clReleaseEvent(event); EXPECT_EQ(CL_SUCCESS, status); } HWTEST_F(EnqueueKernelTest, givenKernelWhenItIsEnqueuedThenAllResourceGraphicsAllocationsAreUpdatedWithCsrTaskCount) { auto mockCsr = new MockCsrHw2(pDevice->getHardwareInfo()); pDevice->resetCommandStreamReceiver(mockCsr); MockKernelWithInternals mockKernel(*pDevice); size_t gws[3] = {1, 0, 0}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(1, mockCsr->flushCalledCount); auto csrTaskCount = mockCsr->peekTaskCount(); auto &passedAllocationPack = mockCsr->copyOfAllocations; for (auto &allocation : passedAllocationPack) { EXPECT_EQ(csrTaskCount, allocation->taskCount); } } TEST_F(EnqueueKernelTest, givenKernelWhenAllArgsAreNotAndEventExistSetThenClEnqueueNDRangeKernelReturnsInvalidKernelArgsAndSetEventToNull) { const size_t n = 512; size_t globalWorkSize[3] = {n, 1, 1}; size_t localWorkSize[3] = {256, 1, 1}; cl_int retVal = CL_SUCCESS; CommandQueue *pCmdQ2 = createCommandQueue(pDevice, 0); std::unique_ptr kernel(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer"), &retVal)); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(kernel->isPatched()); cl_event event; retVal = clEnqueueNDRangeKernel(pCmdQ2, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, &event); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, retVal); clReleaseCommandQueue(pCmdQ2); }