/* * Copyright (C) 2017-2018 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "runtime/kernel/kernel.h" #include "runtime/program/printf_handler.h" #include "runtime/sampler/sampler.h" #include "unit_tests/fixtures/execution_model_fixture.h" #include "unit_tests/fixtures/execution_model_kernel_fixture.h" #include "unit_tests/fixtures/image_fixture.h" #include "unit_tests/helpers/debug_manager_state_restore.h" #include "unit_tests/helpers/gtest_helpers.h" #include "unit_tests/mocks/mock_kernel.h" #include "unit_tests/mocks/mock_program.h" #include "unit_tests/mocks/mock_context.h" #include "unit_tests/mocks/mock_device_queue.h" #include "unit_tests/mocks/mock_mdi.h" #include "unit_tests/mocks/mock_sampler.h" #include "patch_list.h" #include "runtime/execution_model/device_enqueue.h" #include "matchers.h" #include #include using namespace OCLRT; typedef ExecutionModelKernelFixture KernelReflectionSurfaceTest; typedef ExecutionModelKernelTest KernelReflectionSurfaceWithQueueTest; TEST_P(KernelReflectionSurfaceTest, CreatedKernelHasNullKernelReflectionSurface) { if (std::string(pPlatform->getDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { EXPECT_EQ(nullptr, pKernel->getKernelReflectionSurface()); } } TEST_P(KernelReflectionSurfaceTest, GivenEmptyKernelInfoWhenPassedToGetCurbeParamsThenEmptyVectorIsReturned) { KernelInfo info; SPatchImageMemoryObjectKernelArgument imageMemObjKernelArg; imageMemObjKernelArg.ArgumentNumber = 0; imageMemObjKernelArg.Offset = 32; imageMemObjKernelArg.Size = 4; imageMemObjKernelArg.Type = iOpenCL::IMAGE_MEMORY_OBJECT_2D; info.storeKernelArgument(&imageMemObjKernelArg); SPatchSamplerKernelArgument samplerMemObjKernelArg; samplerMemObjKernelArg.ArgumentNumber = 1; samplerMemObjKernelArg.Offset = 32; samplerMemObjKernelArg.Size = 4; samplerMemObjKernelArg.Type = iOpenCL::SAMPLER_OBJECT_TEXTURE; info.storeKernelArgument(&samplerMemObjKernelArg); SPatchDataParameterBuffer bufferMemObjKernelArg; bufferMemObjKernelArg.ArgumentNumber = 2; bufferMemObjKernelArg.Offset = 32; bufferMemObjKernelArg.Size = 4; info.storeKernelArgument(&bufferMemObjKernelArg); std::vector curbeParamsForBlock; uint64_t tokenMask = 0; uint32_t firstSSHTokenIndex = 0; MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParamsForBlock, tokenMask, firstSSHTokenIndex, info, pPlatform->getDevice(0)->getHardwareInfo()); // 3 params with Binding Table index of type 1024 EXPECT_EQ(3u, curbeParamsForBlock.size()); for (uint32_t i = 0; i < curbeParamsForBlock.size(); i++) { EXPECT_EQ(1024u, curbeParamsForBlock[i].m_parameterType); } EXPECT_EQ(0u, firstSSHTokenIndex); } TEST_P(KernelReflectionSurfaceTest, GivenKernelInfoWithCorrectlyFilledImageArgumentWhenPassedToGetCurbeParamsThenImageCurbeParamsAreReturned) { KernelInfo info; SPatchImageMemoryObjectKernelArgument imageMemObjKernelArg; imageMemObjKernelArg.ArgumentNumber = 0; imageMemObjKernelArg.Offset = 32; imageMemObjKernelArg.Size = 4; imageMemObjKernelArg.Type = iOpenCL::IMAGE_MEMORY_OBJECT_2D; const uint32_t offsetDataType = 4; const uint32_t offsetChannelOrder = 8; const uint32_t offsetHeap = 12; const uint32_t offsetDepth = 16; const uint32_t offsetWidth = 20; const uint32_t offsetHeight = 24; const uint32_t offsetObjectID = 28; const uint32_t offsetArraySize = 32; info.storeKernelArgument(&imageMemObjKernelArg); info.kernelArgInfo[0].accessQualifier = CL_KERNEL_ARG_ACCESS_READ_ONLY; info.kernelArgInfo[0].accessQualifierStr = "read_only"; info.kernelArgInfo[0].isImage = true; info.kernelArgInfo[0].name = "img"; info.kernelArgInfo[0].offsetChannelDataType = offsetDataType; info.kernelArgInfo[0].offsetChannelOrder = offsetChannelOrder; info.kernelArgInfo[0].offsetHeap = offsetHeap; info.kernelArgInfo[0].offsetImgDepth = offsetDepth; info.kernelArgInfo[0].offsetImgWidth = offsetWidth; info.kernelArgInfo[0].offsetImgHeight = offsetHeight; info.kernelArgInfo[0].offsetObjectId = offsetObjectID; info.kernelArgInfo[0].offsetArraySize = offsetArraySize; info.gpuPointerSize = 8; std::vector curbeParams; uint64_t tokenMask = 0; uint32_t firstSSHTokenIndex = 0; MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParams, tokenMask, firstSSHTokenIndex, info, pPlatform->getDevice(0)->getHardwareInfo()); std::vector supportedImageParamTypes = {iOpenCL::DATA_PARAMETER_IMAGE_WIDTH, iOpenCL::DATA_PARAMETER_IMAGE_HEIGHT, iOpenCL::DATA_PARAMETER_IMAGE_DEPTH, iOpenCL::DATA_PARAMETER_IMAGE_CHANNEL_DATA_TYPE, iOpenCL::DATA_PARAMETER_IMAGE_CHANNEL_ORDER, iOpenCL::DATA_PARAMETER_IMAGE_ARRAY_SIZE, iOpenCL::DATA_PARAMETER_OBJECT_ID, 1024}; // type for Binding Table Index std::sort(supportedImageParamTypes.begin(), supportedImageParamTypes.end()); size_t ParamCount = supportedImageParamTypes.size(); EXPECT_EQ(ParamCount, curbeParams.size()); for (size_t i = 0; i < std::min(ParamCount, curbeParams.size()); i++) { if (i < ParamCount - 1) { EXPECT_EQ(supportedImageParamTypes[i] + 50, curbeParams[i].m_parameterType); EXPECT_EQ(sizeof(uint32_t), curbeParams[i].m_parameterSize); } else { EXPECT_EQ(1024u, curbeParams[i].m_parameterType); EXPECT_EQ(8u, curbeParams[i].m_parameterSize); } switch (curbeParams[i].m_parameterType - 50) { case iOpenCL::DATA_PARAMETER_IMAGE_WIDTH: EXPECT_EQ(offsetWidth, curbeParams[i].m_patchOffset); break; case iOpenCL::DATA_PARAMETER_IMAGE_HEIGHT: EXPECT_EQ(offsetHeight, curbeParams[i].m_patchOffset); break; case iOpenCL::DATA_PARAMETER_IMAGE_DEPTH: EXPECT_EQ(offsetDepth, curbeParams[i].m_patchOffset); break; case iOpenCL::DATA_PARAMETER_IMAGE_CHANNEL_DATA_TYPE: EXPECT_EQ(offsetDataType, curbeParams[i].m_patchOffset); break; case iOpenCL::DATA_PARAMETER_IMAGE_CHANNEL_ORDER: EXPECT_EQ(offsetChannelOrder, curbeParams[i].m_patchOffset); break; case iOpenCL::DATA_PARAMETER_IMAGE_ARRAY_SIZE: EXPECT_EQ(offsetArraySize, curbeParams[i].m_patchOffset); break; case iOpenCL::DATA_PARAMETER_OBJECT_ID: EXPECT_EQ(offsetObjectID, curbeParams[i].m_patchOffset); break; } } EXPECT_EQ(curbeParams.size() - 1, firstSSHTokenIndex); } HWTEST_P(KernelReflectionSurfaceTest, GivenKernelInfoWithSetBindingTableStateAndImageArgumentWhenPassedToGetCurbeParamsThenProperCurbeParamIsReturned) { typedef typename FamilyType::BINDING_TABLE_STATE BINDING_TABLE_STATE; KernelInfo info; uint32_t imageOffset = 32; uint32_t btIndex = 3; info.gpuPointerSize = 8; SPatchImageMemoryObjectKernelArgument imageMemObjKernelArg; imageMemObjKernelArg.ArgumentNumber = 0; imageMemObjKernelArg.Offset = imageOffset; imageMemObjKernelArg.Size = 4; imageMemObjKernelArg.Type = iOpenCL::IMAGE_MEMORY_OBJECT_2D; info.storeKernelArgument(&imageMemObjKernelArg); SPatchBindingTableState bindingTableStateInfo; bindingTableStateInfo.Offset = 0; bindingTableStateInfo.Count = 4; info.patchInfo.bindingTableState = &bindingTableStateInfo; BINDING_TABLE_STATE bindingTableState[4]; memset(&bindingTableState, 0, 4 * sizeof(BINDING_TABLE_STATE)); bindingTableState[btIndex].getRawData(0) = imageOffset; info.heapInfo.pSsh = reinterpret_cast(bindingTableState); std::vector curbeParams; uint64_t tokenMask = 0; uint32_t firstSSHTokenIndex = 0; MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParams, tokenMask, firstSSHTokenIndex, info, pPlatform->getDevice(0)->getHardwareInfo()); EXPECT_NE(0u, curbeParams.size()); bool foundProperParam = false; for (size_t i = 0; i < curbeParams.size(); i++) { if (curbeParams[i].m_parameterType == 1024u) { EXPECT_EQ(btIndex, curbeParams[i].m_patchOffset); EXPECT_EQ(8u, curbeParams[i].m_parameterSize); EXPECT_EQ(0u, curbeParams[i].m_sourceOffset); foundProperParam = true; break; } } EXPECT_TRUE(foundProperParam); } HWTEST_P(KernelReflectionSurfaceTest, GivenKernelInfoWithBindingTableStateAndImageArgumentWhenCountIsZeroThenGetCurbeParamsReturnsMaxBTIndex) { typedef typename FamilyType::BINDING_TABLE_STATE BINDING_TABLE_STATE; KernelInfo info; uint32_t imageOffset = 32; uint32_t btIndex = 0; uint32_t maxBTIndex = 253; info.gpuPointerSize = 8; SPatchImageMemoryObjectKernelArgument imageMemObjKernelArg; imageMemObjKernelArg.ArgumentNumber = 0; imageMemObjKernelArg.Offset = imageOffset; imageMemObjKernelArg.Size = 4; imageMemObjKernelArg.Type = iOpenCL::IMAGE_MEMORY_OBJECT_2D; info.storeKernelArgument(&imageMemObjKernelArg); SPatchBindingTableState bindingTableStateInfo; bindingTableStateInfo.Offset = 0; bindingTableStateInfo.Count = 0; info.patchInfo.bindingTableState = &bindingTableStateInfo; BINDING_TABLE_STATE bindingTableState[1]; memset(&bindingTableState, 0, 1 * sizeof(BINDING_TABLE_STATE)); bindingTableState[btIndex].getRawData(0) = imageOffset; info.heapInfo.pSsh = reinterpret_cast(bindingTableState); std::vector curbeParams; uint64_t tokenMask = 0; uint32_t firstSSHTokenIndex = 0; MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParams, tokenMask, firstSSHTokenIndex, info, pPlatform->getDevice(0)->getHardwareInfo()); EXPECT_EQ(1u, curbeParams.size()); bool foundProperParam = false; for (size_t i = 0; i < curbeParams.size(); i++) { if (curbeParams[i].m_parameterType == 1024u) { EXPECT_EQ(maxBTIndex, curbeParams[i].m_patchOffset); EXPECT_EQ(8u, curbeParams[i].m_parameterSize); EXPECT_EQ(0u, curbeParams[i].m_sourceOffset); foundProperParam = true; break; } } EXPECT_TRUE(foundProperParam); } TEST_P(KernelReflectionSurfaceTest, GivenKernelInfoWithCorrectlyFilledSamplerArgumentWhenPassedToGetCurbeParamsThenSamplerCurbeParamsAreReturned) { KernelInfo info; SPatchSamplerKernelArgument samplerMemObjKernelArg; samplerMemObjKernelArg.ArgumentNumber = 1; samplerMemObjKernelArg.Offset = 32; samplerMemObjKernelArg.Size = 4; samplerMemObjKernelArg.Type = iOpenCL::SAMPLER_OBJECT_TEXTURE; const uint32_t offsetSamplerAddressingMode = 4; const uint32_t offsetSamplerNormalizedCoords = 8; const uint32_t offsetSamplerSnapWa = 12; const uint32_t offsetObjectID = 28; info.storeKernelArgument(&samplerMemObjKernelArg); info.kernelArgInfo[0].isSampler = true; info.kernelArgInfo[0].name = "smp"; info.kernelArgInfo[0].offsetSamplerAddressingMode = offsetSamplerAddressingMode; info.kernelArgInfo[0].offsetSamplerNormalizedCoords = offsetSamplerNormalizedCoords; info.kernelArgInfo[0].offsetSamplerSnapWa = offsetSamplerSnapWa; info.kernelArgInfo[0].offsetObjectId = offsetObjectID; std::vector curbeParams; uint64_t tokenMask = 0; uint32_t firstSSHTokenIndex = 0; MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParams, tokenMask, firstSSHTokenIndex, info, pPlatform->getDevice(0)->getHardwareInfo()); std::vector supportedSamplerParamTypes = {iOpenCL::DATA_PARAMETER_SAMPLER_ADDRESS_MODE, iOpenCL::DATA_PARAMETER_SAMPLER_NORMALIZED_COORDS, iOpenCL::DATA_PARAMETER_SAMPLER_COORDINATE_SNAP_WA_REQUIRED, iOpenCL::DATA_PARAMETER_OBJECT_ID}; std::sort(supportedSamplerParamTypes.begin(), supportedSamplerParamTypes.end()); size_t ParamCount = supportedSamplerParamTypes.size(); EXPECT_EQ(ParamCount + 2, curbeParams.size()); // + 2 for 2 arguments' Binding Table Index params stored for (size_t i = 0; i < std::min(ParamCount, curbeParams.size()); i++) { EXPECT_EQ(supportedSamplerParamTypes[i] + 100, curbeParams[i].m_parameterType); EXPECT_EQ(sizeof(uint32_t), curbeParams[i].m_parameterSize); switch (curbeParams[i].m_parameterType - 100) { case iOpenCL::DATA_PARAMETER_SAMPLER_ADDRESS_MODE: EXPECT_EQ(offsetSamplerAddressingMode, curbeParams[i].m_patchOffset); break; case iOpenCL::DATA_PARAMETER_SAMPLER_NORMALIZED_COORDS: EXPECT_EQ(offsetSamplerNormalizedCoords, curbeParams[i].m_patchOffset); break; case iOpenCL::DATA_PARAMETER_SAMPLER_COORDINATE_SNAP_WA_REQUIRED: EXPECT_EQ(offsetSamplerSnapWa, curbeParams[i].m_patchOffset); break; case iOpenCL::DATA_PARAMETER_OBJECT_ID: EXPECT_EQ(offsetObjectID, curbeParams[i].m_patchOffset); break; } } } TEST_P(KernelReflectionSurfaceTest, GivenKernelInfoWithBufferAndDataParameterBuffersTokensWhenPassedToGetCurbeParamsThenCorrectCurbeParamsWithProperSizesAreReturned) { KernelInfo info; SPatchDataParameterBuffer dataParameterBuffer; dataParameterBuffer.ArgumentNumber = 0; dataParameterBuffer.DataSize = 8; dataParameterBuffer.Offset = 40; dataParameterBuffer.SourceOffset = 0; dataParameterBuffer.Type = iOpenCL::DATA_PARAMETER_KERNEL_ARGUMENT; info.patchInfo.dataParameterBuffers.push_back(&dataParameterBuffer); info.storeKernelArgument(&dataParameterBuffer); std::vector curbeParams; uint64_t tokenMask = 0; uint32_t firstSSHTokenIndex = 0; MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParams, tokenMask, firstSSHTokenIndex, info, pPlatform->getDevice(0)->getHardwareInfo()); EXPECT_LT(1u, curbeParams.size()); bool kernelArgumentTokenFound = false; bool kernelArgumentSSHParamFound = false; for (size_t i = 0; i < curbeParams.size(); i++) { if (iOpenCL::DATA_PARAMETER_KERNEL_ARGUMENT == curbeParams[i].m_parameterType) { kernelArgumentTokenFound = true; EXPECT_EQ(0u, curbeParams[i].m_sourceOffset); EXPECT_EQ(8u, curbeParams[i].m_parameterSize); EXPECT_EQ(40u, curbeParams[i].m_patchOffset); } // kernel arg SSH param if (1024 == curbeParams[i].m_parameterType) { kernelArgumentSSHParamFound = true; EXPECT_EQ(0u, curbeParams[i].m_sourceOffset); EXPECT_EQ(0u, curbeParams[i].m_parameterSize); EXPECT_EQ(0u, curbeParams[i].m_patchOffset); } } EXPECT_TRUE(kernelArgumentTokenFound); EXPECT_TRUE((tokenMask & ((uint64_t)1 << iOpenCL::DATA_PARAMETER_KERNEL_ARGUMENT)) > 0); EXPECT_TRUE(kernelArgumentSSHParamFound); } TEST_P(KernelReflectionSurfaceTest, GivenKernelInfoWithBufferAndNoDataParameterBuffersTokenWhenPassedToGetCurbeParamsThenCurbeParamForDataKernelArgumentTokenIsNotReturned) { KernelInfo info; SPatchDataParameterBuffer dataParameterBuffer; dataParameterBuffer.ArgumentNumber = 0; dataParameterBuffer.DataSize = 8; dataParameterBuffer.Offset = 40; dataParameterBuffer.SourceOffset = 0; dataParameterBuffer.Type = iOpenCL::DATA_PARAMETER_KERNEL_ARGUMENT; info.storeKernelArgument(&dataParameterBuffer); std::vector curbeParams; uint64_t tokenMask = 0; uint32_t firstSSHTokenIndex = 0; MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParams, tokenMask, firstSSHTokenIndex, info, pPlatform->getDevice(0)->getHardwareInfo()); bool kernelArgumentTokenFound = false; for (size_t i = 0; i < curbeParams.size(); i++) { if (iOpenCL::DATA_PARAMETER_KERNEL_ARGUMENT == curbeParams[i].m_parameterType) { kernelArgumentTokenFound = true; } } EXPECT_FALSE(kernelArgumentTokenFound); EXPECT_TRUE((tokenMask & ((uint64_t)1 << iOpenCL::DATA_PARAMETER_KERNEL_ARGUMENT)) == 0); } TEST_P(KernelReflectionSurfaceTest, GivenKernelInfoWithLocalMemoryParameterWhenPassedToGetCurbeParamsThenCurbeParamForLocalMemoryArgIsReturned) { KernelInfo info; SPatchDataParameterBuffer dataParameterBuffer; const uint32_t crossThreadOffset = 10; const uint32_t dataSize = 8; const uint32_t slmAlignment = 80; dataParameterBuffer.ArgumentNumber = 0; dataParameterBuffer.DataSize = dataSize; dataParameterBuffer.Offset = crossThreadOffset; dataParameterBuffer.SourceOffset = 0; dataParameterBuffer.Type = iOpenCL::DATA_PARAMETER_KERNEL_ARGUMENT; info.storeKernelArgument(&dataParameterBuffer); KernelArgPatchInfo kernelArgPatchInfo; kernelArgPatchInfo.crossthreadOffset = crossThreadOffset; kernelArgPatchInfo.size = dataSize; kernelArgPatchInfo.sourceOffset = 76; info.kernelArgInfo[0].slmAlignment = slmAlignment; info.kernelArgInfo[0].kernelArgPatchInfoVector[0] = kernelArgPatchInfo; std::vector curbeParams; uint64_t tokenMask = 0; uint32_t firstSSHTokenIndex = 0; MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParams, tokenMask, firstSSHTokenIndex, info, pPlatform->getDevice(0)->getHardwareInfo()); bool localMemoryTokenFound = false; for (size_t i = 0; i < curbeParams.size(); i++) { if (iOpenCL::DATA_PARAMETER_SUM_OF_LOCAL_MEMORY_OBJECT_ARGUMENT_SIZES == curbeParams[i].m_parameterType) { localMemoryTokenFound = true; EXPECT_EQ(slmAlignment, curbeParams[i].m_sourceOffset); EXPECT_EQ(0u, curbeParams[i].m_parameterSize); EXPECT_EQ(crossThreadOffset, curbeParams[i].m_patchOffset); } } EXPECT_TRUE(localMemoryTokenFound); EXPECT_TRUE((tokenMask & ((uint64_t)1 << iOpenCL::DATA_PARAMETER_SUM_OF_LOCAL_MEMORY_OBJECT_ARGUMENT_SIZES)) > 0); } TEST_P(KernelReflectionSurfaceTest, GivenKernelInfoWithoutLocalMemoryParameterWhenPassedToGetCurbeParamsThenCurbeParamForLocalMemoryArgIsNotReturned) { KernelInfo info; SPatchDataParameterBuffer dataParameterBuffer; const uint32_t crossThreadOffset = 10; const uint32_t dataSize = 8; const uint32_t slmAlignment = 0; dataParameterBuffer.ArgumentNumber = 0; dataParameterBuffer.DataSize = dataSize; dataParameterBuffer.Offset = crossThreadOffset; dataParameterBuffer.SourceOffset = 0; dataParameterBuffer.Type = iOpenCL::DATA_PARAMETER_KERNEL_ARGUMENT; info.storeKernelArgument(&dataParameterBuffer); KernelArgPatchInfo kernelArgPatchInfo; kernelArgPatchInfo.crossthreadOffset = crossThreadOffset; kernelArgPatchInfo.size = dataSize; kernelArgPatchInfo.sourceOffset = 76; info.kernelArgInfo[0].slmAlignment = slmAlignment; info.kernelArgInfo[0].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); std::vector curbeParams; uint64_t tokenMask = 0; uint32_t firstSSHTokenIndex = 0; MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParams, tokenMask, firstSSHTokenIndex, info, pPlatform->getDevice(0)->getHardwareInfo()); bool localMemoryTokenFound = false; for (size_t i = 0; i < curbeParams.size(); i++) { if (iOpenCL::DATA_PARAMETER_SUM_OF_LOCAL_MEMORY_OBJECT_ARGUMENT_SIZES == curbeParams[i].m_parameterType) { localMemoryTokenFound = true; } } EXPECT_FALSE(localMemoryTokenFound); EXPECT_TRUE((tokenMask & ((uint64_t)1 << iOpenCL::DATA_PARAMETER_SUM_OF_LOCAL_MEMORY_OBJECT_ARGUMENT_SIZES)) == 0); } TEST_P(KernelReflectionSurfaceTest, getCurbeParamsReturnsSortedVector) { if (std::string(pPlatform->getDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { EXPECT_TRUE(pKernel->isParentKernel); BlockKernelManager *blockManager = pProgram->getBlockKernelManager(); size_t blockCount = blockManager->getCount(); EXPECT_NE(0u, blockCount); std::vector curbeParamsForBlock; for (size_t i = 0; i < blockCount; i++) { const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); uint64_t tokenMask = 0; uint32_t firstSSHTokenIndex = 0; MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParamsForBlock, tokenMask, firstSSHTokenIndex, *pBlockInfo, pDevice->getHardwareInfo()); if (pBlockInfo->name.find("simple_block_kernel") == std::string::npos) { EXPECT_LT(1u, curbeParamsForBlock.size()); } for (size_t i = 1; i < curbeParamsForBlock.size(); i++) { EXPECT_LE(curbeParamsForBlock[i - 1].m_parameterType, curbeParamsForBlock[i].m_parameterType); if (curbeParamsForBlock[i - 1].m_parameterType == curbeParamsForBlock[i].m_parameterType) { if (curbeParamsForBlock[i - 1].m_parameterType == iOpenCL::DATA_PARAMETER_TOKEN::DATA_PARAMETER_LOCAL_WORK_SIZE) { EXPECT_LE(curbeParamsForBlock[i - 1].m_patchOffset, curbeParamsForBlock[i].m_patchOffset); } else { EXPECT_LE(curbeParamsForBlock[i - 1].m_sourceOffset, curbeParamsForBlock[i].m_sourceOffset); } } } EXPECT_EQ(curbeParamsForBlock.size() - pBlockInfo->kernelArgInfo.size(), firstSSHTokenIndex); curbeParamsForBlock.resize(0); } } } TEST_P(KernelReflectionSurfaceTest, getCurbeParamsReturnsVectorWithExpectedParamTypes) { if (std::string(pPlatform->getDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { EXPECT_TRUE(pKernel->isParentKernel); BlockKernelManager *blockManager = pProgram->getBlockKernelManager(); size_t blockCount = blockManager->getCount(); EXPECT_NE(0u, blockCount); std::vector curbeParamsForBlock; for (size_t i = 0; i < blockCount; i++) { const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); uint64_t tokenMask = 0; uint32_t firstSSHTokenIndex = 0; MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParamsForBlock, tokenMask, firstSSHTokenIndex, *pBlockInfo, pDevice->getHardwareInfo()); const uint32_t bufferType = 49; const uint32_t imageType = iOpenCL::DATA_PARAMETER_TOKEN::DATA_PARAMETER_OBJECT_ID + 50; const uint32_t samplerType = iOpenCL::DATA_PARAMETER_TOKEN::DATA_PARAMETER_OBJECT_ID + 100; bool bufferFound = false; bool imageFound = false; bool samplerFound = false; if (pBlockInfo->name.find("kernel_reflection_dispatch_0") != std::string::npos) { EXPECT_LT(1u, curbeParamsForBlock.size()); for (size_t i = 0; i < curbeParamsForBlock.size(); i++) { switch (curbeParamsForBlock[i].m_parameterType) { case bufferType: bufferFound = true; break; case imageType: imageFound = true; break; case samplerType: samplerFound = true; break; } } EXPECT_TRUE(bufferFound); EXPECT_TRUE(imageFound); EXPECT_TRUE(samplerFound); } EXPECT_EQ(curbeParamsForBlock.size() - pBlockInfo->kernelArgInfo.size(), firstSSHTokenIndex); curbeParamsForBlock.resize(0); } } } TEST_P(KernelReflectionSurfaceTest, getCurbeParamsReturnsTokenMask) { if (std::string(pPlatform->getDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { EXPECT_TRUE(pKernel->isParentKernel); BlockKernelManager *blockManager = pProgram->getBlockKernelManager(); size_t blockCount = blockManager->getCount(); EXPECT_NE(0u, blockCount); std::vector curbeParamsForBlock; for (size_t i = 0; i < blockCount; i++) { const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); uint64_t tokenMask = 0; uint32_t firstSSHTokenIndex = 0; MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParamsForBlock, tokenMask, firstSSHTokenIndex, *pBlockInfo, pDevice->getHardwareInfo()); if (pBlockInfo->name.find("kernel_reflection_dispatch_0") != std::string::npos) { EXPECT_LT(1u, curbeParamsForBlock.size()); const uint64_t bufferToken = (uint64_t)1 << 63; const uint64_t imageToken = (uint64_t)1 << 50; const uint64_t samplerToken = (uint64_t)1 << 51; uint64_t expectedTokens = bufferToken | imageToken | samplerToken; EXPECT_NE(0u, tokenMask & expectedTokens); } curbeParamsForBlock.resize(0); } } } TEST(KernelReflectionSurfaceTestSingle, CreateKernelReflectionSurfaceCalledOnNonParentKernelDoesNotCreateReflectionSurface) { MockDevice device(*platformDevices[0]); MockProgram program(*device.getExecutionEnvironment()); KernelInfo info; MockKernel kernel(&program, info, device); EXPECT_FALSE(kernel.isParentKernel); kernel.createReflectionSurface(); auto reflectionSurface = kernel.getKernelReflectionSurface(); EXPECT_EQ(nullptr, reflectionSurface); } TEST(KernelReflectionSurfaceTestSingle, CreateKernelReflectionSurfaceCalledOnNonSchedulerKernelWithForcedSchedulerDispatchDoesNotCreateKRS) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceDispatchScheduler.set(true); MockDevice device(*platformDevices[0]); MockProgram program(*device.getExecutionEnvironment()); KernelInfo info; MockKernel kernel(&program, info, device); EXPECT_FALSE(kernel.isParentKernel); kernel.createReflectionSurface(); auto reflectionSurface = kernel.getKernelReflectionSurface(); EXPECT_EQ(nullptr, reflectionSurface); } TEST(KernelReflectionSurfaceTestSingle, ObtainKernelReflectionSurfaceWithoutKernelArgs) { MockContext context; std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(platformDevices[0])); MockProgram program(*device->getExecutionEnvironment()); KernelInfo *blockInfo = new KernelInfo; KernelInfo &info = *blockInfo; cl_queue_properties properties[1] = {0}; DeviceQueue devQueue(&context, device.get(), properties[0]); SPatchExecutionEnvironment environment = {}; environment.HasDeviceEnqueue = 1; info.patchInfo.executionEnvironment = &environment; SKernelBinaryHeaderCommon kernelHeader; info.heapInfo.pKernelHeader = &kernelHeader; SPatchDataParameterStream dataParameterStream; dataParameterStream.Size = 0; dataParameterStream.DataParameterStreamSize = 0; info.patchInfo.dataParameterStream = &dataParameterStream; SPatchBindingTableState bindingTableState; bindingTableState.Count = 0; bindingTableState.Offset = 0; bindingTableState.Size = 0; bindingTableState.SurfaceStateOffset = 0; info.patchInfo.bindingTableState = &bindingTableState; MockKernel kernel(&program, info, *device.get()); EXPECT_TRUE(kernel.isParentKernel); program.addBlockKernel(blockInfo); kernel.createReflectionSurface(); auto reflectionSurface = kernel.getKernelReflectionSurface(); EXPECT_NE(nullptr, reflectionSurface); kernel.patchReflectionSurface(&devQueue, nullptr); uint64_t undefinedOffset = MockKernel::ReflectionSurfaceHelperPublic::undefinedOffset; EXPECT_EQ(undefinedOffset, MockKernel::ReflectionSurfaceHelperPublic::defaultQueue.offset); EXPECT_EQ(undefinedOffset, MockKernel::ReflectionSurfaceHelperPublic::devQueue.offset); EXPECT_EQ(undefinedOffset, MockKernel::ReflectionSurfaceHelperPublic::eventPool.offset); EXPECT_EQ(0u, MockKernel::ReflectionSurfaceHelperPublic::defaultQueue.size); EXPECT_EQ(0u, MockKernel::ReflectionSurfaceHelperPublic::devQueue.size); EXPECT_EQ(0u, MockKernel::ReflectionSurfaceHelperPublic::eventPool.size); } TEST(KernelReflectionSurfaceTestSingle, ObtainKernelReflectionSurfaceWithDeviceQueueKernelArg) { MockContext context; std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(platformDevices[0])); MockProgram program(*device->getExecutionEnvironment()); KernelInfo *blockInfo = new KernelInfo; KernelInfo &info = *blockInfo; cl_queue_properties properties[1] = {0}; DeviceQueue devQueue(&context, device.get(), properties[0]); uint32_t devQueueCurbeOffset = 16; uint32_t devQueueCurbeSize = 4; SPatchExecutionEnvironment environment = {}; environment.HasDeviceEnqueue = 1; info.patchInfo.executionEnvironment = &environment; SKernelBinaryHeaderCommon kernelHeader; info.heapInfo.pKernelHeader = &kernelHeader; SPatchDataParameterStream dataParameterStream; dataParameterStream.Size = 0; dataParameterStream.DataParameterStreamSize = 0; info.patchInfo.dataParameterStream = &dataParameterStream; SPatchBindingTableState bindingTableState; bindingTableState.Count = 0; bindingTableState.Offset = 0; bindingTableState.Size = 0; bindingTableState.SurfaceStateOffset = 0; info.patchInfo.bindingTableState = &bindingTableState; KernelArgInfo argInfo; argInfo.isDeviceQueue = true; info.kernelArgInfo.resize(1); info.kernelArgInfo[0] = argInfo; info.kernelArgInfo[0].kernelArgPatchInfoVector.resize(1); info.kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset = devQueueCurbeOffset; info.kernelArgInfo[0].kernelArgPatchInfoVector[0].size = devQueueCurbeSize; MockKernel kernel(&program, info, *device.get()); EXPECT_TRUE(kernel.isParentKernel); program.addBlockKernel(blockInfo); kernel.createReflectionSurface(); auto reflectionSurface = kernel.getKernelReflectionSurface(); EXPECT_NE(nullptr, reflectionSurface); kernel.patchReflectionSurface(&devQueue, nullptr); uint64_t undefinedOffset = MockKernel::ReflectionSurfaceHelperPublic::undefinedOffset; EXPECT_EQ(undefinedOffset, MockKernel::ReflectionSurfaceHelperPublic::defaultQueue.offset); EXPECT_EQ(devQueueCurbeOffset, MockKernel::ReflectionSurfaceHelperPublic::devQueue.offset); EXPECT_EQ(undefinedOffset, MockKernel::ReflectionSurfaceHelperPublic::eventPool.offset); EXPECT_EQ(0u, MockKernel::ReflectionSurfaceHelperPublic::defaultQueue.size); EXPECT_EQ(4u, MockKernel::ReflectionSurfaceHelperPublic::devQueue.size); EXPECT_EQ(0u, MockKernel::ReflectionSurfaceHelperPublic::eventPool.size); } TEST_P(KernelReflectionSurfaceTest, CreateKernelReflectionSurface) { if (std::string(pPlatform->getDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { EXPECT_TRUE(pKernel->isParentKernel); BlockKernelManager *blockManager = pProgram->getBlockKernelManager(); size_t blockCount = blockManager->getCount(); EXPECT_EQ(3u, blockCount); size_t maxConstantBufferSize = 0; size_t parentImageCount = 0; size_t parentSamplerCount = 0; if (pKernel->getKernelInfo().name == "kernel_reflection") { parentImageCount = 1; parentSamplerCount = 1; } size_t samplerStateArrayAndBorderColorTotalSize = 0; size_t totalCurbeParamsSize = 0; std::vector blockCurbeParamCounts(blockCount); std::vector samplerStateAndBorderColorSizes(blockCount); std::vector curbeParamsForBlock; for (size_t i = 0; i < blockCount; i++) { const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); uint64_t tokenMask = 0; uint32_t firstSSHTokenIndex = 0; MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParamsForBlock, tokenMask, firstSSHTokenIndex, *pBlockInfo, pDevice->getHardwareInfo()); blockCurbeParamCounts[i] = curbeParamsForBlock.size(); maxConstantBufferSize = std::max(maxConstantBufferSize, static_cast(pBlockInfo->patchInfo.dataParameterStream->DataParameterStreamSize)); totalCurbeParamsSize += blockCurbeParamCounts[i]; size_t samplerStateAndBorderColorSize = pBlockInfo->getSamplerStateArraySize(pDevice->getHardwareInfo()); samplerStateAndBorderColorSize = alignUp(samplerStateAndBorderColorSize, Sampler::samplerStateArrayAlignment); samplerStateAndBorderColorSize += pBlockInfo->getBorderColorStateSize(); samplerStateAndBorderColorSizes[i] = samplerStateAndBorderColorSize; samplerStateArrayAndBorderColorTotalSize += alignUp(samplerStateAndBorderColorSizes[i], sizeof(void *)); curbeParamsForBlock.clear(); } totalCurbeParamsSize *= sizeof(IGIL_KernelCurbeParams); size_t expectedReflectionSurfaceSize = alignUp(sizeof(IGIL_KernelDataHeader) + sizeof(IGIL_KernelAddressData) * blockCount, sizeof(void *)); expectedReflectionSurfaceSize += alignUp(sizeof(IGIL_KernelData), sizeof(void *)) * blockCount; expectedReflectionSurfaceSize += (parentSamplerCount * sizeof(IGIL_SamplerParams) + maxConstantBufferSize) * blockCount + totalCurbeParamsSize + parentImageCount * sizeof(IGIL_ImageParamters) + parentSamplerCount * sizeof(IGIL_ParentSamplerParams) + samplerStateArrayAndBorderColorTotalSize; pKernel->createReflectionSurface(); auto reflectionSurface = pKernel->getKernelReflectionSurface(); ASSERT_NE(nullptr, reflectionSurface); EXPECT_EQ(expectedReflectionSurfaceSize, reflectionSurface->getUnderlyingBufferSize()); IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast(reflectionSurface->getUnderlyingBuffer()); uint32_t parentImages = 0; uint32_t parentSamplers = 0; if (pKernel->getKernelInfo().name == "kernel_reflection") { parentImages = 1; parentSamplers = 1; EXPECT_LT(sizeof(IGIL_KernelDataHeader), pKernelHeader->m_ParentSamplerParamsOffset); } EXPECT_EQ(blockCount, pKernelHeader->m_numberOfKernels); EXPECT_EQ(parentImages, pKernelHeader->m_ParentKernelImageCount); EXPECT_LT(sizeof(IGIL_KernelDataHeader), pKernelHeader->m_ParentImageDataOffset); EXPECT_EQ(parentSamplers, pKernelHeader->m_ParentSamplerCount); EXPECT_NE(pKernelHeader->m_ParentImageDataOffset, pKernelHeader->m_ParentSamplerParamsOffset); // Curbe tokens EXPECT_NE(0u, totalCurbeParamsSize); for (uint32_t i = 0; i < pKernelHeader->m_numberOfKernels; i++) { IGIL_KernelAddressData *addressData = pKernelHeader->m_data; EXPECT_NE(0u, addressData->m_KernelDataOffset); EXPECT_NE(0u, addressData->m_BTSize); EXPECT_NE(0u, addressData->m_SSHTokensOffset); EXPECT_NE(0u, addressData->m_ConstantBufferOffset); EXPECT_NE(0u, addressData->m_BTSoffset); IGIL_KernelData *kernelData = reinterpret_cast(ptrOffset(pKernelHeader, (size_t)(addressData->m_KernelDataOffset))); EXPECT_NE_VAL(0u, kernelData->m_SIMDSize); EXPECT_NE_VAL(0u, kernelData->m_PatchTokensMask); EXPECT_NE_VAL(0u, kernelData->m_numberOfCurbeParams); EXPECT_NE_VAL(0u, kernelData->m_numberOfCurbeTokens); EXPECT_NE_VAL(0u, kernelData->m_sizeOfConstantBuffer); for (uint32_t j = 0; j < kernelData->m_numberOfCurbeParams; j++) { EXPECT_NE_VAL(0u, kernelData->m_data[j].m_parameterType); } } } } TEST_P(KernelReflectionSurfaceTest, GivenKernelInfoWithArgsWhenPassedToGetCurbeParamsThenProperFirstSSHTokenIndexIsReturned) { KernelInfo info; info.kernelArgInfo.resize(9); std::vector curbeParams; uint64_t tokenMask = 0; uint32_t firstSSHTokenIndex = 0; MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParams, tokenMask, firstSSHTokenIndex, info, pDevice->getHardwareInfo()); EXPECT_EQ(curbeParams.size() - 9, firstSSHTokenIndex); } TEST_P(KernelReflectionSurfaceTest, GivenKernelInfoWithExecutionParametersWhenPassedToGetCurbeParamsThenProperCurbeParamsAreReturned) { KernelInfo info; std::vector supportedExecutionParamTypes = {iOpenCL::DATA_PARAMETER_LOCAL_WORK_SIZE, iOpenCL::DATA_PARAMETER_GLOBAL_WORK_SIZE, iOpenCL::DATA_PARAMETER_NUM_WORK_GROUPS, iOpenCL::DATA_PARAMETER_WORK_DIMENSIONS, iOpenCL::DATA_PARAMETER_GLOBAL_WORK_OFFSET, iOpenCL::DATA_PARAMETER_NUM_HARDWARE_THREADS, iOpenCL::DATA_PARAMETER_PARENT_EVENT, iOpenCL::DATA_PARAMETER_ENQUEUED_LOCAL_WORK_SIZE}; std::sort(supportedExecutionParamTypes.begin(), supportedExecutionParamTypes.end()); const uint32_t workDimOffset = 80; const uint32_t numHwThreads = 84; const uint32_t parentEventOffset = 88; const uint32_t lwsOffsets[3] = {4, 8, 12}; const uint32_t lwsOffsets2[3] = {16, 20, 24}; const uint32_t gwsOffsets[3] = {28, 32, 36}; const uint32_t numOffsets[3] = {40, 44, 48}; const uint32_t globalOffsetOffsets[3] = {52, 56, 60}; const uint32_t enqueuedLocalWorkSizeOffsets[3] = {64, 68, 72}; info.workloadInfo.localWorkSizeOffsets[0] = lwsOffsets[0]; info.workloadInfo.localWorkSizeOffsets[1] = lwsOffsets[1]; info.workloadInfo.localWorkSizeOffsets[2] = lwsOffsets[2]; info.workloadInfo.localWorkSizeOffsets2[0] = lwsOffsets2[0]; info.workloadInfo.localWorkSizeOffsets2[1] = lwsOffsets2[1]; info.workloadInfo.localWorkSizeOffsets2[2] = lwsOffsets2[2]; info.workloadInfo.globalWorkSizeOffsets[0] = gwsOffsets[0]; info.workloadInfo.globalWorkSizeOffsets[1] = gwsOffsets[1]; info.workloadInfo.globalWorkSizeOffsets[2] = gwsOffsets[2]; info.workloadInfo.numWorkGroupsOffset[0] = numOffsets[0]; info.workloadInfo.numWorkGroupsOffset[1] = numOffsets[1]; info.workloadInfo.numWorkGroupsOffset[2] = numOffsets[2]; info.workloadInfo.globalWorkOffsetOffsets[0] = globalOffsetOffsets[0]; info.workloadInfo.globalWorkOffsetOffsets[1] = globalOffsetOffsets[1]; info.workloadInfo.globalWorkOffsetOffsets[2] = globalOffsetOffsets[2]; info.workloadInfo.enqueuedLocalWorkSizeOffsets[0] = enqueuedLocalWorkSizeOffsets[0]; info.workloadInfo.enqueuedLocalWorkSizeOffsets[1] = enqueuedLocalWorkSizeOffsets[1]; info.workloadInfo.enqueuedLocalWorkSizeOffsets[2] = enqueuedLocalWorkSizeOffsets[2]; info.workloadInfo.workDimOffset = workDimOffset; // NUM_HARDWARE_THREADS unsupported EXPECT_TRUE(numHwThreads > 0u); info.workloadInfo.parentEventOffset = parentEventOffset; std::vector curbeParams; uint64_t tokenMask = 0; uint32_t firstSSHTokenIndex = 0; MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParams, tokenMask, firstSSHTokenIndex, info, pPlatform->getDevice(0)->getHardwareInfo()); EXPECT_LE(supportedExecutionParamTypes.size(), curbeParams.size()); uint32_t foundParams = 0; for (uint32_t i = 0; i < supportedExecutionParamTypes.size(); i++) { foundParams = 0; for (uint32_t j = 0; j < curbeParams.size(); j++) { if (supportedExecutionParamTypes[i] == curbeParams[j].m_parameterType) { foundParams++; uint32_t index = curbeParams[j].m_sourceOffset / sizeof(uint32_t); switch (curbeParams[j].m_parameterType) { case iOpenCL::DATA_PARAMETER_LOCAL_WORK_SIZE: if (j < 3) { EXPECT_EQ(lwsOffsets[index], curbeParams[j].m_patchOffset); } else { EXPECT_EQ(lwsOffsets2[index], curbeParams[j].m_patchOffset); } break; case iOpenCL::DATA_PARAMETER_GLOBAL_WORK_SIZE: EXPECT_EQ(gwsOffsets[index], curbeParams[j].m_patchOffset); break; case iOpenCL::DATA_PARAMETER_NUM_WORK_GROUPS: EXPECT_EQ(numOffsets[index], curbeParams[j].m_patchOffset); break; case iOpenCL::DATA_PARAMETER_GLOBAL_WORK_OFFSET: EXPECT_EQ(globalOffsetOffsets[index], curbeParams[j].m_patchOffset); break; case iOpenCL::DATA_PARAMETER_ENQUEUED_LOCAL_WORK_SIZE: EXPECT_EQ(enqueuedLocalWorkSizeOffsets[index], curbeParams[j].m_patchOffset); break; } } } switch (supportedExecutionParamTypes[i]) { case iOpenCL::DATA_PARAMETER_LOCAL_WORK_SIZE: EXPECT_EQ(6u, foundParams) << "Parameter token: " << supportedExecutionParamTypes[i]; break; case iOpenCL::DATA_PARAMETER_GLOBAL_WORK_SIZE: case iOpenCL::DATA_PARAMETER_NUM_WORK_GROUPS: case iOpenCL::DATA_PARAMETER_GLOBAL_WORK_OFFSET: case iOpenCL::DATA_PARAMETER_ENQUEUED_LOCAL_WORK_SIZE: EXPECT_EQ(3u, foundParams) << "Parameter token: " << supportedExecutionParamTypes[i]; break; } } for (uint32_t i = 0; i < supportedExecutionParamTypes.size(); i++) { foundParams = 0; for (uint32_t j = 0; j < curbeParams.size(); j++) { if (supportedExecutionParamTypes[i] == curbeParams[j].m_parameterType) { switch (curbeParams[j].m_parameterType) { case iOpenCL::DATA_PARAMETER_PARENT_EVENT: EXPECT_EQ(parentEventOffset, curbeParams[j].m_patchOffset); foundParams++; break; case iOpenCL::DATA_PARAMETER_WORK_DIMENSIONS: EXPECT_EQ(workDimOffset, curbeParams[j].m_patchOffset); foundParams++; break; } } } switch (supportedExecutionParamTypes[i]) { case iOpenCL::DATA_PARAMETER_PARENT_EVENT: case iOpenCL::DATA_PARAMETER_WORK_DIMENSIONS: EXPECT_EQ(1u, foundParams); break; } } uint64_t expectedTokens = 0; for (uint32_t i = 0; i < supportedExecutionParamTypes.size(); i++) { expectedTokens = (uint64_t)1 << supportedExecutionParamTypes[i]; if (supportedExecutionParamTypes[i] != iOpenCL::DATA_PARAMETER_NUM_HARDWARE_THREADS) { EXPECT_TRUE((tokenMask & expectedTokens) > 0) << "Pramater Token: " << supportedExecutionParamTypes[i]; } } } static const char *binaryFile = "simple_block_kernel"; static const char *KernelNames[] = {"kernel_reflection", "simple_block_kernel"}; INSTANTIATE_TEST_CASE_P(KernelReflectionSurfaceTest, KernelReflectionSurfaceTest, ::testing::Combine( ::testing::Values(binaryFile), ::testing::ValuesIn(KernelNames))); TEST_P(KernelReflectionSurfaceWithQueueTest, ObtainKernelReflectionSurfacePatchesBlocksCurbe) { if (std::string(pPlatform->getDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { BlockKernelManager *blockManager = pProgram->getBlockKernelManager(); size_t blockCount = blockManager->getCount(); EXPECT_NE(0u, blockCount); std::vector curbeParamsForBlock; pKernel->createReflectionSurface(); pKernel->patchReflectionSurface(pDevQueue, nullptr); auto *reflectionSurface = pKernel->getKernelReflectionSurface(); ASSERT_NE(nullptr, reflectionSurface); void *reflectionSurfaceMemory = reflectionSurface->getUnderlyingBuffer(); IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast(reflectionSurface->getUnderlyingBuffer()); EXPECT_EQ(blockCount, pKernelHeader->m_numberOfKernels); for (uint32_t i = 0; i < pKernelHeader->m_numberOfKernels; i++) { const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); IGIL_KernelAddressData *addressData = pKernelHeader->m_data; EXPECT_NE(0u, addressData[i].m_ConstantBufferOffset); void *pCurbe = ptrOffset(reflectionSurfaceMemory, (size_t)(addressData[i].m_ConstantBufferOffset)); if (pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface) { auto *patchedPointer = ptrOffset(pCurbe, pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamOffset); if (pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamSize == sizeof(uint32_t)) { uint32_t *patchedValue = static_cast(patchedPointer); uint64_t patchedValue64 = *patchedValue; EXPECT_EQ(pDevQueue->getEventPoolBuffer()->getGpuAddress(), patchedValue64); } else if (pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamSize == sizeof(uint64_t)) { uint64_t *patchedValue = static_cast(patchedPointer); EXPECT_EQ(pDevQueue->getEventPoolBuffer()->getGpuAddress(), *patchedValue); } } if (pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface) { auto *patchedPointer = ptrOffset(pCurbe, pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamOffset); if (pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamSize == sizeof(uint32_t)) { uint32_t *patchedValue = static_cast(patchedPointer); uint64_t patchedValue64 = *patchedValue; EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddress(), patchedValue64); } else if (pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamSize == sizeof(uint64_t)) { uint64_t *patchedValue = static_cast(patchedPointer); EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddress(), *patchedValue); } } if (pBlockInfo->kernelArgInfo.size() > 0) { for (uint32_t i = 0; i < pBlockInfo->kernelArgInfo.size(); i++) { if (pBlockInfo->kernelArgInfo[i].isDeviceQueue) { auto *patchedPointer = ptrOffset(pCurbe, pBlockInfo->kernelArgInfo[i].kernelArgPatchInfoVector[0].crossthreadOffset); if (pBlockInfo->kernelArgInfo[i].kernelArgPatchInfoVector[0].size == sizeof(uint32_t)) { uint32_t *patchedValue = static_cast(patchedPointer); uint64_t patchedValue64 = *patchedValue; EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddress(), patchedValue64); } else if (pBlockInfo->kernelArgInfo[i].kernelArgPatchInfoVector[0].size == sizeof(uint64_t)) { uint64_t *patchedValue = static_cast(patchedPointer); EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddress(), *patchedValue); } } } } } } } TEST_P(KernelReflectionSurfaceWithQueueTest, ObtainKernelReflectionSurfaceSetsParentImageAndSamplersParams) { if (std::string(pPlatform->getDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { BlockKernelManager *blockManager = pProgram->getBlockKernelManager(); size_t blockCount = blockManager->getCount(); EXPECT_NE(0u, blockCount); std::vector curbeParamsForBlock; std::unique_ptr image3d(ImageHelper::create(context)); std::unique_ptr sampler(new MockSampler(context, true, (cl_addressing_mode)CL_ADDRESS_CLAMP_TO_EDGE, (cl_filter_mode)CL_FILTER_LINEAR)); cl_sampler samplerCl = sampler.get(); cl_mem imageCl = image3d.get(); if (pKernel->getKernelInfo().name == "kernel_reflection") { pKernel->setArgSampler(0, sizeof(cl_sampler), &samplerCl); pKernel->setArgImage(1, sizeof(cl_mem), &imageCl); } pKernel->createReflectionSurface(); auto *reflectionSurface = pKernel->getKernelReflectionSurface(); ASSERT_NE(nullptr, reflectionSurface); IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast(reflectionSurface->getUnderlyingBuffer()); IGIL_ImageParamters *pParentImageParams = reinterpret_cast(ptrOffset(pKernelHeader, (size_t)pKernelHeader->m_ParentImageDataOffset)); IGIL_ParentSamplerParams *pParentSamplerParams = reinterpret_cast(ptrOffset(pKernelHeader, (size_t)pKernelHeader->m_ParentSamplerParamsOffset)); memset(pParentImageParams, 0xff, sizeof(IGIL_ImageParamters) * pKernelHeader->m_ParentKernelImageCount); memset(pParentSamplerParams, 0xff, sizeof(IGIL_ParentSamplerParams) * pKernelHeader->m_ParentSamplerCount); pKernel->patchReflectionSurface(pDevQueue, nullptr); EXPECT_EQ(blockCount, pKernelHeader->m_numberOfKernels); for (uint32_t i = 0; i < pKernelHeader->m_numberOfKernels; i++) { if (pKernelHeader->m_ParentKernelImageCount > 0) { uint32_t imageIndex = 0; for (uint32_t i = 0; i < pKernel->getKernelInfo().kernelArgInfo.size(); i++) { if (pKernel->getKernelInfo().kernelArgInfo[i].isImage) { EXPECT_EQ(pKernel->getKernelInfo().kernelArgInfo[i].offsetHeap, pParentImageParams[imageIndex].m_ObjectID); imageIndex++; } } } if (pKernelHeader->m_ParentSamplerCount > 0) { uint32_t samplerIndex = 0; for (uint32_t i = 0; i < pKernel->getKernelInfo().kernelArgInfo.size(); i++) { if (pKernel->getKernelInfo().kernelArgInfo[i].isSampler) { EXPECT_EQ(OCLRT_ARG_OFFSET_TO_SAMPLER_OBJECT_ID(pKernel->getKernelInfo().kernelArgInfo[i].offsetHeap), pParentSamplerParams[samplerIndex].m_ObjectID); samplerIndex++; } } } } } } INSTANTIATE_TEST_CASE_P(KernelReflectionSurfaceWithQueueTest, KernelReflectionSurfaceWithQueueTest, ::testing::Combine( ::testing::Values(binaryFile), ::testing::ValuesIn(KernelNames))); class ReflectionSurfaceHelperTest : public testing::TestWithParam> { protected: ReflectionSurfaceHelperTest() { } void SetUp() override { } void TearDown() override { } }; TEST_P(ReflectionSurfaceHelperTest, ReflectionSurfaceHelperCompareFunction) { IGIL_KernelCurbeParams curbeParamFirst, curbeParamSecond; bool result; std::tie(curbeParamFirst, curbeParamSecond, result) = GetParam(); EXPECT_EQ(result, MockKernel::ReflectionSurfaceHelperPublic::compareFunction(curbeParamFirst, curbeParamSecond)); } // arg first, arg second, expected result static std::tuple Inputs[] = {std::make_tuple(IGIL_KernelCurbeParams{1, 0, 0, 0}, IGIL_KernelCurbeParams{1, 0, 0, 100}, true), std::make_tuple(IGIL_KernelCurbeParams{1, 0, 0, 100}, IGIL_KernelCurbeParams{1, 0, 0, 0}, false), std::make_tuple(IGIL_KernelCurbeParams{1, 0, 0, 100}, IGIL_KernelCurbeParams{100, 0, 0, 0}, true), std::make_tuple(IGIL_KernelCurbeParams{iOpenCL::DATA_PARAMETER_LOCAL_WORK_SIZE, 0, 4, 100}, IGIL_KernelCurbeParams{iOpenCL::DATA_PARAMETER_LOCAL_WORK_SIZE, 0, 8, 0}, true), std::make_tuple(IGIL_KernelCurbeParams{iOpenCL::DATA_PARAMETER_LOCAL_WORK_SIZE, 0, 4, 0}, IGIL_KernelCurbeParams{iOpenCL::DATA_PARAMETER_LOCAL_WORK_SIZE, 0, 8, 100}, true), std::make_tuple(IGIL_KernelCurbeParams{iOpenCL::DATA_PARAMETER_LOCAL_WORK_SIZE, 0, 8, 100}, IGIL_KernelCurbeParams{iOpenCL::DATA_PARAMETER_LOCAL_WORK_SIZE, 0, 4, 0}, false), std::make_tuple(IGIL_KernelCurbeParams{iOpenCL::DATA_PARAMETER_LOCAL_WORK_SIZE, 0, 8, 0}, IGIL_KernelCurbeParams{iOpenCL::DATA_PARAMETER_LOCAL_WORK_SIZE, 0, 4, 100}, false)}; INSTANTIATE_TEST_CASE_P(ReflectionSurfaceHelperTest, ReflectionSurfaceHelperTest, ::testing::ValuesIn(Inputs)); struct LocalIDPresent { bool x; bool y; bool z; bool flattend; }; class ReflectionSurfaceHelperFixture : public PlatformFixture, public ::testing::Test { protected: ReflectionSurfaceHelperFixture() { } void SetUp() override { PlatformFixture::SetUp(); } void TearDown() override { PlatformFixture::TearDown(); } }; class ReflectionSurfaceHelperSetKernelDataTest : public testing::TestWithParam>, // LocalIDPresent, private surface size, public PlatformFixture { protected: ReflectionSurfaceHelperSetKernelDataTest() { } void SetUp() override { PlatformFixture::SetUp(); samplerStateArray.BorderColorOffset = 0x3; samplerStateArray.Count = 1; samplerStateArray.Offset = 5; samplerStateArray.Size = 16; samplerStateArray.Token = 1; info.patchInfo.samplerStateArray = &samplerStateArray; dataParameterStream.DataParameterStreamSize = 60; dataParameterStream.Size = 20; dataParameterStream.Token = 3; info.patchInfo.dataParameterStream = &dataParameterStream; executionEnvironment = {}; executionEnvironment.LargestCompiledSIMDSize = 16; executionEnvironment.HasBarriers = 1; info.patchInfo.executionEnvironment = &executionEnvironment; info.patchInfo.threadPayload = &threadPayload; info.patchInfo.pAllocateStatelessPrivateSurface = &privateSurface; info.reqdWorkGroupSize[0] = 4; info.reqdWorkGroupSize[1] = 8; info.reqdWorkGroupSize[2] = 2; info.workloadInfo.slmStaticSize = 1652; IGIL_KernelCurbeParams testParams[3] = {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}}; curbeParams.push_back(testParams[0]); curbeParams.push_back(testParams[1]); curbeParams.push_back(testParams[2]); } void TearDown() override { PlatformFixture::TearDown(); } KernelInfo info; SPatchSamplerStateArray samplerStateArray; SPatchDataParameterStream dataParameterStream; SPatchExecutionEnvironment executionEnvironment; SPatchThreadPayload threadPayload; SPatchAllocateStatelessPrivateSurface privateSurface; std::vector curbeParams; }; // arg first, arg second, expected result static std::tuple InputsSetKernelData[] = {std::make_tuple(LocalIDPresent{1, 0, 0, 0}, 0), std::make_tuple(LocalIDPresent{0, 1, 0, 0}, 0), std::make_tuple(LocalIDPresent{0, 0, 1, 0}, 32), std::make_tuple(LocalIDPresent{0, 0, 0, 1}, 0), std::make_tuple(LocalIDPresent{0, 0, 0, 0}, 32)}; INSTANTIATE_TEST_CASE_P(ReflectionSurfaceHelperSetKernelDataTest, ReflectionSurfaceHelperSetKernelDataTest, ::testing::ValuesIn(InputsSetKernelData)); TEST_P(ReflectionSurfaceHelperSetKernelDataTest, setKernelData) { LocalIDPresent localIDPresent; uint32_t privateSurfaceSize; std::tie(localIDPresent, privateSurfaceSize) = GetParam(); threadPayload.LocalIDFlattenedPresent = localIDPresent.flattend; threadPayload.LocalIDXPresent = localIDPresent.x; threadPayload.LocalIDYPresent = localIDPresent.y; threadPayload.LocalIDZPresent = localIDPresent.z; privateSurface.PerThreadPrivateMemorySize = privateSurfaceSize; std::unique_ptr kernelDataMemory(new char[4096]); uint64_t tokenMask = 1 | 2 | 4; size_t maxConstantBufferSize = 32; size_t samplerCount = 1; size_t samplerHeapSize = alignUp(info.getSamplerStateArraySize(pPlatform->getDevice(0)->getHardwareInfo()), Sampler::samplerStateArrayAlignment) + info.getBorderColorStateSize(); uint32_t offsetInKernelDataMemory = 12; uint32_t offset = MockKernel::ReflectionSurfaceHelperPublic::setKernelData(kernelDataMemory.get(), offsetInKernelDataMemory, curbeParams, tokenMask, maxConstantBufferSize, samplerCount, info, pPlatform->getDevice(0)->getHardwareInfo()); IGIL_KernelData *kernelData = reinterpret_cast(kernelDataMemory.get() + offsetInKernelDataMemory); EXPECT_EQ(3u, kernelData->m_numberOfCurbeParams); EXPECT_EQ(3u, kernelData->m_numberOfCurbeTokens); EXPECT_EQ(samplerStateArray.Count, kernelData->m_numberOfSamplerStates); EXPECT_EQ(alignUp(samplerStateArray.Size, Sampler::samplerStateArrayAlignment) + samplerStateArray.Offset - samplerStateArray.BorderColorOffset, kernelData->m_SizeOfSamplerHeap); EXPECT_EQ(samplerStateArray.BorderColorOffset, kernelData->m_SamplerBorderColorStateOffsetOnDSH); EXPECT_EQ(samplerStateArray.Offset, kernelData->m_SamplerStateArrayOffsetOnDSH); EXPECT_EQ(dataParameterStream.DataParameterStreamSize, kernelData->m_sizeOfConstantBuffer); EXPECT_EQ(tokenMask, kernelData->m_PatchTokensMask); EXPECT_EQ(0u, kernelData->m_ScratchSpacePatchValue); EXPECT_EQ(executionEnvironment.LargestCompiledSIMDSize, kernelData->m_SIMDSize); EXPECT_EQ(executionEnvironment.HasBarriers, kernelData->m_HasBarriers); EXPECT_EQ(info.reqdWorkGroupSize[0], kernelData->m_RequiredWkgSizes[0]); EXPECT_EQ(info.reqdWorkGroupSize[1], kernelData->m_RequiredWkgSizes[1]); EXPECT_EQ(info.reqdWorkGroupSize[2], kernelData->m_RequiredWkgSizes[2]); EXPECT_EQ(info.workloadInfo.slmStaticSize, kernelData->m_InilineSLMSize); if (localIDPresent.flattend || localIDPresent.x || localIDPresent.y || localIDPresent.z) EXPECT_EQ(1u, kernelData->m_NeedLocalIDS); else EXPECT_EQ(0u, kernelData->m_NeedLocalIDS); EXPECT_EQ(0u, kernelData->m_DisablePreemption); if (privateSurfaceSize == 0) EXPECT_EQ(1u, kernelData->m_CanRunConcurently); else EXPECT_EQ(0u, kernelData->m_CanRunConcurently); size_t expectedOffset = offsetInKernelDataMemory; expectedOffset += alignUp(sizeof(IGIL_KernelData) + sizeof(IGIL_KernelCurbeParams) * curbeParams.size(), sizeof(void *)); expectedOffset += maxConstantBufferSize + alignUp(samplerHeapSize, sizeof(void *)) + samplerCount * sizeof(IGIL_SamplerParams); EXPECT_EQ(expectedOffset, offset); } TEST_F(ReflectionSurfaceHelperSetKernelDataTest, nullExecutionEnvironment) { info.patchInfo.executionEnvironment = nullptr; std::unique_ptr kernelDataMemory(new char[4096]); std::vector curbeParams; uint64_t tokenMask = 1 | 2 | 4; size_t maxConstantBufferSize = 32; size_t samplerCount = 1; size_t samplerHeapSize = alignUp(info.getSamplerStateArraySize(pPlatform->getDevice(0)->getHardwareInfo()), Sampler::samplerStateArrayAlignment) + info.getBorderColorStateSize(); uint32_t offsetInKernelDataMemory = 0; uint32_t offset = MockKernel::ReflectionSurfaceHelperPublic::setKernelData(kernelDataMemory.get(), offsetInKernelDataMemory, curbeParams, tokenMask, maxConstantBufferSize, samplerCount, info, pPlatform->getDevice(0)->getHardwareInfo()); IGIL_KernelData *kernelData = reinterpret_cast(kernelDataMemory.get() + offsetInKernelDataMemory); EXPECT_EQ(0u, kernelData->m_SIMDSize); EXPECT_EQ(0u, kernelData->m_HasBarriers); size_t expectedOffset = offsetInKernelDataMemory; expectedOffset += alignUp(sizeof(IGIL_KernelData) + sizeof(IGIL_KernelCurbeParams) * curbeParams.size(), sizeof(void *)); expectedOffset += maxConstantBufferSize + alignUp(samplerHeapSize, sizeof(void *)) + samplerCount * sizeof(IGIL_SamplerParams); EXPECT_EQ(expectedOffset, offset); } TEST_F(ReflectionSurfaceHelperSetKernelDataTest, nullThreadPayload) { info.patchInfo.threadPayload = nullptr; std::unique_ptr kernelDataMemory(new char[4096]); std::vector curbeParams; uint64_t tokenMask = 1 | 2 | 4; size_t maxConstantBufferSize = 32; size_t samplerCount = 1; size_t samplerHeapSize = alignUp(info.getSamplerStateArraySize(pPlatform->getDevice(0)->getHardwareInfo()), Sampler::samplerStateArrayAlignment) + info.getBorderColorStateSize(); uint32_t offsetInKernelDataMemory = 0; uint32_t offset = MockKernel::ReflectionSurfaceHelperPublic::setKernelData(kernelDataMemory.get(), offsetInKernelDataMemory, curbeParams, tokenMask, maxConstantBufferSize, samplerCount, info, pPlatform->getDevice(0)->getHardwareInfo()); IGIL_KernelData *kernelData = reinterpret_cast(kernelDataMemory.get() + offsetInKernelDataMemory); EXPECT_EQ(0u, kernelData->m_NeedLocalIDS); size_t expectedOffset = offsetInKernelDataMemory; expectedOffset += alignUp(sizeof(IGIL_KernelData) + sizeof(IGIL_KernelCurbeParams) * curbeParams.size(), sizeof(void *)); expectedOffset += maxConstantBufferSize + alignUp(samplerHeapSize, sizeof(void *)) + samplerCount * sizeof(IGIL_SamplerParams); EXPECT_EQ(expectedOffset, offset); } TEST_F(ReflectionSurfaceHelperSetKernelDataTest, nullPrivateSurface) { info.patchInfo.pAllocateStatelessPrivateSurface = nullptr; std::unique_ptr kernelDataMemory(new char[4096]); std::vector curbeParams; uint64_t tokenMask = 1 | 2 | 4; size_t maxConstantBufferSize = 32; size_t samplerCount = 1; size_t samplerHeapSize = alignUp(info.getSamplerStateArraySize(pPlatform->getDevice(0)->getHardwareInfo()), Sampler::samplerStateArrayAlignment) + info.getBorderColorStateSize(); uint32_t offsetInKernelDataMemory = 0; uint32_t offset = MockKernel::ReflectionSurfaceHelperPublic::setKernelData(kernelDataMemory.get(), offsetInKernelDataMemory, curbeParams, tokenMask, maxConstantBufferSize, samplerCount, info, pPlatform->getDevice(0)->getHardwareInfo()); IGIL_KernelData *kernelData = reinterpret_cast(kernelDataMemory.get() + offsetInKernelDataMemory); EXPECT_EQ(1u, kernelData->m_CanRunConcurently); size_t expectedOffset = offsetInKernelDataMemory; expectedOffset += alignUp(sizeof(IGIL_KernelData) + sizeof(IGIL_KernelCurbeParams) * curbeParams.size(), sizeof(void *)); expectedOffset += maxConstantBufferSize + alignUp(samplerHeapSize, sizeof(void *)) + samplerCount * sizeof(IGIL_SamplerParams); EXPECT_EQ(expectedOffset, offset); } TEST_F(ReflectionSurfaceHelperSetKernelDataTest, nullSamplerState) { info.patchInfo.samplerStateArray = nullptr; std::unique_ptr kernelDataMemory(new char[4096]); std::vector curbeParams; uint64_t tokenMask = 1 | 2 | 4; size_t maxConstantBufferSize = 32; size_t samplerCount = 1; size_t samplerHeapSize = alignUp(info.getSamplerStateArraySize(pPlatform->getDevice(0)->getHardwareInfo()), Sampler::samplerStateArrayAlignment) + info.getBorderColorStateSize(); uint32_t offsetInKernelDataMemory = 0; uint32_t offset = MockKernel::ReflectionSurfaceHelperPublic::setKernelData(kernelDataMemory.get(), offsetInKernelDataMemory, curbeParams, tokenMask, maxConstantBufferSize, samplerCount, info, pPlatform->getDevice(0)->getHardwareInfo()); IGIL_KernelData *kernelData = reinterpret_cast(kernelDataMemory.get() + offsetInKernelDataMemory); size_t expectedOffset = offsetInKernelDataMemory; expectedOffset += alignUp(sizeof(IGIL_KernelData) + sizeof(IGIL_KernelCurbeParams) * curbeParams.size(), sizeof(void *)); expectedOffset += maxConstantBufferSize + alignUp(samplerHeapSize, sizeof(void *)) + samplerCount * sizeof(IGIL_SamplerParams); EXPECT_EQ(0u, kernelData->m_numberOfSamplerStates); EXPECT_EQ(0u, kernelData->m_SizeOfSamplerHeap); EXPECT_EQ(expectedOffset, offset); } TEST_F(ReflectionSurfaceHelperSetKernelDataTest, setKernelDataWithDisabledConcurrentExecutionDebugFlag) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.DisableConcurrentBlockExecution.set(true); std::unique_ptr kernelDataMemory(new char[4096]); std::vector curbeParams; uint64_t tokenMask = 0; size_t maxConstantBufferSize = 0; size_t samplerCount = 0; uint32_t offsetInKernelDataMemory = 0; MockKernel::ReflectionSurfaceHelperPublic::setKernelData(kernelDataMemory.get(), offsetInKernelDataMemory, curbeParams, tokenMask, maxConstantBufferSize, samplerCount, info, pPlatform->getDevice(0)->getHardwareInfo()); IGIL_KernelData *kernelData = reinterpret_cast(kernelDataMemory.get() + offsetInKernelDataMemory); EXPECT_EQ(0u, kernelData->m_CanRunConcurently); } TEST_F(ReflectionSurfaceHelperFixture, setKernelAddressDataWithNullBindingTable) { KernelInfo info; info.patchInfo.bindingTableState = nullptr; std::unique_ptr kernelDataMemory(new char[200]); IGIL_KernelAddressData *kernalAddressData = reinterpret_cast(kernelDataMemory.get()); MockKernel::ReflectionSurfaceHelperPublic::setKernelAddressData(kernelDataMemory.get(), 0, 1, 2, 3, 4, 5, 6, info, pPlatform->getDevice(0)->getHardwareInfo()); EXPECT_EQ(1u, kernalAddressData->m_KernelDataOffset); EXPECT_EQ(2u, kernalAddressData->m_SamplerHeapOffset); EXPECT_EQ(4u, kernalAddressData->m_SamplerParamsOffset); EXPECT_EQ(3u, kernalAddressData->m_ConstantBufferOffset); EXPECT_EQ(5u, kernalAddressData->m_SSHTokensOffset); EXPECT_EQ(6u, kernalAddressData->m_BTSoffset); EXPECT_EQ(0u, kernalAddressData->m_BTSize); } TEST_F(ReflectionSurfaceHelperFixture, setKernelAddressDataWithSetBindingTable) { KernelInfo info; SPatchBindingTableState bindingTableStateInfo; bindingTableStateInfo.Offset = 0; bindingTableStateInfo.Count = 4; info.patchInfo.bindingTableState = &bindingTableStateInfo; std::unique_ptr kernelDataMemory(new char[200]); IGIL_KernelAddressData *kernalAddressData = reinterpret_cast(kernelDataMemory.get()); MockKernel::ReflectionSurfaceHelperPublic::setKernelAddressData(kernelDataMemory.get(), 0, 1, 2, 3, 4, 5, 6, info, pPlatform->getDevice(0)->getHardwareInfo()); EXPECT_EQ(1u, kernalAddressData->m_KernelDataOffset); EXPECT_EQ(2u, kernalAddressData->m_SamplerHeapOffset); EXPECT_EQ(4u, kernalAddressData->m_SamplerParamsOffset); EXPECT_EQ(3u, kernalAddressData->m_ConstantBufferOffset); EXPECT_EQ(5u, kernalAddressData->m_SSHTokensOffset); EXPECT_EQ(6u, kernalAddressData->m_BTSoffset); EXPECT_NE(0u, kernalAddressData->m_BTSize); } TEST_F(ReflectionSurfaceHelperFixture, patchBlocksCurbe) { KernelInfo info; info.patchInfo.bindingTableState = nullptr; std::unique_ptr refletionSurfaceMemory(new char[4096]); IGIL_KernelDataHeader *header = reinterpret_cast(refletionSurfaceMemory.get()); header->m_numberOfKernels = 2; IGIL_KernelAddressData *kernalAddressData = header->m_data; uint32_t offset = static_cast(alignUp(sizeof(IGIL_KernelDataHeader) + 2 * sizeof(IGIL_KernelAddressData) + 2 * sizeof(IGIL_KernelData), MemoryConstants::cacheLineSize)); for (uint32_t i = 0; i < 2; i++) { assert(offset < 4000); kernalAddressData[i].m_ConstantBufferOffset = offset; offset += 128; } uint64_t defaultQueueOffset = 8; uint64_t deviceQueueOffset = 16; uint64_t eventPoolOffset = 24; uint64_t printfOffset = 32; uint64_t privateSurfaceOffset = 40; uint64_t deviceQueueAddress = 0x12345678; uint64_t eventPoolAddress = 0x87654321; uint64_t printfAddress = 0x55667788; uint64_t privateSurfaceAddress = 0x19283746; for (uint32_t i = 0; i < 2; i++) { MockKernel::ReflectionSurfaceHelperPublic::patchBlocksCurbe(refletionSurfaceMemory.get(), i, defaultQueueOffset, sizeof(uint64_t), deviceQueueAddress, eventPoolOffset, sizeof(uint64_t), eventPoolAddress, deviceQueueOffset, sizeof(uint64_t), deviceQueueAddress, printfOffset, sizeof(uint64_t), printfAddress, privateSurfaceOffset, sizeof(uint64_t), privateSurfaceAddress); void *pCurbe = ptrOffset(refletionSurfaceMemory.get(), (size_t)(kernalAddressData[i].m_ConstantBufferOffset)); EXPECT_EQ(deviceQueueAddress, *static_cast(ptrOffset(pCurbe, (size_t)defaultQueueOffset))); EXPECT_EQ(eventPoolAddress, *static_cast(ptrOffset(pCurbe, (size_t)eventPoolOffset))); EXPECT_EQ(deviceQueueAddress, *static_cast(ptrOffset(pCurbe, (size_t)deviceQueueOffset))); EXPECT_EQ(printfAddress, *static_cast(ptrOffset(pCurbe, (size_t)printfOffset))); EXPECT_EQ(privateSurfaceAddress, *static_cast(ptrOffset(pCurbe, (size_t)privateSurfaceOffset))); } } TEST_F(ReflectionSurfaceHelperFixture, patchBlocksCurbeWithUndefinedOffsets) { KernelInfo info; info.patchInfo.bindingTableState = nullptr; std::unique_ptr refletionSurfaceMemory(new char[4096]); IGIL_KernelDataHeader *header = reinterpret_cast(refletionSurfaceMemory.get()); header->m_numberOfKernels = 2; IGIL_KernelAddressData *kernalAddressData = header->m_data; uint32_t offset = sizeof(IGIL_KernelDataHeader) + 2 * sizeof(IGIL_KernelAddressData) + 2 * sizeof(IGIL_KernelData); uint8_t pattern[100] = {0}; memset(pattern, 0, 100); memset(ptrOffset(refletionSurfaceMemory.get(), offset), 0, 200); for (uint32_t i = 0; i < 2; i++) { assert(offset < 4000); kernalAddressData[i].m_ConstantBufferOffset = offset; offset += 100; } uint64_t defaultQueueOffset = MockKernel::ReflectionSurfaceHelperPublic::undefinedOffset; uint64_t deviceQueueOffset = MockKernel::ReflectionSurfaceHelperPublic::undefinedOffset; uint64_t eventPoolOffset = MockKernel::ReflectionSurfaceHelperPublic::undefinedOffset; uint64_t printfOffset = MockKernel::ReflectionSurfaceHelperPublic::undefinedOffset; uint64_t privateSurfaceOffset = MockKernel::ReflectionSurfaceHelperPublic::undefinedOffset; uint64_t deviceQueueAddress = 0x12345678; uint64_t eventPoolAddress = 0x87654321; uint64_t printfAddress = 0x55667788; uint64_t privateSurfaceGpuAddress = 0x19283746; uint32_t privateSurfaceSize = 128; for (uint32_t i = 0; i < 2; i++) { MockKernel::ReflectionSurfaceHelperPublic::patchBlocksCurbe(refletionSurfaceMemory.get(), i, defaultQueueOffset, sizeof(uint64_t), deviceQueueAddress, eventPoolOffset, sizeof(uint64_t), eventPoolAddress, deviceQueueOffset, sizeof(uint64_t), deviceQueueAddress, printfOffset, sizeof(uint64_t), printfAddress, privateSurfaceOffset, privateSurfaceSize, privateSurfaceGpuAddress); void *pCurbe = ptrOffset(refletionSurfaceMemory.get(), (size_t)(kernalAddressData[i].m_ConstantBufferOffset)); // constant buffer should be intact EXPECT_EQ(0, memcmp(pattern, pCurbe, 100)); } } TEST_F(ReflectionSurfaceHelperFixture, setParentImageParams) { MockContext context; KernelInfo info; std::vector kernelArguments; std::unique_ptr image2d(ImageHelper::create(&context)); std::unique_ptr image1d(ImageHelper::create(&context)); Kernel::SimpleKernelArgInfo imgInfo; KernelArgInfo argInfo; uint32_t imageID[4] = {32, 64, 0, 0}; // Buffer Object should never be dereferenced by setParentImageParams imgInfo.type = Kernel::kernelArgType::BUFFER_OBJ; imgInfo.object = reinterpret_cast(0x0); kernelArguments.push_back(imgInfo); argInfo.offsetHeap = 0; argInfo.isBuffer = true; info.kernelArgInfo.push_back(argInfo); imgInfo.type = Kernel::kernelArgType::IMAGE_OBJ; imgInfo.object = (cl_mem)image2d.get(); kernelArguments.push_back(imgInfo); argInfo.offsetHeap = imageID[0]; argInfo.isImage = true; info.kernelArgInfo.push_back(argInfo); // Buffer Object should never be dereferenced by setParentImageParams imgInfo.type = Kernel::kernelArgType::BUFFER_OBJ; imgInfo.object = reinterpret_cast(0x0); kernelArguments.push_back(imgInfo); argInfo.offsetHeap = 0; argInfo.isBuffer = true; info.kernelArgInfo.push_back(argInfo); imgInfo.type = Kernel::kernelArgType::IMAGE_OBJ; imgInfo.object = (cl_mem)image1d.get(); kernelArguments.push_back(imgInfo); argInfo.offsetHeap = imageID[1]; argInfo.isImage = true; info.kernelArgInfo.push_back(argInfo); std::unique_ptr reflectionSurfaceMemory(new char[4096]); IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast(reflectionSurfaceMemory.get()); pKernelHeader->m_ParentImageDataOffset = 16; pKernelHeader->m_ParentKernelImageCount = 2; IGIL_ImageParamters *pImageParameters = reinterpret_cast(ptrOffset(pKernelHeader, (size_t)pKernelHeader->m_ParentImageDataOffset)); MockKernel::ReflectionSurfaceHelperPublic::setParentImageParams(reflectionSurfaceMemory.get(), kernelArguments, info); auto numArgs = kernelArguments.size(); uint32_t imageIndex = 0; for (decltype(numArgs) argIndex = 0; argIndex < numArgs; argIndex++) { if (kernelArguments[argIndex].type == Kernel::kernelArgType::IMAGE_OBJ) { const Image *image = const_cast(castToObject((cl_mem)kernelArguments[argIndex].object)); EXPECT_EQ(image->getImageDesc().image_array_size, pImageParameters->m_ArraySize); EXPECT_EQ(image->getImageDesc().image_depth, pImageParameters->m_Depth); EXPECT_EQ(image->getImageDesc().image_height, pImageParameters->m_Height); EXPECT_EQ(image->getImageDesc().image_width, pImageParameters->m_Width); EXPECT_EQ(image->getImageDesc().num_mip_levels, pImageParameters->m_NumMipLevels); EXPECT_EQ(image->getImageDesc().num_samples, pImageParameters->m_NumSamples); EXPECT_EQ(image->getImageFormat().image_channel_data_type, pImageParameters->m_ChannelDataType); EXPECT_EQ(image->getImageFormat().image_channel_data_type, pImageParameters->m_ChannelOrder); EXPECT_EQ(imageID[imageIndex], pImageParameters->m_ObjectID); pImageParameters++; imageIndex++; } } } TEST_F(ReflectionSurfaceHelperFixture, setParentSamplerParams) { MockContext context; KernelInfo info; std::vector kernelArguments; std::unique_ptr sampler1(new MockSampler(&context, true, (cl_addressing_mode)CL_ADDRESS_CLAMP_TO_EDGE, (cl_filter_mode)CL_FILTER_LINEAR)); std::unique_ptr sampler2(new MockSampler(&context, false, (cl_addressing_mode)CL_ADDRESS_CLAMP, (cl_filter_mode)CL_FILTER_NEAREST)); Kernel::SimpleKernelArgInfo imgInfo; KernelArgInfo argInfo; uint32_t samplerID[4] = {32, 64, 0, 0}; // Buffer Object should never be dereferenced by setParentImageParams imgInfo.type = Kernel::kernelArgType::BUFFER_OBJ; imgInfo.object = reinterpret_cast(0x0); kernelArguments.push_back(imgInfo); argInfo.offsetHeap = 0; argInfo.isBuffer = true; info.kernelArgInfo.push_back(argInfo); imgInfo.type = Kernel::kernelArgType::SAMPLER_OBJ; imgInfo.object = (cl_sampler)sampler1.get(); kernelArguments.push_back(imgInfo); argInfo.offsetHeap = samplerID[0]; argInfo.isSampler = true; info.kernelArgInfo.push_back(argInfo); // Buffer Object should never be dereferenced by setParentImageParams imgInfo.type = Kernel::kernelArgType::BUFFER_OBJ; imgInfo.object = reinterpret_cast(0x0); kernelArguments.push_back(imgInfo); argInfo.offsetHeap = 0; argInfo.isBuffer = true; info.kernelArgInfo.push_back(argInfo); imgInfo.type = Kernel::kernelArgType::SAMPLER_OBJ; imgInfo.object = (cl_sampler)sampler2.get(); kernelArguments.push_back(imgInfo); argInfo.offsetHeap = samplerID[1]; argInfo.isSampler = true; info.kernelArgInfo.push_back(argInfo); std::unique_ptr reflectionSurfaceMemory(new char[4096]); IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast(reflectionSurfaceMemory.get()); pKernelHeader->m_ParentSamplerParamsOffset = 16; pKernelHeader->m_ParentSamplerCount = 2; IGIL_ParentSamplerParams *pParentSamplerParams = reinterpret_cast(ptrOffset(pKernelHeader, (size_t)pKernelHeader->m_ParentSamplerParamsOffset)); MockKernel::ReflectionSurfaceHelperPublic::setParentSamplerParams(reflectionSurfaceMemory.get(), kernelArguments, info); auto numArgs = kernelArguments.size(); uint32_t samplerIndex = 0; for (decltype(numArgs) argIndex = 0; argIndex < numArgs; argIndex++) { if (kernelArguments[argIndex].type == Kernel::kernelArgType::SAMPLER_OBJ) { MockSampler *sampler = reinterpret_cast(castToObject((cl_sampler)kernelArguments[argIndex].object)); EXPECT_EQ((uint32_t)sampler->getAddressingMode(), pParentSamplerParams->m_AddressingMode); EXPECT_EQ((uint32_t)sampler->getNormalizedCoordinates(), pParentSamplerParams->NormalizedCoords); EXPECT_EQ(sampler->getSnapWaValue(), pParentSamplerParams->CoordinateSnapRequired); EXPECT_EQ(OCLRT_ARG_OFFSET_TO_SAMPLER_OBJECT_ID(samplerID[samplerIndex]), pParentSamplerParams->m_ObjectID); pParentSamplerParams++; samplerIndex++; } } } TEST_F(ReflectionSurfaceHelperFixture, PatchBlocksCurbeWithConstantValuesWithDefinedOffsetsPatchesCurbeOnReflectionSurface) { IGIL_KernelDataHeader reflectionSurface[10]; IGIL_KernelDataHeader referenceHeader = {0}; memset(reflectionSurface, 0, sizeof(IGIL_KernelDataHeader) * 10); const uint32_t constBufferOffset = (uint32_t)alignUp(sizeof(IGIL_KernelDataHeader) + sizeof(IGIL_KernelAddressData) + sizeof(IGIL_KernelData) + sizeof(IGIL_KernelCurbeParams), sizeof(uint64_t)); reflectionSurface[0].m_numberOfKernels = 1; reflectionSurface[0].m_data[0].m_ConstantBufferOffset = constBufferOffset; reflectionSurface[0].m_data[0].m_KernelDataOffset = sizeof(IGIL_KernelDataHeader) + sizeof(IGIL_KernelAddressData); referenceHeader = reflectionSurface[0]; uint64_t inititalOffset = 8; uint64_t offset = inititalOffset; uint64_t globalMemoryCurbeOffset = offset; uint32_t globalMemoryPatchSize = 8; uint64_t globalMemoryGpuAddress = 0x12345678; offset += 8; uint64_t constantMemoryCurbeOffset = offset; uint32_t constantMemoryPatchSize = 8; uint64_t constantMemoryGpuAddress = 0x87654321; offset += 8; uint64_t privateMemoryCurbeOffset = offset; uint32_t privateMemoryPatchSize = 8; uint64_t privateMemoryGpuAddress = 0x22446688; MockKernel::ReflectionSurfaceHelperPublic::patchBlocksCurbeWithConstantValues((void *)reflectionSurface, 0, globalMemoryCurbeOffset, globalMemoryPatchSize, globalMemoryGpuAddress, constantMemoryCurbeOffset, constantMemoryPatchSize, constantMemoryGpuAddress, privateMemoryCurbeOffset, privateMemoryPatchSize, privateMemoryGpuAddress); uint64_t *patchedValues = reinterpret_cast(reinterpret_cast(reflectionSurface) + constBufferOffset + inititalOffset); EXPECT_EQ(patchedValues[0], globalMemoryGpuAddress); EXPECT_EQ(patchedValues[1], constantMemoryGpuAddress); EXPECT_EQ(patchedValues[2], privateMemoryGpuAddress); EXPECT_THAT(&referenceHeader, MemCompare(&reflectionSurface[0], sizeof(IGIL_KernelDataHeader))); IGIL_KernelData *kernelData = (IGIL_KernelData *)ptrOffset((char *)&reflectionSurface[0], sizeof(IGIL_KernelDataHeader) + sizeof(IGIL_KernelAddressData)); IGIL_KernelData referenceKerneldData = {0}; EXPECT_THAT(&referenceKerneldData, MemCompare(kernelData, sizeof(IGIL_KernelData))); } TEST_F(ReflectionSurfaceHelperFixture, PatchBlocksCurbeWithConstantValuesWithUndefinedOffsetsDoesNotPatchCurbeOnReflectionSurface) { IGIL_KernelDataHeader reflectionSurface[10]; IGIL_KernelDataHeader referenceHeader = {0}; memset(reflectionSurface, 0, sizeof(IGIL_KernelDataHeader) * 10); const uint32_t constBufferOffset = (uint32_t)alignUp(sizeof(IGIL_KernelDataHeader) + sizeof(IGIL_KernelAddressData) + sizeof(IGIL_KernelData) + sizeof(IGIL_KernelCurbeParams), sizeof(uint64_t)); reflectionSurface[0].m_numberOfKernels = 1; reflectionSurface[0].m_data[0].m_ConstantBufferOffset = constBufferOffset; reflectionSurface[0].m_data[0].m_KernelDataOffset = sizeof(IGIL_KernelDataHeader) + sizeof(IGIL_KernelAddressData); referenceHeader = reflectionSurface[0]; uint64_t offset = MockKernel::ReflectionSurfaceHelperPublic::undefinedOffset; uint64_t globalMemoryCurbeOffset = offset; uint32_t globalMemoryPatchSize = 8; uint64_t globalMemoryGpuAddress = 0x12345678; uint64_t constantMemoryCurbeOffset = offset; uint32_t constantMemoryPatchSize = 8; uint64_t constantMemoryGpuAddress = 0x87654321; uint64_t privateMemoryCurbeOffset = offset; uint32_t privateMemoryPatchSize = 8; uint64_t privateMemoryGpuAddress = 0x22446688; MockKernel::ReflectionSurfaceHelperPublic::patchBlocksCurbeWithConstantValues((void *)reflectionSurface, 0, globalMemoryCurbeOffset, globalMemoryPatchSize, globalMemoryGpuAddress, constantMemoryCurbeOffset, constantMemoryPatchSize, constantMemoryGpuAddress, privateMemoryCurbeOffset, privateMemoryPatchSize, privateMemoryGpuAddress); uint64_t *patchedValues = reinterpret_cast(reinterpret_cast(reflectionSurface) + constBufferOffset); std::unique_ptr reference = std::unique_ptr(new char[10 * sizeof(IGIL_KernelDataHeader)]); memset(reference.get(), 0, 10 * sizeof(IGIL_KernelDataHeader)); EXPECT_THAT(patchedValues, MemCompare(reference.get(), 10 * sizeof(IGIL_KernelDataHeader) - constBufferOffset)); } typedef ParentKernelCommandQueueFixture ReflectionSurfaceTestForPrintfHandler; TEST_F(ReflectionSurfaceTestForPrintfHandler, PatchReflectionSurfacePatchesPrintfBufferWhenPrintfHandlerIsPassed) { MockContext context(device); cl_queue_properties properties[3] = {0}; MockParentKernel *parentKernel = MockParentKernel::create(context); DeviceQueue devQueue(&context, device, properties[0]); parentKernel->createReflectionSurface(); context.setDefaultDeviceQueue(&devQueue); MockMultiDispatchInfo multiDispatchInfo(parentKernel); PrintfHandler *printfHandler = PrintfHandler::create(multiDispatchInfo, *device); printfHandler->prepareDispatch(multiDispatchInfo); parentKernel->patchReflectionSurface(&devQueue, printfHandler); uint64_t printfBufferOffset = parentKernel->getProgram()->getBlockKernelManager()->getBlockKernelInfo(0)->patchInfo.pAllocateStatelessPrintfSurface->DataParamOffset; uint64_t printfBufferPatchSize = parentKernel->getProgram()->getBlockKernelManager()->getBlockKernelInfo(0)->patchInfo.pAllocateStatelessPrintfSurface->DataParamSize; EXPECT_EQ(printfBufferOffset, MockKernel::ReflectionSurfaceHelperPublic::printfBuffer.offset); EXPECT_EQ(printfHandler->getSurface()->getGpuAddress(), MockKernel::ReflectionSurfaceHelperPublic::printfBuffer.address); EXPECT_EQ(printfBufferPatchSize, MockKernel::ReflectionSurfaceHelperPublic::printfBuffer.size); delete printfHandler; delete parentKernel; } TEST_F(ReflectionSurfaceTestForPrintfHandler, PatchReflectionSurfaceDoesNotPatchPrintfBufferWhenPrintfSurfaceIsNotCreated) { MockContext context(device); cl_queue_properties properties[3] = {0}; MockParentKernel *parentKernel = MockParentKernel::create(context); DeviceQueue devQueue(&context, device, properties[0]); parentKernel->createReflectionSurface(); context.setDefaultDeviceQueue(&devQueue); MockMultiDispatchInfo multiDispatchInfo(parentKernel); PrintfHandler *printfHandler = PrintfHandler::create(multiDispatchInfo, *device); parentKernel->patchReflectionSurface(&devQueue, printfHandler); uint64_t printfBufferOffset = parentKernel->getProgram()->getBlockKernelManager()->getBlockKernelInfo(0)->patchInfo.pAllocateStatelessPrintfSurface->DataParamOffset; uint64_t printfBufferPatchSize = parentKernel->getProgram()->getBlockKernelManager()->getBlockKernelInfo(0)->patchInfo.pAllocateStatelessPrintfSurface->DataParamSize; EXPECT_EQ(printfBufferOffset, MockKernel::ReflectionSurfaceHelperPublic::printfBuffer.offset); EXPECT_EQ(0u, MockKernel::ReflectionSurfaceHelperPublic::printfBuffer.address); EXPECT_EQ(printfBufferPatchSize, MockKernel::ReflectionSurfaceHelperPublic::printfBuffer.size); delete printfHandler; delete parentKernel; } class ReflectionSurfaceConstantValuesPatchingTest : public DeviceFixture, public ::testing::Test { public: void SetUp() override { DeviceFixture::SetUp(); } void TearDown() override { DeviceFixture::TearDown(); } }; TEST_F(ReflectionSurfaceConstantValuesPatchingTest, GivenBlockWithGlobalMemoryWhenReflectionSurfaceIsPatchedWithConstantValuesThenProgramGlobalMemoryAddressIsPatched) { MockContext context(pDevice); MockParentKernel *parentKernel = MockParentKernel::create(context, false, true, false); // graphicsMemory is released by Program GraphicsAllocation *globalMemory = pDevice->getMemoryManager()->allocateGraphicsMemory(4096); parentKernel->mockProgram->setGlobalSurface(globalMemory); // Allocte reflectionSurface, 2 * 4096 should be enough GraphicsAllocation *reflectionSurface = pDevice->getMemoryManager()->allocateGraphicsMemory(2 * 4096); parentKernel->setReflectionSurface(reflectionSurface); memset(reflectionSurface->getUnderlyingBuffer(), 0, reflectionSurface->getUnderlyingBufferSize()); const uint32_t constBufferOffset = (uint32_t)alignUp(sizeof(IGIL_KernelDataHeader) + sizeof(IGIL_KernelAddressData) + sizeof(IGIL_KernelData) + sizeof(IGIL_KernelCurbeParams), sizeof(uint64_t)); IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast(reflectionSurface->getUnderlyingBuffer()); pKernelHeader->m_numberOfKernels = 1; pKernelHeader->m_data[0].m_ConstantBufferOffset = constBufferOffset; parentKernel->patchBlocksCurbeWithConstantValues(); auto *blockInfo = parentKernel->mockProgram->getBlockKernelInfo(0); uint32_t blockPatchOffset = blockInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization->DataParamOffset; uint64_t *pCurbe = (uint64_t *)ptrOffset(reflectionSurface->getUnderlyingBuffer(), constBufferOffset + blockPatchOffset); EXPECT_EQ(globalMemory->getGpuAddressToPatch(), *pCurbe); delete parentKernel; } TEST_F(ReflectionSurfaceConstantValuesPatchingTest, GivenBlockWithGlobalMemoryAndProgramWithoutGlobalMemortWhenReflectionSurfaceIsPatchedWithConstantValuesThenZeroAddressIsPatched) { MockContext context(pDevice); MockParentKernel *parentKernel = MockParentKernel::create(context, false, true, false); if (parentKernel->mockProgram->getGlobalSurface()) { pDevice->getMemoryManager()->freeGraphicsMemory(parentKernel->mockProgram->getGlobalSurface()); parentKernel->mockProgram->setGlobalSurface(nullptr); } // Allocte reflectionSurface, 2 * 4096 should be enough GraphicsAllocation *reflectionSurface = pDevice->getMemoryManager()->allocateGraphicsMemory(2 * 4096); parentKernel->setReflectionSurface(reflectionSurface); memset(reflectionSurface->getUnderlyingBuffer(), 0, reflectionSurface->getUnderlyingBufferSize()); const uint32_t constBufferOffset = (uint32_t)alignUp(sizeof(IGIL_KernelDataHeader) + sizeof(IGIL_KernelAddressData) + sizeof(IGIL_KernelData) + sizeof(IGIL_KernelCurbeParams), sizeof(uint64_t)); IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast(reflectionSurface->getUnderlyingBuffer()); pKernelHeader->m_numberOfKernels = 1; pKernelHeader->m_data[0].m_ConstantBufferOffset = constBufferOffset; parentKernel->patchBlocksCurbeWithConstantValues(); auto *blockInfo = parentKernel->mockProgram->getBlockKernelInfo(0); uint32_t blockPatchOffset = blockInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization->DataParamOffset; uint64_t *pCurbe = (uint64_t *)ptrOffset(reflectionSurface->getUnderlyingBuffer(), constBufferOffset + blockPatchOffset); EXPECT_EQ(0u, *pCurbe); delete parentKernel; } TEST_F(ReflectionSurfaceConstantValuesPatchingTest, GivenBlockWithConstantMemoryWhenReflectionSurfaceIsPatchedWithConstantValuesThenProgramConstantMemoryAddressIsPatched) { MockContext context(pDevice); MockParentKernel *parentKernel = MockParentKernel::create(context, false, false, true); // graphicsMemory is released by Program GraphicsAllocation *constantMemory = pDevice->getMemoryManager()->allocateGraphicsMemory(4096); parentKernel->mockProgram->setConstantSurface(constantMemory); // Allocte reflectionSurface, 2 * 4096 should be enough GraphicsAllocation *reflectionSurface = pDevice->getMemoryManager()->allocateGraphicsMemory(2 * 4096); parentKernel->setReflectionSurface(reflectionSurface); memset(reflectionSurface->getUnderlyingBuffer(), 0, reflectionSurface->getUnderlyingBufferSize()); const uint32_t constBufferOffset = (uint32_t)alignUp(sizeof(IGIL_KernelDataHeader) + sizeof(IGIL_KernelAddressData) + sizeof(IGIL_KernelData) + sizeof(IGIL_KernelCurbeParams), sizeof(uint64_t)); IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast(reflectionSurface->getUnderlyingBuffer()); pKernelHeader->m_numberOfKernels = 1; pKernelHeader->m_data[0].m_ConstantBufferOffset = constBufferOffset; parentKernel->patchBlocksCurbeWithConstantValues(); auto *blockInfo = parentKernel->mockProgram->getBlockKernelInfo(0); uint32_t blockPatchOffset = blockInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization->DataParamOffset; uint64_t *pCurbe = (uint64_t *)ptrOffset(reflectionSurface->getUnderlyingBuffer(), constBufferOffset); uint64_t *pCurbeToPatch = (uint64_t *)ptrOffset(reflectionSurface->getUnderlyingBuffer(), constBufferOffset + blockPatchOffset); EXPECT_EQ(constantMemory->getGpuAddressToPatch(), *pCurbeToPatch); std::unique_ptr zeroMemory = std::unique_ptr(new char[4096]); memset(zeroMemory.get(), 0, 4096); // memory before is not written EXPECT_THAT(zeroMemory.get(), MemCompare(pCurbe, std::min(4096u, blockPatchOffset))); //memory after is not written EXPECT_THAT(zeroMemory.get(), MemCompare(pCurbeToPatch + 1, std::min(4096u, 8192u - constBufferOffset - blockPatchOffset - (uint32_t)sizeof(uint64_t)))); delete parentKernel; } TEST_F(ReflectionSurfaceConstantValuesPatchingTest, GivenBlockWithConstantMemoryAndProgramWithoutConstantMemortWhenReflectionSurfaceIsPatchedWithConstantValuesThenZeroAddressIsPatched) { MockContext context(pDevice); MockParentKernel *parentKernel = MockParentKernel::create(context, false, false, true); if (parentKernel->mockProgram->getConstantSurface()) { pDevice->getMemoryManager()->freeGraphicsMemory(parentKernel->mockProgram->getConstantSurface()); parentKernel->mockProgram->setConstantSurface(nullptr); } // Allocte reflectionSurface, 2 * 4096 should be enough GraphicsAllocation *reflectionSurface = pDevice->getMemoryManager()->allocateGraphicsMemory(2 * 4096); parentKernel->setReflectionSurface(reflectionSurface); memset(reflectionSurface->getUnderlyingBuffer(), 0, reflectionSurface->getUnderlyingBufferSize()); const uint32_t constBufferOffset = (uint32_t)alignUp(sizeof(IGIL_KernelDataHeader) + sizeof(IGIL_KernelAddressData) + sizeof(IGIL_KernelData) + sizeof(IGIL_KernelCurbeParams), sizeof(uint64_t)); IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast(reflectionSurface->getUnderlyingBuffer()); pKernelHeader->m_numberOfKernels = 1; pKernelHeader->m_data[0].m_ConstantBufferOffset = constBufferOffset; parentKernel->patchBlocksCurbeWithConstantValues(); auto *blockInfo = parentKernel->mockProgram->getBlockKernelInfo(0); uint32_t blockPatchOffset = blockInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization->DataParamOffset; uint64_t *pCurbe = (uint64_t *)ptrOffset(reflectionSurface->getUnderlyingBuffer(), constBufferOffset); uint64_t *pCurbeToPatch = (uint64_t *)ptrOffset(reflectionSurface->getUnderlyingBuffer(), constBufferOffset + blockPatchOffset); EXPECT_EQ(0u, *pCurbeToPatch); std::unique_ptr zeroMemory = std::unique_ptr(new char[4096]); memset(zeroMemory.get(), 0, 4096); // memory before is not written EXPECT_THAT(zeroMemory.get(), MemCompare(pCurbe, std::min(4096u, blockPatchOffset))); //memory after is not written EXPECT_THAT(zeroMemory.get(), MemCompare(pCurbeToPatch + 1, std::min(4096u, 8192u - constBufferOffset - blockPatchOffset - (uint32_t)sizeof(uint64_t)))); delete parentKernel; }