diff --git a/runtime/command_stream/command_stream_receiver.h b/runtime/command_stream/command_stream_receiver.h index de08b70487..0ebe973a42 100644 --- a/runtime/command_stream/command_stream_receiver.h +++ b/runtime/command_stream/command_stream_receiver.h @@ -49,6 +49,11 @@ class CommandStreamReceiver { BatchedDispatch // dispatching is batched, explicit clFlush is required }; + enum class SamplerCacheFlushState { + samplerCacheFlushNotRequired, + samplerCacheFlushBefore, //add sampler cache flush before Walker with redescribed image + samplerCacheFlushAfter //add sampler cache flush after Walker with redescribed image + }; CommandStreamReceiver(); virtual ~CommandStreamReceiver(); @@ -115,6 +120,8 @@ class CommandStreamReceiver { // allows CommandStreamReceiver to prepopulate reserved block in instruction heap MOCKABLE_VIRTUAL void initializeInstructionHeapCmdStreamReceiverReservedBlock(LinearStream &ih) const; + void setSamplerCacheFlushRequired(SamplerCacheFlushState value) { this->samplerCacheFlushRequired = value; } + protected: // taskCount - # of tasks submitted uint32_t taskCount = 0; @@ -156,6 +163,7 @@ class CommandStreamReceiver { bool disableL3Cache = 0; uint32_t requiredScratchSize = 0; uint64_t totalMemoryUsed = 0u; + SamplerCacheFlushState samplerCacheFlushRequired = SamplerCacheFlushState::samplerCacheFlushNotRequired; }; typedef CommandStreamReceiver *(*CommandStreamReceiverCreateFunc)(const HardwareInfo &hwInfoIn, bool withAubDump); diff --git a/runtime/command_stream/command_stream_receiver_hw.inl b/runtime/command_stream/command_stream_receiver_hw.inl index f11c64e2c0..d42268fcee 100644 --- a/runtime/command_stream/command_stream_receiver_hw.inl +++ b/runtime/command_stream/command_stream_receiver_hw.inl @@ -254,6 +254,17 @@ CompletionStamp CommandStreamReceiverHw::flushTask( DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "this->taskLevel", (uint32_t)this->taskLevel); + if (getMemoryManager()->device->getWaTable()->waSamplerCacheFlushBetweenRedescribedSurfaceReads) { + if (this->samplerCacheFlushRequired != SamplerCacheFlushState::samplerCacheFlushNotRequired) { + auto pCmd = addPipeControlCmd(commandStreamCSR); + pCmd->setTextureCacheInvalidationEnable(true); + if (this->samplerCacheFlushRequired == SamplerCacheFlushState::samplerCacheFlushBefore) { + this->samplerCacheFlushRequired = SamplerCacheFlushState::samplerCacheFlushAfter; + } else { + this->samplerCacheFlushRequired = SamplerCacheFlushState::samplerCacheFlushNotRequired; + } + } + } // Add a PC if we have a dependency on a previous walker to avoid concurrency issues. if (taskLevel > this->taskLevel) { //Some architectures (SKL) requires to have pipe control prior to pipe control with tag write, add it here @@ -272,6 +283,7 @@ CompletionStamp CommandStreamReceiverHw::flushTask( pCmd->setConstantCacheInvalidationEnable(true); pCmd->setStateCacheInvalidationEnable(true); } + auto address = (uint64_t)this->getTagAddress(); pCmd->setAddressHigh(address >> 32); pCmd->setAddress(address & (0xffffffff)); @@ -514,6 +526,11 @@ size_t CommandStreamReceiverHw::getRequiredCmdStreamSize(const Dispat size += getCmdSizeForMediaSampler(dispatchFlags.mediaSamplerRequired); size += PreemptionHelper::getRequiredCmdStreamSize(dispatchFlags.preemptionMode, this->lastPreemptionMode); + if (getMemoryManager()->device->getWaTable()->waSamplerCacheFlushBetweenRedescribedSurfaceReads) { + if (this->samplerCacheFlushRequired != SamplerCacheFlushState::samplerCacheFlushNotRequired) { + size += sizeof(typename GfxFamily::PIPE_CONTROL); + } + } return size; } diff --git a/runtime/gen8/linux/hw_info_config_bdw.cpp b/runtime/gen8/linux/hw_info_config_bdw.cpp index e7d3fc55d1..d49ed06c7f 100644 --- a/runtime/gen8/linux/hw_info_config_bdw.cpp +++ b/runtime/gen8/linux/hw_info_config_bdw.cpp @@ -54,6 +54,7 @@ int HwInfoConfigHw::configureHardwareCustom(HardwareInfo *hwInfo pWaTable->waReportPerfCountUseGlobalContextID = 1; pWaTable->waUseVAlign16OnTileXYBpp816 = 1; pWaTable->waModifyVFEStateAfterGPGPUPreemption = 1; + pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = 1; if (hwInfo->pPlatform->usDeviceID == IBDW_GT3_HALO_MOBL_DEVICE_F0_ID || hwInfo->pPlatform->usDeviceID == IBDW_GT3_SERV_DEVICE_F0_ID) { diff --git a/runtime/gen9/linux/hw_info_config_bxt.cpp b/runtime/gen9/linux/hw_info_config_bxt.cpp index 2e2b705997..246e58a660 100644 --- a/runtime/gen9/linux/hw_info_config_bxt.cpp +++ b/runtime/gen9/linux/hw_info_config_bxt.cpp @@ -76,6 +76,7 @@ int HwInfoConfigHw::configureHardwareCustom(HardwareInfo *hwInfo, pWaTable->waSendMIFLUSHBeforeVFE = 1; pWaTable->waForcePcBbFullCfgRestore = 1; pWaTable->waReportPerfCountUseGlobalContextID = 1; + pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = 1; int enabled = 0; int retVal = drm->getEnabledPooledEu(enabled); diff --git a/runtime/gen9/linux/hw_info_config_cfl.cpp b/runtime/gen9/linux/hw_info_config_cfl.cpp index e1222ce346..b81394c4dd 100644 --- a/runtime/gen9/linux/hw_info_config_cfl.cpp +++ b/runtime/gen9/linux/hw_info_config_cfl.cpp @@ -70,6 +70,7 @@ int HwInfoConfigHw::configureHardwareCustom(HardwareInfo *hwInf pWaTable->waLosslessCompressionSurfaceStride = 1; pWaTable->waFbcLinearSurfaceStride = 1; pWaTable->wa4kAlignUVOffsetNV12LinearSurface = 1; + pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = 1; if (hwInfo->pPlatform->usDeviceID == ICFL_GT3_ULT_28W_DEVICE_F0_ID || hwInfo->pPlatform->usDeviceID == ICFL_GT3_ULT_15W_DEVICE_F0_ID) { diff --git a/runtime/gen9/linux/hw_info_config_glk.cpp b/runtime/gen9/linux/hw_info_config_glk.cpp index 4f5968eee5..8f418cc9f9 100644 --- a/runtime/gen9/linux/hw_info_config_glk.cpp +++ b/runtime/gen9/linux/hw_info_config_glk.cpp @@ -65,6 +65,7 @@ int HwInfoConfigHw::configureHardwareCustom(HardwareInfo *hwInf pWaTable->waSendMIFLUSHBeforeVFE = 1; pWaTable->waForcePcBbFullCfgRestore = 1; pWaTable->waReportPerfCountUseGlobalContextID = 1; + pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = 1; int enabled = 0; int retVal = drm->getEnabledPooledEu(enabled); diff --git a/runtime/gen9/linux/hw_info_config_kbl.cpp b/runtime/gen9/linux/hw_info_config_kbl.cpp index 8d5015cf53..5c317c688c 100644 --- a/runtime/gen9/linux/hw_info_config_kbl.cpp +++ b/runtime/gen9/linux/hw_info_config_kbl.cpp @@ -71,6 +71,7 @@ int HwInfoConfigHw::configureHardwareCustom(HardwareInfo *hwInfo, pWaTable->waLosslessCompressionSurfaceStride = 1; pWaTable->waFbcLinearSurfaceStride = 1; pWaTable->wa4kAlignUVOffsetNV12LinearSurface = 1; + pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = 1; if (pPlatform->usRevId <= 0x6) { pWaTable->waDisableLSQCROPERFforOCL = 1; diff --git a/runtime/gen9/linux/hw_info_config_skl.cpp b/runtime/gen9/linux/hw_info_config_skl.cpp index 7cfdc6651c..941d5bd2b5 100644 --- a/runtime/gen9/linux/hw_info_config_skl.cpp +++ b/runtime/gen9/linux/hw_info_config_skl.cpp @@ -81,6 +81,7 @@ int HwInfoConfigHw::configureHardwareCustom(HardwareInfo *hwInfo, pWaTable->waEncryptedEdramOnlyPartials = 1; pWaTable->waDisableEdramForDisplayRT = 1; pWaTable->waForcePcBbFullCfgRestore = 1; + pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = 1; if ((1 << hwInfo->pPlatform->usRevId) & 0x0eu) { pWaTable->waCompressedResourceRequiresConstVA21 = 1; diff --git a/runtime/kernel/kernel.cpp b/runtime/kernel/kernel.cpp index 7a04472ecb..96e8e2e29d 100644 --- a/runtime/kernel/kernel.cpp +++ b/runtime/kernel/kernel.cpp @@ -882,6 +882,10 @@ inline void Kernel::makeArgsResident(CommandStreamReceiver &commandStreamReceive } else if (Kernel::isMemObj(kernelArguments[argIndex].type)) { auto clMem = (const cl_mem)kernelArguments[argIndex].object; auto memObj = castToObjectOrAbort(clMem); + DEBUG_BREAK_IF(memObj == nullptr); + if (memObj->isImageFromImage()) { + commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushBefore); + } commandStreamReceiver.makeResident(*memObj->getGraphicsAllocation()); if (memObj->getMcsAllocation()) { commandStreamReceiver.makeResident(*memObj->getMcsAllocation()); diff --git a/runtime/mem_obj/image.cpp b/runtime/mem_obj/image.cpp index ee7c09392e..295c1c8f82 100644 --- a/runtime/mem_obj/image.cpp +++ b/runtime/mem_obj/image.cpp @@ -320,6 +320,9 @@ Image *Image::create(Context *context, if ((imageDesc->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) || ((imageDesc->image_type == CL_MEM_OBJECT_IMAGE2D) && (imageDesc->mem_object != nullptr))) { image->associatedMemObject = castToObject(imageDesc->mem_object); } + if (parentImage) { + image->isImageFromImageCreated = true; + } // Driver needs to store rowPitch passed by the app in order to synchronize the host_ptr later on map call image->setHostPtrRowPitch(imageDesc->image_row_pitch ? imageDesc->image_row_pitch : hostPtrRowPitch); image->setHostPtrSlicePitch(hostPtrSlicePitch); diff --git a/runtime/mem_obj/mem_obj.h b/runtime/mem_obj/mem_obj.h index caab777358..462aa6ecd4 100644 --- a/runtime/mem_obj/mem_obj.h +++ b/runtime/mem_obj/mem_obj.h @@ -100,8 +100,9 @@ class MemObj : public BaseObject<_cl_mem> { virtual bool allowTiling() { return false; } - CommandQueue *getAssociatedCommandQueue() { return cmdQueuePtr; }; - Device *getAssociatedDevice() { return device; }; + CommandQueue *getAssociatedCommandQueue() { return cmdQueuePtr; } + Device *getAssociatedDevice() { return device; } + bool isImageFromImage() const { return isImageFromImageCreated; } virtual cl_int unmapObj(CommandQueue *cmdQ, void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { @@ -147,6 +148,7 @@ class MemObj : public BaseObject<_cl_mem> { bool isZeroCopy; bool isHostPtrSVM; bool isObjectRedescribed; + bool isImageFromImageCreated = false; MemoryManager *memoryManager = nullptr; GraphicsAllocation *graphicsAllocation; GraphicsAllocation *mcsAllocation = nullptr; diff --git a/runtime/sku_info/operations/sku_info_receiver.h b/runtime/sku_info/operations/sku_info_receiver.h index 5ced85e61d..f0468ca459 100644 --- a/runtime/sku_info/operations/sku_info_receiver.h +++ b/runtime/sku_info/operations/sku_info_receiver.h @@ -127,6 +127,7 @@ class SkuInfoReceiver { RECEIVE_WA(UseVAlign16OnTileXYBpp816); RECEIVE_WA(ModifyVFEStateAfterGPGPUPreemption); RECEIVE_WA(CSRUncachable); + RECEIVE_WA(SamplerCacheFlushBetweenRedescribedSurfaceReads); #undef RECEIVE_WA } }; diff --git a/runtime/sku_info/sku_info_base.h b/runtime/sku_info/sku_info_base.h index 8de5638c80..afe066f119 100644 --- a/runtime/sku_info/sku_info_base.h +++ b/runtime/sku_info/sku_info_base.h @@ -115,5 +115,6 @@ struct WorkaroundTableBase { bool waUseVAlign16OnTileXYBpp816 = false; bool waModifyVFEStateAfterGPGPUPreemption = false; bool waCSRUncachable = false; + bool waSamplerCacheFlushBetweenRedescribedSurfaceReads = false; }; } // namespace OCLRT diff --git a/unit_tests/command_stream/command_stream_receiver_hw_tests.cpp b/unit_tests/command_stream/command_stream_receiver_hw_tests.cpp index 576f090a70..0909efd126 100644 --- a/unit_tests/command_stream/command_stream_receiver_hw_tests.cpp +++ b/unit_tests/command_stream/command_stream_receiver_hw_tests.cpp @@ -353,7 +353,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDeviceWithPreemptionSupportTh HWTEST_F(CommandStreamReceiverFlushTaskTests, higherTaskLevelShouldSendAPipeControl) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = true; - configureCSRHeapStatesToNonDirty(); + configureCSRtoNonDirtyState(); commandStreamReceiver.taskLevel = taskLevel / 2; flushTask(commandStreamReceiver); @@ -367,6 +367,105 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, higherTaskLevelShouldSendAPipeCont EXPECT_NE(cmdList.end(), itorPC); } +HWTEST_F(CommandStreamReceiverFlushTaskTests, whenSamplerCacheFlushNotRequiredThenDontSendPipecontrol) { + auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + OCLRT::WorkaroundTable *waTable = nullptr; + waTable = const_cast(pDevice->getWaTable()); + + commandStreamReceiver.isPreambleSent = true; + commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushNotRequired); + configureCSRtoNonDirtyState(); + commandStreamReceiver.taskLevel = taskLevel; + bool tmp = waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads; + waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = true; + flushTask(commandStreamReceiver); + + EXPECT_EQ(commandStreamReceiver.commandStream.getUsed(), 0u); + EXPECT_EQ(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushNotRequired, commandStreamReceiver.peekSamplerCacheFlushRequired()); + + parseCommands(commandStreamReceiver.commandStream, 0); + + auto itorPC = find(cmdList.begin(), cmdList.end()); + EXPECT_EQ(cmdList.end(), itorPC); + waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = tmp; +} + +HWTEST_F(CommandStreamReceiverFlushTaskTests, whenSamplerCacheFlushBeforeThenSendPipecontrol) { + auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + commandStreamReceiver.isPreambleSent = true; + commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushBefore); + configureCSRtoNonDirtyState(); + commandStreamReceiver.taskLevel = taskLevel; + OCLRT::WorkaroundTable *waTable = nullptr; + waTable = const_cast(pDevice->getWaTable()); + + bool tmp = waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads; + waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = true; + + flushTask(commandStreamReceiver); + + EXPECT_GT(commandStreamReceiver.commandStream.getUsed(), 0u); + EXPECT_EQ(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushAfter, commandStreamReceiver.peekSamplerCacheFlushRequired()); + + parseCommands(commandStreamReceiver.commandStream, 0); + + auto itorPC = find(cmdList.begin(), cmdList.end()); + EXPECT_NE(cmdList.end(), itorPC); + auto pipeControlCmd = (typename FamilyType::PIPE_CONTROL *)*itorPC; + EXPECT_TRUE(pipeControlCmd->getTextureCacheInvalidationEnable()); + waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = tmp; +} + +HWTEST_F(CommandStreamReceiverFlushTaskTests, whenSamplerCacheFlushBeforeAndWaSamplerCacheFlushBetweenRedescribedSurfaceReadsDasabledThenDontSendPipecontrol) { + auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + commandStreamReceiver.isPreambleSent = true; + commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushBefore); + configureCSRtoNonDirtyState(); + commandStreamReceiver.taskLevel = taskLevel; + OCLRT::WorkaroundTable *waTable = nullptr; + waTable = const_cast(pDevice->getWaTable()); + + bool tmp = waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads; + waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = false; + + flushTask(commandStreamReceiver); + + EXPECT_EQ(commandStreamReceiver.commandStream.getUsed(), 0u); + EXPECT_EQ(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushBefore, commandStreamReceiver.peekSamplerCacheFlushRequired()); + + parseCommands(commandStreamReceiver.commandStream, 0); + + auto itorPC = find(cmdList.begin(), cmdList.end()); + EXPECT_EQ(cmdList.end(), itorPC); + waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = tmp; +} + +HWTEST_F(CommandStreamReceiverFlushTaskTests, whenSamplerCacheFlushAfterThenSendPipecontrol) { + auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + commandStreamReceiver.isPreambleSent = true; + commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushAfter); + configureCSRtoNonDirtyState(); + commandStreamReceiver.taskLevel = taskLevel; + OCLRT::WorkaroundTable *waTable = nullptr; + waTable = const_cast(pDevice->getWaTable()); + + bool tmp = waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads; + waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = true; + + flushTask(commandStreamReceiver); + + EXPECT_GT(commandStreamReceiver.commandStream.getUsed(), 0u); + EXPECT_EQ(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushNotRequired, commandStreamReceiver.peekSamplerCacheFlushRequired()); + + parseCommands(commandStreamReceiver.commandStream, 0); + + auto itorPC = find(cmdList.begin(), cmdList.end()); + EXPECT_NE(cmdList.end(), itorPC); + auto pipeControlCmd = (typename FamilyType::PIPE_CONTROL *)*itorPC; + EXPECT_TRUE(pipeControlCmd->getTextureCacheInvalidationEnable()); + waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = tmp; +} + HWTEST_F(CommandStreamReceiverFlushTaskTests, completionStampValid) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); //simulate our CQ is stale for 10 TL's @@ -1954,6 +2053,31 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrWhenPreambleSentThenRequir EXPECT_EQ(sizeof(PIPELINE_SELECT), difference); } +HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrWhenSamplerCacheFlushSentThenRequiredCsrSizeContainsPipecontrolSize) { + typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT; + UltCommandStreamReceiver &commandStreamReceiver = (UltCommandStreamReceiver &)pDevice->getCommandStreamReceiver(); + CsrSizeRequestFlags csrSizeRequest = {}; + DispatchFlags flags; + commandStreamReceiver.isPreambleSent = true; + + commandStreamReceiver.overrideCsrSizeReqFlags(csrSizeRequest); + commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushNotRequired); + auto samplerCacheNotFlushedSize = commandStreamReceiver.getRequiredCmdStreamSize(flags); + commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushBefore); + auto samplerCacheFlushBeforeSize = commandStreamReceiver.getRequiredCmdStreamSize(flags); + EXPECT_EQ(samplerCacheNotFlushedSize, samplerCacheFlushBeforeSize); + + OCLRT::WorkaroundTable *waTable = const_cast(pDevice->getWaTable()); + bool tmp = waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads; + waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = true; + + samplerCacheFlushBeforeSize = commandStreamReceiver.getRequiredCmdStreamSize(flags); + + auto difference = samplerCacheFlushBeforeSize - samplerCacheNotFlushedSize; + EXPECT_EQ(sizeof(typename FamilyType::PIPE_CONTROL), difference); + waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = tmp; +} + HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInNonDirtyStateWhenflushTaskIsCalledThenNoFlushIsCalled) { CommandQueueHw commandQueue(nullptr, pDevice, 0); auto &commandStream = commandQueue.getCS(4096u); diff --git a/unit_tests/gen8/linux/hw_info_config_tests.cpp b/unit_tests/gen8/linux/hw_info_config_tests.cpp index adc15a29b3..1225b16af5 100644 --- a/unit_tests/gen8/linux/hw_info_config_tests.cpp +++ b/unit_tests/gen8/linux/hw_info_config_tests.cpp @@ -135,6 +135,17 @@ BDWTEST_F(HwInfoConfigTestLinuxBdw, negativeFailedIoctlSsCount) { EXPECT_EQ(-5, ret); } +BDWTEST_F(HwInfoConfigTestLinuxBdw, configureHwInfoWaFlags) { + auto hwInfoConfig = HwInfoConfig::get(productFamily); + + drm->StoredDeviceRevID = 0; + int ret = hwInfoConfig->configureHwInfo(pInHwInfo, &outHwInfo, osInterface); + EXPECT_EQ(0, ret); + EXPECT_EQ(1u, outHwInfo.pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads); + + ReleaseOutHwInfoStructs(); +} + BDWTEST_F(HwInfoConfigTestLinuxBdw, configureHwInfoEdram) { auto hwInfoConfig = HwInfoConfig::get(productFamily); diff --git a/unit_tests/gen9/bxt/linux/hw_info_config_tests.cpp b/unit_tests/gen9/bxt/linux/hw_info_config_tests.cpp index cd23fb2bcb..dc5502e402 100644 --- a/unit_tests/gen9/bxt/linux/hw_info_config_tests.cpp +++ b/unit_tests/gen9/bxt/linux/hw_info_config_tests.cpp @@ -241,6 +241,17 @@ BXTTEST_F(HwInfoConfigTestLinuxBxt, configureHwInfoInvalidMinEuInPool) { EXPECT_EQ((outHwInfo.pSysInfo->EUCount - outHwInfo.pSysInfo->EuCountPerPoolMin), outHwInfo.pSysInfo->EuCountPerPoolMax); } +BXTTEST_F(HwInfoConfigTestLinuxBxt, configureHwInfoWaFlags) { + auto hwInfoConfig = HwInfoConfig::get(productFamily); + + drm->StoredDeviceRevID = 0; + int ret = hwInfoConfig->configureHwInfo(pInHwInfo, &outHwInfo, osInterface); + EXPECT_EQ(0, ret); + EXPECT_EQ(1u, outHwInfo.pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads); + + ReleaseOutHwInfoStructs(); +} + template class BxtHwInfoTests : public ::testing::Test { }; diff --git a/unit_tests/gen9/cfl/linux/hw_info_config_tests.cpp b/unit_tests/gen9/cfl/linux/hw_info_config_tests.cpp index 51972291ed..52d5ff99b9 100644 --- a/unit_tests/gen9/cfl/linux/hw_info_config_tests.cpp +++ b/unit_tests/gen9/cfl/linux/hw_info_config_tests.cpp @@ -158,6 +158,17 @@ CFLTEST_F(HwInfoConfigTestLinuxCfl, negativeFailedIoctlSsCount) { EXPECT_EQ(-5, ret); } +CFLTEST_F(HwInfoConfigTestLinuxCfl, configureHwInfoWaFlags) { + auto hwInfoConfig = HwInfoConfig::get(productFamily); + + drm->StoredDeviceRevID = 0; + int ret = hwInfoConfig->configureHwInfo(pInHwInfo, &outHwInfo, osInterface); + EXPECT_EQ(0, ret); + EXPECT_EQ(1u, outHwInfo.pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads); + + ReleaseOutHwInfoStructs(); +} + CFLTEST_F(HwInfoConfigTestLinuxCfl, configureHwInfoEdram) { auto hwInfoConfig = HwInfoConfig::get(productFamily); diff --git a/unit_tests/gen9/glk/linux/hw_info_config_tests.cpp b/unit_tests/gen9/glk/linux/hw_info_config_tests.cpp index 48e8aa0539..b2c4c87a50 100644 --- a/unit_tests/gen9/glk/linux/hw_info_config_tests.cpp +++ b/unit_tests/gen9/glk/linux/hw_info_config_tests.cpp @@ -230,6 +230,17 @@ GLKTEST_F(HwInfoConfigTestLinuxGlk, configureHwInfoInvalidMinEuInPool) { EXPECT_EQ((outHwInfo.pSysInfo->EUCount - outHwInfo.pSysInfo->EuCountPerPoolMin), outHwInfo.pSysInfo->EuCountPerPoolMax); } +GLKTEST_F(HwInfoConfigTestLinuxGlk, configureHwInfoWaFlags) { + auto hwInfoConfig = HwInfoConfig::get(productFamily); + + drm->StoredDeviceRevID = 0; + int ret = hwInfoConfig->configureHwInfo(pInHwInfo, &outHwInfo, osInterface); + EXPECT_EQ(0, ret); + EXPECT_EQ(1u, outHwInfo.pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads); + + ReleaseOutHwInfoStructs(); +} + template class GlkHwInfoTests : public ::testing::Test { }; diff --git a/unit_tests/gen9/kbl/linux/hw_info_config_tests.cpp b/unit_tests/gen9/kbl/linux/hw_info_config_tests.cpp index 70d58fe616..6a06022d88 100644 --- a/unit_tests/gen9/kbl/linux/hw_info_config_tests.cpp +++ b/unit_tests/gen9/kbl/linux/hw_info_config_tests.cpp @@ -211,6 +211,7 @@ KBLTEST_F(HwInfoConfigTestLinuxKbl, configureHwInfoWaFlags) { EXPECT_EQ(1u, outHwInfo.pWaTable->waDisableLSQCROPERFforOCL); EXPECT_EQ(1u, outHwInfo.pWaTable->waEncryptedEdramOnlyPartials); EXPECT_EQ(1u, outHwInfo.pWaTable->waForcePcBbFullCfgRestore); + EXPECT_EQ(1u, outHwInfo.pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads); ReleaseOutHwInfoStructs(); @@ -220,6 +221,7 @@ KBLTEST_F(HwInfoConfigTestLinuxKbl, configureHwInfoWaFlags) { EXPECT_EQ(0u, outHwInfo.pWaTable->waDisableLSQCROPERFforOCL); EXPECT_EQ(0u, outHwInfo.pWaTable->waEncryptedEdramOnlyPartials); EXPECT_EQ(1u, outHwInfo.pWaTable->waForcePcBbFullCfgRestore); + EXPECT_EQ(1u, outHwInfo.pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads); ReleaseOutHwInfoStructs(); @@ -229,6 +231,7 @@ KBLTEST_F(HwInfoConfigTestLinuxKbl, configureHwInfoWaFlags) { EXPECT_EQ(0u, outHwInfo.pWaTable->waDisableLSQCROPERFforOCL); EXPECT_EQ(0u, outHwInfo.pWaTable->waEncryptedEdramOnlyPartials); EXPECT_EQ(0u, outHwInfo.pWaTable->waForcePcBbFullCfgRestore); + EXPECT_EQ(1u, outHwInfo.pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads); } KBLTEST_F(HwInfoConfigTestLinuxKbl, configureHwInfoEdram) { diff --git a/unit_tests/gen9/skl/linux/hw_info_config_tests.cpp b/unit_tests/gen9/skl/linux/hw_info_config_tests.cpp index 30df2d838f..840a83c296 100644 --- a/unit_tests/gen9/skl/linux/hw_info_config_tests.cpp +++ b/unit_tests/gen9/skl/linux/hw_info_config_tests.cpp @@ -212,6 +212,7 @@ SKLTEST_F(HwInfoConfigTestLinuxSkl, configureHwInfoWaFlags) { EXPECT_EQ(1u, outHwInfo.pWaTable->waModifyVFEStateAfterGPGPUPreemption); EXPECT_EQ(1u, outHwInfo.pWaTable->waDisablePerCtxtPreemptionGranularityControl); EXPECT_EQ(1u, outHwInfo.pWaTable->waCSRUncachable); + EXPECT_EQ(1u, outHwInfo.pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads); ReleaseOutHwInfoStructs(); @@ -222,6 +223,7 @@ SKLTEST_F(HwInfoConfigTestLinuxSkl, configureHwInfoWaFlags) { EXPECT_EQ(1u, outHwInfo.pWaTable->waModifyVFEStateAfterGPGPUPreemption); EXPECT_EQ(1u, outHwInfo.pWaTable->waDisablePerCtxtPreemptionGranularityControl); EXPECT_EQ(1u, outHwInfo.pWaTable->waCSRUncachable); + EXPECT_EQ(1u, outHwInfo.pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads); ReleaseOutHwInfoStructs(); @@ -232,6 +234,7 @@ SKLTEST_F(HwInfoConfigTestLinuxSkl, configureHwInfoWaFlags) { EXPECT_EQ(0u, outHwInfo.pWaTable->waModifyVFEStateAfterGPGPUPreemption); EXPECT_EQ(0u, outHwInfo.pWaTable->waDisablePerCtxtPreemptionGranularityControl); EXPECT_EQ(1u, outHwInfo.pWaTable->waCSRUncachable); + EXPECT_EQ(1u, outHwInfo.pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads); ReleaseOutHwInfoStructs(); @@ -242,6 +245,7 @@ SKLTEST_F(HwInfoConfigTestLinuxSkl, configureHwInfoWaFlags) { EXPECT_EQ(0u, outHwInfo.pWaTable->waModifyVFEStateAfterGPGPUPreemption); EXPECT_EQ(0u, outHwInfo.pWaTable->waDisablePerCtxtPreemptionGranularityControl); EXPECT_EQ(0u, outHwInfo.pWaTable->waCSRUncachable); + EXPECT_EQ(1u, outHwInfo.pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads); } SKLTEST_F(HwInfoConfigTestLinuxSkl, configureHwInfoEdram) { diff --git a/unit_tests/kernel/kernel_tests.cpp b/unit_tests/kernel/kernel_tests.cpp index a85765317b..fe47281b82 100644 --- a/unit_tests/kernel/kernel_tests.cpp +++ b/unit_tests/kernel/kernel_tests.cpp @@ -22,7 +22,9 @@ #include "runtime/command_stream/command_stream_receiver_hw.h" #include "runtime/helpers/options.h" +#include "runtime/helpers/surface_formats.h" #include "runtime/kernel/kernel.h" +#include "runtime/mem_obj/image.h" #include "runtime/memory_manager/os_agnostic_memory_manager.h" #include "runtime/os_interface/debug_settings_manager.h" #include "unit_tests/fixtures/device_fixture.h" @@ -1563,6 +1565,57 @@ TEST_F(KernelResidencyTest, test_MakeArgsResident) { delete pKernelInfo; } +HWTEST_F(KernelResidencyTest, test_MakeArgsResidentCheckImageFromImage) { + ASSERT_NE(nullptr, pDevice); + + //create NV12 image + cl_mem_flags flags = CL_MEM_READ_ONLY | CL_MEM_HOST_NO_ACCESS; + cl_image_format imageFormat; + imageFormat.image_channel_data_type = CL_UNORM_INT8; + imageFormat.image_channel_order = CL_NV12_INTEL; + auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat); + + cl_image_desc imageDesc = {}; + imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; + imageDesc.image_width = 16; + imageDesc.image_height = 16; + imageDesc.image_depth = 1; + + cl_int retVal; + MockContext context; + std::unique_ptr imageNV12(Image::create(&context, flags, surfaceFormat, &imageDesc, nullptr, retVal)); + + //create Y plane + imageFormat.image_channel_order = CL_R; + flags = CL_MEM_READ_ONLY; + surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat); + + imageDesc.image_width = 0; + imageDesc.image_height = 0; + imageDesc.image_depth = 0; + imageDesc.mem_object = imageNV12.get(); + + std::unique_ptr imageY(Image::create(&context, flags, surfaceFormat, &imageDesc, nullptr, retVal)); + std::unique_ptr pKernelInfo(KernelInfo::create()); + KernelArgInfo kernelArgInfo; + kernelArgInfo.isImage = true; + + pKernelInfo->kernelArgInfo.push_back(kernelArgInfo); + + std::unique_ptr program(new MockProgram); + std::unique_ptr pKernel(new MockKernel(program.get(), *pKernelInfo, *pDevice)); + + ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); + pKernel->storeKernelArg(0, Kernel::IMAGE_OBJ, (cl_mem)imageY.get(), NULL, 0); + pKernel->makeResident(pDevice->getCommandStreamReceiver()); + + EXPECT_FALSE(imageNV12->isImageFromImage()); + EXPECT_TRUE(imageY->isImageFromImage()); + + auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + EXPECT_EQ(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushBefore, commandStreamReceiver.peekSamplerCacheFlushRequired()); +} + struct KernelExecutionEnvironmentTest : public Test { void SetUp() override { DeviceFixture::SetUp(); diff --git a/unit_tests/libult/ult_command_stream_receiver.h b/unit_tests/libult/ult_command_stream_receiver.h index 159fa0024a..e76ec7b9c7 100644 --- a/unit_tests/libult/ult_command_stream_receiver.h +++ b/unit_tests/libult/ult_command_stream_receiver.h @@ -79,6 +79,8 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw { GraphicsAllocation *getPreemptionCsrAllocation() { return this->preemptionCsrAllocation; } + using SamplerCacheFlushState = CommandStreamReceiver::SamplerCacheFlushState; + SamplerCacheFlushState peekSamplerCacheFlushRequired() const { return this->samplerCacheFlushRequired; } protected: using BaseClass::CommandStreamReceiver::memoryManager; diff --git a/unit_tests/mem_obj/nv12_image_tests.cpp b/unit_tests/mem_obj/nv12_image_tests.cpp index b49b615aa1..dd8c91711b 100644 --- a/unit_tests/mem_obj/nv12_image_tests.cpp +++ b/unit_tests/mem_obj/nv12_image_tests.cpp @@ -268,7 +268,7 @@ TEST_F(Nv12ImageTest, createNV12YPlaneImage) { auto imageYPlane = createImageWithFlags(CL_MEM_READ_WRITE); ASSERT_NE(nullptr, imageYPlane); - + EXPECT_EQ(true, imageYPlane->isImageFromImage()); EXPECT_EQ(imageNV12->getGraphicsAllocation(), imageYPlane->getGraphicsAllocation()); cl_image_desc parentDimensions, planeDimensions; @@ -311,6 +311,7 @@ TEST_F(Nv12ImageTest, createNV12UVPlaneImage) { ASSERT_NE(nullptr, imageUVPlane); + EXPECT_EQ(true, imageUVPlane->isImageFromImage()); EXPECT_EQ(imageNV12->getGraphicsAllocation(), imageUVPlane->getGraphicsAllocation()); cl_image_desc parentDimensions, planeDimensions; @@ -359,6 +360,7 @@ TEST_F(Nv12ImageTest, createNV12UVPlaneImageWithOffsetOfUVPlane) { ASSERT_NE(nullptr, imageUVPlane); + EXPECT_EQ(true, imageUVPlane->isImageFromImage()); EXPECT_EQ(imageNV12->getGraphicsAllocation(), imageUVPlane->getGraphicsAllocation()); cl_image_desc parentDimensions, planeDimensions; diff --git a/unit_tests/sku_info/sku_info_base_reference.h b/unit_tests/sku_info/sku_info_base_reference.h index 2663c21f02..1ff8457abc 100644 --- a/unit_tests/sku_info/sku_info_base_reference.h +++ b/unit_tests/sku_info/sku_info_base_reference.h @@ -147,6 +147,7 @@ struct SkuInfoBaseReference { refWaTable.waUseVAlign16OnTileXYBpp816 = true; refWaTable.waModifyVFEStateAfterGPGPUPreemption = true; refWaTable.waCSRUncachable = true; + refWaTable.waSamplerCacheFlushBetweenRedescribedSurfaceReads = true; } }; // namespace SkuInfoBaseReference } // namespace OCLRT