mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-24 20:39:56 +08:00
waSamplerCacheFlushBetweenRedescribedSurfaceReads fix
Change-Id: Id322f58ce6997c8710ed2d8faf4f3c3f3d2ef0c3
This commit is contained in:
@@ -49,6 +49,11 @@ class CommandStreamReceiver {
|
||||
BatchedDispatch // dispatching is batched, explicit clFlush is required
|
||||
};
|
||||
|
||||
enum class SamplerCacheFlushState {
|
||||
samplerCacheFlushNotRequired,
|
||||
samplerCacheFlushBefore, //add sampler cache flush before Walker with redescribed image
|
||||
samplerCacheFlushAfter //add sampler cache flush after Walker with redescribed image
|
||||
};
|
||||
CommandStreamReceiver();
|
||||
virtual ~CommandStreamReceiver();
|
||||
|
||||
@@ -115,6 +120,8 @@ class CommandStreamReceiver {
|
||||
// allows CommandStreamReceiver to prepopulate reserved block in instruction heap
|
||||
MOCKABLE_VIRTUAL void initializeInstructionHeapCmdStreamReceiverReservedBlock(LinearStream &ih) const;
|
||||
|
||||
void setSamplerCacheFlushRequired(SamplerCacheFlushState value) { this->samplerCacheFlushRequired = value; }
|
||||
|
||||
protected:
|
||||
// taskCount - # of tasks submitted
|
||||
uint32_t taskCount = 0;
|
||||
@@ -156,6 +163,7 @@ class CommandStreamReceiver {
|
||||
bool disableL3Cache = 0;
|
||||
uint32_t requiredScratchSize = 0;
|
||||
uint64_t totalMemoryUsed = 0u;
|
||||
SamplerCacheFlushState samplerCacheFlushRequired = SamplerCacheFlushState::samplerCacheFlushNotRequired;
|
||||
};
|
||||
|
||||
typedef CommandStreamReceiver *(*CommandStreamReceiverCreateFunc)(const HardwareInfo &hwInfoIn, bool withAubDump);
|
||||
|
||||
@@ -254,6 +254,17 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
|
||||
DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "this->taskLevel", (uint32_t)this->taskLevel);
|
||||
|
||||
if (getMemoryManager()->device->getWaTable()->waSamplerCacheFlushBetweenRedescribedSurfaceReads) {
|
||||
if (this->samplerCacheFlushRequired != SamplerCacheFlushState::samplerCacheFlushNotRequired) {
|
||||
auto pCmd = addPipeControlCmd(commandStreamCSR);
|
||||
pCmd->setTextureCacheInvalidationEnable(true);
|
||||
if (this->samplerCacheFlushRequired == SamplerCacheFlushState::samplerCacheFlushBefore) {
|
||||
this->samplerCacheFlushRequired = SamplerCacheFlushState::samplerCacheFlushAfter;
|
||||
} else {
|
||||
this->samplerCacheFlushRequired = SamplerCacheFlushState::samplerCacheFlushNotRequired;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Add a PC if we have a dependency on a previous walker to avoid concurrency issues.
|
||||
if (taskLevel > this->taskLevel) {
|
||||
//Some architectures (SKL) requires to have pipe control prior to pipe control with tag write, add it here
|
||||
@@ -272,6 +283,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
pCmd->setConstantCacheInvalidationEnable(true);
|
||||
pCmd->setStateCacheInvalidationEnable(true);
|
||||
}
|
||||
|
||||
auto address = (uint64_t)this->getTagAddress();
|
||||
pCmd->setAddressHigh(address >> 32);
|
||||
pCmd->setAddress(address & (0xffffffff));
|
||||
@@ -514,6 +526,11 @@ size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdStreamSize(const Dispat
|
||||
size += getCmdSizeForMediaSampler(dispatchFlags.mediaSamplerRequired);
|
||||
|
||||
size += PreemptionHelper::getRequiredCmdStreamSize<GfxFamily>(dispatchFlags.preemptionMode, this->lastPreemptionMode);
|
||||
if (getMemoryManager()->device->getWaTable()->waSamplerCacheFlushBetweenRedescribedSurfaceReads) {
|
||||
if (this->samplerCacheFlushRequired != SamplerCacheFlushState::samplerCacheFlushNotRequired) {
|
||||
size += sizeof(typename GfxFamily::PIPE_CONTROL);
|
||||
}
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
|
||||
@@ -54,6 +54,7 @@ int HwInfoConfigHw<IGFX_BROADWELL>::configureHardwareCustom(HardwareInfo *hwInfo
|
||||
pWaTable->waReportPerfCountUseGlobalContextID = 1;
|
||||
pWaTable->waUseVAlign16OnTileXYBpp816 = 1;
|
||||
pWaTable->waModifyVFEStateAfterGPGPUPreemption = 1;
|
||||
pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = 1;
|
||||
|
||||
if (hwInfo->pPlatform->usDeviceID == IBDW_GT3_HALO_MOBL_DEVICE_F0_ID ||
|
||||
hwInfo->pPlatform->usDeviceID == IBDW_GT3_SERV_DEVICE_F0_ID) {
|
||||
|
||||
@@ -76,6 +76,7 @@ int HwInfoConfigHw<IGFX_BROXTON>::configureHardwareCustom(HardwareInfo *hwInfo,
|
||||
pWaTable->waSendMIFLUSHBeforeVFE = 1;
|
||||
pWaTable->waForcePcBbFullCfgRestore = 1;
|
||||
pWaTable->waReportPerfCountUseGlobalContextID = 1;
|
||||
pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = 1;
|
||||
|
||||
int enabled = 0;
|
||||
int retVal = drm->getEnabledPooledEu(enabled);
|
||||
|
||||
@@ -70,6 +70,7 @@ int HwInfoConfigHw<IGFX_COFFEELAKE>::configureHardwareCustom(HardwareInfo *hwInf
|
||||
pWaTable->waLosslessCompressionSurfaceStride = 1;
|
||||
pWaTable->waFbcLinearSurfaceStride = 1;
|
||||
pWaTable->wa4kAlignUVOffsetNV12LinearSurface = 1;
|
||||
pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = 1;
|
||||
|
||||
if (hwInfo->pPlatform->usDeviceID == ICFL_GT3_ULT_28W_DEVICE_F0_ID ||
|
||||
hwInfo->pPlatform->usDeviceID == ICFL_GT3_ULT_15W_DEVICE_F0_ID) {
|
||||
|
||||
@@ -65,6 +65,7 @@ int HwInfoConfigHw<IGFX_GEMINILAKE>::configureHardwareCustom(HardwareInfo *hwInf
|
||||
pWaTable->waSendMIFLUSHBeforeVFE = 1;
|
||||
pWaTable->waForcePcBbFullCfgRestore = 1;
|
||||
pWaTable->waReportPerfCountUseGlobalContextID = 1;
|
||||
pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = 1;
|
||||
|
||||
int enabled = 0;
|
||||
int retVal = drm->getEnabledPooledEu(enabled);
|
||||
|
||||
@@ -71,6 +71,7 @@ int HwInfoConfigHw<IGFX_KABYLAKE>::configureHardwareCustom(HardwareInfo *hwInfo,
|
||||
pWaTable->waLosslessCompressionSurfaceStride = 1;
|
||||
pWaTable->waFbcLinearSurfaceStride = 1;
|
||||
pWaTable->wa4kAlignUVOffsetNV12LinearSurface = 1;
|
||||
pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = 1;
|
||||
|
||||
if (pPlatform->usRevId <= 0x6) {
|
||||
pWaTable->waDisableLSQCROPERFforOCL = 1;
|
||||
|
||||
@@ -81,6 +81,7 @@ int HwInfoConfigHw<IGFX_SKYLAKE>::configureHardwareCustom(HardwareInfo *hwInfo,
|
||||
pWaTable->waEncryptedEdramOnlyPartials = 1;
|
||||
pWaTable->waDisableEdramForDisplayRT = 1;
|
||||
pWaTable->waForcePcBbFullCfgRestore = 1;
|
||||
pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = 1;
|
||||
|
||||
if ((1 << hwInfo->pPlatform->usRevId) & 0x0eu) {
|
||||
pWaTable->waCompressedResourceRequiresConstVA21 = 1;
|
||||
|
||||
@@ -882,6 +882,10 @@ inline void Kernel::makeArgsResident(CommandStreamReceiver &commandStreamReceive
|
||||
} else if (Kernel::isMemObj(kernelArguments[argIndex].type)) {
|
||||
auto clMem = (const cl_mem)kernelArguments[argIndex].object;
|
||||
auto memObj = castToObjectOrAbort<MemObj>(clMem);
|
||||
DEBUG_BREAK_IF(memObj == nullptr);
|
||||
if (memObj->isImageFromImage()) {
|
||||
commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushBefore);
|
||||
}
|
||||
commandStreamReceiver.makeResident(*memObj->getGraphicsAllocation());
|
||||
if (memObj->getMcsAllocation()) {
|
||||
commandStreamReceiver.makeResident(*memObj->getMcsAllocation());
|
||||
|
||||
@@ -320,6 +320,9 @@ Image *Image::create(Context *context,
|
||||
if ((imageDesc->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) || ((imageDesc->image_type == CL_MEM_OBJECT_IMAGE2D) && (imageDesc->mem_object != nullptr))) {
|
||||
image->associatedMemObject = castToObject<MemObj>(imageDesc->mem_object);
|
||||
}
|
||||
if (parentImage) {
|
||||
image->isImageFromImageCreated = true;
|
||||
}
|
||||
// Driver needs to store rowPitch passed by the app in order to synchronize the host_ptr later on map call
|
||||
image->setHostPtrRowPitch(imageDesc->image_row_pitch ? imageDesc->image_row_pitch : hostPtrRowPitch);
|
||||
image->setHostPtrSlicePitch(hostPtrSlicePitch);
|
||||
|
||||
@@ -100,8 +100,9 @@ class MemObj : public BaseObject<_cl_mem> {
|
||||
|
||||
virtual bool allowTiling() { return false; }
|
||||
|
||||
CommandQueue *getAssociatedCommandQueue() { return cmdQueuePtr; };
|
||||
Device *getAssociatedDevice() { return device; };
|
||||
CommandQueue *getAssociatedCommandQueue() { return cmdQueuePtr; }
|
||||
Device *getAssociatedDevice() { return device; }
|
||||
bool isImageFromImage() const { return isImageFromImageCreated; }
|
||||
|
||||
virtual cl_int unmapObj(CommandQueue *cmdQ, void *ptr, cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList, cl_event *event) {
|
||||
@@ -147,6 +148,7 @@ class MemObj : public BaseObject<_cl_mem> {
|
||||
bool isZeroCopy;
|
||||
bool isHostPtrSVM;
|
||||
bool isObjectRedescribed;
|
||||
bool isImageFromImageCreated = false;
|
||||
MemoryManager *memoryManager = nullptr;
|
||||
GraphicsAllocation *graphicsAllocation;
|
||||
GraphicsAllocation *mcsAllocation = nullptr;
|
||||
|
||||
@@ -127,6 +127,7 @@ class SkuInfoReceiver {
|
||||
RECEIVE_WA(UseVAlign16OnTileXYBpp816);
|
||||
RECEIVE_WA(ModifyVFEStateAfterGPGPUPreemption);
|
||||
RECEIVE_WA(CSRUncachable);
|
||||
RECEIVE_WA(SamplerCacheFlushBetweenRedescribedSurfaceReads);
|
||||
#undef RECEIVE_WA
|
||||
}
|
||||
};
|
||||
|
||||
@@ -115,5 +115,6 @@ struct WorkaroundTableBase {
|
||||
bool waUseVAlign16OnTileXYBpp816 = false;
|
||||
bool waModifyVFEStateAfterGPGPUPreemption = false;
|
||||
bool waCSRUncachable = false;
|
||||
bool waSamplerCacheFlushBetweenRedescribedSurfaceReads = false;
|
||||
};
|
||||
} // namespace OCLRT
|
||||
|
||||
@@ -353,7 +353,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDeviceWithPreemptionSupportTh
|
||||
HWTEST_F(CommandStreamReceiverFlushTaskTests, higherTaskLevelShouldSendAPipeControl) {
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
commandStreamReceiver.isPreambleSent = true;
|
||||
configureCSRHeapStatesToNonDirty<FamilyType>();
|
||||
configureCSRtoNonDirtyState<FamilyType>();
|
||||
commandStreamReceiver.taskLevel = taskLevel / 2;
|
||||
|
||||
flushTask(commandStreamReceiver);
|
||||
@@ -367,6 +367,105 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, higherTaskLevelShouldSendAPipeCont
|
||||
EXPECT_NE(cmdList.end(), itorPC);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverFlushTaskTests, whenSamplerCacheFlushNotRequiredThenDontSendPipecontrol) {
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
OCLRT::WorkaroundTable *waTable = nullptr;
|
||||
waTable = const_cast<WorkaroundTable *>(pDevice->getWaTable());
|
||||
|
||||
commandStreamReceiver.isPreambleSent = true;
|
||||
commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushNotRequired);
|
||||
configureCSRtoNonDirtyState<FamilyType>();
|
||||
commandStreamReceiver.taskLevel = taskLevel;
|
||||
bool tmp = waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads;
|
||||
waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = true;
|
||||
flushTask(commandStreamReceiver);
|
||||
|
||||
EXPECT_EQ(commandStreamReceiver.commandStream.getUsed(), 0u);
|
||||
EXPECT_EQ(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushNotRequired, commandStreamReceiver.peekSamplerCacheFlushRequired());
|
||||
|
||||
parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
|
||||
|
||||
auto itorPC = find<typename FamilyType::PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_EQ(cmdList.end(), itorPC);
|
||||
waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = tmp;
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverFlushTaskTests, whenSamplerCacheFlushBeforeThenSendPipecontrol) {
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
commandStreamReceiver.isPreambleSent = true;
|
||||
commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushBefore);
|
||||
configureCSRtoNonDirtyState<FamilyType>();
|
||||
commandStreamReceiver.taskLevel = taskLevel;
|
||||
OCLRT::WorkaroundTable *waTable = nullptr;
|
||||
waTable = const_cast<WorkaroundTable *>(pDevice->getWaTable());
|
||||
|
||||
bool tmp = waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads;
|
||||
waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = true;
|
||||
|
||||
flushTask(commandStreamReceiver);
|
||||
|
||||
EXPECT_GT(commandStreamReceiver.commandStream.getUsed(), 0u);
|
||||
EXPECT_EQ(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushAfter, commandStreamReceiver.peekSamplerCacheFlushRequired());
|
||||
|
||||
parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
|
||||
|
||||
auto itorPC = find<typename FamilyType::PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), itorPC);
|
||||
auto pipeControlCmd = (typename FamilyType::PIPE_CONTROL *)*itorPC;
|
||||
EXPECT_TRUE(pipeControlCmd->getTextureCacheInvalidationEnable());
|
||||
waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = tmp;
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverFlushTaskTests, whenSamplerCacheFlushBeforeAndWaSamplerCacheFlushBetweenRedescribedSurfaceReadsDasabledThenDontSendPipecontrol) {
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
commandStreamReceiver.isPreambleSent = true;
|
||||
commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushBefore);
|
||||
configureCSRtoNonDirtyState<FamilyType>();
|
||||
commandStreamReceiver.taskLevel = taskLevel;
|
||||
OCLRT::WorkaroundTable *waTable = nullptr;
|
||||
waTable = const_cast<WorkaroundTable *>(pDevice->getWaTable());
|
||||
|
||||
bool tmp = waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads;
|
||||
waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = false;
|
||||
|
||||
flushTask(commandStreamReceiver);
|
||||
|
||||
EXPECT_EQ(commandStreamReceiver.commandStream.getUsed(), 0u);
|
||||
EXPECT_EQ(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushBefore, commandStreamReceiver.peekSamplerCacheFlushRequired());
|
||||
|
||||
parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
|
||||
|
||||
auto itorPC = find<typename FamilyType::PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_EQ(cmdList.end(), itorPC);
|
||||
waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = tmp;
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverFlushTaskTests, whenSamplerCacheFlushAfterThenSendPipecontrol) {
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
commandStreamReceiver.isPreambleSent = true;
|
||||
commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushAfter);
|
||||
configureCSRtoNonDirtyState<FamilyType>();
|
||||
commandStreamReceiver.taskLevel = taskLevel;
|
||||
OCLRT::WorkaroundTable *waTable = nullptr;
|
||||
waTable = const_cast<WorkaroundTable *>(pDevice->getWaTable());
|
||||
|
||||
bool tmp = waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads;
|
||||
waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = true;
|
||||
|
||||
flushTask(commandStreamReceiver);
|
||||
|
||||
EXPECT_GT(commandStreamReceiver.commandStream.getUsed(), 0u);
|
||||
EXPECT_EQ(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushNotRequired, commandStreamReceiver.peekSamplerCacheFlushRequired());
|
||||
|
||||
parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
|
||||
|
||||
auto itorPC = find<typename FamilyType::PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), itorPC);
|
||||
auto pipeControlCmd = (typename FamilyType::PIPE_CONTROL *)*itorPC;
|
||||
EXPECT_TRUE(pipeControlCmd->getTextureCacheInvalidationEnable());
|
||||
waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = tmp;
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverFlushTaskTests, completionStampValid) {
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
//simulate our CQ is stale for 10 TL's
|
||||
@@ -1954,6 +2053,31 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrWhenPreambleSentThenRequir
|
||||
EXPECT_EQ(sizeof(PIPELINE_SELECT), difference);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrWhenSamplerCacheFlushSentThenRequiredCsrSizeContainsPipecontrolSize) {
|
||||
typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT;
|
||||
UltCommandStreamReceiver<FamilyType> &commandStreamReceiver = (UltCommandStreamReceiver<FamilyType> &)pDevice->getCommandStreamReceiver();
|
||||
CsrSizeRequestFlags csrSizeRequest = {};
|
||||
DispatchFlags flags;
|
||||
commandStreamReceiver.isPreambleSent = true;
|
||||
|
||||
commandStreamReceiver.overrideCsrSizeReqFlags(csrSizeRequest);
|
||||
commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushNotRequired);
|
||||
auto samplerCacheNotFlushedSize = commandStreamReceiver.getRequiredCmdStreamSize(flags);
|
||||
commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushBefore);
|
||||
auto samplerCacheFlushBeforeSize = commandStreamReceiver.getRequiredCmdStreamSize(flags);
|
||||
EXPECT_EQ(samplerCacheNotFlushedSize, samplerCacheFlushBeforeSize);
|
||||
|
||||
OCLRT::WorkaroundTable *waTable = const_cast<WorkaroundTable *>(pDevice->getWaTable());
|
||||
bool tmp = waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads;
|
||||
waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = true;
|
||||
|
||||
samplerCacheFlushBeforeSize = commandStreamReceiver.getRequiredCmdStreamSize(flags);
|
||||
|
||||
auto difference = samplerCacheFlushBeforeSize - samplerCacheNotFlushedSize;
|
||||
EXPECT_EQ(sizeof(typename FamilyType::PIPE_CONTROL), difference);
|
||||
waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = tmp;
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInNonDirtyStateWhenflushTaskIsCalledThenNoFlushIsCalled) {
|
||||
CommandQueueHw<FamilyType> commandQueue(nullptr, pDevice, 0);
|
||||
auto &commandStream = commandQueue.getCS(4096u);
|
||||
|
||||
@@ -135,6 +135,17 @@ BDWTEST_F(HwInfoConfigTestLinuxBdw, negativeFailedIoctlSsCount) {
|
||||
EXPECT_EQ(-5, ret);
|
||||
}
|
||||
|
||||
BDWTEST_F(HwInfoConfigTestLinuxBdw, configureHwInfoWaFlags) {
|
||||
auto hwInfoConfig = HwInfoConfig::get(productFamily);
|
||||
|
||||
drm->StoredDeviceRevID = 0;
|
||||
int ret = hwInfoConfig->configureHwInfo(pInHwInfo, &outHwInfo, osInterface);
|
||||
EXPECT_EQ(0, ret);
|
||||
EXPECT_EQ(1u, outHwInfo.pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads);
|
||||
|
||||
ReleaseOutHwInfoStructs();
|
||||
}
|
||||
|
||||
BDWTEST_F(HwInfoConfigTestLinuxBdw, configureHwInfoEdram) {
|
||||
auto hwInfoConfig = HwInfoConfig::get(productFamily);
|
||||
|
||||
|
||||
@@ -241,6 +241,17 @@ BXTTEST_F(HwInfoConfigTestLinuxBxt, configureHwInfoInvalidMinEuInPool) {
|
||||
EXPECT_EQ((outHwInfo.pSysInfo->EUCount - outHwInfo.pSysInfo->EuCountPerPoolMin), outHwInfo.pSysInfo->EuCountPerPoolMax);
|
||||
}
|
||||
|
||||
BXTTEST_F(HwInfoConfigTestLinuxBxt, configureHwInfoWaFlags) {
|
||||
auto hwInfoConfig = HwInfoConfig::get(productFamily);
|
||||
|
||||
drm->StoredDeviceRevID = 0;
|
||||
int ret = hwInfoConfig->configureHwInfo(pInHwInfo, &outHwInfo, osInterface);
|
||||
EXPECT_EQ(0, ret);
|
||||
EXPECT_EQ(1u, outHwInfo.pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads);
|
||||
|
||||
ReleaseOutHwInfoStructs();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
class BxtHwInfoTests : public ::testing::Test {
|
||||
};
|
||||
|
||||
@@ -158,6 +158,17 @@ CFLTEST_F(HwInfoConfigTestLinuxCfl, negativeFailedIoctlSsCount) {
|
||||
EXPECT_EQ(-5, ret);
|
||||
}
|
||||
|
||||
CFLTEST_F(HwInfoConfigTestLinuxCfl, configureHwInfoWaFlags) {
|
||||
auto hwInfoConfig = HwInfoConfig::get(productFamily);
|
||||
|
||||
drm->StoredDeviceRevID = 0;
|
||||
int ret = hwInfoConfig->configureHwInfo(pInHwInfo, &outHwInfo, osInterface);
|
||||
EXPECT_EQ(0, ret);
|
||||
EXPECT_EQ(1u, outHwInfo.pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads);
|
||||
|
||||
ReleaseOutHwInfoStructs();
|
||||
}
|
||||
|
||||
CFLTEST_F(HwInfoConfigTestLinuxCfl, configureHwInfoEdram) {
|
||||
auto hwInfoConfig = HwInfoConfig::get(productFamily);
|
||||
|
||||
|
||||
@@ -230,6 +230,17 @@ GLKTEST_F(HwInfoConfigTestLinuxGlk, configureHwInfoInvalidMinEuInPool) {
|
||||
EXPECT_EQ((outHwInfo.pSysInfo->EUCount - outHwInfo.pSysInfo->EuCountPerPoolMin), outHwInfo.pSysInfo->EuCountPerPoolMax);
|
||||
}
|
||||
|
||||
GLKTEST_F(HwInfoConfigTestLinuxGlk, configureHwInfoWaFlags) {
|
||||
auto hwInfoConfig = HwInfoConfig::get(productFamily);
|
||||
|
||||
drm->StoredDeviceRevID = 0;
|
||||
int ret = hwInfoConfig->configureHwInfo(pInHwInfo, &outHwInfo, osInterface);
|
||||
EXPECT_EQ(0, ret);
|
||||
EXPECT_EQ(1u, outHwInfo.pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads);
|
||||
|
||||
ReleaseOutHwInfoStructs();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
class GlkHwInfoTests : public ::testing::Test {
|
||||
};
|
||||
|
||||
@@ -211,6 +211,7 @@ KBLTEST_F(HwInfoConfigTestLinuxKbl, configureHwInfoWaFlags) {
|
||||
EXPECT_EQ(1u, outHwInfo.pWaTable->waDisableLSQCROPERFforOCL);
|
||||
EXPECT_EQ(1u, outHwInfo.pWaTable->waEncryptedEdramOnlyPartials);
|
||||
EXPECT_EQ(1u, outHwInfo.pWaTable->waForcePcBbFullCfgRestore);
|
||||
EXPECT_EQ(1u, outHwInfo.pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads);
|
||||
|
||||
ReleaseOutHwInfoStructs();
|
||||
|
||||
@@ -220,6 +221,7 @@ KBLTEST_F(HwInfoConfigTestLinuxKbl, configureHwInfoWaFlags) {
|
||||
EXPECT_EQ(0u, outHwInfo.pWaTable->waDisableLSQCROPERFforOCL);
|
||||
EXPECT_EQ(0u, outHwInfo.pWaTable->waEncryptedEdramOnlyPartials);
|
||||
EXPECT_EQ(1u, outHwInfo.pWaTable->waForcePcBbFullCfgRestore);
|
||||
EXPECT_EQ(1u, outHwInfo.pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads);
|
||||
|
||||
ReleaseOutHwInfoStructs();
|
||||
|
||||
@@ -229,6 +231,7 @@ KBLTEST_F(HwInfoConfigTestLinuxKbl, configureHwInfoWaFlags) {
|
||||
EXPECT_EQ(0u, outHwInfo.pWaTable->waDisableLSQCROPERFforOCL);
|
||||
EXPECT_EQ(0u, outHwInfo.pWaTable->waEncryptedEdramOnlyPartials);
|
||||
EXPECT_EQ(0u, outHwInfo.pWaTable->waForcePcBbFullCfgRestore);
|
||||
EXPECT_EQ(1u, outHwInfo.pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads);
|
||||
}
|
||||
|
||||
KBLTEST_F(HwInfoConfigTestLinuxKbl, configureHwInfoEdram) {
|
||||
|
||||
@@ -212,6 +212,7 @@ SKLTEST_F(HwInfoConfigTestLinuxSkl, configureHwInfoWaFlags) {
|
||||
EXPECT_EQ(1u, outHwInfo.pWaTable->waModifyVFEStateAfterGPGPUPreemption);
|
||||
EXPECT_EQ(1u, outHwInfo.pWaTable->waDisablePerCtxtPreemptionGranularityControl);
|
||||
EXPECT_EQ(1u, outHwInfo.pWaTable->waCSRUncachable);
|
||||
EXPECT_EQ(1u, outHwInfo.pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads);
|
||||
|
||||
ReleaseOutHwInfoStructs();
|
||||
|
||||
@@ -222,6 +223,7 @@ SKLTEST_F(HwInfoConfigTestLinuxSkl, configureHwInfoWaFlags) {
|
||||
EXPECT_EQ(1u, outHwInfo.pWaTable->waModifyVFEStateAfterGPGPUPreemption);
|
||||
EXPECT_EQ(1u, outHwInfo.pWaTable->waDisablePerCtxtPreemptionGranularityControl);
|
||||
EXPECT_EQ(1u, outHwInfo.pWaTable->waCSRUncachable);
|
||||
EXPECT_EQ(1u, outHwInfo.pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads);
|
||||
|
||||
ReleaseOutHwInfoStructs();
|
||||
|
||||
@@ -232,6 +234,7 @@ SKLTEST_F(HwInfoConfigTestLinuxSkl, configureHwInfoWaFlags) {
|
||||
EXPECT_EQ(0u, outHwInfo.pWaTable->waModifyVFEStateAfterGPGPUPreemption);
|
||||
EXPECT_EQ(0u, outHwInfo.pWaTable->waDisablePerCtxtPreemptionGranularityControl);
|
||||
EXPECT_EQ(1u, outHwInfo.pWaTable->waCSRUncachable);
|
||||
EXPECT_EQ(1u, outHwInfo.pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads);
|
||||
|
||||
ReleaseOutHwInfoStructs();
|
||||
|
||||
@@ -242,6 +245,7 @@ SKLTEST_F(HwInfoConfigTestLinuxSkl, configureHwInfoWaFlags) {
|
||||
EXPECT_EQ(0u, outHwInfo.pWaTable->waModifyVFEStateAfterGPGPUPreemption);
|
||||
EXPECT_EQ(0u, outHwInfo.pWaTable->waDisablePerCtxtPreemptionGranularityControl);
|
||||
EXPECT_EQ(0u, outHwInfo.pWaTable->waCSRUncachable);
|
||||
EXPECT_EQ(1u, outHwInfo.pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads);
|
||||
}
|
||||
|
||||
SKLTEST_F(HwInfoConfigTestLinuxSkl, configureHwInfoEdram) {
|
||||
|
||||
@@ -22,7 +22,9 @@
|
||||
|
||||
#include "runtime/command_stream/command_stream_receiver_hw.h"
|
||||
#include "runtime/helpers/options.h"
|
||||
#include "runtime/helpers/surface_formats.h"
|
||||
#include "runtime/kernel/kernel.h"
|
||||
#include "runtime/mem_obj/image.h"
|
||||
#include "runtime/memory_manager/os_agnostic_memory_manager.h"
|
||||
#include "runtime/os_interface/debug_settings_manager.h"
|
||||
#include "unit_tests/fixtures/device_fixture.h"
|
||||
@@ -1563,6 +1565,57 @@ TEST_F(KernelResidencyTest, test_MakeArgsResident) {
|
||||
delete pKernelInfo;
|
||||
}
|
||||
|
||||
HWTEST_F(KernelResidencyTest, test_MakeArgsResidentCheckImageFromImage) {
|
||||
ASSERT_NE(nullptr, pDevice);
|
||||
|
||||
//create NV12 image
|
||||
cl_mem_flags flags = CL_MEM_READ_ONLY | CL_MEM_HOST_NO_ACCESS;
|
||||
cl_image_format imageFormat;
|
||||
imageFormat.image_channel_data_type = CL_UNORM_INT8;
|
||||
imageFormat.image_channel_order = CL_NV12_INTEL;
|
||||
auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat);
|
||||
|
||||
cl_image_desc imageDesc = {};
|
||||
imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D;
|
||||
imageDesc.image_width = 16;
|
||||
imageDesc.image_height = 16;
|
||||
imageDesc.image_depth = 1;
|
||||
|
||||
cl_int retVal;
|
||||
MockContext context;
|
||||
std::unique_ptr<OCLRT::Image> imageNV12(Image::create(&context, flags, surfaceFormat, &imageDesc, nullptr, retVal));
|
||||
|
||||
//create Y plane
|
||||
imageFormat.image_channel_order = CL_R;
|
||||
flags = CL_MEM_READ_ONLY;
|
||||
surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat);
|
||||
|
||||
imageDesc.image_width = 0;
|
||||
imageDesc.image_height = 0;
|
||||
imageDesc.image_depth = 0;
|
||||
imageDesc.mem_object = imageNV12.get();
|
||||
|
||||
std::unique_ptr<OCLRT::Image> imageY(Image::create(&context, flags, surfaceFormat, &imageDesc, nullptr, retVal));
|
||||
std::unique_ptr<KernelInfo> pKernelInfo(KernelInfo::create());
|
||||
KernelArgInfo kernelArgInfo;
|
||||
kernelArgInfo.isImage = true;
|
||||
|
||||
pKernelInfo->kernelArgInfo.push_back(kernelArgInfo);
|
||||
|
||||
std::unique_ptr<MockProgram> program(new MockProgram);
|
||||
std::unique_ptr<MockKernel> pKernel(new MockKernel(program.get(), *pKernelInfo, *pDevice));
|
||||
|
||||
ASSERT_EQ(CL_SUCCESS, pKernel->initialize());
|
||||
pKernel->storeKernelArg(0, Kernel::IMAGE_OBJ, (cl_mem)imageY.get(), NULL, 0);
|
||||
pKernel->makeResident(pDevice->getCommandStreamReceiver());
|
||||
|
||||
EXPECT_FALSE(imageNV12->isImageFromImage());
|
||||
EXPECT_TRUE(imageY->isImageFromImage());
|
||||
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
EXPECT_EQ(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushBefore, commandStreamReceiver.peekSamplerCacheFlushRequired());
|
||||
}
|
||||
|
||||
struct KernelExecutionEnvironmentTest : public Test<DeviceFixture> {
|
||||
void SetUp() override {
|
||||
DeviceFixture::SetUp();
|
||||
|
||||
@@ -79,6 +79,8 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily> {
|
||||
GraphicsAllocation *getPreemptionCsrAllocation() {
|
||||
return this->preemptionCsrAllocation;
|
||||
}
|
||||
using SamplerCacheFlushState = CommandStreamReceiver::SamplerCacheFlushState;
|
||||
SamplerCacheFlushState peekSamplerCacheFlushRequired() const { return this->samplerCacheFlushRequired; }
|
||||
|
||||
protected:
|
||||
using BaseClass::CommandStreamReceiver::memoryManager;
|
||||
|
||||
@@ -268,7 +268,7 @@ TEST_F(Nv12ImageTest, createNV12YPlaneImage) {
|
||||
auto imageYPlane = createImageWithFlags(CL_MEM_READ_WRITE);
|
||||
|
||||
ASSERT_NE(nullptr, imageYPlane);
|
||||
|
||||
EXPECT_EQ(true, imageYPlane->isImageFromImage());
|
||||
EXPECT_EQ(imageNV12->getGraphicsAllocation(), imageYPlane->getGraphicsAllocation());
|
||||
|
||||
cl_image_desc parentDimensions, planeDimensions;
|
||||
@@ -311,6 +311,7 @@ TEST_F(Nv12ImageTest, createNV12UVPlaneImage) {
|
||||
|
||||
ASSERT_NE(nullptr, imageUVPlane);
|
||||
|
||||
EXPECT_EQ(true, imageUVPlane->isImageFromImage());
|
||||
EXPECT_EQ(imageNV12->getGraphicsAllocation(), imageUVPlane->getGraphicsAllocation());
|
||||
|
||||
cl_image_desc parentDimensions, planeDimensions;
|
||||
@@ -359,6 +360,7 @@ TEST_F(Nv12ImageTest, createNV12UVPlaneImageWithOffsetOfUVPlane) {
|
||||
|
||||
ASSERT_NE(nullptr, imageUVPlane);
|
||||
|
||||
EXPECT_EQ(true, imageUVPlane->isImageFromImage());
|
||||
EXPECT_EQ(imageNV12->getGraphicsAllocation(), imageUVPlane->getGraphicsAllocation());
|
||||
|
||||
cl_image_desc parentDimensions, planeDimensions;
|
||||
|
||||
@@ -147,6 +147,7 @@ struct SkuInfoBaseReference {
|
||||
refWaTable.waUseVAlign16OnTileXYBpp816 = true;
|
||||
refWaTable.waModifyVFEStateAfterGPGPUPreemption = true;
|
||||
refWaTable.waCSRUncachable = true;
|
||||
refWaTable.waSamplerCacheFlushBetweenRedescribedSurfaceReads = true;
|
||||
}
|
||||
}; // namespace SkuInfoBaseReference
|
||||
} // namespace OCLRT
|
||||
|
||||
Reference in New Issue
Block a user