waSamplerCacheFlushBetweenRedescribedSurfaceReads fix

Change-Id: Id322f58ce6997c8710ed2d8faf4f3c3f3d2ef0c3
This commit is contained in:
Jacek Danecki
2018-01-10 14:05:34 +01:00
parent e35a066f79
commit 73e2e72d07
24 changed files with 279 additions and 4 deletions

View File

@@ -49,6 +49,11 @@ class CommandStreamReceiver {
BatchedDispatch // dispatching is batched, explicit clFlush is required
};
enum class SamplerCacheFlushState {
samplerCacheFlushNotRequired,
samplerCacheFlushBefore, //add sampler cache flush before Walker with redescribed image
samplerCacheFlushAfter //add sampler cache flush after Walker with redescribed image
};
CommandStreamReceiver();
virtual ~CommandStreamReceiver();
@@ -115,6 +120,8 @@ class CommandStreamReceiver {
// allows CommandStreamReceiver to prepopulate reserved block in instruction heap
MOCKABLE_VIRTUAL void initializeInstructionHeapCmdStreamReceiverReservedBlock(LinearStream &ih) const;
void setSamplerCacheFlushRequired(SamplerCacheFlushState value) { this->samplerCacheFlushRequired = value; }
protected:
// taskCount - # of tasks submitted
uint32_t taskCount = 0;
@@ -156,6 +163,7 @@ class CommandStreamReceiver {
bool disableL3Cache = 0;
uint32_t requiredScratchSize = 0;
uint64_t totalMemoryUsed = 0u;
SamplerCacheFlushState samplerCacheFlushRequired = SamplerCacheFlushState::samplerCacheFlushNotRequired;
};
typedef CommandStreamReceiver *(*CommandStreamReceiverCreateFunc)(const HardwareInfo &hwInfoIn, bool withAubDump);

View File

@@ -254,6 +254,17 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "this->taskLevel", (uint32_t)this->taskLevel);
if (getMemoryManager()->device->getWaTable()->waSamplerCacheFlushBetweenRedescribedSurfaceReads) {
if (this->samplerCacheFlushRequired != SamplerCacheFlushState::samplerCacheFlushNotRequired) {
auto pCmd = addPipeControlCmd(commandStreamCSR);
pCmd->setTextureCacheInvalidationEnable(true);
if (this->samplerCacheFlushRequired == SamplerCacheFlushState::samplerCacheFlushBefore) {
this->samplerCacheFlushRequired = SamplerCacheFlushState::samplerCacheFlushAfter;
} else {
this->samplerCacheFlushRequired = SamplerCacheFlushState::samplerCacheFlushNotRequired;
}
}
}
// Add a PC if we have a dependency on a previous walker to avoid concurrency issues.
if (taskLevel > this->taskLevel) {
//Some architectures (SKL) requires to have pipe control prior to pipe control with tag write, add it here
@@ -272,6 +283,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
pCmd->setConstantCacheInvalidationEnable(true);
pCmd->setStateCacheInvalidationEnable(true);
}
auto address = (uint64_t)this->getTagAddress();
pCmd->setAddressHigh(address >> 32);
pCmd->setAddress(address & (0xffffffff));
@@ -514,6 +526,11 @@ size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdStreamSize(const Dispat
size += getCmdSizeForMediaSampler(dispatchFlags.mediaSamplerRequired);
size += PreemptionHelper::getRequiredCmdStreamSize<GfxFamily>(dispatchFlags.preemptionMode, this->lastPreemptionMode);
if (getMemoryManager()->device->getWaTable()->waSamplerCacheFlushBetweenRedescribedSurfaceReads) {
if (this->samplerCacheFlushRequired != SamplerCacheFlushState::samplerCacheFlushNotRequired) {
size += sizeof(typename GfxFamily::PIPE_CONTROL);
}
}
return size;
}

View File

@@ -54,6 +54,7 @@ int HwInfoConfigHw<IGFX_BROADWELL>::configureHardwareCustom(HardwareInfo *hwInfo
pWaTable->waReportPerfCountUseGlobalContextID = 1;
pWaTable->waUseVAlign16OnTileXYBpp816 = 1;
pWaTable->waModifyVFEStateAfterGPGPUPreemption = 1;
pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = 1;
if (hwInfo->pPlatform->usDeviceID == IBDW_GT3_HALO_MOBL_DEVICE_F0_ID ||
hwInfo->pPlatform->usDeviceID == IBDW_GT3_SERV_DEVICE_F0_ID) {

View File

@@ -76,6 +76,7 @@ int HwInfoConfigHw<IGFX_BROXTON>::configureHardwareCustom(HardwareInfo *hwInfo,
pWaTable->waSendMIFLUSHBeforeVFE = 1;
pWaTable->waForcePcBbFullCfgRestore = 1;
pWaTable->waReportPerfCountUseGlobalContextID = 1;
pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = 1;
int enabled = 0;
int retVal = drm->getEnabledPooledEu(enabled);

View File

@@ -70,6 +70,7 @@ int HwInfoConfigHw<IGFX_COFFEELAKE>::configureHardwareCustom(HardwareInfo *hwInf
pWaTable->waLosslessCompressionSurfaceStride = 1;
pWaTable->waFbcLinearSurfaceStride = 1;
pWaTable->wa4kAlignUVOffsetNV12LinearSurface = 1;
pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = 1;
if (hwInfo->pPlatform->usDeviceID == ICFL_GT3_ULT_28W_DEVICE_F0_ID ||
hwInfo->pPlatform->usDeviceID == ICFL_GT3_ULT_15W_DEVICE_F0_ID) {

View File

@@ -65,6 +65,7 @@ int HwInfoConfigHw<IGFX_GEMINILAKE>::configureHardwareCustom(HardwareInfo *hwInf
pWaTable->waSendMIFLUSHBeforeVFE = 1;
pWaTable->waForcePcBbFullCfgRestore = 1;
pWaTable->waReportPerfCountUseGlobalContextID = 1;
pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = 1;
int enabled = 0;
int retVal = drm->getEnabledPooledEu(enabled);

View File

@@ -71,6 +71,7 @@ int HwInfoConfigHw<IGFX_KABYLAKE>::configureHardwareCustom(HardwareInfo *hwInfo,
pWaTable->waLosslessCompressionSurfaceStride = 1;
pWaTable->waFbcLinearSurfaceStride = 1;
pWaTable->wa4kAlignUVOffsetNV12LinearSurface = 1;
pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = 1;
if (pPlatform->usRevId <= 0x6) {
pWaTable->waDisableLSQCROPERFforOCL = 1;

View File

@@ -81,6 +81,7 @@ int HwInfoConfigHw<IGFX_SKYLAKE>::configureHardwareCustom(HardwareInfo *hwInfo,
pWaTable->waEncryptedEdramOnlyPartials = 1;
pWaTable->waDisableEdramForDisplayRT = 1;
pWaTable->waForcePcBbFullCfgRestore = 1;
pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = 1;
if ((1 << hwInfo->pPlatform->usRevId) & 0x0eu) {
pWaTable->waCompressedResourceRequiresConstVA21 = 1;

View File

@@ -882,6 +882,10 @@ inline void Kernel::makeArgsResident(CommandStreamReceiver &commandStreamReceive
} else if (Kernel::isMemObj(kernelArguments[argIndex].type)) {
auto clMem = (const cl_mem)kernelArguments[argIndex].object;
auto memObj = castToObjectOrAbort<MemObj>(clMem);
DEBUG_BREAK_IF(memObj == nullptr);
if (memObj->isImageFromImage()) {
commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushBefore);
}
commandStreamReceiver.makeResident(*memObj->getGraphicsAllocation());
if (memObj->getMcsAllocation()) {
commandStreamReceiver.makeResident(*memObj->getMcsAllocation());

View File

@@ -320,6 +320,9 @@ Image *Image::create(Context *context,
if ((imageDesc->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) || ((imageDesc->image_type == CL_MEM_OBJECT_IMAGE2D) && (imageDesc->mem_object != nullptr))) {
image->associatedMemObject = castToObject<MemObj>(imageDesc->mem_object);
}
if (parentImage) {
image->isImageFromImageCreated = true;
}
// Driver needs to store rowPitch passed by the app in order to synchronize the host_ptr later on map call
image->setHostPtrRowPitch(imageDesc->image_row_pitch ? imageDesc->image_row_pitch : hostPtrRowPitch);
image->setHostPtrSlicePitch(hostPtrSlicePitch);

View File

@@ -100,8 +100,9 @@ class MemObj : public BaseObject<_cl_mem> {
virtual bool allowTiling() { return false; }
CommandQueue *getAssociatedCommandQueue() { return cmdQueuePtr; };
Device *getAssociatedDevice() { return device; };
CommandQueue *getAssociatedCommandQueue() { return cmdQueuePtr; }
Device *getAssociatedDevice() { return device; }
bool isImageFromImage() const { return isImageFromImageCreated; }
virtual cl_int unmapObj(CommandQueue *cmdQ, void *ptr, cl_uint numEventsInWaitList,
const cl_event *eventWaitList, cl_event *event) {
@@ -147,6 +148,7 @@ class MemObj : public BaseObject<_cl_mem> {
bool isZeroCopy;
bool isHostPtrSVM;
bool isObjectRedescribed;
bool isImageFromImageCreated = false;
MemoryManager *memoryManager = nullptr;
GraphicsAllocation *graphicsAllocation;
GraphicsAllocation *mcsAllocation = nullptr;

View File

@@ -127,6 +127,7 @@ class SkuInfoReceiver {
RECEIVE_WA(UseVAlign16OnTileXYBpp816);
RECEIVE_WA(ModifyVFEStateAfterGPGPUPreemption);
RECEIVE_WA(CSRUncachable);
RECEIVE_WA(SamplerCacheFlushBetweenRedescribedSurfaceReads);
#undef RECEIVE_WA
}
};

View File

@@ -115,5 +115,6 @@ struct WorkaroundTableBase {
bool waUseVAlign16OnTileXYBpp816 = false;
bool waModifyVFEStateAfterGPGPUPreemption = false;
bool waCSRUncachable = false;
bool waSamplerCacheFlushBetweenRedescribedSurfaceReads = false;
};
} // namespace OCLRT

View File

@@ -353,7 +353,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDeviceWithPreemptionSupportTh
HWTEST_F(CommandStreamReceiverFlushTaskTests, higherTaskLevelShouldSendAPipeControl) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.isPreambleSent = true;
configureCSRHeapStatesToNonDirty<FamilyType>();
configureCSRtoNonDirtyState<FamilyType>();
commandStreamReceiver.taskLevel = taskLevel / 2;
flushTask(commandStreamReceiver);
@@ -367,6 +367,105 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, higherTaskLevelShouldSendAPipeCont
EXPECT_NE(cmdList.end(), itorPC);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, whenSamplerCacheFlushNotRequiredThenDontSendPipecontrol) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
OCLRT::WorkaroundTable *waTable = nullptr;
waTable = const_cast<WorkaroundTable *>(pDevice->getWaTable());
commandStreamReceiver.isPreambleSent = true;
commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushNotRequired);
configureCSRtoNonDirtyState<FamilyType>();
commandStreamReceiver.taskLevel = taskLevel;
bool tmp = waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads;
waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = true;
flushTask(commandStreamReceiver);
EXPECT_EQ(commandStreamReceiver.commandStream.getUsed(), 0u);
EXPECT_EQ(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushNotRequired, commandStreamReceiver.peekSamplerCacheFlushRequired());
parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
auto itorPC = find<typename FamilyType::PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(cmdList.end(), itorPC);
waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = tmp;
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, whenSamplerCacheFlushBeforeThenSendPipecontrol) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.isPreambleSent = true;
commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushBefore);
configureCSRtoNonDirtyState<FamilyType>();
commandStreamReceiver.taskLevel = taskLevel;
OCLRT::WorkaroundTable *waTable = nullptr;
waTable = const_cast<WorkaroundTable *>(pDevice->getWaTable());
bool tmp = waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads;
waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = true;
flushTask(commandStreamReceiver);
EXPECT_GT(commandStreamReceiver.commandStream.getUsed(), 0u);
EXPECT_EQ(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushAfter, commandStreamReceiver.peekSamplerCacheFlushRequired());
parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
auto itorPC = find<typename FamilyType::PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itorPC);
auto pipeControlCmd = (typename FamilyType::PIPE_CONTROL *)*itorPC;
EXPECT_TRUE(pipeControlCmd->getTextureCacheInvalidationEnable());
waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = tmp;
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, whenSamplerCacheFlushBeforeAndWaSamplerCacheFlushBetweenRedescribedSurfaceReadsDasabledThenDontSendPipecontrol) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.isPreambleSent = true;
commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushBefore);
configureCSRtoNonDirtyState<FamilyType>();
commandStreamReceiver.taskLevel = taskLevel;
OCLRT::WorkaroundTable *waTable = nullptr;
waTable = const_cast<WorkaroundTable *>(pDevice->getWaTable());
bool tmp = waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads;
waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = false;
flushTask(commandStreamReceiver);
EXPECT_EQ(commandStreamReceiver.commandStream.getUsed(), 0u);
EXPECT_EQ(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushBefore, commandStreamReceiver.peekSamplerCacheFlushRequired());
parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
auto itorPC = find<typename FamilyType::PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(cmdList.end(), itorPC);
waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = tmp;
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, whenSamplerCacheFlushAfterThenSendPipecontrol) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.isPreambleSent = true;
commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushAfter);
configureCSRtoNonDirtyState<FamilyType>();
commandStreamReceiver.taskLevel = taskLevel;
OCLRT::WorkaroundTable *waTable = nullptr;
waTable = const_cast<WorkaroundTable *>(pDevice->getWaTable());
bool tmp = waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads;
waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = true;
flushTask(commandStreamReceiver);
EXPECT_GT(commandStreamReceiver.commandStream.getUsed(), 0u);
EXPECT_EQ(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushNotRequired, commandStreamReceiver.peekSamplerCacheFlushRequired());
parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
auto itorPC = find<typename FamilyType::PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itorPC);
auto pipeControlCmd = (typename FamilyType::PIPE_CONTROL *)*itorPC;
EXPECT_TRUE(pipeControlCmd->getTextureCacheInvalidationEnable());
waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = tmp;
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, completionStampValid) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
//simulate our CQ is stale for 10 TL's
@@ -1954,6 +2053,31 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrWhenPreambleSentThenRequir
EXPECT_EQ(sizeof(PIPELINE_SELECT), difference);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrWhenSamplerCacheFlushSentThenRequiredCsrSizeContainsPipecontrolSize) {
typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT;
UltCommandStreamReceiver<FamilyType> &commandStreamReceiver = (UltCommandStreamReceiver<FamilyType> &)pDevice->getCommandStreamReceiver();
CsrSizeRequestFlags csrSizeRequest = {};
DispatchFlags flags;
commandStreamReceiver.isPreambleSent = true;
commandStreamReceiver.overrideCsrSizeReqFlags(csrSizeRequest);
commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushNotRequired);
auto samplerCacheNotFlushedSize = commandStreamReceiver.getRequiredCmdStreamSize(flags);
commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushBefore);
auto samplerCacheFlushBeforeSize = commandStreamReceiver.getRequiredCmdStreamSize(flags);
EXPECT_EQ(samplerCacheNotFlushedSize, samplerCacheFlushBeforeSize);
OCLRT::WorkaroundTable *waTable = const_cast<WorkaroundTable *>(pDevice->getWaTable());
bool tmp = waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads;
waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = true;
samplerCacheFlushBeforeSize = commandStreamReceiver.getRequiredCmdStreamSize(flags);
auto difference = samplerCacheFlushBeforeSize - samplerCacheNotFlushedSize;
EXPECT_EQ(sizeof(typename FamilyType::PIPE_CONTROL), difference);
waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = tmp;
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInNonDirtyStateWhenflushTaskIsCalledThenNoFlushIsCalled) {
CommandQueueHw<FamilyType> commandQueue(nullptr, pDevice, 0);
auto &commandStream = commandQueue.getCS(4096u);

View File

@@ -135,6 +135,17 @@ BDWTEST_F(HwInfoConfigTestLinuxBdw, negativeFailedIoctlSsCount) {
EXPECT_EQ(-5, ret);
}
BDWTEST_F(HwInfoConfigTestLinuxBdw, configureHwInfoWaFlags) {
auto hwInfoConfig = HwInfoConfig::get(productFamily);
drm->StoredDeviceRevID = 0;
int ret = hwInfoConfig->configureHwInfo(pInHwInfo, &outHwInfo, osInterface);
EXPECT_EQ(0, ret);
EXPECT_EQ(1u, outHwInfo.pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads);
ReleaseOutHwInfoStructs();
}
BDWTEST_F(HwInfoConfigTestLinuxBdw, configureHwInfoEdram) {
auto hwInfoConfig = HwInfoConfig::get(productFamily);

View File

@@ -241,6 +241,17 @@ BXTTEST_F(HwInfoConfigTestLinuxBxt, configureHwInfoInvalidMinEuInPool) {
EXPECT_EQ((outHwInfo.pSysInfo->EUCount - outHwInfo.pSysInfo->EuCountPerPoolMin), outHwInfo.pSysInfo->EuCountPerPoolMax);
}
BXTTEST_F(HwInfoConfigTestLinuxBxt, configureHwInfoWaFlags) {
auto hwInfoConfig = HwInfoConfig::get(productFamily);
drm->StoredDeviceRevID = 0;
int ret = hwInfoConfig->configureHwInfo(pInHwInfo, &outHwInfo, osInterface);
EXPECT_EQ(0, ret);
EXPECT_EQ(1u, outHwInfo.pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads);
ReleaseOutHwInfoStructs();
}
template <typename T>
class BxtHwInfoTests : public ::testing::Test {
};

View File

@@ -158,6 +158,17 @@ CFLTEST_F(HwInfoConfigTestLinuxCfl, negativeFailedIoctlSsCount) {
EXPECT_EQ(-5, ret);
}
CFLTEST_F(HwInfoConfigTestLinuxCfl, configureHwInfoWaFlags) {
auto hwInfoConfig = HwInfoConfig::get(productFamily);
drm->StoredDeviceRevID = 0;
int ret = hwInfoConfig->configureHwInfo(pInHwInfo, &outHwInfo, osInterface);
EXPECT_EQ(0, ret);
EXPECT_EQ(1u, outHwInfo.pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads);
ReleaseOutHwInfoStructs();
}
CFLTEST_F(HwInfoConfigTestLinuxCfl, configureHwInfoEdram) {
auto hwInfoConfig = HwInfoConfig::get(productFamily);

View File

@@ -230,6 +230,17 @@ GLKTEST_F(HwInfoConfigTestLinuxGlk, configureHwInfoInvalidMinEuInPool) {
EXPECT_EQ((outHwInfo.pSysInfo->EUCount - outHwInfo.pSysInfo->EuCountPerPoolMin), outHwInfo.pSysInfo->EuCountPerPoolMax);
}
GLKTEST_F(HwInfoConfigTestLinuxGlk, configureHwInfoWaFlags) {
auto hwInfoConfig = HwInfoConfig::get(productFamily);
drm->StoredDeviceRevID = 0;
int ret = hwInfoConfig->configureHwInfo(pInHwInfo, &outHwInfo, osInterface);
EXPECT_EQ(0, ret);
EXPECT_EQ(1u, outHwInfo.pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads);
ReleaseOutHwInfoStructs();
}
template <typename T>
class GlkHwInfoTests : public ::testing::Test {
};

View File

@@ -211,6 +211,7 @@ KBLTEST_F(HwInfoConfigTestLinuxKbl, configureHwInfoWaFlags) {
EXPECT_EQ(1u, outHwInfo.pWaTable->waDisableLSQCROPERFforOCL);
EXPECT_EQ(1u, outHwInfo.pWaTable->waEncryptedEdramOnlyPartials);
EXPECT_EQ(1u, outHwInfo.pWaTable->waForcePcBbFullCfgRestore);
EXPECT_EQ(1u, outHwInfo.pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads);
ReleaseOutHwInfoStructs();
@@ -220,6 +221,7 @@ KBLTEST_F(HwInfoConfigTestLinuxKbl, configureHwInfoWaFlags) {
EXPECT_EQ(0u, outHwInfo.pWaTable->waDisableLSQCROPERFforOCL);
EXPECT_EQ(0u, outHwInfo.pWaTable->waEncryptedEdramOnlyPartials);
EXPECT_EQ(1u, outHwInfo.pWaTable->waForcePcBbFullCfgRestore);
EXPECT_EQ(1u, outHwInfo.pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads);
ReleaseOutHwInfoStructs();
@@ -229,6 +231,7 @@ KBLTEST_F(HwInfoConfigTestLinuxKbl, configureHwInfoWaFlags) {
EXPECT_EQ(0u, outHwInfo.pWaTable->waDisableLSQCROPERFforOCL);
EXPECT_EQ(0u, outHwInfo.pWaTable->waEncryptedEdramOnlyPartials);
EXPECT_EQ(0u, outHwInfo.pWaTable->waForcePcBbFullCfgRestore);
EXPECT_EQ(1u, outHwInfo.pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads);
}
KBLTEST_F(HwInfoConfigTestLinuxKbl, configureHwInfoEdram) {

View File

@@ -212,6 +212,7 @@ SKLTEST_F(HwInfoConfigTestLinuxSkl, configureHwInfoWaFlags) {
EXPECT_EQ(1u, outHwInfo.pWaTable->waModifyVFEStateAfterGPGPUPreemption);
EXPECT_EQ(1u, outHwInfo.pWaTable->waDisablePerCtxtPreemptionGranularityControl);
EXPECT_EQ(1u, outHwInfo.pWaTable->waCSRUncachable);
EXPECT_EQ(1u, outHwInfo.pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads);
ReleaseOutHwInfoStructs();
@@ -222,6 +223,7 @@ SKLTEST_F(HwInfoConfigTestLinuxSkl, configureHwInfoWaFlags) {
EXPECT_EQ(1u, outHwInfo.pWaTable->waModifyVFEStateAfterGPGPUPreemption);
EXPECT_EQ(1u, outHwInfo.pWaTable->waDisablePerCtxtPreemptionGranularityControl);
EXPECT_EQ(1u, outHwInfo.pWaTable->waCSRUncachable);
EXPECT_EQ(1u, outHwInfo.pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads);
ReleaseOutHwInfoStructs();
@@ -232,6 +234,7 @@ SKLTEST_F(HwInfoConfigTestLinuxSkl, configureHwInfoWaFlags) {
EXPECT_EQ(0u, outHwInfo.pWaTable->waModifyVFEStateAfterGPGPUPreemption);
EXPECT_EQ(0u, outHwInfo.pWaTable->waDisablePerCtxtPreemptionGranularityControl);
EXPECT_EQ(1u, outHwInfo.pWaTable->waCSRUncachable);
EXPECT_EQ(1u, outHwInfo.pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads);
ReleaseOutHwInfoStructs();
@@ -242,6 +245,7 @@ SKLTEST_F(HwInfoConfigTestLinuxSkl, configureHwInfoWaFlags) {
EXPECT_EQ(0u, outHwInfo.pWaTable->waModifyVFEStateAfterGPGPUPreemption);
EXPECT_EQ(0u, outHwInfo.pWaTable->waDisablePerCtxtPreemptionGranularityControl);
EXPECT_EQ(0u, outHwInfo.pWaTable->waCSRUncachable);
EXPECT_EQ(1u, outHwInfo.pWaTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads);
}
SKLTEST_F(HwInfoConfigTestLinuxSkl, configureHwInfoEdram) {

View File

@@ -22,7 +22,9 @@
#include "runtime/command_stream/command_stream_receiver_hw.h"
#include "runtime/helpers/options.h"
#include "runtime/helpers/surface_formats.h"
#include "runtime/kernel/kernel.h"
#include "runtime/mem_obj/image.h"
#include "runtime/memory_manager/os_agnostic_memory_manager.h"
#include "runtime/os_interface/debug_settings_manager.h"
#include "unit_tests/fixtures/device_fixture.h"
@@ -1563,6 +1565,57 @@ TEST_F(KernelResidencyTest, test_MakeArgsResident) {
delete pKernelInfo;
}
HWTEST_F(KernelResidencyTest, test_MakeArgsResidentCheckImageFromImage) {
ASSERT_NE(nullptr, pDevice);
//create NV12 image
cl_mem_flags flags = CL_MEM_READ_ONLY | CL_MEM_HOST_NO_ACCESS;
cl_image_format imageFormat;
imageFormat.image_channel_data_type = CL_UNORM_INT8;
imageFormat.image_channel_order = CL_NV12_INTEL;
auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat);
cl_image_desc imageDesc = {};
imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D;
imageDesc.image_width = 16;
imageDesc.image_height = 16;
imageDesc.image_depth = 1;
cl_int retVal;
MockContext context;
std::unique_ptr<OCLRT::Image> imageNV12(Image::create(&context, flags, surfaceFormat, &imageDesc, nullptr, retVal));
//create Y plane
imageFormat.image_channel_order = CL_R;
flags = CL_MEM_READ_ONLY;
surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat);
imageDesc.image_width = 0;
imageDesc.image_height = 0;
imageDesc.image_depth = 0;
imageDesc.mem_object = imageNV12.get();
std::unique_ptr<OCLRT::Image> imageY(Image::create(&context, flags, surfaceFormat, &imageDesc, nullptr, retVal));
std::unique_ptr<KernelInfo> pKernelInfo(KernelInfo::create());
KernelArgInfo kernelArgInfo;
kernelArgInfo.isImage = true;
pKernelInfo->kernelArgInfo.push_back(kernelArgInfo);
std::unique_ptr<MockProgram> program(new MockProgram);
std::unique_ptr<MockKernel> pKernel(new MockKernel(program.get(), *pKernelInfo, *pDevice));
ASSERT_EQ(CL_SUCCESS, pKernel->initialize());
pKernel->storeKernelArg(0, Kernel::IMAGE_OBJ, (cl_mem)imageY.get(), NULL, 0);
pKernel->makeResident(pDevice->getCommandStreamReceiver());
EXPECT_FALSE(imageNV12->isImageFromImage());
EXPECT_TRUE(imageY->isImageFromImage());
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
EXPECT_EQ(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushBefore, commandStreamReceiver.peekSamplerCacheFlushRequired());
}
struct KernelExecutionEnvironmentTest : public Test<DeviceFixture> {
void SetUp() override {
DeviceFixture::SetUp();

View File

@@ -79,6 +79,8 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily> {
GraphicsAllocation *getPreemptionCsrAllocation() {
return this->preemptionCsrAllocation;
}
using SamplerCacheFlushState = CommandStreamReceiver::SamplerCacheFlushState;
SamplerCacheFlushState peekSamplerCacheFlushRequired() const { return this->samplerCacheFlushRequired; }
protected:
using BaseClass::CommandStreamReceiver::memoryManager;

View File

@@ -268,7 +268,7 @@ TEST_F(Nv12ImageTest, createNV12YPlaneImage) {
auto imageYPlane = createImageWithFlags(CL_MEM_READ_WRITE);
ASSERT_NE(nullptr, imageYPlane);
EXPECT_EQ(true, imageYPlane->isImageFromImage());
EXPECT_EQ(imageNV12->getGraphicsAllocation(), imageYPlane->getGraphicsAllocation());
cl_image_desc parentDimensions, planeDimensions;
@@ -311,6 +311,7 @@ TEST_F(Nv12ImageTest, createNV12UVPlaneImage) {
ASSERT_NE(nullptr, imageUVPlane);
EXPECT_EQ(true, imageUVPlane->isImageFromImage());
EXPECT_EQ(imageNV12->getGraphicsAllocation(), imageUVPlane->getGraphicsAllocation());
cl_image_desc parentDimensions, planeDimensions;
@@ -359,6 +360,7 @@ TEST_F(Nv12ImageTest, createNV12UVPlaneImageWithOffsetOfUVPlane) {
ASSERT_NE(nullptr, imageUVPlane);
EXPECT_EQ(true, imageUVPlane->isImageFromImage());
EXPECT_EQ(imageNV12->getGraphicsAllocation(), imageUVPlane->getGraphicsAllocation());
cl_image_desc parentDimensions, planeDimensions;

View File

@@ -147,6 +147,7 @@ struct SkuInfoBaseReference {
refWaTable.waUseVAlign16OnTileXYBpp816 = true;
refWaTable.waModifyVFEStateAfterGPGPUPreemption = true;
refWaTable.waCSRUncachable = true;
refWaTable.waSamplerCacheFlushBetweenRedescribedSurfaceReads = true;
}
}; // namespace SkuInfoBaseReference
} // namespace OCLRT