mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-24 12:23:05 +08:00
Update disabling caching for a resource
Change-Id: I00eac0add01f75a1b82d04cf42652c15b776a457 Signed-off-by: Filip Hazubski <filip.hazubski@intel.com>
This commit is contained in:
committed by
sys_ocldev
parent
32ecd91401
commit
d30cc221df
@@ -527,6 +527,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
uint32_t numGrfRequired = GrfConfig::DefaultGrfNumber;
|
||||
auto specialPipelineSelectMode = false;
|
||||
Kernel *kernel = nullptr;
|
||||
bool anyUncacheableArgs = false;
|
||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||
if (kernel != dispatchInfo.getKernel()) {
|
||||
kernel = dispatchInfo.getKernel();
|
||||
@@ -539,6 +540,9 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
auto numGrfRequiredByKernel = kernel->getKernelInfo().patchInfo.executionEnvironment->NumGRFRequired;
|
||||
numGrfRequired = std::max(numGrfRequired, numGrfRequiredByKernel);
|
||||
specialPipelineSelectMode |= kernel->requiresSpecialPipelineSelectMode();
|
||||
if (kernel->hasUncacheableArgs()) {
|
||||
anyUncacheableArgs = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (mediaSamplerRequired) {
|
||||
@@ -579,6 +583,10 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
}
|
||||
}
|
||||
|
||||
if (anyUncacheableArgs) {
|
||||
getCommandStreamReceiver().setDisableL3Cache(true);
|
||||
}
|
||||
|
||||
DispatchFlags dispatchFlags;
|
||||
dispatchFlags.blocking = blocking;
|
||||
dispatchFlags.dcFlush = shouldFlushDC(commandType, printfHandler) || allocNeedsFlushDC;
|
||||
|
||||
@@ -170,12 +170,13 @@ class CommandStreamReceiver {
|
||||
|
||||
virtual void expectMemory(const void *gfxAddress, const void *srcAddress, size_t length, uint32_t compareOperation);
|
||||
|
||||
protected:
|
||||
void cleanupResources();
|
||||
void setDisableL3Cache(bool val) {
|
||||
disableL3Cache = val;
|
||||
}
|
||||
|
||||
protected:
|
||||
void cleanupResources();
|
||||
|
||||
std::unique_ptr<FlushStampTracker> flushStamp;
|
||||
std::unique_ptr<SubmissionAggregator> submissionAggregator;
|
||||
std::unique_ptr<FlatBatchBufferHelper> flatBatchBufferHelper;
|
||||
|
||||
@@ -784,6 +784,7 @@ uint32_t Kernel::getScratchSizeValueToProgramMediaVfeState(int scratchSize) {
|
||||
cl_int Kernel::setArg(uint32_t argIndex, size_t argSize, const void *argVal) {
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
bool updateExposedKernel = true;
|
||||
auto argWasUncacheable = false;
|
||||
if (getKernelInfo().builtinDispatchBuilder != nullptr) {
|
||||
updateExposedKernel = getKernelInfo().builtinDispatchBuilder->setExplicitArg(argIndex, argSize, argVal, retVal);
|
||||
}
|
||||
@@ -791,6 +792,7 @@ cl_int Kernel::setArg(uint32_t argIndex, size_t argSize, const void *argVal) {
|
||||
if (argIndex >= kernelArgHandlers.size()) {
|
||||
return CL_INVALID_ARG_INDEX;
|
||||
}
|
||||
argWasUncacheable = kernelArguments[argIndex].isUncacheable;
|
||||
auto argHandler = kernelArgHandlers[argIndex];
|
||||
retVal = (this->*argHandler)(argIndex, argSize, argVal);
|
||||
}
|
||||
@@ -799,6 +801,8 @@ cl_int Kernel::setArg(uint32_t argIndex, size_t argSize, const void *argVal) {
|
||||
patchedArgumentsNum++;
|
||||
kernelArguments[argIndex].isPatched = true;
|
||||
}
|
||||
auto argIsUncacheable = kernelArguments[argIndex].isUncacheable;
|
||||
uncacheableArgsCount += (argIsUncacheable ? 1 : 0) - (argWasUncacheable ? 1 : 0);
|
||||
resolveArgs();
|
||||
}
|
||||
return retVal;
|
||||
@@ -1128,6 +1132,7 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex,
|
||||
if (requiresSshForBuffers()) {
|
||||
auto surfaceState = ptrOffset(getSurfaceStateHeap(), kernelArgInfo.offsetHeap);
|
||||
buffer->setArgStateful(surfaceState, forceNonAuxMode);
|
||||
kernelArguments[argIndex].isUncacheable = buffer->isMemObjUncacheable();
|
||||
}
|
||||
addAllocationToCacheFlushVector(argIndex, buffer->getGraphicsAllocation());
|
||||
return CL_SUCCESS;
|
||||
@@ -1433,6 +1438,10 @@ void Kernel::unsetArg(uint32_t argIndex) {
|
||||
if (kernelArguments[argIndex].isPatched) {
|
||||
patchedArgumentsNum--;
|
||||
kernelArguments[argIndex].isPatched = false;
|
||||
if (kernelArguments[argIndex].isUncacheable) {
|
||||
uncacheableArgsCount--;
|
||||
kernelArguments[argIndex].isUncacheable = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -57,6 +57,7 @@ class Kernel : public BaseObject<_cl_kernel> {
|
||||
GraphicsAllocation *pSvmAlloc;
|
||||
cl_mem_flags svmFlags;
|
||||
bool isPatched = false;
|
||||
bool isUncacheable = false;
|
||||
};
|
||||
|
||||
typedef int32_t (Kernel::*KernelArgHandler)(uint32_t argIndex,
|
||||
@@ -286,6 +287,7 @@ class Kernel : public BaseObject<_cl_kernel> {
|
||||
bool requiresCoherency();
|
||||
void resetSharedObjectsPatchAddresses();
|
||||
bool isUsingSharedObjArgs() const { return usingSharedObjArgs; }
|
||||
bool hasUncacheableArgs() const { return uncacheableArgsCount > 0; }
|
||||
|
||||
bool hasPrintfOutput() const;
|
||||
|
||||
@@ -496,6 +498,7 @@ class Kernel : public BaseObject<_cl_kernel> {
|
||||
bool auxTranslationRequired = false;
|
||||
uint32_t patchedArgumentsNum = 0;
|
||||
uint32_t startOffset = 0;
|
||||
uint32_t uncacheableArgsCount = 0;
|
||||
|
||||
std::vector<PatchInfoData> patchInfoDataList;
|
||||
std::unique_ptr<ImageTransformer> imageTransformer;
|
||||
|
||||
@@ -255,6 +255,7 @@ Buffer *Buffer::create(Context *context,
|
||||
}
|
||||
|
||||
pBuffer->setHostPtrMinSize(size);
|
||||
pBuffer->isUncacheable = isValueSet(properties.flags_intel, CL_MEM_LOCALLY_UNCACHED_RESOURCE);
|
||||
|
||||
if (copyMemoryFromHostPtr) {
|
||||
if ((memory->gmm && memory->gmm->isRenderCompressed) || !MemoryPool::isSystemMemoryPool(memory->getMemoryPool())) {
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "common/helpers/bit_helpers.h"
|
||||
#include "hw_cmds.h"
|
||||
#include "runtime/execution_environment/execution_environment.h"
|
||||
#include "runtime/helpers/surface_formats.h"
|
||||
@@ -64,8 +65,9 @@ void BufferHw<GfxFamily>::setArgStateful(void *memory, bool forceNonAuxMode) {
|
||||
surfaceState->setTileMode(RENDER_SURFACE_STATE::TILE_MODE_LINEAR);
|
||||
surfaceState->setVerticalLineStride(0);
|
||||
surfaceState->setVerticalLineStrideOffset(0);
|
||||
if ((isAligned<MemoryConstants::cacheLineSize>(bufferAddress) && isAligned<MemoryConstants::cacheLineSize>(bufferSize)) ||
|
||||
((getFlags() & CL_MEM_READ_ONLY)) != 0 || !this->isMemObjZeroCopy()) {
|
||||
if (((isAligned<MemoryConstants::cacheLineSize>(bufferAddress) && isAligned<MemoryConstants::cacheLineSize>(bufferSize)) ||
|
||||
isValueSet(getFlags(), CL_MEM_READ_ONLY) || !this->isMemObjZeroCopy()) &&
|
||||
!this->isUncacheable) {
|
||||
surfaceState->setMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER));
|
||||
} else {
|
||||
surfaceState->setMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED));
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2018 Intel Corporation
|
||||
* Copyright (C) 2017-2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -219,6 +219,10 @@ bool MemObj::isMemObjWithHostPtrSVM() const {
|
||||
return isHostPtrSVM;
|
||||
}
|
||||
|
||||
bool MemObj::isMemObjUncacheable() const {
|
||||
return isUncacheable;
|
||||
}
|
||||
|
||||
GraphicsAllocation *MemObj::getGraphicsAllocation() {
|
||||
return graphicsAllocation;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2018 Intel Corporation
|
||||
* Copyright (C) 2017-2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -71,6 +71,7 @@ class MemObj : public BaseObject<_cl_mem> {
|
||||
|
||||
bool isMemObjZeroCopy() const;
|
||||
bool isMemObjWithHostPtrSVM() const;
|
||||
bool isMemObjUncacheable() const;
|
||||
virtual void transferDataToHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) { UNRECOVERABLE_IF(true); };
|
||||
virtual void transferDataFromHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) { UNRECOVERABLE_IF(true); };
|
||||
|
||||
@@ -126,6 +127,7 @@ class MemObj : public BaseObject<_cl_mem> {
|
||||
bool isZeroCopy;
|
||||
bool isHostPtrSVM;
|
||||
bool isObjectRedescribed;
|
||||
bool isUncacheable = false;
|
||||
MemoryManager *memoryManager = nullptr;
|
||||
GraphicsAllocation *graphicsAllocation;
|
||||
GraphicsAllocation *mcsAllocation = nullptr;
|
||||
|
||||
@@ -17,71 +17,172 @@
|
||||
#include "test.h"
|
||||
#include "unit_tests/fixtures/hello_world_fixture.h"
|
||||
#include "unit_tests/helpers/hw_parse.h"
|
||||
#include "unit_tests/utilities/base_object_utils.h"
|
||||
|
||||
using namespace OCLRT;
|
||||
|
||||
namespace clMemLocallyUncachedResourceTests {
|
||||
|
||||
struct clMemLocallyUncachedResourceFixture : Test<HelloWorldFixture<HelloWorldFixtureFactory>>,
|
||||
::testing::WithParamInterface<bool> {};
|
||||
|
||||
HWTEST_P(clMemLocallyUncachedResourceFixture, GivenLocallyCachedOrUncachedBufferWhenItIsSetAndQueuedThenItIsCorrectlyCached) {
|
||||
template <typename FamilyType>
|
||||
uint32_t argMocs(Kernel &kernel, size_t argIndex) {
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
auto surfaceStateHeapAddress = kernel.getSurfaceStateHeap();
|
||||
auto surfaceStateHeapAddressOffset = kernel.getKernelInfo().kernelArgInfo[argIndex].offsetHeap;
|
||||
auto surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(ptrOffset(surfaceStateHeapAddress, surfaceStateHeapAddressOffset));
|
||||
return surfaceState->getMemoryObjectControlState();
|
||||
}
|
||||
|
||||
template <typename FamilyType>
|
||||
uint32_t cmdQueueMocs(CommandQueue *pCmdQ) {
|
||||
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
|
||||
auto pCmdQHw = reinterpret_cast<CommandQueueHw<FamilyType> *>(pCmdQ);
|
||||
auto &csr = pCmdQHw->getCommandStreamReceiver();
|
||||
HardwareParse hwParse;
|
||||
hwParse.parseCommands<FamilyType>(csr.getCS(0), 0);
|
||||
auto itorCmd = reverse_find<STATE_BASE_ADDRESS *>(hwParse.cmdList.rbegin(), hwParse.cmdList.rend());
|
||||
EXPECT_NE(hwParse.cmdList.rend(), itorCmd);
|
||||
auto sba = genCmdCast<STATE_BASE_ADDRESS *>(*itorCmd);
|
||||
EXPECT_NE(nullptr, sba);
|
||||
|
||||
return sba->getStatelessDataPortAccessMemoryObjectControlState();
|
||||
}
|
||||
|
||||
const size_t n = 512;
|
||||
size_t globalWorkSize[3] = {n, 1, 1};
|
||||
size_t localWorkSize[3] = {256, 1, 1};
|
||||
bool useUncachedFlag = GetParam();
|
||||
const size_t globalWorkSize[3] = {n, 1, 1};
|
||||
const size_t localWorkSize[3] = {256, 1, 1};
|
||||
|
||||
const cl_mem_properties_intel *propertiesCacheable = nullptr;
|
||||
const cl_mem_properties_intel propertiesUncacheable[] = {CL_MEM_FLAGS_INTEL, CL_MEM_LOCALLY_UNCACHED_RESOURCE, 0};
|
||||
|
||||
using clMemLocallyUncachedResourceFixture = Test<HelloWorldFixture<HelloWorldFixtureFactory>>;
|
||||
|
||||
HWTEST_F(clMemLocallyUncachedResourceFixture, GivenAtLeastOneLocallyUncacheableResourceWhenSettingKernelArgumentsThenKernelIsUncacheable) {
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
std::unique_ptr<Kernel> kernel(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer"), &retVal));
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
cl_mem_properties_intel propertiesUncached[] = {CL_MEM_FLAGS_INTEL, CL_MEM_LOCALLY_UNCACHED_RESOURCE, 0};
|
||||
cl_mem_properties_intel *properties = (useUncachedFlag ? propertiesUncached : nullptr);
|
||||
auto buffer1 = clCreateBufferWithPropertiesINTEL(context, properties, n * sizeof(float), nullptr, nullptr);
|
||||
auto buffer2 = clCreateBufferWithPropertiesINTEL(context, properties, n * sizeof(float), nullptr, nullptr);
|
||||
auto bufferCacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, n * sizeof(float), nullptr, nullptr);
|
||||
auto pBufferCacheable1 = clUniquePtr(castToObject<Buffer>(bufferCacheable1));
|
||||
auto bufferCacheable2 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, n * sizeof(float), nullptr, nullptr);
|
||||
auto pBufferCacheable2 = clUniquePtr(castToObject<Buffer>(bufferCacheable2));
|
||||
|
||||
retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &buffer1);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
auto surfaceStateHeapAddress = kernel.get()->getSurfaceStateHeap();
|
||||
auto surfaceStateHeapAddressOffset = kernel.get()->getKernelInfo().kernelArgInfo[0].offsetHeap;
|
||||
auto surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(ptrOffset(surfaceStateHeapAddress, surfaceStateHeapAddressOffset));
|
||||
auto expectedMocs = pDevice->getGmmHelper()->getMOCS(useUncachedFlag ? GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED
|
||||
: GMM_RESOURCE_USAGE_OCL_BUFFER);
|
||||
EXPECT_EQ(expectedMocs, surfaceState->getMemoryObjectControlState());
|
||||
auto bufferUncacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheable, n * sizeof(float), nullptr, nullptr);
|
||||
auto pBufferUncacheable1 = clUniquePtr(castToObject<Buffer>(bufferUncacheable1));
|
||||
auto bufferUncacheable2 = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheable, n * sizeof(float), nullptr, nullptr);
|
||||
auto pBufferUncacheable2 = clUniquePtr(castToObject<Buffer>(bufferUncacheable2));
|
||||
|
||||
retVal = clSetKernelArg(kernel.get(), 1, sizeof(cl_mem), &buffer2);
|
||||
auto mocsCacheable = kernel->getDevice().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
|
||||
auto mocsUncacheable = kernel->getDevice().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED);
|
||||
|
||||
retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &bufferCacheable1);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
surfaceStateHeapAddressOffset = kernel.get()->getKernelInfo().kernelArgInfo[1].offsetHeap;
|
||||
surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(ptrOffset(surfaceStateHeapAddress, surfaceStateHeapAddressOffset));
|
||||
EXPECT_EQ(expectedMocs, surfaceState->getMemoryObjectControlState());
|
||||
EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 0));
|
||||
|
||||
retVal = clSetKernelArg(kernel.get(), 1, sizeof(cl_mem), &bufferCacheable2);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 1));
|
||||
|
||||
EXPECT_TRUE(kernel->isPatched());
|
||||
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
|
||||
|
||||
auto pCmdQHw = reinterpret_cast<CommandQueueHw<FamilyType> *>(pCmdQ);
|
||||
ASSERT_NE(nullptr, pCmdQHw);
|
||||
auto &csr = pCmdQHw->getCommandStreamReceiver();
|
||||
HardwareParse hwParse;
|
||||
hwParse.parseCommands<FamilyType>(csr.getCS(0), 0);
|
||||
auto itorCmd = find<STATE_BASE_ADDRESS *>(hwParse.cmdList.begin(), hwParse.cmdList.end());
|
||||
EXPECT_NE(hwParse.cmdList.end(), itorCmd);
|
||||
auto sba = genCmdCast<STATE_BASE_ADDRESS *>(*itorCmd);
|
||||
ASSERT_NE(nullptr, sba);
|
||||
|
||||
EXPECT_EQ(expectedMocs, sba->getStatelessDataPortAccessMemoryObjectControlState());
|
||||
|
||||
retVal = clReleaseMemObject(buffer1);
|
||||
retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &bufferUncacheable1);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
retVal = clReleaseMemObject(buffer2);
|
||||
EXPECT_EQ(mocsUncacheable, argMocs<FamilyType>(*kernel, 0));
|
||||
|
||||
EXPECT_TRUE(kernel->isPatched());
|
||||
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(mocsUncacheable, cmdQueueMocs<FamilyType>(pCmdQ));
|
||||
|
||||
retVal = clSetKernelArg(kernel.get(), 1, sizeof(cl_mem), &bufferUncacheable2);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(mocsUncacheable, argMocs<FamilyType>(*kernel, 0));
|
||||
|
||||
EXPECT_TRUE(kernel->isPatched());
|
||||
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(mocsUncacheable, cmdQueueMocs<FamilyType>(pCmdQ));
|
||||
|
||||
retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &bufferCacheable1);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 0));
|
||||
|
||||
EXPECT_TRUE(kernel->isPatched());
|
||||
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(mocsUncacheable, cmdQueueMocs<FamilyType>(pCmdQ));
|
||||
|
||||
retVal = clSetKernelArg(kernel.get(), 1, sizeof(cl_mem), &bufferCacheable2);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 1));
|
||||
|
||||
EXPECT_TRUE(kernel->isPatched());
|
||||
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(clMemLocallyUncachedResourceTest,
|
||||
clMemLocallyUncachedResourceFixture,
|
||||
::testing::Bool());
|
||||
HWTEST_F(clMemLocallyUncachedResourceFixture, WhenUnsettingUncacheableResourceFromKernelThanKernelContinuesToCorrectlySetMocs) {
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
std::unique_ptr<Kernel> kernel(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer"), &retVal));
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
auto bufferCacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, n * sizeof(float), nullptr, nullptr);
|
||||
auto pBufferCacheable1 = clUniquePtr(castToObject<Buffer>(bufferCacheable1));
|
||||
auto bufferCacheable2 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, n * sizeof(float), nullptr, nullptr);
|
||||
auto pBufferCacheable2 = clUniquePtr(castToObject<Buffer>(bufferCacheable2));
|
||||
|
||||
auto bufferUncacheable = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheable, n * sizeof(float), nullptr, nullptr);
|
||||
auto pBufferUncacheable = clUniquePtr(castToObject<Buffer>(bufferUncacheable));
|
||||
|
||||
auto mocsCacheable = kernel->getDevice().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
|
||||
auto mocsUncacheable = kernel->getDevice().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED);
|
||||
|
||||
retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &bufferCacheable1);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 0));
|
||||
|
||||
retVal = clSetKernelArg(kernel.get(), 1, sizeof(cl_mem), &bufferCacheable2);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 1));
|
||||
|
||||
EXPECT_TRUE(kernel->isPatched());
|
||||
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
|
||||
|
||||
retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &bufferUncacheable);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(mocsUncacheable, argMocs<FamilyType>(*kernel, 0));
|
||||
|
||||
EXPECT_TRUE(kernel->isPatched());
|
||||
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(mocsUncacheable, cmdQueueMocs<FamilyType>(pCmdQ));
|
||||
|
||||
kernel->unsetArg(0);
|
||||
|
||||
retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &bufferCacheable1);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 0));
|
||||
|
||||
EXPECT_TRUE(kernel->isPatched());
|
||||
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
|
||||
|
||||
kernel->unsetArg(0);
|
||||
|
||||
retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &bufferUncacheable);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(mocsUncacheable, argMocs<FamilyType>(*kernel, 0));
|
||||
|
||||
EXPECT_TRUE(kernel->isPatched());
|
||||
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(mocsUncacheable, cmdQueueMocs<FamilyType>(pCmdQ));
|
||||
}
|
||||
|
||||
} // namespace clMemLocallyUncachedResourceTests
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018 Intel Corporation
|
||||
* Copyright (C) 2018-2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -14,6 +14,9 @@ MockGmmClientContextBase::MockGmmClientContextBase(GMM_CLIENT clientType, GmmExp
|
||||
MEMORY_OBJECT_CONTROL_STATE MockGmmClientContextBase::cachePolicyGetMemoryObject(GMM_RESOURCE_INFO *pResInfo, GMM_RESOURCE_USAGE_TYPE usage) {
|
||||
MEMORY_OBJECT_CONTROL_STATE retVal = {};
|
||||
memset(&retVal, 0, sizeof(MEMORY_OBJECT_CONTROL_STATE));
|
||||
if (usage != GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) {
|
||||
retVal.DwordValue = 4u;
|
||||
}
|
||||
return retVal;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user