refactor: remove not needed code
Signed-off-by: Mrozek, Michal <michal.mrozek@intel.com>
This commit is contained in:
parent
b5e9c10f64
commit
cac547946a
|
@ -796,10 +796,8 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
|||
}
|
||||
|
||||
bool anyUncacheableArgs = false;
|
||||
auto requiresCoherency = false;
|
||||
for (auto surface : createRange(surfaces, surfaceCount)) {
|
||||
surface->makeResident(getGpgpuCommandStreamReceiver());
|
||||
requiresCoherency |= surface->isCoherent;
|
||||
if (!surface->allowsL3Caching()) {
|
||||
anyUncacheableArgs = true;
|
||||
}
|
||||
|
@ -819,7 +817,6 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
|||
continue;
|
||||
}
|
||||
kernel->makeResident(getGpgpuCommandStreamReceiver());
|
||||
requiresCoherency |= kernel->requiresCoherency();
|
||||
mediaSamplerRequired |= kernel->isVmeKernel();
|
||||
auto numGrfRequiredByKernel = static_cast<uint32_t>(kernel->getKernelInfo().kernelDescriptor.kernelAttributes.numGrfRequired);
|
||||
numGrfRequired = std::max(numGrfRequired, numGrfRequiredByKernel);
|
||||
|
@ -886,7 +883,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
|||
multiDispatchInfo.usesSlm(), // useSLM
|
||||
!getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled() || commandType == CL_COMMAND_FILL_BUFFER, // guardCommandBufferWithPipeControl
|
||||
commandType == CL_COMMAND_NDRANGE_KERNEL, // GSBA32BitRequired
|
||||
requiresCoherency, // requiresCoherency
|
||||
false, // requiresCoherency
|
||||
(QueuePriority::LOW == priority), // lowPriority
|
||||
implicitFlush, // implicitFlush
|
||||
!eventBuilder.getEvent() || getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), // outOfOrderExecutionAllowed
|
||||
|
|
|
@ -107,9 +107,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueKernel(
|
|||
if (kernel.hasPrintfOutput()) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, PRINTF_DETECTED_IN_KERNEL, kernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str());
|
||||
}
|
||||
if (kernel.requiresCoherency()) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, KERNEL_REQUIRES_COHERENCY, kernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
if (kernelInfo.builtinDispatchBuilder != nullptr) {
|
||||
|
|
|
@ -154,12 +154,10 @@ CompletionStamp &CommandComputeKernel::submit(TaskCountType taskLevel, bool term
|
|||
IndirectHeap *ioh = kernelOperation->ioh.get();
|
||||
IndirectHeap *ssh = kernelOperation->ssh.get();
|
||||
|
||||
auto requiresCoherency = false;
|
||||
auto anyUncacheableArgs = false;
|
||||
for (auto &surface : surfaces) {
|
||||
DEBUG_BREAK_IF(!surface);
|
||||
surface->makeResident(commandStreamReceiver);
|
||||
requiresCoherency |= surface->isCoherent;
|
||||
if (!surface->allowsL3Caching()) {
|
||||
anyUncacheableArgs = true;
|
||||
}
|
||||
|
@ -211,7 +209,7 @@ CompletionStamp &CommandComputeKernel::submit(TaskCountType taskLevel, bool term
|
|||
slmUsed, // useSLM
|
||||
!commandQueue.getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled(), // guardCommandBufferWithPipeControl
|
||||
commandType == CL_COMMAND_NDRANGE_KERNEL, // GSBA32BitRequired
|
||||
requiresCoherency, // requiresCoherency
|
||||
false, // requiresCoherency
|
||||
commandQueue.getPriority() == QueuePriority::LOW, // lowPriority
|
||||
false, // implicitFlush
|
||||
commandQueue.getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), // outOfOrderExecutionAllowed
|
||||
|
|
|
@ -1398,29 +1398,6 @@ void Kernel::getResidency(std::vector<Surface *> &dst) {
|
|||
gtpinNotifyUpdateResidencyList(this, &dst);
|
||||
}
|
||||
|
||||
bool Kernel::requiresCoherency() {
|
||||
auto numArgs = kernelInfo.kernelDescriptor.payloadMappings.explicitArgs.size();
|
||||
for (decltype(numArgs) argIndex = 0; argIndex < numArgs; argIndex++) {
|
||||
if (kernelArguments[argIndex].object) {
|
||||
if (kernelArguments[argIndex].type == SVM_ALLOC_OBJ) {
|
||||
auto pSVMAlloc = (GraphicsAllocation *)kernelArguments[argIndex].object;
|
||||
if (pSVMAlloc->isCoherent()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (Kernel::isMemObj(kernelArguments[argIndex].type)) {
|
||||
auto clMem = const_cast<cl_mem>(static_cast<const _cl_mem *>(kernelArguments[argIndex].object));
|
||||
auto memObj = castToObjectOrAbort<MemObj>(clMem);
|
||||
if (memObj->getMultiGraphicsAllocation().isCoherent()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
cl_int Kernel::setArgLocal(uint32_t argIndexIn,
|
||||
size_t argSize,
|
||||
const void *argVal) {
|
||||
|
|
|
@ -289,7 +289,6 @@ class Kernel : public ReferenceTrackedObject<Kernel> {
|
|||
// residency for kernel surfaces
|
||||
MOCKABLE_VIRTUAL void makeResident(CommandStreamReceiver &commandStreamReceiver);
|
||||
MOCKABLE_VIRTUAL void getResidency(std::vector<Surface *> &dst);
|
||||
bool requiresCoherency();
|
||||
void resetSharedObjectsPatchAddresses();
|
||||
bool isUsingSharedObjArgs() const { return usingSharedObjArgs; }
|
||||
bool hasUncacheableStatelessArgs() const { return statelessUncacheableArgsCount > 0; }
|
||||
|
|
|
@ -813,42 +813,6 @@ TEST_F(PerformanceHintEnqueueKernelPrintfTest, GivenKernelWithPrintfWhenEnqueueK
|
|||
EXPECT_TRUE(containsHint(expectedHint, userData));
|
||||
}
|
||||
|
||||
TEST_F(PerformanceHintEnqueueTest, GivenKernelWithCoherentPtrWhenEnqueueKernelIsCalledWithWorkDim2ThenContextProvidesProperHint) {
|
||||
size_t preferredWorkGroupSize[3];
|
||||
size_t globalWorkGroupSize[3] = {1, 1, 1};
|
||||
auto maxWorkGroupSize = static_cast<uint32_t>(pPlatform->getClDevice(0)->getSharedDeviceInfo().maxWorkGroupSize);
|
||||
MockKernelWithInternals mockKernel(*pPlatform->getClDevice(0), context);
|
||||
Kernel::SimpleKernelArgInfo kernelArgInfo;
|
||||
|
||||
if (DebugManager.flags.EnableComputeWorkSizeND.get()) {
|
||||
auto &rootDeviceEnvironment = pPlatform->getClDevice(0)->getRootDeviceEnvironment();
|
||||
WorkSizeInfo wsInfo(maxWorkGroupSize, 0u, 32u, 0u, rootDeviceEnvironment, 32u, 0u, false, false, false);
|
||||
computeWorkgroupSizeND(wsInfo, preferredWorkGroupSize, globalWorkGroupSize, 2);
|
||||
} else
|
||||
computeWorkgroupSize2D(maxWorkGroupSize, preferredWorkGroupSize, globalWorkGroupSize, 32);
|
||||
|
||||
auto buffer = new MockBuffer();
|
||||
buffer->getGraphicsAllocation(mockRootDeviceIndex)->setCoherent(true);
|
||||
auto clBuffer = (cl_mem)buffer;
|
||||
|
||||
kernelArgInfo.object = clBuffer;
|
||||
kernelArgInfo.type = Kernel::kernelArgType::BUFFER_OBJ;
|
||||
|
||||
std::vector<Kernel::SimpleKernelArgInfo> kernelArguments;
|
||||
kernelArguments.resize(1);
|
||||
kernelArguments[0] = kernelArgInfo;
|
||||
mockKernel.kernelInfo.kernelDescriptor.payloadMappings.explicitArgs.resize(1);
|
||||
mockKernel.mockKernel->setKernelArguments(kernelArguments);
|
||||
|
||||
retVal = pCmdQ->enqueueKernel(mockKernel.mockKernel, 2, nullptr, globalWorkGroupSize, preferredWorkGroupSize, 0, nullptr, nullptr);
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[KERNEL_REQUIRES_COHERENCY], mockKernel.mockKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str());
|
||||
EXPECT_TRUE(containsHint(expectedHint, userData));
|
||||
delete buffer;
|
||||
}
|
||||
|
||||
const int validDimensions[] = {0, 1, 2};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
|
|
|
@ -128,86 +128,6 @@ TEST_F(MultiDeviceKernelArgBufferTest, GivenValidBufferWhenSettingKernelArgThenB
|
|||
}
|
||||
}
|
||||
|
||||
TEST_F(KernelArgBufferTest, GivenSvmPtrStatelessWhenSettingKernelArgThenArgumentsAreSetCorrectly) {
|
||||
Buffer *buffer = new MockBuffer();
|
||||
|
||||
auto val = (cl_mem)buffer;
|
||||
auto pVal = &val;
|
||||
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless;
|
||||
|
||||
auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_FALSE(pKernel->requiresCoherency());
|
||||
|
||||
delete buffer;
|
||||
}
|
||||
|
||||
HWTEST_F(KernelArgBufferTest, GivenSvmPtrStatefulWhenSettingKernelArgThenArgumentsAreSetCorrectly) {
|
||||
Buffer *buffer = new MockBuffer();
|
||||
|
||||
auto val = (cl_mem)buffer;
|
||||
auto pVal = &val;
|
||||
|
||||
auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_FALSE(pKernel->requiresCoherency());
|
||||
|
||||
EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize());
|
||||
|
||||
typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
|
||||
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
|
||||
ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->argAsPtr(0).bindful));
|
||||
|
||||
auto surfaceAddress = surfaceState->getSurfaceBaseAddress();
|
||||
EXPECT_EQ(buffer->getGraphicsAllocation(mockRootDeviceIndex)->getGpuAddress(), surfaceAddress);
|
||||
|
||||
delete buffer;
|
||||
}
|
||||
|
||||
HWTEST_F(MultiDeviceKernelArgBufferTest, GivenSvmPtrStatefulWhenSettingKernelArgThenArgumentsAreSetCorrectly) {
|
||||
cl_mem val = pBuffer.get();
|
||||
auto pVal = &val;
|
||||
|
||||
int32_t retVal = CL_INVALID_VALUE;
|
||||
for (auto &kernelInfo : pKernelInfosStorage) {
|
||||
kernelInfo->argAsPtr(0).bindful = 0;
|
||||
}
|
||||
auto pMultiDeviceKernel = std::unique_ptr<MultiDeviceKernel>(MultiDeviceKernel::create<MockKernel>(pProgram.get(), kernelInfos, retVal));
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
retVal = pMultiDeviceKernel->setArg(0, sizeof(cl_mem *), pVal);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
for (auto &rootDeviceIndex : pContext->getRootDeviceIndices()) {
|
||||
auto pKernel = pMultiDeviceKernel->getKernel(rootDeviceIndex);
|
||||
EXPECT_FALSE(pKernel->requiresCoherency());
|
||||
EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize());
|
||||
typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
|
||||
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
|
||||
ptrOffset(pKernel->getSurfaceStateHeap(), kernelInfos[rootDeviceIndex]->getArgDescriptorAt(0).as<ArgDescPointer>().bindful));
|
||||
|
||||
auto surfaceAddress = surfaceState->getSurfaceBaseAddress();
|
||||
EXPECT_EQ(pBuffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(), surfaceAddress);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(KernelArgBufferTest, GivenBufferFromSvmPtrWhenSettingKernelArgThenArgumentsAreSetCorrectly) {
|
||||
|
||||
Buffer *buffer = new MockBuffer();
|
||||
buffer->getGraphicsAllocation(mockRootDeviceIndex)->setCoherent(true);
|
||||
|
||||
auto val = (cl_mem)buffer;
|
||||
auto pVal = &val;
|
||||
|
||||
auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_TRUE(pKernel->requiresCoherency());
|
||||
|
||||
delete buffer;
|
||||
}
|
||||
|
||||
TEST_F(KernelArgBufferTest, GivenInvalidBufferWhenSettingKernelArgThenInvalidMemObjectErrorIsReturned) {
|
||||
char *ptr = new char[sizeof(Buffer)];
|
||||
|
||||
|
|
Loading…
Reference in New Issue