refactor: remove not needed code

Signed-off-by: Mrozek, Michal <michal.mrozek@intel.com>
This commit is contained in:
Mrozek, Michal 2023-09-11 17:12:15 +00:00 committed by Compute-Runtime-Automation
parent b5e9c10f64
commit cac547946a
7 changed files with 2 additions and 150 deletions

View File

@ -796,10 +796,8 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
}
bool anyUncacheableArgs = false;
auto requiresCoherency = false;
for (auto surface : createRange(surfaces, surfaceCount)) {
surface->makeResident(getGpgpuCommandStreamReceiver());
requiresCoherency |= surface->isCoherent;
if (!surface->allowsL3Caching()) {
anyUncacheableArgs = true;
}
@ -819,7 +817,6 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
continue;
}
kernel->makeResident(getGpgpuCommandStreamReceiver());
requiresCoherency |= kernel->requiresCoherency();
mediaSamplerRequired |= kernel->isVmeKernel();
auto numGrfRequiredByKernel = static_cast<uint32_t>(kernel->getKernelInfo().kernelDescriptor.kernelAttributes.numGrfRequired);
numGrfRequired = std::max(numGrfRequired, numGrfRequiredByKernel);
@ -886,7 +883,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
multiDispatchInfo.usesSlm(), // useSLM
!getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled() || commandType == CL_COMMAND_FILL_BUFFER, // guardCommandBufferWithPipeControl
commandType == CL_COMMAND_NDRANGE_KERNEL, // GSBA32BitRequired
requiresCoherency, // requiresCoherency
false, // requiresCoherency
(QueuePriority::LOW == priority), // lowPriority
implicitFlush, // implicitFlush
!eventBuilder.getEvent() || getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), // outOfOrderExecutionAllowed

View File

@ -107,9 +107,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueKernel(
if (kernel.hasPrintfOutput()) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, PRINTF_DETECTED_IN_KERNEL, kernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str());
}
if (kernel.requiresCoherency()) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, KERNEL_REQUIRES_COHERENCY, kernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str());
}
}
if (kernelInfo.builtinDispatchBuilder != nullptr) {

View File

@ -154,12 +154,10 @@ CompletionStamp &CommandComputeKernel::submit(TaskCountType taskLevel, bool term
IndirectHeap *ioh = kernelOperation->ioh.get();
IndirectHeap *ssh = kernelOperation->ssh.get();
auto requiresCoherency = false;
auto anyUncacheableArgs = false;
for (auto &surface : surfaces) {
DEBUG_BREAK_IF(!surface);
surface->makeResident(commandStreamReceiver);
requiresCoherency |= surface->isCoherent;
if (!surface->allowsL3Caching()) {
anyUncacheableArgs = true;
}
@ -211,7 +209,7 @@ CompletionStamp &CommandComputeKernel::submit(TaskCountType taskLevel, bool term
slmUsed, // useSLM
!commandQueue.getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled(), // guardCommandBufferWithPipeControl
commandType == CL_COMMAND_NDRANGE_KERNEL, // GSBA32BitRequired
requiresCoherency, // requiresCoherency
false, // requiresCoherency
commandQueue.getPriority() == QueuePriority::LOW, // lowPriority
false, // implicitFlush
commandQueue.getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), // outOfOrderExecutionAllowed

View File

@ -1398,29 +1398,6 @@ void Kernel::getResidency(std::vector<Surface *> &dst) {
gtpinNotifyUpdateResidencyList(this, &dst);
}
bool Kernel::requiresCoherency() {
auto numArgs = kernelInfo.kernelDescriptor.payloadMappings.explicitArgs.size();
for (decltype(numArgs) argIndex = 0; argIndex < numArgs; argIndex++) {
if (kernelArguments[argIndex].object) {
if (kernelArguments[argIndex].type == SVM_ALLOC_OBJ) {
auto pSVMAlloc = (GraphicsAllocation *)kernelArguments[argIndex].object;
if (pSVMAlloc->isCoherent()) {
return true;
}
}
if (Kernel::isMemObj(kernelArguments[argIndex].type)) {
auto clMem = const_cast<cl_mem>(static_cast<const _cl_mem *>(kernelArguments[argIndex].object));
auto memObj = castToObjectOrAbort<MemObj>(clMem);
if (memObj->getMultiGraphicsAllocation().isCoherent()) {
return true;
}
}
}
}
return false;
}
cl_int Kernel::setArgLocal(uint32_t argIndexIn,
size_t argSize,
const void *argVal) {

View File

@ -289,7 +289,6 @@ class Kernel : public ReferenceTrackedObject<Kernel> {
// residency for kernel surfaces
MOCKABLE_VIRTUAL void makeResident(CommandStreamReceiver &commandStreamReceiver);
MOCKABLE_VIRTUAL void getResidency(std::vector<Surface *> &dst);
bool requiresCoherency();
void resetSharedObjectsPatchAddresses();
bool isUsingSharedObjArgs() const { return usingSharedObjArgs; }
bool hasUncacheableStatelessArgs() const { return statelessUncacheableArgsCount > 0; }

View File

@ -813,42 +813,6 @@ TEST_F(PerformanceHintEnqueueKernelPrintfTest, GivenKernelWithPrintfWhenEnqueueK
EXPECT_TRUE(containsHint(expectedHint, userData));
}
TEST_F(PerformanceHintEnqueueTest, GivenKernelWithCoherentPtrWhenEnqueueKernelIsCalledWithWorkDim2ThenContextProvidesProperHint) {
size_t preferredWorkGroupSize[3];
size_t globalWorkGroupSize[3] = {1, 1, 1};
auto maxWorkGroupSize = static_cast<uint32_t>(pPlatform->getClDevice(0)->getSharedDeviceInfo().maxWorkGroupSize);
MockKernelWithInternals mockKernel(*pPlatform->getClDevice(0), context);
Kernel::SimpleKernelArgInfo kernelArgInfo;
if (DebugManager.flags.EnableComputeWorkSizeND.get()) {
auto &rootDeviceEnvironment = pPlatform->getClDevice(0)->getRootDeviceEnvironment();
WorkSizeInfo wsInfo(maxWorkGroupSize, 0u, 32u, 0u, rootDeviceEnvironment, 32u, 0u, false, false, false);
computeWorkgroupSizeND(wsInfo, preferredWorkGroupSize, globalWorkGroupSize, 2);
} else
computeWorkgroupSize2D(maxWorkGroupSize, preferredWorkGroupSize, globalWorkGroupSize, 32);
auto buffer = new MockBuffer();
buffer->getGraphicsAllocation(mockRootDeviceIndex)->setCoherent(true);
auto clBuffer = (cl_mem)buffer;
kernelArgInfo.object = clBuffer;
kernelArgInfo.type = Kernel::kernelArgType::BUFFER_OBJ;
std::vector<Kernel::SimpleKernelArgInfo> kernelArguments;
kernelArguments.resize(1);
kernelArguments[0] = kernelArgInfo;
mockKernel.kernelInfo.kernelDescriptor.payloadMappings.explicitArgs.resize(1);
mockKernel.mockKernel->setKernelArguments(kernelArguments);
retVal = pCmdQ->enqueueKernel(mockKernel.mockKernel, 2, nullptr, globalWorkGroupSize, preferredWorkGroupSize, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[KERNEL_REQUIRES_COHERENCY], mockKernel.mockKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str());
EXPECT_TRUE(containsHint(expectedHint, userData));
delete buffer;
}
const int validDimensions[] = {0, 1, 2};
INSTANTIATE_TEST_CASE_P(

View File

@ -128,86 +128,6 @@ TEST_F(MultiDeviceKernelArgBufferTest, GivenValidBufferWhenSettingKernelArgThenB
}
}
TEST_F(KernelArgBufferTest, GivenSvmPtrStatelessWhenSettingKernelArgThenArgumentsAreSetCorrectly) {
Buffer *buffer = new MockBuffer();
auto val = (cl_mem)buffer;
auto pVal = &val;
pKernelInfo->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless;
auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_FALSE(pKernel->requiresCoherency());
delete buffer;
}
HWTEST_F(KernelArgBufferTest, GivenSvmPtrStatefulWhenSettingKernelArgThenArgumentsAreSetCorrectly) {
Buffer *buffer = new MockBuffer();
auto val = (cl_mem)buffer;
auto pVal = &val;
auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_FALSE(pKernel->requiresCoherency());
EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize());
typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->argAsPtr(0).bindful));
auto surfaceAddress = surfaceState->getSurfaceBaseAddress();
EXPECT_EQ(buffer->getGraphicsAllocation(mockRootDeviceIndex)->getGpuAddress(), surfaceAddress);
delete buffer;
}
HWTEST_F(MultiDeviceKernelArgBufferTest, GivenSvmPtrStatefulWhenSettingKernelArgThenArgumentsAreSetCorrectly) {
cl_mem val = pBuffer.get();
auto pVal = &val;
int32_t retVal = CL_INVALID_VALUE;
for (auto &kernelInfo : pKernelInfosStorage) {
kernelInfo->argAsPtr(0).bindful = 0;
}
auto pMultiDeviceKernel = std::unique_ptr<MultiDeviceKernel>(MultiDeviceKernel::create<MockKernel>(pProgram.get(), kernelInfos, retVal));
EXPECT_EQ(CL_SUCCESS, retVal);
retVal = pMultiDeviceKernel->setArg(0, sizeof(cl_mem *), pVal);
EXPECT_EQ(CL_SUCCESS, retVal);
for (auto &rootDeviceIndex : pContext->getRootDeviceIndices()) {
auto pKernel = pMultiDeviceKernel->getKernel(rootDeviceIndex);
EXPECT_FALSE(pKernel->requiresCoherency());
EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize());
typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
ptrOffset(pKernel->getSurfaceStateHeap(), kernelInfos[rootDeviceIndex]->getArgDescriptorAt(0).as<ArgDescPointer>().bindful));
auto surfaceAddress = surfaceState->getSurfaceBaseAddress();
EXPECT_EQ(pBuffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(), surfaceAddress);
}
}
HWTEST_F(KernelArgBufferTest, GivenBufferFromSvmPtrWhenSettingKernelArgThenArgumentsAreSetCorrectly) {
Buffer *buffer = new MockBuffer();
buffer->getGraphicsAllocation(mockRootDeviceIndex)->setCoherent(true);
auto val = (cl_mem)buffer;
auto pVal = &val;
auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_TRUE(pKernel->requiresCoherency());
delete buffer;
}
TEST_F(KernelArgBufferTest, GivenInvalidBufferWhenSettingKernelArgThenInvalidMemObjectErrorIsReturned) {
char *ptr = new char[sizeof(Buffer)];