refactor: remove not needed code
Signed-off-by: Mrozek, Michal <michal.mrozek@intel.com>
This commit is contained in:
parent
b5e9c10f64
commit
cac547946a
|
@ -796,10 +796,8 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||||
}
|
}
|
||||||
|
|
||||||
bool anyUncacheableArgs = false;
|
bool anyUncacheableArgs = false;
|
||||||
auto requiresCoherency = false;
|
|
||||||
for (auto surface : createRange(surfaces, surfaceCount)) {
|
for (auto surface : createRange(surfaces, surfaceCount)) {
|
||||||
surface->makeResident(getGpgpuCommandStreamReceiver());
|
surface->makeResident(getGpgpuCommandStreamReceiver());
|
||||||
requiresCoherency |= surface->isCoherent;
|
|
||||||
if (!surface->allowsL3Caching()) {
|
if (!surface->allowsL3Caching()) {
|
||||||
anyUncacheableArgs = true;
|
anyUncacheableArgs = true;
|
||||||
}
|
}
|
||||||
|
@ -819,7 +817,6 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
kernel->makeResident(getGpgpuCommandStreamReceiver());
|
kernel->makeResident(getGpgpuCommandStreamReceiver());
|
||||||
requiresCoherency |= kernel->requiresCoherency();
|
|
||||||
mediaSamplerRequired |= kernel->isVmeKernel();
|
mediaSamplerRequired |= kernel->isVmeKernel();
|
||||||
auto numGrfRequiredByKernel = static_cast<uint32_t>(kernel->getKernelInfo().kernelDescriptor.kernelAttributes.numGrfRequired);
|
auto numGrfRequiredByKernel = static_cast<uint32_t>(kernel->getKernelInfo().kernelDescriptor.kernelAttributes.numGrfRequired);
|
||||||
numGrfRequired = std::max(numGrfRequired, numGrfRequiredByKernel);
|
numGrfRequired = std::max(numGrfRequired, numGrfRequiredByKernel);
|
||||||
|
@ -886,7 +883,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||||
multiDispatchInfo.usesSlm(), // useSLM
|
multiDispatchInfo.usesSlm(), // useSLM
|
||||||
!getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled() || commandType == CL_COMMAND_FILL_BUFFER, // guardCommandBufferWithPipeControl
|
!getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled() || commandType == CL_COMMAND_FILL_BUFFER, // guardCommandBufferWithPipeControl
|
||||||
commandType == CL_COMMAND_NDRANGE_KERNEL, // GSBA32BitRequired
|
commandType == CL_COMMAND_NDRANGE_KERNEL, // GSBA32BitRequired
|
||||||
requiresCoherency, // requiresCoherency
|
false, // requiresCoherency
|
||||||
(QueuePriority::LOW == priority), // lowPriority
|
(QueuePriority::LOW == priority), // lowPriority
|
||||||
implicitFlush, // implicitFlush
|
implicitFlush, // implicitFlush
|
||||||
!eventBuilder.getEvent() || getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), // outOfOrderExecutionAllowed
|
!eventBuilder.getEvent() || getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), // outOfOrderExecutionAllowed
|
||||||
|
|
|
@ -107,9 +107,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueKernel(
|
||||||
if (kernel.hasPrintfOutput()) {
|
if (kernel.hasPrintfOutput()) {
|
||||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, PRINTF_DETECTED_IN_KERNEL, kernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str());
|
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, PRINTF_DETECTED_IN_KERNEL, kernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str());
|
||||||
}
|
}
|
||||||
if (kernel.requiresCoherency()) {
|
|
||||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, KERNEL_REQUIRES_COHERENCY, kernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (kernelInfo.builtinDispatchBuilder != nullptr) {
|
if (kernelInfo.builtinDispatchBuilder != nullptr) {
|
||||||
|
|
|
@ -154,12 +154,10 @@ CompletionStamp &CommandComputeKernel::submit(TaskCountType taskLevel, bool term
|
||||||
IndirectHeap *ioh = kernelOperation->ioh.get();
|
IndirectHeap *ioh = kernelOperation->ioh.get();
|
||||||
IndirectHeap *ssh = kernelOperation->ssh.get();
|
IndirectHeap *ssh = kernelOperation->ssh.get();
|
||||||
|
|
||||||
auto requiresCoherency = false;
|
|
||||||
auto anyUncacheableArgs = false;
|
auto anyUncacheableArgs = false;
|
||||||
for (auto &surface : surfaces) {
|
for (auto &surface : surfaces) {
|
||||||
DEBUG_BREAK_IF(!surface);
|
DEBUG_BREAK_IF(!surface);
|
||||||
surface->makeResident(commandStreamReceiver);
|
surface->makeResident(commandStreamReceiver);
|
||||||
requiresCoherency |= surface->isCoherent;
|
|
||||||
if (!surface->allowsL3Caching()) {
|
if (!surface->allowsL3Caching()) {
|
||||||
anyUncacheableArgs = true;
|
anyUncacheableArgs = true;
|
||||||
}
|
}
|
||||||
|
@ -211,7 +209,7 @@ CompletionStamp &CommandComputeKernel::submit(TaskCountType taskLevel, bool term
|
||||||
slmUsed, // useSLM
|
slmUsed, // useSLM
|
||||||
!commandQueue.getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled(), // guardCommandBufferWithPipeControl
|
!commandQueue.getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled(), // guardCommandBufferWithPipeControl
|
||||||
commandType == CL_COMMAND_NDRANGE_KERNEL, // GSBA32BitRequired
|
commandType == CL_COMMAND_NDRANGE_KERNEL, // GSBA32BitRequired
|
||||||
requiresCoherency, // requiresCoherency
|
false, // requiresCoherency
|
||||||
commandQueue.getPriority() == QueuePriority::LOW, // lowPriority
|
commandQueue.getPriority() == QueuePriority::LOW, // lowPriority
|
||||||
false, // implicitFlush
|
false, // implicitFlush
|
||||||
commandQueue.getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), // outOfOrderExecutionAllowed
|
commandQueue.getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), // outOfOrderExecutionAllowed
|
||||||
|
|
|
@ -1398,29 +1398,6 @@ void Kernel::getResidency(std::vector<Surface *> &dst) {
|
||||||
gtpinNotifyUpdateResidencyList(this, &dst);
|
gtpinNotifyUpdateResidencyList(this, &dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Kernel::requiresCoherency() {
|
|
||||||
auto numArgs = kernelInfo.kernelDescriptor.payloadMappings.explicitArgs.size();
|
|
||||||
for (decltype(numArgs) argIndex = 0; argIndex < numArgs; argIndex++) {
|
|
||||||
if (kernelArguments[argIndex].object) {
|
|
||||||
if (kernelArguments[argIndex].type == SVM_ALLOC_OBJ) {
|
|
||||||
auto pSVMAlloc = (GraphicsAllocation *)kernelArguments[argIndex].object;
|
|
||||||
if (pSVMAlloc->isCoherent()) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (Kernel::isMemObj(kernelArguments[argIndex].type)) {
|
|
||||||
auto clMem = const_cast<cl_mem>(static_cast<const _cl_mem *>(kernelArguments[argIndex].object));
|
|
||||||
auto memObj = castToObjectOrAbort<MemObj>(clMem);
|
|
||||||
if (memObj->getMultiGraphicsAllocation().isCoherent()) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
cl_int Kernel::setArgLocal(uint32_t argIndexIn,
|
cl_int Kernel::setArgLocal(uint32_t argIndexIn,
|
||||||
size_t argSize,
|
size_t argSize,
|
||||||
const void *argVal) {
|
const void *argVal) {
|
||||||
|
|
|
@ -289,7 +289,6 @@ class Kernel : public ReferenceTrackedObject<Kernel> {
|
||||||
// residency for kernel surfaces
|
// residency for kernel surfaces
|
||||||
MOCKABLE_VIRTUAL void makeResident(CommandStreamReceiver &commandStreamReceiver);
|
MOCKABLE_VIRTUAL void makeResident(CommandStreamReceiver &commandStreamReceiver);
|
||||||
MOCKABLE_VIRTUAL void getResidency(std::vector<Surface *> &dst);
|
MOCKABLE_VIRTUAL void getResidency(std::vector<Surface *> &dst);
|
||||||
bool requiresCoherency();
|
|
||||||
void resetSharedObjectsPatchAddresses();
|
void resetSharedObjectsPatchAddresses();
|
||||||
bool isUsingSharedObjArgs() const { return usingSharedObjArgs; }
|
bool isUsingSharedObjArgs() const { return usingSharedObjArgs; }
|
||||||
bool hasUncacheableStatelessArgs() const { return statelessUncacheableArgsCount > 0; }
|
bool hasUncacheableStatelessArgs() const { return statelessUncacheableArgsCount > 0; }
|
||||||
|
|
|
@ -813,42 +813,6 @@ TEST_F(PerformanceHintEnqueueKernelPrintfTest, GivenKernelWithPrintfWhenEnqueueK
|
||||||
EXPECT_TRUE(containsHint(expectedHint, userData));
|
EXPECT_TRUE(containsHint(expectedHint, userData));
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(PerformanceHintEnqueueTest, GivenKernelWithCoherentPtrWhenEnqueueKernelIsCalledWithWorkDim2ThenContextProvidesProperHint) {
|
|
||||||
size_t preferredWorkGroupSize[3];
|
|
||||||
size_t globalWorkGroupSize[3] = {1, 1, 1};
|
|
||||||
auto maxWorkGroupSize = static_cast<uint32_t>(pPlatform->getClDevice(0)->getSharedDeviceInfo().maxWorkGroupSize);
|
|
||||||
MockKernelWithInternals mockKernel(*pPlatform->getClDevice(0), context);
|
|
||||||
Kernel::SimpleKernelArgInfo kernelArgInfo;
|
|
||||||
|
|
||||||
if (DebugManager.flags.EnableComputeWorkSizeND.get()) {
|
|
||||||
auto &rootDeviceEnvironment = pPlatform->getClDevice(0)->getRootDeviceEnvironment();
|
|
||||||
WorkSizeInfo wsInfo(maxWorkGroupSize, 0u, 32u, 0u, rootDeviceEnvironment, 32u, 0u, false, false, false);
|
|
||||||
computeWorkgroupSizeND(wsInfo, preferredWorkGroupSize, globalWorkGroupSize, 2);
|
|
||||||
} else
|
|
||||||
computeWorkgroupSize2D(maxWorkGroupSize, preferredWorkGroupSize, globalWorkGroupSize, 32);
|
|
||||||
|
|
||||||
auto buffer = new MockBuffer();
|
|
||||||
buffer->getGraphicsAllocation(mockRootDeviceIndex)->setCoherent(true);
|
|
||||||
auto clBuffer = (cl_mem)buffer;
|
|
||||||
|
|
||||||
kernelArgInfo.object = clBuffer;
|
|
||||||
kernelArgInfo.type = Kernel::kernelArgType::BUFFER_OBJ;
|
|
||||||
|
|
||||||
std::vector<Kernel::SimpleKernelArgInfo> kernelArguments;
|
|
||||||
kernelArguments.resize(1);
|
|
||||||
kernelArguments[0] = kernelArgInfo;
|
|
||||||
mockKernel.kernelInfo.kernelDescriptor.payloadMappings.explicitArgs.resize(1);
|
|
||||||
mockKernel.mockKernel->setKernelArguments(kernelArguments);
|
|
||||||
|
|
||||||
retVal = pCmdQ->enqueueKernel(mockKernel.mockKernel, 2, nullptr, globalWorkGroupSize, preferredWorkGroupSize, 0, nullptr, nullptr);
|
|
||||||
|
|
||||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
|
||||||
|
|
||||||
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[KERNEL_REQUIRES_COHERENCY], mockKernel.mockKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str());
|
|
||||||
EXPECT_TRUE(containsHint(expectedHint, userData));
|
|
||||||
delete buffer;
|
|
||||||
}
|
|
||||||
|
|
||||||
const int validDimensions[] = {0, 1, 2};
|
const int validDimensions[] = {0, 1, 2};
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
|
|
|
@ -128,86 +128,6 @@ TEST_F(MultiDeviceKernelArgBufferTest, GivenValidBufferWhenSettingKernelArgThenB
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(KernelArgBufferTest, GivenSvmPtrStatelessWhenSettingKernelArgThenArgumentsAreSetCorrectly) {
|
|
||||||
Buffer *buffer = new MockBuffer();
|
|
||||||
|
|
||||||
auto val = (cl_mem)buffer;
|
|
||||||
auto pVal = &val;
|
|
||||||
|
|
||||||
pKernelInfo->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless;
|
|
||||||
|
|
||||||
auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal);
|
|
||||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
|
||||||
EXPECT_FALSE(pKernel->requiresCoherency());
|
|
||||||
|
|
||||||
delete buffer;
|
|
||||||
}
|
|
||||||
|
|
||||||
HWTEST_F(KernelArgBufferTest, GivenSvmPtrStatefulWhenSettingKernelArgThenArgumentsAreSetCorrectly) {
|
|
||||||
Buffer *buffer = new MockBuffer();
|
|
||||||
|
|
||||||
auto val = (cl_mem)buffer;
|
|
||||||
auto pVal = &val;
|
|
||||||
|
|
||||||
auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal);
|
|
||||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
|
||||||
EXPECT_FALSE(pKernel->requiresCoherency());
|
|
||||||
|
|
||||||
EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize());
|
|
||||||
|
|
||||||
typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
|
|
||||||
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
|
|
||||||
ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->argAsPtr(0).bindful));
|
|
||||||
|
|
||||||
auto surfaceAddress = surfaceState->getSurfaceBaseAddress();
|
|
||||||
EXPECT_EQ(buffer->getGraphicsAllocation(mockRootDeviceIndex)->getGpuAddress(), surfaceAddress);
|
|
||||||
|
|
||||||
delete buffer;
|
|
||||||
}
|
|
||||||
|
|
||||||
HWTEST_F(MultiDeviceKernelArgBufferTest, GivenSvmPtrStatefulWhenSettingKernelArgThenArgumentsAreSetCorrectly) {
|
|
||||||
cl_mem val = pBuffer.get();
|
|
||||||
auto pVal = &val;
|
|
||||||
|
|
||||||
int32_t retVal = CL_INVALID_VALUE;
|
|
||||||
for (auto &kernelInfo : pKernelInfosStorage) {
|
|
||||||
kernelInfo->argAsPtr(0).bindful = 0;
|
|
||||||
}
|
|
||||||
auto pMultiDeviceKernel = std::unique_ptr<MultiDeviceKernel>(MultiDeviceKernel::create<MockKernel>(pProgram.get(), kernelInfos, retVal));
|
|
||||||
|
|
||||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
|
||||||
|
|
||||||
retVal = pMultiDeviceKernel->setArg(0, sizeof(cl_mem *), pVal);
|
|
||||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
|
||||||
|
|
||||||
for (auto &rootDeviceIndex : pContext->getRootDeviceIndices()) {
|
|
||||||
auto pKernel = pMultiDeviceKernel->getKernel(rootDeviceIndex);
|
|
||||||
EXPECT_FALSE(pKernel->requiresCoherency());
|
|
||||||
EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize());
|
|
||||||
typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
|
|
||||||
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
|
|
||||||
ptrOffset(pKernel->getSurfaceStateHeap(), kernelInfos[rootDeviceIndex]->getArgDescriptorAt(0).as<ArgDescPointer>().bindful));
|
|
||||||
|
|
||||||
auto surfaceAddress = surfaceState->getSurfaceBaseAddress();
|
|
||||||
EXPECT_EQ(pBuffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(), surfaceAddress);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
HWTEST_F(KernelArgBufferTest, GivenBufferFromSvmPtrWhenSettingKernelArgThenArgumentsAreSetCorrectly) {
|
|
||||||
|
|
||||||
Buffer *buffer = new MockBuffer();
|
|
||||||
buffer->getGraphicsAllocation(mockRootDeviceIndex)->setCoherent(true);
|
|
||||||
|
|
||||||
auto val = (cl_mem)buffer;
|
|
||||||
auto pVal = &val;
|
|
||||||
|
|
||||||
auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal);
|
|
||||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
|
||||||
EXPECT_TRUE(pKernel->requiresCoherency());
|
|
||||||
|
|
||||||
delete buffer;
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F(KernelArgBufferTest, GivenInvalidBufferWhenSettingKernelArgThenInvalidMemObjectErrorIsReturned) {
|
TEST_F(KernelArgBufferTest, GivenInvalidBufferWhenSettingKernelArgThenInvalidMemObjectErrorIsReturned) {
|
||||||
char *ptr = new char[sizeof(Buffer)];
|
char *ptr = new char[sizeof(Buffer)];
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue