mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-19 06:24:51 +08:00
Pass root device index to Buffer::setArgStateless
Related-To: NEO-4672 Change-Id: I3acb09b14588fd4aad253eb82c34d1c2d192a34e Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
committed by
sys_ocldev
parent
1c312f76e1
commit
ec391d62ee
@@ -1292,7 +1292,7 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex,
|
||||
|
||||
auto patchSize = kernelArgInfo.kernelArgPatchInfoVector[0].size;
|
||||
|
||||
uint64_t addressToPatch = buffer->setArgStateless(patchLocation, patchSize, !this->isBuiltIn);
|
||||
uint64_t addressToPatch = buffer->setArgStateless(patchLocation, patchSize, getDevice().getRootDeviceIndex(), !this->isBuiltIn);
|
||||
|
||||
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
|
||||
PatchInfoData patchInfoData(addressToPatch - buffer->getOffset(), static_cast<uint64_t>(buffer->getOffset()), PatchInfoAllocationType::KernelArg, reinterpret_cast<uint64_t>(getCrossThreadData()), static_cast<uint64_t>(kernelArgInfo.kernelArgPatchInfoVector[0].crossthreadOffset), PatchInfoAllocationType::IndirectObjectHeap, patchSize);
|
||||
|
||||
@@ -359,8 +359,8 @@ Buffer *Buffer::create(Context *context,
|
||||
}
|
||||
|
||||
if (DebugManager.flags.MakeAllBuffersResident.get()) {
|
||||
auto graphicsAllocation = pBuffer->multiGraphicsAllocation.getDefaultGraphicsAllocation();
|
||||
auto rootDeviceEnvironment = pBuffer->executionEnvironment->rootDeviceEnvironments[graphicsAllocation->getRootDeviceIndex()].get();
|
||||
auto graphicsAllocation = pBuffer->getGraphicsAllocation(rootDeviceIndex);
|
||||
auto rootDeviceEnvironment = pBuffer->executionEnvironment->rootDeviceEnvironments[rootDeviceIndex].get();
|
||||
rootDeviceEnvironment->memoryOperationsInterface->makeResident(ArrayRef<GraphicsAllocation *>(&graphicsAllocation, 1));
|
||||
}
|
||||
|
||||
@@ -478,9 +478,9 @@ Buffer *Buffer::createSubBuffer(cl_mem_flags flags,
|
||||
return buffer;
|
||||
}
|
||||
|
||||
uint64_t Buffer::setArgStateless(void *memory, uint32_t patchSize, bool set32BitAddressing) {
|
||||
uint64_t Buffer::setArgStateless(void *memory, uint32_t patchSize, uint32_t rootDeviceIndex, bool set32BitAddressing) {
|
||||
// Subbuffers have offset that graphicsAllocation is not aware of
|
||||
auto graphicsAllocation = multiGraphicsAllocation.getDefaultGraphicsAllocation();
|
||||
auto graphicsAllocation = multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex);
|
||||
uintptr_t addressToPatch = ((set32BitAddressing) ? static_cast<uintptr_t>(graphicsAllocation->getGpuAddressToPatch()) : static_cast<uintptr_t>(graphicsAllocation->getGpuAddress())) + this->offset;
|
||||
DEBUG_BREAK_IF(!(graphicsAllocation->isLocked() || (addressToPatch != 0) || (graphicsAllocation->getGpuBaseAddress() != 0) ||
|
||||
(this->getCpuAddress() == nullptr && graphicsAllocation->peekSharedHandle())));
|
||||
|
||||
@@ -134,8 +134,7 @@ class Buffer : public MemObj {
|
||||
BufferCreatFunc createFunction = nullptr;
|
||||
bool isSubBuffer();
|
||||
bool isValidSubBufferOffset(size_t offset);
|
||||
uint64_t setArgStateless(void *memory, uint32_t patchSize) { return setArgStateless(memory, patchSize, false); }
|
||||
uint64_t setArgStateless(void *memory, uint32_t patchSize, bool set32BitAddressing);
|
||||
uint64_t setArgStateless(void *memory, uint32_t patchSize, uint32_t rootDeviceIndex, bool set32BitAddressing);
|
||||
virtual void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly) = 0;
|
||||
bool bufferRectPitchSet(const size_t *bufferOrigin,
|
||||
const size_t *region,
|
||||
|
||||
@@ -109,7 +109,7 @@ TEST_F(BufferSetArgTest, WhenSettingKernelArgBufferThenGpuAddressIsSet) {
|
||||
|
||||
auto tokenSize = pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].size;
|
||||
|
||||
buffer->setArgStateless(pKernelArg, tokenSize);
|
||||
buffer->setArgStateless(pKernelArg, tokenSize, pClDevice->getRootDeviceIndex(), false);
|
||||
|
||||
EXPECT_EQ(reinterpret_cast<void *>(buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress()), *pKernelArg);
|
||||
}
|
||||
@@ -213,7 +213,7 @@ TEST_F(BufferSetArgTest, Given32BitAddressingWhenSettingArgStatelessThenGpuAddre
|
||||
|
||||
auto gpuBase = buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress() >> 2;
|
||||
buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->setGpuBaseAddress(gpuBase);
|
||||
buffer->setArgStateless(pKernelArg, tokenSize, true);
|
||||
buffer->setArgStateless(pKernelArg, tokenSize, pClDevice->getRootDeviceIndex(), true);
|
||||
|
||||
EXPECT_EQ(reinterpret_cast<void *>(buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress() - gpuBase), *pKernelArg);
|
||||
}
|
||||
@@ -234,7 +234,7 @@ TEST_F(BufferSetArgTest, givenBufferWhenOffsetedSubbufferIsPassedToSetKernelArgT
|
||||
|
||||
auto tokenSize = pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].size;
|
||||
|
||||
subBuffer->setArgStateless(pKernelArg, tokenSize);
|
||||
subBuffer->setArgStateless(pKernelArg, tokenSize, pClDevice->getRootDeviceIndex(), false);
|
||||
|
||||
EXPECT_EQ(reinterpret_cast<void *>(subBuffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress() + region.origin), *pKernelArg);
|
||||
delete subBuffer;
|
||||
@@ -253,7 +253,7 @@ TEST_F(BufferSetArgTest, givenCurbeTokenThatSizeIs4BytesWhenStatelessArgIsPatche
|
||||
|
||||
pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].size = sizeOf4Bytes;
|
||||
|
||||
buffer->setArgStateless(pKernelArg, sizeOf4Bytes);
|
||||
buffer->setArgStateless(pKernelArg, sizeOf4Bytes, pClDevice->getRootDeviceIndex(), false);
|
||||
|
||||
//make sure only 4 bytes are patched
|
||||
auto bufferAddress = buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress();
|
||||
|
||||
Reference in New Issue
Block a user