Fixing IntDescr programing for blocked cmd and MT

Fixing InterfaceDescriptor programming for
blocked commands when MidThread preemption is
enabled
Additionally, fixing couple of tests that block
global preemption enabling in ULTs

Change-Id: I454c9608f8606f23d7446785ac24c7c7d8701ae0
This commit is contained in:
Chodor, Jaroslaw
2018-01-16 13:58:48 +01:00
committed by sys_ocldev
parent 41f0ac3019
commit 044fd1ab81
26 changed files with 180 additions and 67 deletions

View File

@@ -213,6 +213,11 @@ uint32_t CommandQueue::getTaskLevelFromWaitList(uint32_t taskLevel,
return taskLevel;
}
size_t CommandQueue::getInstructionHeapReservedBlockSize() const {
return alignUp(device->getCommandStreamReceiver().getInstructionHeapCmdStreamReceiverReservedSize(),
MemoryConstants::cacheLineSize);
}
IndirectHeap &CommandQueue::getIndirectHeap(IndirectHeap::Type heapType,
size_t minRequiredSize) {
DEBUG_BREAK_IF(static_cast<uint32_t>(heapType) >= ARRAY_COUNT(indirectHeap));
@@ -240,8 +245,7 @@ IndirectHeap &CommandQueue::getIndirectHeap(IndirectHeap::Type heapType,
size_t reservedSize = 0;
if (heapType == IndirectHeap::INSTRUCTION) {
reservedSize = alignUp(device->getCommandStreamReceiver().getInstructionHeapCmdStreamReceiverReservedSize(),
MemoryConstants::cacheLineSize);
reservedSize = getInstructionHeapReservedBlockSize();
}
minRequiredSize += reservedSize;

View File

@@ -341,6 +341,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
Context *getContextPtr() { return context; }
LinearStream &getCS(size_t minRequiredSize = 1024u);
size_t getInstructionHeapReservedBlockSize() const;
IndirectHeap &getIndirectHeap(IndirectHeap::Type heapType,
size_t minRequiredSize = 0u);

View File

@@ -456,6 +456,7 @@ void dispatchWalker(
bool executionModelKernel = multiDispatchInfo.begin()->getKernel()->isParentKernel;
// Allocate command stream and indirect heaps
size_t cmdQInstructionHeapReservedBlockSize = 0;
if (blockQueue) {
using KCH = KernelCommandsHelper<GfxFamily>;
commandStream = new LinearStream(alignedMalloc(MemoryConstants::pageSize, MemoryConstants::pageSize), MemoryConstants::pageSize);
@@ -471,12 +472,15 @@ void dispatchWalker(
ioh = allocateIndirectHeap([&multiDispatchInfo] { return KCH::getTotalSizeRequiredIOH(multiDispatchInfo); });
}
ish = allocateIndirectHeap([&multiDispatchInfo] { return KCH::getTotalSizeRequiredIH(multiDispatchInfo); });
cmdQInstructionHeapReservedBlockSize = commandQueue.getInstructionHeapReservedBlockSize();
ssh = allocateIndirectHeap([&multiDispatchInfo] { return KCH::getTotalSizeRequiredSSH(multiDispatchInfo); });
using UniqueIH = std::unique_ptr<IndirectHeap>;
*blockedCommandsData = new KernelOperation(std::unique_ptr<LinearStream>(commandStream), UniqueIH(dsh),
UniqueIH(ish), UniqueIH(ioh), UniqueIH(ssh));
if (executionModelKernel)
if (executionModelKernel) {
(*blockedCommandsData)->doNotFreeISH = true;
}
} else {
commandStream = &commandQueue.getCS(0);
if (executionModelKernel && (commandQueue.getIndirectHeap(IndirectHeap::SURFACE_STATE, 0).getUsed() > 0)) {
@@ -582,6 +586,7 @@ void dispatchWalker(
*commandStream,
*dsh,
*ish,
cmdQInstructionHeapReservedBlockSize,
*ioh,
*ssh,
kernel,
@@ -749,6 +754,7 @@ void dispatchScheduler(
*commandStream,
*dsh,
*ish,
0,
*ioh,
*ssh,
scheduler,

View File

@@ -36,17 +36,23 @@
#endif
template <typename T>
inline T alignUp(T ptrBefore, size_t alignment) {
auto addrBefore = (uintptr_t)ptrBefore;
auto addrAfter = (addrBefore + alignment - 1) & ~(alignment - 1);
return (T)addrAfter;
constexpr inline T alignUp(T before, size_t alignment) {
return static_cast<T>((static_cast<size_t>(before) + alignment - 1) & ~(alignment - 1));
}
template <typename T>
inline T alignDown(T ptrBefore, size_t alignment) {
auto addrBefore = (uintptr_t)ptrBefore;
auto addrAfter = addrBefore & ~(alignment - 1);
return (T)addrAfter;
constexpr inline T *alignUp(T *ptrBefore, size_t alignment) {
return reinterpret_cast<T *>(alignUp(reinterpret_cast<uintptr_t>(ptrBefore), alignment));
}
template <typename T>
constexpr inline T alignDown(T before, size_t alignment) {
return static_cast<T>(static_cast<size_t>(before) & ~(alignment - 1));
}
template <typename T>
constexpr inline T *alignDown(T *ptrBefore, size_t alignment) {
return reinterpret_cast<T *>(alignDown(reinterpret_cast<uintptr_t>(ptrBefore), alignment));
}
inline void *alignedMalloc(size_t bytes, size_t alignment) {

View File

@@ -92,6 +92,7 @@ struct KernelCommandsHelper : public PerThreadDataHelper {
LinearStream &commandStream,
IndirectHeap &dsh,
IndirectHeap &ih,
size_t ihReservedBlockSize,
IndirectHeap &ioh,
IndirectHeap &ssh,
const Kernel &kernel,

View File

@@ -319,6 +319,7 @@ size_t KernelCommandsHelper<GfxFamily>::sendIndirectState(
LinearStream &commandStream,
IndirectHeap &dsh,
IndirectHeap &ih,
size_t ihReservedBlockSize,
IndirectHeap &ioh,
IndirectHeap &ssh,
const Kernel &kernel,
@@ -401,7 +402,7 @@ size_t KernelCommandsHelper<GfxFamily>::sendIndirectState(
KernelCommandsHelper<GfxFamily>::sendInterfaceDescriptorData(
dsh,
offsetInterfaceDescriptor,
kernelStartOffset,
kernelStartOffset + ihReservedBlockSize,
kernel.getCrossThreadDataSize(),
sizePerThreadData,
dstBindingTablePointer,

View File

@@ -166,7 +166,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
IndirectHeap *dsh = nullptr;
IndirectHeap *ioh = nullptr;
IndirectHeap::Type trackedHeaps[] = {IndirectHeap::SURFACE_STATE, IndirectHeap::INDIRECT_OBJECT, IndirectHeap::DYNAMIC_STATE, IndirectHeap::INSTRUCTION};
IndirectHeap::Type trackedHeaps[] = {IndirectHeap::SURFACE_STATE, IndirectHeap::INDIRECT_OBJECT, IndirectHeap::DYNAMIC_STATE};
for (auto trackedHeap = 0u; trackedHeap < ARRAY_COUNT(trackedHeaps); trackedHeap++) {
if (commandQueue.getIndirectHeap(trackedHeaps[trackedHeap], 0).getUsed() > 0) {
@@ -174,6 +174,10 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
}
}
if (commandQueue.getIndirectHeap(IndirectHeap::INSTRUCTION, 0).getUsed() > commandQueue.getInstructionHeapReservedBlockSize()) {
commandQueue.releaseIndirectHeap(IndirectHeap::INSTRUCTION);
}
if (executionModelKernel) {
dsh = devQueue->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
// In ExecutionModel IOH is the same as DSH to eliminate StateBaseAddress reprogramming for scheduler kernel and blocks.
@@ -195,7 +199,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
IndirectHeap &ish = commandQueue.getIndirectHeap(IndirectHeap::INSTRUCTION, requestedIshSize);
IndirectHeap &ssh = commandQueue.getIndirectHeap(IndirectHeap::SURFACE_STATE, requestedSshSize);
memcpy_s(ish.getBase(), requestedIshSize, kernelOperation->ish->getBase(), kernelOperation->ish->getUsed());
memcpy_s(ptrOffset(ish.getBase(), commandQueue.getInstructionHeapReservedBlockSize()), requestedIshSize, kernelOperation->ish->getBase(), kernelOperation->ish->getUsed());
ish.getSpace(kernelOperation->ish->getUsed());
memcpy_s(ssh.getBase(), requestedSshSize, kernelOperation->ssh->getBase(), kernelOperation->ssh->getUsed());

View File

@@ -33,6 +33,7 @@
#include "unit_tests/fixtures/context_fixture.h"
#include "unit_tests/fixtures/device_fixture.h"
#include "unit_tests/fixtures/memory_management_fixture.h"
#include "unit_tests/helpers/debug_manager_state_restore.h"
#include "unit_tests/mocks/mock_buffer.h"
#include "unit_tests/mocks/mock_command_queue.h"
#include "unit_tests/mocks/mock_context.h"
@@ -43,6 +44,8 @@
#include "unit_tests/helpers/debug_manager_state_restore.h"
#include "test.h"
#include "gmock/gmock-matchers.h"
using namespace OCLRT;
struct CommandQueueHwTest
@@ -376,39 +379,100 @@ HWTEST_F(CommandQueueHwTest, GivenNotCompleteUserEventPassedToEnqueueWhenEventIs
mockCSR->getMemoryManager()->freeGraphicsMemory(printfSurface);
mockCSR->getMemoryManager()->freeGraphicsMemory(constantSurface);
}
typedef CommandQueueHwTest BlockedCommandQueueTest;
HWTEST_F(BlockedCommandQueueTest, givenCommandQueueWhichHasSomeUsedHeapsWhenBlockedCommandIsBeingSubmittedItReloadsThemToZeroToKeepProperOffsets) {
DebugManagerStateRestore debugStateRestore;
bool oldMemsetAllocationsFlag = MemoryManagement::memsetNewAllocations;
MemoryManagement::memsetNewAllocations = true;
DebugManager.flags.ForcePreemptionMode.set(0); // allow default preemption mode
auto deviceWithDefaultPreemptionMode = std::unique_ptr<MockDevice>(DeviceHelper<>::create(nullptr));
this->pDevice->setPreemptionMode(deviceWithDefaultPreemptionMode->getPreemptionMode());
this->pDevice->getCommandStreamReceiver().setPreemptionCsrAllocation(deviceWithDefaultPreemptionMode->getPreemptionAllocation());
DebugManager.flags.DisableResourceRecycling.set(true);
UserEvent userEvent(context);
cl_event blockedEvent = &userEvent;
MockKernelWithInternals mockKernelWithInternals(*pDevice);
mockKernelWithInternals.kernelHeader.KernelHeapSize = sizeof(mockKernelWithInternals.kernelIsa);
auto mockKernel = mockKernelWithInternals.mockKernel;
IndirectHeap::Type heaps[] = {IndirectHeap::INSTRUCTION, IndirectHeap::INDIRECT_OBJECT,
IndirectHeap::DYNAMIC_STATE, IndirectHeap::SURFACE_STATE};
size_t prealocatedHeapSize = 2 * 64 * KB;
for (auto heapType : heaps) {
auto &heap = pCmdQ->getIndirectHeap(heapType, prealocatedHeapSize);
heap.getSpace(16);
memset(heap.getBase(), 0, prealocatedHeapSize);
}
// preallocating memsetted allocations to get predictable results
pCmdQ->getDevice().getMemoryManager()->cleanAllocationList(-1, REUSABLE_ALLOCATION);
DebugManager.flags.DisableResourceRecycling.set(false);
std::set<void *> reusableHeaps;
for (unsigned int i = 0; i < 5; ++i) {
void *mem = alignedMalloc(prealocatedHeapSize, 64);
reusableHeaps.insert(mem);
memset(mem, 0, prealocatedHeapSize);
std::unique_ptr<GraphicsAllocation> reusableAlloc{new MockGraphicsAllocation(mem, prealocatedHeapSize)};
pCmdQ->getDevice().getMemoryManager()->storeAllocation(std::move(reusableAlloc), REUSABLE_ALLOCATION);
}
// disable further allocation reuse
DebugManager.flags.DisableResourceRecycling.set(true);
size_t offset = 0;
size_t size = 1;
cl_event blockedEvent = &userEvent;
auto &ish = pCmdQ->getIndirectHeap(IndirectHeap::INSTRUCTION, 4096u);
auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 4096u);
auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 4096u);
auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 4096u);
ssh.getSpace(1);
ish.getSpace(1);
ioh.getSpace(1);
dsh.getSpace(1);
auto ishBase = ish.getBase();
auto iohBase = ioh.getBase();
auto dshBase = dsh.getBase();
auto sshBase = ssh.getBase();
pCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr);
pCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr); // blocked command
userEvent.setStatus(CL_COMPLETE);
EXPECT_NE(ishBase, ish.getBase());
EXPECT_NE(iohBase, ioh.getBase());
EXPECT_NE(dshBase, dsh.getBase());
EXPECT_NE(sshBase, ssh.getBase());
// make sure used heaps are from preallocated pool
EXPECT_NE(reusableHeaps.end(), reusableHeaps.find(pCmdQ->getIndirectHeap(IndirectHeap::INSTRUCTION, 0).getBase()));
EXPECT_NE(reusableHeaps.end(), reusableHeaps.find(pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0).getBase()));
EXPECT_NE(reusableHeaps.end(), reusableHeaps.find(pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0).getBase()));
EXPECT_NE(reusableHeaps.end(), reusableHeaps.find(pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0).getBase()));
pCmdQ->getDevice().getMemoryManager()->cleanAllocationList(-1, REUSABLE_ALLOCATION);
std::unordered_map<int, std::vector<char>> blockedCommandHeaps;
int i = 0;
for (auto heapType : heaps) {
auto &heap = pCmdQ->getIndirectHeap(heapType, 0);
blockedCommandHeaps[static_cast<int>(heaps[i])].assign(reinterpret_cast<char *>(heap.getBase()), reinterpret_cast<char *>(heap.getBase()) + heap.getUsed());
// prepare new heaps for nonblocked command
pCmdQ->releaseIndirectHeap(heapType);
++i;
}
pCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 0, nullptr, nullptr); // nonblocked command
i = 0;
std::unordered_map<int, std::vector<char>> nonblockedCommandHeaps;
for (auto heapType : heaps) {
auto &heap = pCmdQ->getIndirectHeap(heapType, 0);
nonblockedCommandHeaps[static_cast<int>(heaps[i])].assign(reinterpret_cast<char *>(heap.getBase()), reinterpret_cast<char *>(heap.getBase()) + heap.getUsed());
++i;
}
// expecting blocked command to be programmed indentically to a non-blocked counterpart
EXPECT_THAT(nonblockedCommandHeaps[static_cast<int>(IndirectHeap::INSTRUCTION)],
testing::ContainerEq(blockedCommandHeaps[static_cast<int>(IndirectHeap::INSTRUCTION)]));
EXPECT_THAT(nonblockedCommandHeaps[static_cast<int>(IndirectHeap::INDIRECT_OBJECT)],
testing::ContainerEq(blockedCommandHeaps[static_cast<int>(IndirectHeap::INDIRECT_OBJECT)]));
EXPECT_THAT(nonblockedCommandHeaps[static_cast<int>(IndirectHeap::DYNAMIC_STATE)],
testing::ContainerEq(blockedCommandHeaps[static_cast<int>(IndirectHeap::DYNAMIC_STATE)]));
EXPECT_THAT(nonblockedCommandHeaps[static_cast<int>(IndirectHeap::SURFACE_STATE)],
testing::ContainerEq(blockedCommandHeaps[static_cast<int>(IndirectHeap::SURFACE_STATE)]));
for (auto ptr : reusableHeaps) {
alignedFree(ptr);
}
BuiltIns::shutDown();
MemoryManagement::memsetNewAllocations = oldMemsetAllocationsFlag;
}
HWTEST_F(BlockedCommandQueueTest, givenCommandQueueWhichHasSomeUnusedHeapsWhenBlockedCommandIsBeingSubmittedThenThoseHeapsAreBeingUsed) {

View File

@@ -744,25 +744,36 @@ HWTEST_F(KmdNotifyTests, givenMultipleCommandQueuesWhenMarkerIsEmittedThenGraphi
EXPECT_EQ(commandStreamGraphicsAllocation, commandStreamGraphicsAllocation2);
}
TEST(CommandQueueGetIndirectHeap, whenNewInstructionHeapIsBeingCreatedThenCommandStreamReceiverCanReserveAMemoryBlockAtItsBegining) {
char pattern[] = {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 39, 41};
static_assert(false == isAligned<MemoryConstants::cacheLineSize>(sizeof(pattern)),
"Will be checking for automatic cacheline alignment, so pattern length must not be a multiple of cacheline");
size_t alignedPatternSize = alignUp(sizeof(pattern), MemoryConstants::cacheLineSize);
constexpr char sipPattern[] = {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 39, 41};
static_assert(false == isAligned<MemoryConstants::cacheLineSize>(sizeof(sipPattern)),
"Will be checking for automatic cacheline alignment, so pattern length must not be a multiple of cacheline");
constexpr size_t alignedPatternSize = alignUp(sizeof(sipPattern), MemoryConstants::cacheLineSize);
TEST(CommandQueueGetIndirectHeap, whenNewInstructionHeapIsBeingCreatedThenCommandStreamReceiverCanReserveAMemoryBlockAtItsBegining) {
auto mockDevice = std::unique_ptr<MockDevice>(MockDevice::create<MockDevice>(nullptr));
MockCommandStreamReceiver *csr = new MockCommandStreamReceiver;
mockDevice->resetCommandStreamReceiver(csr);
csr->instructionHeapReserveredData.assign(pattern, pattern + sizeof(pattern));
csr->instructionHeapReserveredData.assign(sipPattern, sipPattern + sizeof(sipPattern));
MockCommandQueue cmdQ{nullptr, mockDevice.get(), nullptr};
IndirectHeap &heap = cmdQ.getIndirectHeap(OCLRT::IndirectHeap::INSTRUCTION, 8192);
EXPECT_LE(8192U, heap.getAvailableSpace());
EXPECT_EQ(alignedPatternSize, heap.getUsed());
ASSERT_LE(sizeof(pattern), heap.getMaxAvailableSpace());
ASSERT_LE(sizeof(sipPattern), heap.getMaxAvailableSpace());
char *reservedBlock = reinterpret_cast<char *>(heap.getBase());
auto dataFoundInReservedBlock = ArrayRef<char>(reservedBlock, sizeof(pattern));
auto dataFoundInReservedBlock = ArrayRef<char>(reservedBlock, sizeof(sipPattern));
auto expectedData = ArrayRef<char>(csr->instructionHeapReserveredData);
EXPECT_THAT(dataFoundInReservedBlock, testing::ContainerEq(expectedData));
}
TEST(CommandQueueGetIndirectHeap, whenCheckingForCsrInstructionHeapReservedBlockSizeThenCachelineAlignmentIsExpected) {
auto mockDevice = std::unique_ptr<MockDevice>(MockDevice::create<MockDevice>(nullptr));
MockCommandStreamReceiver *csr = new MockCommandStreamReceiver;
mockDevice->resetCommandStreamReceiver(csr);
csr->instructionHeapReserveredData.assign(sipPattern, sipPattern + sizeof(sipPattern));
MockCommandQueue cmdQ{nullptr, mockDevice.get(), nullptr};
EXPECT_GE(alignedPatternSize, csr->getInstructionHeapCmdStreamReceiverReservedSize());
EXPECT_EQ(alignedPatternSize, cmdQ.getInstructionHeapReservedBlockSize());
}

View File

@@ -292,7 +292,7 @@ HWTEST_F(EnqueueCopyBufferRectTest, 2D_InterfaceDescriptorData) {
// Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value.
auto kernelStartPointer = ((uint64_t)IDD.getKernelStartPointerHigh() << 32) + IDD.getKernelStartPointer();
EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize());
EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize);
EXPECT_NE(0u, IDD.getNumberOfThreadsInGpgpuThreadGroup());
EXPECT_NE(0u, IDD.getCrossThreadConstantDataReadLength());
@@ -481,7 +481,7 @@ HWTEST_F(EnqueueCopyBufferRectTest, 3D_InterfaceDescriptorData) {
// Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value.
auto kernelStartPointer = ((uint64_t)IDD.getKernelStartPointerHigh() << 32) + IDD.getKernelStartPointer();
EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize());
EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize);
EXPECT_NE(0u, IDD.getNumberOfThreadsInGpgpuThreadGroup());
EXPECT_NE(0u, IDD.getCrossThreadConstantDataReadLength());

View File

@@ -261,7 +261,7 @@ HWTEST_F(EnqueueCopyBufferTest, InterfaceDescriptorData) {
// Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value.
auto kernelStartPointer = ((uint64_t)cmdIDD->getKernelStartPointerHigh() << 32) + cmdIDD->getKernelStartPointer();
EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize());
EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize);
EXPECT_NE(0u, cmdIDD->getNumberOfThreadsInGpgpuThreadGroup());
EXPECT_NE(0u, cmdIDD->getCrossThreadConstantDataReadLength());

View File

@@ -192,7 +192,7 @@ HWTEST_F(EnqueueCopyBufferToImageTest, interfaceDescriptorData) {
// Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value.
auto kernelStartPointer = ((uint64_t)interfaceDescriptorData.getKernelStartPointerHigh() << 32) + interfaceDescriptorData.getKernelStartPointer();
EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize());
EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize);
size_t maxLocalSize = 256u;
auto localWorkSize = std::min(

View File

@@ -195,7 +195,7 @@ HWTEST_F(EnqueueCopyImageTest, interfaceDescriptorData) {
// Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value.
auto kernelStartPointer = ((uint64_t)interfaceDescriptorData.getKernelStartPointerHigh() << 32) + interfaceDescriptorData.getKernelStartPointer();
EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize());
EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize);
size_t maxLocalSize = 256u;
auto localWorkSize = std::min(maxLocalSize,

View File

@@ -193,7 +193,7 @@ HWTEST_F(EnqueueCopyImageToBufferTest, interfaceDescriptorData) {
// Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value.
auto kernelStartPointer = ((uint64_t)interfaceDescriptorData.getKernelStartPointerHigh() << 32) + interfaceDescriptorData.getKernelStartPointer();
EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize());
EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize);
size_t maxLocalSize = 256u;
auto localWorkSize = std::min(

View File

@@ -311,7 +311,7 @@ HWTEST_F(EnqueueFillBufferCmdTests, InterfaceDescriptorData) {
// Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value.
auto kernelStartPointer = ((uint64_t)IDD.getKernelStartPointerHigh() << 32) + IDD.getKernelStartPointer();
EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize());
EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize);
EXPECT_NE(0u, IDD.getNumberOfThreadsInGpgpuThreadGroup());
EXPECT_NE(0u, IDD.getCrossThreadConstantDataReadLength());

View File

@@ -204,7 +204,7 @@ HWTEST_F(EnqueueFillImageTest, interfaceDescriptorData) {
// Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value.
auto kernelStartPointer = ((uint64_t)interfaceDescriptorData.getKernelStartPointerHigh() << 32) + interfaceDescriptorData.getKernelStartPointer();
EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize());
EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize);
size_t maxLocalSize = 256u;
auto localWorkSize = std::min(maxLocalSize,

View File

@@ -550,7 +550,7 @@ HWTEST_P(EnqueueWorkItemTests, InterfaceDescriptorData) {
// Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value.
auto kernelStartPointer = ((uint64_t)IDD.getKernelStartPointerHigh() << 32) + IDD.getKernelStartPointer();
EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize());
EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize);
EXPECT_NE(0u, IDD.getNumberOfThreadsInGpgpuThreadGroup());
EXPECT_NE(0u, IDD.getCrossThreadConstantDataReadLength());
@@ -1114,9 +1114,10 @@ HWTEST_F(EnqueueKernelTest, givenCommandStreamReceiverInBatchingModeWhenEnqueueK
EXPECT_FALSE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty());
auto cmdBuffer = mockedSubmissionsAggregator->peekCmdBufferList().peekHead();
size_t csrSurfaceCount = (pDevice->getPreemptionMode() == PreemptionMode::MidThread) ? 1 : 0;
EXPECT_EQ(0, mockCsr->flushCalledCount);
EXPECT_EQ(6u, cmdBuffer->surfaces.size());
EXPECT_EQ(6u + csrSurfaceCount, cmdBuffer->surfaces.size());
}
HWTEST_F(EnqueueKernelTest, givenDefaultCommandStreamReceiverWhenClFlushIsCalledThenSuccessIsReturned) {

View File

@@ -329,7 +329,7 @@ HWTEST_F(EnqueueReadBufferRectTest, 2D_InterfaceDescriptorData) {
// Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value.
auto kernelStartPointer = ((uint64_t)IDD.getKernelStartPointerHigh() << 32) + IDD.getKernelStartPointer();
EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize());
EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize);
EXPECT_NE(0u, IDD.getNumberOfThreadsInGpgpuThreadGroup());
EXPECT_NE(0u, IDD.getCrossThreadConstantDataReadLength());

View File

@@ -296,7 +296,7 @@ HWTEST_F(EnqueueReadBufferTypeTest, InterfaceDescriptorData) {
// Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value.
auto kernelStartPointer = ((uint64_t)IDD.getKernelStartPointerHigh() << 32) + IDD.getKernelStartPointer();
EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize());
EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize);
EXPECT_NE(0u, IDD.getNumberOfThreadsInGpgpuThreadGroup());
EXPECT_NE(0u, IDD.getCrossThreadConstantDataReadLength());

View File

@@ -203,7 +203,7 @@ HWTEST_F(EnqueueReadImageTest, interfaceDescriptorData) {
// Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value.
auto kernelStartPointer = ((uint64_t)interfaceDescriptorData.getKernelStartPointerHigh() << 32) + interfaceDescriptorData.getKernelStartPointer();
EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize());
EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize);
auto localWorkSize = 4u;
auto simd = 32u;

View File

@@ -302,7 +302,7 @@ HWTEST_F(EnqueueWriteBufferRectTest, 2D_InterfaceDescriptorData) {
// Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value.
auto kernelStartPointer = ((uint64_t)IDD.getKernelStartPointerHigh() << 32) + IDD.getKernelStartPointer();
EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize());
EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize);
EXPECT_NE(0u, IDD.getNumberOfThreadsInGpgpuThreadGroup());
EXPECT_NE(0u, IDD.getCrossThreadConstantDataReadLength());

View File

@@ -291,7 +291,7 @@ HWTEST_F(EnqueueWriteBufferTypeTest, InterfaceDescriptorData) {
// Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value.
auto kernelStartPointer = ((uint64_t)IDD.getKernelStartPointerHigh() << 32) + IDD.getKernelStartPointer();
EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize());
EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize);
EXPECT_NE(0u, IDD.getNumberOfThreadsInGpgpuThreadGroup());
EXPECT_NE(0u, IDD.getCrossThreadConstantDataReadLength());

View File

@@ -204,7 +204,7 @@ HWTEST_F(EnqueueWriteImageTest, interfaceDescriptorData) {
// Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value.
auto kernelStartPointer = ((uint64_t)interfaceDescriptorData.getKernelStartPointerHigh() << 32) + interfaceDescriptorData.getKernelStartPointer();
EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize());
EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize);
// EnqueueWriteImage uses a byte copy. Need to convert to bytes.
auto localWorkSize = 2 * 2 * sizeof(float);

View File

@@ -265,6 +265,7 @@ HWTEST_F(KernelCommandsTest, sendIndirectStateResourceUsage) {
commandStream,
dsh,
ih,
0,
ioh,
ssh,
*kernel,
@@ -346,6 +347,7 @@ HWTEST_F(KernelCommandsTest, usedBindingTableStatePointer) {
commandStream,
dsh,
ih,
0,
ioh,
ssh,
*kernel,
@@ -499,6 +501,7 @@ HWTEST_F(KernelCommandsTest, usedBindingTableStatePointersForGlobalAndConstantAn
commandStream,
dsh,
ih,
0,
ioh,
ssh,
*pKernel,
@@ -729,6 +732,7 @@ HWTEST_F(KernelCommandsTest, GivenKernelWithSamplersWhenIndirectStateIsProgramme
commandStream,
dsh,
ih,
0,
ioh,
ssh,
*kernel,

View File

@@ -50,6 +50,7 @@ std::atomic<size_t> indexAllocation(0);
std::atomic<size_t> indexDeallocation(0);
bool logTraces = false;
int fastLeakDetectionMode = 0;
bool memsetNewAllocations = false;
AllocationEvent eventsAllocated[maxEvents];
AllocationEvent eventsDeallocated[maxEvents];
@@ -241,8 +242,12 @@ using MemoryManagement::allocate;
using MemoryManagement::deallocate;
NO_SANITIZE
inline static void *debugInitMemory(void *p, size_t size) {
return p;
inline void initMemory(void *p, size_t size) {
if ((p == nullptr) || (false == MemoryManagement::memsetNewAllocations)) {
return;
}
memset(p, 0, size);
}
#if defined(_WIN32)
@@ -250,22 +255,26 @@ inline static void *debugInitMemory(void *p, size_t size) {
#endif
void *operator new(size_t size) {
void *p = allocate<AllocationEvent::EVENT_NEW, AllocationEvent::EVENT_NEW_FAIL>(size);
return debugInitMemory(p, size);
initMemory(p, size);
return p;
}
void *operator new(size_t size, const std::nothrow_t &) NOEXCEPT {
void *p = allocate<AllocationEvent::EVENT_NEW_NOTHROW, AllocationEvent::EVENT_NEW_NOTHROW_FAIL>(size, std::nothrow);
return debugInitMemory(p, size);
initMemory(p, size);
return p;
}
void *operator new[](size_t size) {
void *p = allocate<AllocationEvent::EVENT_NEW_ARRAY, AllocationEvent::EVENT_NEW_ARRAY_FAIL>(size);
return debugInitMemory(p, size);
initMemory(p, size);
return p;
}
void *operator new[](size_t size, const std::nothrow_t &t) NOEXCEPT {
void *p = allocate<AllocationEvent::EVENT_NEW_ARRAY_NOTHROW, AllocationEvent::EVENT_NEW_ARRAY_NOTHROW_FAIL>(size, std::nothrow);
return debugInitMemory(p, size);
initMemory(p, size);
return p;
}
void operator delete(void *p) throw() {

View File

@@ -76,6 +76,7 @@ extern void *fastEventsDeallocated[maxEvents];
extern std::atomic<int> fastEventsAllocatedCount;
extern std::atomic<int> fastEventsDeallocatedCount;
extern std::atomic<int> fastLeaksDetectionMode;
extern bool memsetNewAllocations;
extern size_t failingAllocation;
extern std::atomic<size_t> numAllocations;