Fixing IntDescr programing for blocked cmd and MT

Fixing InterfaceDescriptor programming for
blocked commands when MidThread preemption is
enabled
Additionally, fixing couple of tests that block
global preemption enabling in ULTs

Change-Id: I454c9608f8606f23d7446785ac24c7c7d8701ae0
This commit is contained in:
Chodor, Jaroslaw
2018-01-16 13:58:48 +01:00
committed by sys_ocldev
parent 41f0ac3019
commit 044fd1ab81
26 changed files with 180 additions and 67 deletions

View File

@@ -36,17 +36,23 @@
#endif
template <typename T>
inline T alignUp(T ptrBefore, size_t alignment) {
auto addrBefore = (uintptr_t)ptrBefore;
auto addrAfter = (addrBefore + alignment - 1) & ~(alignment - 1);
return (T)addrAfter;
constexpr inline T alignUp(T before, size_t alignment) {
return static_cast<T>((static_cast<size_t>(before) + alignment - 1) & ~(alignment - 1));
}
template <typename T>
inline T alignDown(T ptrBefore, size_t alignment) {
auto addrBefore = (uintptr_t)ptrBefore;
auto addrAfter = addrBefore & ~(alignment - 1);
return (T)addrAfter;
constexpr inline T *alignUp(T *ptrBefore, size_t alignment) {
return reinterpret_cast<T *>(alignUp(reinterpret_cast<uintptr_t>(ptrBefore), alignment));
}
template <typename T>
constexpr inline T alignDown(T before, size_t alignment) {
return static_cast<T>(static_cast<size_t>(before) & ~(alignment - 1));
}
template <typename T>
constexpr inline T *alignDown(T *ptrBefore, size_t alignment) {
return reinterpret_cast<T *>(alignDown(reinterpret_cast<uintptr_t>(ptrBefore), alignment));
}
inline void *alignedMalloc(size_t bytes, size_t alignment) {

View File

@@ -92,6 +92,7 @@ struct KernelCommandsHelper : public PerThreadDataHelper {
LinearStream &commandStream,
IndirectHeap &dsh,
IndirectHeap &ih,
size_t ihReservedBlockSize,
IndirectHeap &ioh,
IndirectHeap &ssh,
const Kernel &kernel,

View File

@@ -319,6 +319,7 @@ size_t KernelCommandsHelper<GfxFamily>::sendIndirectState(
LinearStream &commandStream,
IndirectHeap &dsh,
IndirectHeap &ih,
size_t ihReservedBlockSize,
IndirectHeap &ioh,
IndirectHeap &ssh,
const Kernel &kernel,
@@ -401,7 +402,7 @@ size_t KernelCommandsHelper<GfxFamily>::sendIndirectState(
KernelCommandsHelper<GfxFamily>::sendInterfaceDescriptorData(
dsh,
offsetInterfaceDescriptor,
kernelStartOffset,
kernelStartOffset + ihReservedBlockSize,
kernel.getCrossThreadDataSize(),
sizePerThreadData,
dstBindingTablePointer,

View File

@@ -166,7 +166,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
IndirectHeap *dsh = nullptr;
IndirectHeap *ioh = nullptr;
IndirectHeap::Type trackedHeaps[] = {IndirectHeap::SURFACE_STATE, IndirectHeap::INDIRECT_OBJECT, IndirectHeap::DYNAMIC_STATE, IndirectHeap::INSTRUCTION};
IndirectHeap::Type trackedHeaps[] = {IndirectHeap::SURFACE_STATE, IndirectHeap::INDIRECT_OBJECT, IndirectHeap::DYNAMIC_STATE};
for (auto trackedHeap = 0u; trackedHeap < ARRAY_COUNT(trackedHeaps); trackedHeap++) {
if (commandQueue.getIndirectHeap(trackedHeaps[trackedHeap], 0).getUsed() > 0) {
@@ -174,6 +174,10 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
}
}
if (commandQueue.getIndirectHeap(IndirectHeap::INSTRUCTION, 0).getUsed() > commandQueue.getInstructionHeapReservedBlockSize()) {
commandQueue.releaseIndirectHeap(IndirectHeap::INSTRUCTION);
}
if (executionModelKernel) {
dsh = devQueue->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
// In ExecutionModel IOH is the same as DSH to eliminate StateBaseAddress reprogramming for scheduler kernel and blocks.
@@ -195,7 +199,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
IndirectHeap &ish = commandQueue.getIndirectHeap(IndirectHeap::INSTRUCTION, requestedIshSize);
IndirectHeap &ssh = commandQueue.getIndirectHeap(IndirectHeap::SURFACE_STATE, requestedSshSize);
memcpy_s(ish.getBase(), requestedIshSize, kernelOperation->ish->getBase(), kernelOperation->ish->getUsed());
memcpy_s(ptrOffset(ish.getBase(), commandQueue.getInstructionHeapReservedBlockSize()), requestedIshSize, kernelOperation->ish->getBase(), kernelOperation->ish->getUsed());
ish.getSpace(kernelOperation->ish->getUsed());
memcpy_s(ssh.getBase(), requestedSshSize, kernelOperation->ssh->getBase(), kernelOperation->ssh->getUsed());