Create commandContainer encoders

Change-Id: I2f27c4de6af9ebbc0210bc5e08bbfa9cb6beec0e
Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
Maciej Plewka
2020-01-17 08:56:05 +01:00
committed by sys_ocldev
parent 278efbdfe6
commit 00f667723f
35 changed files with 2181 additions and 38 deletions

View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2019 Intel Corporation
# Copyright (C) 2019-2020 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
@@ -8,6 +8,9 @@ set(NEO_CORE_COMMAND_CONTAINER
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/cmdcontainer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cmdcontainer.h
${CMAKE_CURRENT_SOURCE_DIR}/command_encoder.h
${CMAKE_CURRENT_SOURCE_DIR}/command_encoder.inl
${CMAKE_CURRENT_SOURCE_DIR}/command_encoder_base.inl
)
add_subdirectories()
set_property(GLOBAL PROPERTY NEO_CORE_COMMAND_CONTAINER ${NEO_CORE_COMMAND_CONTAINER})

View File

@@ -7,6 +7,7 @@
#include "core/command_container/cmdcontainer.h"
#include "core/command_container/command_encoder.h"
#include "core/command_stream/linear_stream.h"
#include "core/helpers/debug_helpers.h"
#include "core/helpers/heap_helper.h"
@@ -15,8 +16,6 @@
#include "runtime/device/device.h"
#include "runtime/memory_manager/memory_manager.h"
#include <cassert>
namespace NEO {
CommandContainer::~CommandContainer() {
@@ -34,7 +33,11 @@ CommandContainer::~CommandContainer() {
for (auto allocationIndirectHeap : allocationIndirectHeaps) {
heapHelper->storeHeapAllocation(allocationIndirectHeap);
}
for (auto deallocation : deallocationContainer) {
if (((deallocation->getAllocationType() == GraphicsAllocation::AllocationType::INTERNAL_HEAP) || (deallocation->getAllocationType() == GraphicsAllocation::AllocationType::LINEAR_STREAM))) {
getHeapHelper()->storeHeapAllocation(deallocation);
}
}
residencyContainer.clear();
deallocationContainer.clear();
}
@@ -49,13 +52,13 @@ bool CommandContainer::initialize(Device *device) {
heapHelper = std::unique_ptr<HeapHelper>(new HeapHelper(device->getMemoryManager(), device->getDefaultEngine().commandStreamReceiver->getInternalAllocationStorage(), device->getNumAvailableDevices() > 1u));
size_t alignedSize = alignUp<size_t>(totalCmdBufferSize, MemoryConstants::pageSize64k);
NEO::AllocationProperties properties{0u,
true /* allocateMemory*/,
alignedSize,
GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY,
(device->getNumAvailableDevices() > 1u) /* multiOsContextCapable */,
false,
{}};
AllocationProperties properties{0u,
true /* allocateMemory*/,
alignedSize,
GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY,
(device->getNumAvailableDevices() > 1u) /* multiOsContextCapable */,
false,
{}};
auto cmdBufferAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties);
UNRECOVERABLE_IF(!cmdBufferAllocation);
@@ -80,10 +83,13 @@ bool CommandContainer::initialize(Device *device) {
instructionHeapBaseAddress = device->getMemoryManager()->getInternalHeapBaseAddress(0);
iddBlock = nullptr;
nextIddInBlock = numIddsPerBlock;
return true;
}
void CommandContainer::addToResidencyContainer(NEO::GraphicsAllocation *alloc) {
void CommandContainer::addToResidencyContainer(GraphicsAllocation *alloc) {
if (alloc == nullptr) {
return;
}
@@ -118,7 +124,31 @@ void CommandContainer::reset() {
}
}
IndirectHeap *CommandContainer::getHeapWithRequiredSizeAndAlignment(NEO::HeapType heapType, size_t sizeRequired, size_t alignment) {
void *CommandContainer::getHeapSpaceAllowGrow(HeapType heapType,
size_t size) {
auto indirectHeap = getIndirectHeap(heapType);
if (indirectHeap->getAvailableSpace() < size) {
size_t newSize = indirectHeap->getUsed() + indirectHeap->getAvailableSpace();
newSize *= 2;
newSize = std::max(newSize, indirectHeap->getAvailableSpace() + size);
newSize = alignUp(newSize, MemoryConstants::pageSize);
auto oldAlloc = getIndirectHeapAllocation(heapType);
auto newAlloc = getHeapHelper()->getHeapAllocation(heapType, newSize, MemoryConstants::pageSize, device->getRootDeviceIndex());
UNRECOVERABLE_IF(!oldAlloc);
UNRECOVERABLE_IF(!newAlloc);
indirectHeap->replaceGraphicsAllocation(newAlloc);
indirectHeap->replaceBuffer(newAlloc->getUnderlyingBuffer(),
newAlloc->getUnderlyingBufferSize());
getResidencyContainer().push_back(newAlloc);
getDeallocationContainer().push_back(oldAlloc);
setIndirectHeapAllocation(heapType, newAlloc);
setHeapDirty(heapType);
}
return indirectHeap->getSpace(size);
}
IndirectHeap *CommandContainer::getHeapWithRequiredSizeAndAlignment(HeapType heapType, size_t sizeRequired, size_t alignment) {
auto indirectHeap = getIndirectHeap(heapType);
auto sizeRequested = sizeRequired;
@@ -129,9 +159,9 @@ IndirectHeap *CommandContainer::getHeapWithRequiredSizeAndAlignment(NEO::HeapTyp
if (indirectHeap->getAvailableSpace() < sizeRequested) {
size_t newSize = indirectHeap->getUsed() + indirectHeap->getAvailableSpace();
newSize = alignUp(newSize, 4096U);
newSize = alignUp(newSize, MemoryConstants::pageSize);
auto oldAlloc = getIndirectHeapAllocation(heapType);
auto newAlloc = getHeapHelper()->getHeapAllocation(heapType, newSize, 4096u, device->getRootDeviceIndex());
auto newAlloc = getHeapHelper()->getHeapAllocation(heapType, newSize, MemoryConstants::pageSize, device->getRootDeviceIndex());
UNRECOVERABLE_IF(!oldAlloc);
UNRECOVERABLE_IF(!newAlloc);
indirectHeap->replaceGraphicsAllocation(newAlloc);
@@ -151,13 +181,13 @@ IndirectHeap *CommandContainer::getHeapWithRequiredSizeAndAlignment(NEO::HeapTyp
void CommandContainer::allocateNextCommandBuffer() {
size_t alignedSize = alignUp<size_t>(totalCmdBufferSize, MemoryConstants::pageSize64k);
NEO::AllocationProperties properties{0u,
true /* allocateMemory*/,
alignedSize,
GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY,
(device->getNumAvailableDevices() > 1u) /* multiOsContextCapable */,
false,
{}};
AllocationProperties properties{0u,
true /* allocateMemory*/,
alignedSize,
GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY,
(device->getNumAvailableDevices() > 1u) /* multiOsContextCapable */,
false,
{}};
auto cmdBufferAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties);
UNRECOVERABLE_IF(!cmdBufferAllocation);

View File

@@ -20,6 +20,7 @@ namespace NEO {
class Device;
class GraphicsAllocation;
class LinearStream;
using ResidencyContainer = std::vector<GraphicsAllocation *>;
using CmdBufferContainer = std::vector<GraphicsAllocation *>;
using HeapType = IndirectHeap::Type;
@@ -30,9 +31,17 @@ class CommandContainer : public NonCopyableOrMovableClass {
static constexpr size_t totalCmdBufferSize =
defaultListCmdBufferSize +
MemoryConstants::cacheLineSize +
NEO::CSRequirements::csOverfetchSize;
CSRequirements::csOverfetchSize;
CommandContainer() = default;
CommandContainer() {
for (auto &indirectHeap : indirectHeaps) {
indirectHeap = nullptr;
}
for (auto &allocationIndirectHeap : allocationIndirectHeaps) {
allocationIndirectHeap = nullptr;
}
}
CmdBufferContainer &getCmdBufferAllocations() { return cmdBufferAllocations; }
@@ -54,15 +63,20 @@ class CommandContainer : public NonCopyableOrMovableClass {
uint64_t getInstructionHeapBaseAddress() const { return instructionHeapBaseAddress; }
void *getHeapSpaceAllowGrow(HeapType heapType, size_t size);
bool initialize(Device *device);
virtual ~CommandContainer();
uint32_t slmSize = std::numeric_limits<uint32_t>::max();
static const uint32_t numIddsPerBlock = 64;
uint32_t nextIddInBlock = 0;
uint32_t lastSentNumGrfRequired = 0;
Device *getDevice() const { return device; }
IndirectHeap *getHeapWithRequiredSizeAndAlignment(NEO::HeapType heapType, size_t sizeRequired, size_t alignment);
IndirectHeap *getHeapWithRequiredSizeAndAlignment(HeapType heapType, size_t sizeRequired, size_t alignment);
void allocateNextCommandBuffer();
void reset();
@@ -71,19 +85,21 @@ class CommandContainer : public NonCopyableOrMovableClass {
bool isAnyHeapDirty() const { return dirtyHeaps != 0; }
void setHeapDirty(HeapType heapType) { dirtyHeaps |= (1u << heapType); }
void setDirtyStateForAllHeaps(bool dirty) { dirtyHeaps = dirty ? std::numeric_limits<uint32_t>::max() : 0; }
void setIddBlock(void *iddBlock) { this->iddBlock = iddBlock; }
void *getIddBlock() { return iddBlock; }
protected:
void *iddBlock = nullptr;
Device *device = nullptr;
std::unique_ptr<HeapHelper> heapHelper;
CmdBufferContainer cmdBufferAllocations;
GraphicsAllocation *allocationIndirectHeaps[HeapType::NUM_TYPES] = {};
uint64_t instructionHeapBaseAddress = 0u;
uint32_t dirtyHeaps = std::numeric_limits<uint32_t>::max();
std::unique_ptr<LinearStream> commandStream;
std::unique_ptr<IndirectHeap> indirectHeaps[HeapType::NUM_TYPES] = {};
std::unique_ptr<IndirectHeap> indirectHeaps[HeapType::NUM_TYPES];
ResidencyContainer residencyContainer;
std::vector<GraphicsAllocation *> deallocationContainer;
};

View File

@@ -0,0 +1,161 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "core/command_container/cmdcontainer.h"
#include "core/command_stream/linear_stream.h"
#include "core/helpers/simd_helper.h"
#include "core/kernel/dispatch_kernel_encoder_interface.h"
#include "runtime/execution_environment/execution_environment.h"
#include <algorithm>
namespace NEO {
template <typename GfxFamily>
struct EncodeDispatchKernel {
using WALKER_TYPE = typename GfxFamily::WALKER_TYPE;
using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA;
using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE;
static void encode(CommandContainer &container,
const void *pThreadGroupDimensions, bool isIndirect, bool isPredicate, DispatchKernelEncoderI *dispatchInterface,
GraphicsAllocation *eventAllocation, Device *device, PreemptionMode preemptionMode);
static void *getInterfaceDescriptor(CommandContainer &container, uint32_t &iddOffset);
static size_t estimateEncodeDispatchKernelCmdsSize(Device *device);
};
template <typename GfxFamily>
struct EncodeStates {
using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE;
using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA;
using SAMPLER_STATE = typename GfxFamily::SAMPLER_STATE;
static const uint32_t alignIndirectStatePointer = MemoryConstants::cacheLineSize;
static uint32_t copySamplerState(IndirectHeap *dsh,
uint32_t samplerStateOffset,
uint32_t samplerCount,
uint32_t borderColorOffset,
const void *fnDynamicStateHeap);
static void adjustStateComputeMode(CommandContainer &container);
static size_t getAdjustStateComputeModeSize();
};
template <typename GfxFamily>
struct EncodeMathMMIO {
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
using MI_MATH_ALU_INST_INLINE = typename GfxFamily::MI_MATH_ALU_INST_INLINE;
using MI_MATH = typename GfxFamily::MI_MATH;
static const size_t size = sizeof(MI_STORE_REGISTER_MEM);
static void encodeMulRegVal(CommandContainer &container, uint32_t offset, uint32_t val, uint64_t dstAddress);
static void encodeGreaterThanPredicate(CommandContainer &container, uint64_t lhsVal, uint32_t rhsVal);
static void encodeAlu(MI_MATH_ALU_INST_INLINE *pAluParam, uint32_t srcA, uint32_t srcB, uint32_t op, uint32_t dest, uint32_t result);
static void encodeAluSubStoreCarry(MI_MATH_ALU_INST_INLINE *pAluParam, uint32_t regA, uint32_t regB);
static void encodeAluAdd(MI_MATH_ALU_INST_INLINE *pAluParam, uint32_t regA, uint32_t regB);
};
template <typename GfxFamily>
struct EncodeIndirectParams {
using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM;
using MI_LOAD_REGISTER_MEM = typename GfxFamily::MI_LOAD_REGISTER_MEM;
using MI_LOAD_REGISTER_REG = typename GfxFamily::MI_LOAD_REGISTER_REG;
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
using MI_MATH = typename GfxFamily::MI_MATH;
using MI_MATH_ALU_INST_INLINE = typename GfxFamily::MI_MATH_ALU_INST_INLINE;
static void setGroupCountIndirect(CommandContainer &container, uint32_t offsets[3], void *crossThreadAddress);
static void setGroupSizeIndirect(CommandContainer &container, uint32_t offsets[3], void *crossThreadAddress, uint32_t lws[3]);
static size_t getCmdsSizeForIndirectParams();
static size_t getCmdsSizeForSetGroupSizeIndirect();
static size_t getCmdsSizeForSetGroupCountIndirect();
};
template <typename GfxFamily>
struct EncodeFlush {
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
static void encode(CommandContainer &container);
static void encodeWithQwordWrite(CommandContainer &container, uint64_t gpuAddress,
uint64_t value, bool dcFlushEnable);
};
template <typename GfxFamily>
struct EncodeSetMMIO {
using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM;
using MI_LOAD_REGISTER_MEM = typename GfxFamily::MI_LOAD_REGISTER_MEM;
using MI_LOAD_REGISTER_REG = typename GfxFamily::MI_LOAD_REGISTER_REG;
static const size_t sizeIMM = sizeof(MI_LOAD_REGISTER_IMM);
static const size_t sizeMEM = sizeof(MI_LOAD_REGISTER_MEM);
static const size_t sizeREG = sizeof(MI_LOAD_REGISTER_REG);
static void encodeIMM(CommandContainer &container, uint32_t offset, uint32_t data);
static void encodeMEM(CommandContainer &container, uint32_t offset, uint64_t address);
static void encodeREG(CommandContainer &container, uint32_t dstOffset, uint32_t srcOffset);
};
template <typename GfxFamily>
struct EncodeL3State {
static void encode(CommandContainer &container, bool enableSLM);
};
template <typename GfxFamily>
struct EncodeMediaInterfaceDescriptorLoad {
using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA;
static void encode(CommandContainer &container);
};
template <typename GfxFamily>
struct EncodeStateBaseAddress {
static void encode(CommandContainer &container);
};
template <typename GfxFamily>
struct EncodeStoreMMIO {
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
static const size_t size = sizeof(MI_STORE_REGISTER_MEM);
static void encode(CommandContainer &container, uint32_t offset, uint64_t address);
};
template <typename GfxFamily>
struct EncodeSurfaceState {
using R_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE;
using SURFACE_FORMAT = typename R_SURFACE_STATE::SURFACE_FORMAT;
using AUXILIARY_SURFACE_MODE = typename R_SURFACE_STATE::AUXILIARY_SURFACE_MODE;
static void encodeBuffer(void *dst, void *address, size_t size, uint32_t mocs,
bool cpuCoherent);
static constexpr uintptr_t getSurfaceBaseAddressAlignmentMask() {
return ~(getSurfaceBaseAddressAlignment() - 1);
}
static constexpr uintptr_t getSurfaceBaseAddressAlignment() { return 4; }
};
template <typename GfxFamily>
struct EncodeComputeMode {
static void adjustComputeMode(CommandContainer &container, uint32_t numGrfRequired);
};
} // namespace NEO

View File

@@ -0,0 +1,294 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "core/command_container/command_encoder.h"
#include "core/command_stream/linear_stream.h"
#include "core/helpers/hw_helper.h"
#include "core/helpers/preamble.h"
#include "core/helpers/register_offsets.h"
#include "core/helpers/simd_helper.h"
#include "core/helpers/string.h"
#include "core/kernel/dispatch_kernel_encoder_interface.h"
#include "runtime/device/device.h"
#include "runtime/execution_environment/execution_environment.h"
#include "runtime/helpers/hardware_commands_helper.h"
#include <algorithm>
namespace NEO {
template <typename Family>
uint32_t EncodeStates<Family>::copySamplerState(IndirectHeap *dsh,
uint32_t samplerStateOffset,
uint32_t samplerCount,
uint32_t borderColorOffset,
const void *fnDynamicStateHeap) {
auto sizeSamplerState = sizeof(SAMPLER_STATE) * samplerCount;
auto borderColorSize = samplerStateOffset - borderColorOffset;
dsh->align(alignIndirectStatePointer);
auto borderColorOffsetInDsh = static_cast<uint32_t>(dsh->getUsed());
auto borderColor = dsh->getSpace(borderColorSize);
memcpy_s(borderColor, borderColorSize, ptrOffset(fnDynamicStateHeap, borderColorOffset),
borderColorSize);
dsh->align(INTERFACE_DESCRIPTOR_DATA::SAMPLERSTATEPOINTER_ALIGN_SIZE);
auto samplerStateOffsetInDsh = static_cast<uint32_t>(dsh->getUsed());
auto samplerState = dsh->getSpace(sizeSamplerState);
memcpy_s(samplerState, sizeSamplerState, ptrOffset(fnDynamicStateHeap, samplerStateOffset),
sizeSamplerState);
auto pSmplr = reinterpret_cast<SAMPLER_STATE *>(samplerState);
for (uint32_t i = 0; i < samplerCount; i++) {
pSmplr[i].setIndirectStatePointer((uint32_t)borderColorOffsetInDsh);
}
return samplerStateOffsetInDsh;
}
template <typename Family>
void EncodeMathMMIO<Family>::encodeMulRegVal(CommandContainer &container, uint32_t offset, uint32_t val, uint64_t dstAddress) {
int logLws = 0;
int addsCount = 0;
int i = val;
while (val >> logLws) {
if (val & (1 << logLws)) {
addsCount++;
}
logLws++;
addsCount++;
}
EncodeSetMMIO<Family>::encodeREG(container, CS_GPR_R0, offset);
EncodeSetMMIO<Family>::encodeIMM(container, CS_GPR_R1, 0);
uint32_t length = NUM_ALU_INST_FOR_READ_MODIFY_WRITE * addsCount;
auto cmd2 = reinterpret_cast<uint32_t *>(container.getCommandStream()->getSpace(sizeof(MI_MATH) + sizeof(MI_MATH_ALU_INST_INLINE) * length));
reinterpret_cast<MI_MATH *>(cmd2)->DW0.Value = 0x0;
reinterpret_cast<MI_MATH *>(cmd2)->DW0.BitField.InstructionType = MI_MATH::COMMAND_TYPE_MI_COMMAND;
reinterpret_cast<MI_MATH *>(cmd2)->DW0.BitField.InstructionOpcode = MI_MATH::MI_COMMAND_OPCODE_MI_MATH;
reinterpret_cast<MI_MATH *>(cmd2)->DW0.BitField.DwordLength = length - 1;
cmd2++;
MI_MATH_ALU_INST_INLINE *pAluParam = reinterpret_cast<MI_MATH_ALU_INST_INLINE *>(cmd2);
i = 0;
while (i < logLws) {
if (val & (1 << i)) {
encodeAluAdd(pAluParam, ALU_REGISTER_R_1, ALU_REGISTER_R_0);
pAluParam += NUM_ALU_INST_FOR_READ_MODIFY_WRITE;
}
encodeAluAdd(pAluParam, ALU_REGISTER_R_0, ALU_REGISTER_R_0);
pAluParam += NUM_ALU_INST_FOR_READ_MODIFY_WRITE;
i++;
}
EncodeStoreMMIO<Family>::encode(container, CS_GPR_R1, dstAddress);
}
template <typename Family>
void EncodeMathMMIO<Family>::encodeGreaterThanPredicate(CommandContainer &container, uint64_t lhsVal, uint32_t rhsVal) {
EncodeSetMMIO<Family>::encodeMEM(container, CS_GPR_R0, lhsVal);
EncodeSetMMIO<Family>::encodeIMM(container, CS_GPR_R1, rhsVal);
size_t size = sizeof(MI_MATH) + sizeof(MI_MATH_ALU_INST_INLINE) * NUM_ALU_INST_FOR_READ_MODIFY_WRITE;
auto cmd = reinterpret_cast<uint32_t *>(container.getCommandStream()->getSpace(size));
reinterpret_cast<MI_MATH *>(cmd)->DW0.Value = 0x0;
reinterpret_cast<MI_MATH *>(cmd)->DW0.BitField.InstructionType = MI_MATH::COMMAND_TYPE_MI_COMMAND;
reinterpret_cast<MI_MATH *>(cmd)->DW0.BitField.InstructionOpcode = MI_MATH::MI_COMMAND_OPCODE_MI_MATH;
reinterpret_cast<MI_MATH *>(cmd)->DW0.BitField.DwordLength = NUM_ALU_INST_FOR_READ_MODIFY_WRITE - 1;
cmd++;
encodeAluSubStoreCarry(reinterpret_cast<MI_MATH_ALU_INST_INLINE *>(cmd), ALU_REGISTER_R_0, ALU_REGISTER_R_1);
EncodeSetMMIO<Family>::encodeREG(container, CS_PREDICATE_RESULT, CS_GPR_R0);
}
template <typename Family>
void EncodeMathMMIO<Family>::encodeAlu(MI_MATH_ALU_INST_INLINE *pAluParam, uint32_t srcA, uint32_t srcB, uint32_t op, uint32_t dest, uint32_t result) {
pAluParam->DW0.BitField.ALUOpcode = ALU_OPCODE_LOAD;
pAluParam->DW0.BitField.Operand1 = ALU_REGISTER_R_SRCA;
pAluParam->DW0.BitField.Operand2 = srcA;
pAluParam++;
pAluParam->DW0.BitField.ALUOpcode = ALU_OPCODE_LOAD;
pAluParam->DW0.BitField.Operand1 = ALU_REGISTER_R_SRCB;
pAluParam->DW0.BitField.Operand2 = srcB;
pAluParam++;
pAluParam->DW0.BitField.ALUOpcode = op;
pAluParam->DW0.BitField.Operand1 = 0;
pAluParam->DW0.BitField.Operand2 = 0;
pAluParam++;
pAluParam->DW0.BitField.ALUOpcode = ALU_OPCODE_STORE;
pAluParam->DW0.BitField.Operand1 = dest;
pAluParam->DW0.BitField.Operand2 = result;
pAluParam++;
}
template <typename Family>
void EncodeMathMMIO<Family>::encodeAluSubStoreCarry(MI_MATH_ALU_INST_INLINE *pAluParam, uint32_t regA, uint32_t regB) {
encodeAlu(pAluParam, regA, regB, ALU_OPCODE_SUB, ALU_REGISTER_R_CF, regA);
}
template <typename Family>
void EncodeMathMMIO<Family>::encodeAluAdd(MI_MATH_ALU_INST_INLINE *pAluParam, uint32_t regA, uint32_t regB) {
encodeAlu(pAluParam, regA, regB, ALU_OPCODE_ADD, ALU_REGISTER_R_ACCU, regA);
}
template <typename Family>
void EncodeIndirectParams<Family>::setGroupCountIndirect(CommandContainer &container, uint32_t offsets[3], void *crossThreadAddress) {
EncodeStoreMMIO<Family>::encode(container, GPUGPU_DISPATCHDIMX, ptrOffset(reinterpret_cast<uint64_t>(crossThreadAddress), offsets[0]));
EncodeStoreMMIO<Family>::encode(container, GPUGPU_DISPATCHDIMY, ptrOffset(reinterpret_cast<uint64_t>(crossThreadAddress), offsets[1]));
EncodeStoreMMIO<Family>::encode(container, GPUGPU_DISPATCHDIMZ, ptrOffset(reinterpret_cast<uint64_t>(crossThreadAddress), offsets[2]));
}
template <typename Family>
void EncodeIndirectParams<Family>::setGroupSizeIndirect(CommandContainer &container, uint32_t offsets[3], void *crossThreadAddress, uint32_t lws[3]) {
EncodeMathMMIO<Family>::encodeMulRegVal(container, GPUGPU_DISPATCHDIMX, lws[0], ptrOffset(reinterpret_cast<uint64_t>(crossThreadAddress), offsets[0]));
EncodeMathMMIO<Family>::encodeMulRegVal(container, GPUGPU_DISPATCHDIMY, lws[1], ptrOffset(reinterpret_cast<uint64_t>(crossThreadAddress), offsets[1]));
EncodeMathMMIO<Family>::encodeMulRegVal(container, GPUGPU_DISPATCHDIMZ, lws[2], ptrOffset(reinterpret_cast<uint64_t>(crossThreadAddress), offsets[2]));
}
template <typename Family>
void EncodeFlush<Family>::encode(CommandContainer &container) {
PIPE_CONTROL cmd = Family::cmdInitPipeControl;
cmd.setCommandStreamerStallEnable(true);
cmd.setDcFlushEnable(true);
auto buffer = container.getCommandStream()->getSpace(sizeof(cmd));
*(PIPE_CONTROL *)buffer = cmd;
}
template <typename Family>
void EncodeFlush<Family>::encodeWithQwordWrite(CommandContainer &container, uint64_t gpuAddress,
uint64_t value, bool dcFlushEnable) {
PIPE_CONTROL cmd = Family::cmdInitPipeControl;
cmd.setPostSyncOperation(POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA);
cmd.setImmediateData(value);
cmd.setCommandStreamerStallEnable(true);
if (dcFlushEnable) {
cmd.setDcFlushEnable(true);
}
cmd.setAddressHigh(gpuAddress >> 32u);
cmd.setAddress(uint32_t(gpuAddress));
auto buffer = container.getCommandStream()->getSpace(sizeof(cmd));
*(PIPE_CONTROL *)buffer = cmd;
}
template <typename Family>
void EncodeSetMMIO<Family>::encodeIMM(CommandContainer &container, uint32_t offset, uint32_t data) {
MI_LOAD_REGISTER_IMM cmd = Family::cmdInitLoadRegisterImm;
cmd.setRegisterOffset(offset);
cmd.setDataDword(data);
auto buffer = container.getCommandStream()->getSpace(sizeof(cmd));
*(MI_LOAD_REGISTER_IMM *)buffer = cmd;
}
template <typename Family>
void EncodeSetMMIO<Family>::encodeMEM(CommandContainer &container, uint32_t offset, uint64_t address) {
MI_LOAD_REGISTER_MEM cmd = Family::cmdInitLoadRegisterMem;
cmd.setRegisterAddress(offset);
cmd.setMemoryAddress(address);
auto buffer = container.getCommandStream()->getSpace(sizeof(cmd));
*(MI_LOAD_REGISTER_MEM *)buffer = cmd;
}
template <typename Family>
void EncodeSetMMIO<Family>::encodeREG(CommandContainer &container, uint32_t dstOffset, uint32_t srcOffset) {
MI_LOAD_REGISTER_REG cmd = Family::cmdInitLoadRegisterReg;
cmd.setSourceRegisterAddress(srcOffset);
cmd.setDestinationRegisterAddress(dstOffset);
auto buffer = container.getCommandStream()->getSpace(sizeof(cmd));
*(MI_LOAD_REGISTER_REG *)buffer = cmd;
}
template <typename Family>
void EncodeStoreMMIO<Family>::encode(CommandContainer &container, uint32_t offset, uint64_t address) {
MI_STORE_REGISTER_MEM cmd = Family::cmdInitStoreRegisterMem;
cmd.setRegisterAddress(offset);
cmd.setMemoryAddress(address);
auto buffer = container.getCommandStream()->getSpace(sizeof(cmd));
*(MI_STORE_REGISTER_MEM *)buffer = cmd;
}
template <typename Family>
void EncodeSurfaceState<Family>::encodeBuffer(void *dst, void *address, size_t size, uint32_t mocs,
bool cpuCoherent) {
auto ss = reinterpret_cast<R_SURFACE_STATE *>(dst);
UNRECOVERABLE_IF(!isAligned<getSurfaceBaseAddressAlignment()>(size));
SURFACE_STATE_BUFFER_LENGTH Length = {0};
Length.Length = static_cast<uint32_t>(size - 1);
ss->setWidth(Length.SurfaceState.Width + 1);
ss->setHeight(Length.SurfaceState.Height + 1);
ss->setDepth(Length.SurfaceState.Depth + 1);
ss->setSurfaceType((address != nullptr) ? R_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER
: R_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_NULL);
ss->setSurfaceFormat(SURFACE_FORMAT::SURFACE_FORMAT_RAW);
ss->setSurfaceVerticalAlignment(R_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4);
ss->setSurfaceHorizontalAlignment(R_SURFACE_STATE::SURFACE_HORIZONTAL_ALIGNMENT_HALIGN_4);
ss->setTileMode(R_SURFACE_STATE::TILE_MODE_LINEAR);
ss->setVerticalLineStride(0);
ss->setVerticalLineStrideOffset(0);
ss->setMemoryObjectControlState(mocs);
ss->setSurfaceBaseAddress(reinterpret_cast<uintptr_t>(address));
ss->setCoherencyType(cpuCoherent ? R_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT
: R_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT);
ss->setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE);
}
template <typename Family>
void EncodeStates<Family>::adjustStateComputeMode(CommandContainer &container) {
}
template <typename Family>
void *EncodeDispatchKernel<Family>::getInterfaceDescriptor(CommandContainer &container, uint32_t &iddOffset) {
if (container.nextIddInBlock == container.numIddsPerBlock) {
container.getIndirectHeap(HeapType::DYNAMIC_STATE)->align(HardwareCommandsHelper<Family>::alignInterfaceDescriptorData);
container.setIddBlock(container.getHeapSpaceAllowGrow(HeapType::DYNAMIC_STATE,
sizeof(INTERFACE_DESCRIPTOR_DATA) * container.numIddsPerBlock));
container.nextIddInBlock = 0;
EncodeMediaInterfaceDescriptorLoad<Family>::encode(container);
}
iddOffset = container.nextIddInBlock;
auto interfaceDescriptorData = static_cast<INTERFACE_DESCRIPTOR_DATA *>(container.getIddBlock());
return &interfaceDescriptorData[container.nextIddInBlock++];
}
template <typename Family>
size_t EncodeStates<Family>::getAdjustStateComputeModeSize() {
return 0;
}
template <typename Family>
size_t EncodeIndirectParams<Family>::getCmdsSizeForIndirectParams() {
return 3 * sizeof(typename Family::MI_LOAD_REGISTER_MEM);
}
template <typename Family>
size_t EncodeIndirectParams<Family>::getCmdsSizeForSetGroupCountIndirect() {
return 3 * (sizeof(MI_STORE_REGISTER_MEM));
}
template <typename Family>
size_t EncodeIndirectParams<Family>::getCmdsSizeForSetGroupSizeIndirect() {
return 3 * (sizeof(MI_LOAD_REGISTER_REG) + sizeof(MI_LOAD_REGISTER_IMM) + sizeof(MI_MATH) + sizeof(MI_MATH_ALU_INST_INLINE) + sizeof(MI_STORE_REGISTER_MEM));
}
} // namespace NEO

View File

@@ -0,0 +1,266 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "core/command_container/command_encoder.h"
#include "core/command_stream/linear_stream.h"
#include "core/command_stream/preemption.h"
#include "core/gmm_helper/gmm_helper.h"
#include "core/helpers/simd_helper.h"
#include "core/helpers/state_base_address.h"
#include "core/kernel/dispatch_kernel_encoder_interface.h"
#include "runtime/execution_environment/execution_environment.h"
#include "runtime/helpers/hardware_commands_helper.h"
#include <algorithm>
namespace NEO {
template <typename Family>
void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
const void *pThreadGroupDimensions, bool isIndirect, bool isPredicate, DispatchKernelEncoderI *dispatchInterface,
GraphicsAllocation *eventAllocation, Device *device, PreemptionMode preemptionMode) {
using MEDIA_STATE_FLUSH = typename Family::MEDIA_STATE_FLUSH;
using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename Family::MEDIA_INTERFACE_DESCRIPTOR_LOAD;
using MI_BATCH_BUFFER_END = typename Family::MI_BATCH_BUFFER_END;
auto sizeCrossThreadData = dispatchInterface->getSizeCrossThreadData();
auto sizePerThreadData = dispatchInterface->getSizePerThreadData();
auto sizePerThreadDataForWholeGroup = dispatchInterface->getSizePerThreadDataForWholeGroup();
LinearStream *listCmdBufferStream = container.getCommandStream();
size_t estimatedSizeRequired = estimateEncodeDispatchKernelCmdsSize(device);
if (container.getCommandStream()->getAvailableSpace() < estimatedSizeRequired) {
auto bbEnd = listCmdBufferStream->getSpaceForCmd<MI_BATCH_BUFFER_END>();
*bbEnd = Family::cmdInitBatchBufferEnd;
container.allocateNextCommandBuffer();
}
WALKER_TYPE cmd = Family::cmdInitGpgpuWalker;
auto idd = Family::cmdInitInterfaceDescriptorData;
{
auto alloc = dispatchInterface->getIsaAllocation();
UNRECOVERABLE_IF(nullptr == alloc);
auto offset = alloc->getGpuAddressToPatch();
idd.setKernelStartPointer(offset);
idd.setKernelStartPointerHigh(0u);
}
EncodeStates<Family>::adjustStateComputeMode(container);
auto threadsPerThreadGroup = dispatchInterface->getThreadsPerThreadGroupCount();
idd.setNumberOfThreadsInGpgpuThreadGroup(threadsPerThreadGroup);
idd.setBarrierEnable(dispatchInterface->hasBarriers());
idd.setSharedLocalMemorySize(
dispatchInterface->getSlmTotalSize() > 0
? static_cast<typename INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE>(HardwareCommandsHelper<Family>::computeSlmValues(dispatchInterface->getSlmTotalSize()))
: INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE_ENCODES_0K);
{
auto bindingTableStateCount = dispatchInterface->getNumSurfaceStates();
uint32_t bindingTablePointer = 0u;
if (bindingTableStateCount > 0u) {
auto ssh = container.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, dispatchInterface->getSizeSurfaceStateHeapData(), BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
bindingTablePointer = static_cast<uint32_t>(HardwareCommandsHelper<Family>::pushBindingTableAndSurfaceStates(
*ssh, bindingTableStateCount,
dispatchInterface->getSurfaceStateHeap(),
dispatchInterface->getSizeSurfaceStateHeapData(), bindingTableStateCount,
dispatchInterface->getBindingTableOffset()));
}
idd.setBindingTablePointer(bindingTablePointer);
auto bindingTableStatePrefetchCount = std::min(31u, bindingTableStateCount);
idd.setBindingTableEntryCount(bindingTableStatePrefetchCount);
}
PreemptionHelper::programInterfaceDescriptorDataPreemption<Family>(&idd, preemptionMode);
auto heap = container.getIndirectHeap(HeapType::DYNAMIC_STATE);
UNRECOVERABLE_IF(!heap);
uint32_t samplerStateOffset = 0;
uint32_t samplerCount = 0;
if (dispatchInterface->getNumSamplers() > 0) {
samplerCount = dispatchInterface->getNumSamplers();
samplerStateOffset = EncodeStates<Family>::copySamplerState(heap, dispatchInterface->getSamplerTableOffset(),
dispatchInterface->getNumSamplers(),
dispatchInterface->getBorderColor(),
dispatchInterface->getDynamicStateHeap());
}
idd.setSamplerStatePointer(samplerStateOffset);
auto samplerCountState =
static_cast<typename INTERFACE_DESCRIPTOR_DATA::SAMPLER_COUNT>((samplerCount + 3) / 4);
idd.setSamplerCount(samplerCountState);
auto numGrfCrossThreadData = static_cast<uint32_t>(sizeCrossThreadData / sizeof(float[8]));
DEBUG_BREAK_IF(numGrfCrossThreadData <= 0u);
idd.setCrossThreadConstantDataReadLength(numGrfCrossThreadData);
auto numGrfPerThreadData = static_cast<uint32_t>(sizePerThreadData / sizeof(float[8]));
DEBUG_BREAK_IF(numGrfPerThreadData <= 0u);
idd.setConstantIndirectUrbEntryReadLength(numGrfPerThreadData);
uint32_t sizeThreadData = sizePerThreadDataForWholeGroup + sizeCrossThreadData;
uint64_t offsetThreadData = 0u;
{
auto heapIndirect = container.getIndirectHeap(HeapType::INDIRECT_OBJECT);
UNRECOVERABLE_IF(!(heapIndirect));
heapIndirect->align(WALKER_TYPE::INDIRECTDATASTARTADDRESS_ALIGN_SIZE);
auto ptr = container.getHeapSpaceAllowGrow(HeapType::INDIRECT_OBJECT, sizeThreadData);
UNRECOVERABLE_IF(!(ptr));
offsetThreadData = heapIndirect->getHeapGpuStartOffset() + static_cast<uint64_t>(heapIndirect->getUsed() - sizeThreadData);
memcpy_s(ptr, sizeCrossThreadData,
dispatchInterface->getCrossThread(), sizeCrossThreadData);
if (isIndirect) {
void *gpuPtr = reinterpret_cast<void *>(heapIndirect->getHeapGpuBase() + heapIndirect->getUsed() - sizeThreadData);
if (dispatchInterface->hasGroupCounts()) {
EncodeIndirectParams<Family>::setGroupCountIndirect(container, dispatchInterface->getCountOffsets(), gpuPtr);
}
if (dispatchInterface->hasGroupSize()) {
EncodeIndirectParams<Family>::setGroupSizeIndirect(container, dispatchInterface->getSizeOffsets(), gpuPtr, dispatchInterface->getLocalWorkSize());
}
}
ptr = ptrOffset(ptr, sizeCrossThreadData);
memcpy_s(ptr, sizePerThreadDataForWholeGroup,
dispatchInterface->getPerThread(), sizePerThreadDataForWholeGroup);
}
auto slmSizeNew = dispatchInterface->getSlmTotalSize();
bool flush = container.slmSize != slmSizeNew || container.isAnyHeapDirty();
if (flush) {
EncodeFlush<Family>::encode(container);
if (container.slmSize != slmSizeNew) {
EncodeL3State<Family>::encode(container, slmSizeNew != 0u);
container.slmSize = slmSizeNew;
if (container.nextIddInBlock != container.numIddsPerBlock) {
EncodeMediaInterfaceDescriptorLoad<Family>::encode(container);
}
}
if (container.isAnyHeapDirty()) {
EncodeStateBaseAddress<Family>::encode(container);
container.setDirtyStateForAllHeaps(false);
}
}
uint32_t numIDD = 0u;
void *ptr = getInterfaceDescriptor(container, numIDD);
memcpy_s(ptr, sizeof(idd), &idd, sizeof(idd));
cmd.setIndirectDataStartAddress(static_cast<uint32_t>(offsetThreadData));
cmd.setIndirectDataLength(sizeThreadData);
cmd.setInterfaceDescriptorOffset(numIDD);
if (isIndirect) {
cmd.setIndirectParameterEnable(true);
} else {
UNRECOVERABLE_IF(!pThreadGroupDimensions);
auto threadDims = static_cast<const uint32_t *>(pThreadGroupDimensions);
cmd.setThreadGroupIdXDimension(threadDims[0]);
cmd.setThreadGroupIdYDimension(threadDims[1]);
cmd.setThreadGroupIdZDimension(threadDims[2]);
}
auto simdSize = dispatchInterface->getSimdSize();
auto simdSizeOp = getSimdConfig<WALKER_TYPE>(simdSize);
cmd.setSimdSize(simdSizeOp);
cmd.setRightExecutionMask(dispatchInterface->getPerThreadExecutionMask());
cmd.setBottomExecutionMask(0xffffffff);
cmd.setThreadWidthCounterMaximum(threadsPerThreadGroup);
cmd.setPredicateEnable(isPredicate);
PreemptionHelper::applyPreemptionWaCmdsBegin<Family>(listCmdBufferStream, *device);
auto buffer = listCmdBufferStream->getSpace(sizeof(cmd));
*(decltype(cmd) *)buffer = cmd;
PreemptionHelper::applyPreemptionWaCmdsEnd<Family>(listCmdBufferStream, *device);
{
auto mediaStateFlush = listCmdBufferStream->getSpace(sizeof(MEDIA_STATE_FLUSH));
*reinterpret_cast<MEDIA_STATE_FLUSH *>(mediaStateFlush) = Family::cmdInitMediaStateFlush;
}
}
template <typename Family>
void EncodeMediaInterfaceDescriptorLoad<Family>::encode(CommandContainer &container) {
using MEDIA_STATE_FLUSH = typename Family::MEDIA_STATE_FLUSH;
using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename Family::MEDIA_INTERFACE_DESCRIPTOR_LOAD;
auto heap = container.getIndirectHeap(HeapType::DYNAMIC_STATE);
auto mediaStateFlush = container.getCommandStream()->getSpace(sizeof(MEDIA_STATE_FLUSH));
*reinterpret_cast<MEDIA_STATE_FLUSH *>(mediaStateFlush) = Family::cmdInitMediaStateFlush;
MEDIA_INTERFACE_DESCRIPTOR_LOAD cmd = Family::cmdInitMediaInterfaceDescriptorLoad;
cmd.setInterfaceDescriptorDataStartAddress(static_cast<uint32_t>(ptrDiff(container.getIddBlock(), heap->getCpuBase())));
cmd.setInterfaceDescriptorTotalLength(sizeof(INTERFACE_DESCRIPTOR_DATA) * container.numIddsPerBlock);
auto buffer = container.getCommandStream()->getSpace(sizeof(cmd));
*(decltype(cmd) *)buffer = cmd;
}
template <typename Family>
void EncodeStateBaseAddress<Family>::encode(CommandContainer &container) {
auto gmmHelper = container.getDevice()->getExecutionEnvironment()->getGmmHelper();
StateBaseAddressHelper<Family>::programStateBaseAddress(
*container.getCommandStream(),
container.isHeapDirty(HeapType::DYNAMIC_STATE) ? container.getIndirectHeap(HeapType::DYNAMIC_STATE) : nullptr,
container.isHeapDirty(HeapType::INDIRECT_OBJECT) ? container.getIndirectHeap(HeapType::INDIRECT_OBJECT) : nullptr,
container.isHeapDirty(HeapType::SURFACE_STATE) ? container.getIndirectHeap(HeapType::SURFACE_STATE) : nullptr,
0,
(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1),
container.getInstructionHeapBaseAddress(),
gmmHelper,
false);
}
template <typename Family>
void EncodeL3State<Family>::encode(CommandContainer &container, bool enableSLM) {
auto offset = L3CNTLRegisterOffset<Family>::registerOffset;
auto data = PreambleHelper<Family>::getL3Config(container.getDevice()->getHardwareInfo(), enableSLM);
EncodeSetMMIO<Family>::encodeIMM(container, offset, data);
}
template <typename Family>
size_t EncodeDispatchKernel<Family>::estimateEncodeDispatchKernelCmdsSize(Device *device) {
using MEDIA_STATE_FLUSH = typename Family::MEDIA_STATE_FLUSH;
using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename Family::MEDIA_INTERFACE_DESCRIPTOR_LOAD;
using MI_BATCH_BUFFER_END = typename Family::MI_BATCH_BUFFER_END;
size_t issueMediaInterfaceDescriptorLoad = sizeof(MEDIA_STATE_FLUSH) + sizeof(MEDIA_INTERFACE_DESCRIPTOR_LOAD);
size_t totalSize = sizeof(WALKER_TYPE);
totalSize += PreemptionHelper::getPreemptionWaCsSize<Family>(*device);
totalSize += sizeof(MEDIA_STATE_FLUSH);
totalSize += issueMediaInterfaceDescriptorLoad;
totalSize += EncodeStates<Family>::getAdjustStateComputeModeSize();
totalSize += EncodeIndirectParams<Family>::getCmdsSizeForIndirectParams();
totalSize += EncodeIndirectParams<Family>::getCmdsSizeForSetGroupCountIndirect();
totalSize += EncodeIndirectParams<Family>::getCmdsSizeForSetGroupSizeIndirect();
totalSize += sizeof(MI_BATCH_BUFFER_END);
return totalSize;
}
} // namespace NEO

View File

@@ -0,0 +1,29 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "core/command_container/command_encoder.h"
#include "core/command_container/command_encoder.inl"
#include "core/command_container/command_encoder_base.inl"
#include "core/gen11/hw_cmds_base.h"
#include "runtime/gen11/reg_configs.h"
namespace NEO {
using Family = ICLFamily;
template struct EncodeDispatchKernel<Family>;
template struct EncodeStates<Family>;
template struct EncodeMathMMIO<Family>;
template struct EncodeIndirectParams<Family>;
template struct EncodeFlush<Family>;
template struct EncodeSetMMIO<Family>;
template struct EncodeL3State<Family>;
template struct EncodeMediaInterfaceDescriptorLoad<Family>;
template struct EncodeStateBaseAddress<Family>;
template struct EncodeStoreMMIO<Family>;
template struct EncodeSurfaceState<Family>;
} // namespace NEO

View File

@@ -0,0 +1,49 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "core/command_container/command_encoder.h"
#include "core/command_container/command_encoder.inl"
#include "core/command_container/command_encoder_base.inl"
#include "core/gen12lp/hw_cmds_base.h"
#include "runtime/gen12lp/reg_configs.h"
namespace NEO {
using Family = TGLLPFamily;
template <>
void EncodeStates<Family>::adjustStateComputeMode(CommandContainer &container) {
auto stateComputeModeCmd = Family::cmdInitStateComputeMode;
using STATE_COMPUTE_MODE = typename Family::STATE_COMPUTE_MODE;
using FORCE_NON_COHERENT = typename Family::STATE_COMPUTE_MODE::FORCE_NON_COHERENT;
stateComputeModeCmd.setForceNonCoherent(FORCE_NON_COHERENT::FORCE_NON_COHERENT_FORCE_GPU_NON_COHERENT);
stateComputeModeCmd.setMaskBits(Family::stateComputeModeForceNonCoherentMask);
// Commit our commands to the commandStream
auto buffer = container.getCommandStream()->getSpace(sizeof(stateComputeModeCmd));
*(decltype(stateComputeModeCmd) *)buffer = stateComputeModeCmd;
}
template <>
size_t EncodeStates<Family>::getAdjustStateComputeModeSize() {
return sizeof(typename Family::STATE_COMPUTE_MODE);
}
template struct EncodeDispatchKernel<Family>;
template struct EncodeStates<Family>;
template struct EncodeMathMMIO<Family>;
template struct EncodeIndirectParams<Family>;
template struct EncodeFlush<Family>;
template struct EncodeSetMMIO<Family>;
template struct EncodeL3State<Family>;
template struct EncodeMediaInterfaceDescriptorLoad<Family>;
template struct EncodeStateBaseAddress<Family>;
template struct EncodeStoreMMIO<Family>;
template struct EncodeSurfaceState<Family>;
} // namespace NEO

View File

@@ -0,0 +1,30 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "core/command_container/command_encoder.h"
#include "core/command_container/command_encoder.inl"
#include "core/command_container/command_encoder_base.inl"
#include "core/gen8/hw_cmds_base.h"
#include "runtime/gen8/reg_configs.h"
namespace NEO {
using Family = BDWFamily;
template struct EncodeDispatchKernel<Family>;
template struct EncodeStates<Family>;
template struct EncodeMathMMIO<Family>;
template struct EncodeIndirectParams<Family>;
template struct EncodeFlush<Family>;
template struct EncodeSetMMIO<Family>;
template struct EncodeL3State<Family>;
template struct EncodeMediaInterfaceDescriptorLoad<Family>;
template struct EncodeStateBaseAddress<Family>;
template struct EncodeStoreMMIO<Family>;
template struct EncodeSurfaceState<Family>;
} // namespace NEO

View File

@@ -0,0 +1,30 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "core/command_container/command_encoder.h"
#include "core/command_container/command_encoder.inl"
#include "core/command_container/command_encoder_base.inl"
#include "core/gen9/hw_cmds_base.h"
#include "runtime/gen9/reg_configs.h"
namespace NEO {
using Family = SKLFamily;
template struct EncodeDispatchKernel<Family>;
template struct EncodeStates<Family>;
template struct EncodeMathMMIO<Family>;
template struct EncodeIndirectParams<Family>;
template struct EncodeFlush<Family>;
template struct EncodeSetMMIO<Family>;
template struct EncodeL3State<Family>;
template struct EncodeMediaInterfaceDescriptorLoad<Family>;
template struct EncodeStateBaseAddress<Family>;
template struct EncodeStoreMMIO<Family>;
template struct EncodeSurfaceState<Family>;
} // namespace NEO

View File

@@ -1,11 +1,12 @@
#
# Copyright (C) 2019 Intel Corporation
# Copyright (C) 2019-2020 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
set(NEO_CORE_KERNEL
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/dispatch_kernel_encoder_interface.h
${CMAKE_CURRENT_SOURCE_DIR}/grf_config.h
)

View File

@@ -0,0 +1,49 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include <cstdint>
namespace NEO {
class GraphicsAllocation;
struct DispatchKernelEncoderI {
public:
virtual bool hasBarriers() = 0;
virtual uint32_t getSlmTotalSize() = 0;
virtual uint32_t getBindingTableOffset() = 0;
virtual uint32_t getBorderColor() = 0;
virtual uint32_t getSamplerTableOffset() = 0;
virtual uint32_t getNumSurfaceStates() = 0;
virtual uint32_t getNumSamplers() = 0;
virtual uint32_t getSimdSize() = 0;
virtual uint32_t getSizeCrossThreadData() = 0;
virtual uint32_t getPerThreadScratchSize() = 0;
virtual uint32_t getPerThreadExecutionMask() = 0;
virtual uint32_t getSizePerThreadData() = 0;
virtual uint32_t getSizePerThreadDataForWholeGroup() = 0;
virtual uint32_t getSizeSurfaceStateHeapData() = 0;
virtual uint32_t *getCountOffsets() = 0;
virtual uint32_t *getSizeOffsets() = 0;
virtual uint32_t *getLocalWorkSize() = 0;
virtual uint32_t getNumGrfRequired() = 0;
virtual uint32_t getThreadsPerThreadGroupCount() = 0;
virtual GraphicsAllocation *getIsaAllocation() = 0;
virtual bool hasGroupCounts() = 0;
virtual bool hasGroupSize() = 0;
virtual const void *getSurfaceStateHeap() = 0;
virtual const void *getDynamicStateHeap() = 0;
virtual const void *getCrossThread() = 0;
virtual const void *getPerThread() = 0;
virtual ~DispatchKernelEncoderI() = default;
protected:
uint32_t groupCountOffsets[3] = {};
uint32_t groupSizeOffsets[3] = {};
uint32_t localWorkSize[3] = {};
};
} // namespace NEO

View File

@@ -278,6 +278,7 @@ TEST_F(CommandContainerTest, givenNotEnoughSpaceWhenGetHeapWithRequiredSizeAndAl
for (auto deallocation : cmdContainer->getDeallocationContainer()) {
cmdContainer->getDevice()->getMemoryManager()->freeGraphicsMemory(deallocation);
}
cmdContainer->getDeallocationContainer().clear();
}
TEST_F(CommandContainerTest, whenAllocateNextCmdBufferIsCalledThenNewAllocationIsCreatedAndCommandStreamReplaced) {
@@ -329,3 +330,64 @@ TEST_F(CommandContainerTest, whenResettingCommandContainerThenStoredCmdBuffersAr
EXPECT_EQ(cmdContainer->getCmdBufferAllocations()[0]->getUnderlyingBuffer(), buffer);
EXPECT_EQ(cmdBufSize, stream->getMaxAvailableSpace());
}
class CommandContainerHeaps : public DeviceFixture,
public ::testing::TestWithParam<IndirectHeap::Type> {
public:
void SetUp() override {
DeviceFixture::SetUp();
}
void TearDown() override {
DeviceFixture::TearDown();
}
};
INSTANTIATE_TEST_CASE_P(
Device,
CommandContainerHeaps,
testing::Values(
IndirectHeap::DYNAMIC_STATE,
IndirectHeap::INDIRECT_OBJECT,
IndirectHeap::SURFACE_STATE));
TEST_P(CommandContainerHeaps, givenCommandContainerWhenGetAllowHeapGrowCalledThenHeapIsReturned) {
HeapType heap = GetParam();
CommandContainer cmdContainer;
cmdContainer.initialize(pDevice);
auto usedSpaceBefore = cmdContainer.getIndirectHeap(heap)->getUsed();
size_t size = 5000;
void *ptr = cmdContainer.getHeapSpaceAllowGrow(heap, size);
ASSERT_NE(nullptr, ptr);
auto usedSpaceAfter = cmdContainer.getIndirectHeap(heap)->getUsed();
ASSERT_EQ(usedSpaceBefore + size, usedSpaceAfter);
}
TEST_P(CommandContainerHeaps, givenCommandContainerWhenGetingMoreThanAvailableSizeThenBiggerHeapIsReturned) {
HeapType heap = GetParam();
CommandContainer cmdContainer;
cmdContainer.initialize(pDevice);
auto usedSpaceBefore = cmdContainer.getIndirectHeap(heap)->getUsed();
auto availableSizeBefore = cmdContainer.getIndirectHeap(heap)->getAvailableSpace();
void *ptr = cmdContainer.getHeapSpaceAllowGrow(heap, availableSizeBefore + 1);
ASSERT_NE(nullptr, ptr);
auto usedSpaceAfter = cmdContainer.getIndirectHeap(heap)->getUsed();
auto availableSizeAfter = cmdContainer.getIndirectHeap(heap)->getAvailableSpace();
EXPECT_GT(usedSpaceAfter + availableSizeAfter, usedSpaceBefore + availableSizeBefore);
}
TEST_F(CommandContainerTest, givenCommandContainerWhenDestructionThenNonHeapAllocationAreNotDestroyed) {
std::unique_ptr<CommandContainer> cmdContainer(new CommandContainer());
MockGraphicsAllocation alloc;
size_t size = 0x1000;
alloc.setSize(size);
cmdContainer->initialize(pDevice);
cmdContainer->getDeallocationContainer().push_back(&alloc);
cmdContainer.reset();
EXPECT_EQ(alloc.getUnderlyingBufferSize(), size);
}

View File

@@ -0,0 +1,17 @@
#
# Copyright (C) 2020 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
set(NEO_CORE_ENCODERS_TESTS
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/test_encode_dispatch_kernel.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_encode_flush.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_encode_math.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_encode_media_interface_descriptor.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_encode_set_mmio.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_encode_states.cpp
)
add_subdirectories()
set_property(GLOBAL PROPERTY NEO_CORE_ENCODERS_TESTS ${NEO_CORE_ENCODERS_TESTS})

View File

@@ -0,0 +1,308 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "core/helpers/ptr_math.h"
#include "core/unit_tests/fixtures/command_container_fixture.h"
#include "core/unit_tests/mocks/mock_dispatch_kernel_encoder_interface.h"
#include "runtime/helpers/hardware_commands_helper.h"
#include "unit_tests/gen_common/gen_cmd_parse.h"
using namespace NEO;
using CommandEncodeStatesTest = Test<CommandEncodeStatesFixture>;
HWTEST_F(CommandEncodeStatesTest, givenenDispatchInterfaceWhenDispatchKernelThenWalkerCommandProgrammed) {
uint32_t dims[] = {2, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), nullptr, pDevice, NEO::PreemptionMode::Disabled);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
auto itorPC = find<WALKER_TYPE *>(commands.begin(), commands.end());
ASSERT_NE(itorPC, commands.end());
}
HWTEST_F(CommandEncodeStatesTest, givenCommandContainerWithUsedAvailableSizeWhenDispatchKernelThenNextCommandBufferIsAdded) {
uint32_t dims[] = {2, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
auto cmdBuffersCountBefore = cmdContainer->getCmdBufferAllocations().size();
cmdContainer->getCommandStream()->getSpace(cmdContainer->getCommandStream()->getAvailableSpace() - sizeof(typename FamilyType::MI_BATCH_BUFFER_END));
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), nullptr, pDevice, NEO::PreemptionMode::Disabled);
auto cmdBuffersCountAfter = cmdContainer->getCmdBufferAllocations().size();
EXPECT_GT(cmdBuffersCountAfter, cmdBuffersCountBefore);
}
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenSlmTotalSizeGraterThanZeroWhenDispatchingKernelThenSharedMemorySizeSetCorrectly) {
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
using SAMPLER_STATE = typename FamilyType::SAMPLER_STATE;
uint32_t dims[] = {2, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
uint32_t slmTotalSize = 1;
EXPECT_CALL(*dispatchInterface.get(), getSlmTotalSize()).WillRepeatedly(::testing::Return(slmTotalSize));
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), nullptr, pDevice, NEO::PreemptionMode::Disabled);
auto interfaceDescriptorData = static_cast<INTERFACE_DESCRIPTOR_DATA *>(cmdContainer->getIddBlock());
uint32_t expectedValue = static_cast<typename INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE>(HardwareCommandsHelper<FamilyType>::computeSlmValues(slmTotalSize));
EXPECT_EQ(expectedValue, interfaceDescriptorData->getSharedLocalMemorySize());
}
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenSlmTotalSizeEqualZeroWhenDispatchingKernelThenSharedMemorySizeSetCorrectly) {
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
using SAMPLER_STATE = typename FamilyType::SAMPLER_STATE;
uint32_t dims[] = {2, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
uint32_t slmTotalSize = 0;
EXPECT_CALL(*dispatchInterface.get(), getSlmTotalSize()).WillRepeatedly(::testing::Return(slmTotalSize));
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), nullptr, pDevice, NEO::PreemptionMode::Disabled);
auto interfaceDescriptorData = static_cast<INTERFACE_DESCRIPTOR_DATA *>(cmdContainer->getIddBlock());
uint32_t expectedValue = INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE_ENCODES_0K;
EXPECT_EQ(expectedValue, interfaceDescriptorData->getSharedLocalMemorySize());
}
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givennumBindingTableOneWhenDispatchingKernelThenBindingTableOffsetIsCorrect) {
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
uint32_t numBindingTable = 1;
BINDING_TABLE_STATE bindingTableState;
bindingTableState.sInit();
auto ssh = cmdContainer->getIndirectHeap(HeapType::SURFACE_STATE);
uint32_t sizeUsed = 0x20;
ssh->getSpace(sizeUsed);
auto expectedOffset = alignUp(sizeUsed, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
uint32_t dims[] = {2, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
EXPECT_CALL(*dispatchInterface.get(), getNumSurfaceStates()).WillRepeatedly(::testing::Return(numBindingTable));
EXPECT_CALL(*dispatchInterface.get(), getSurfaceStateHeap()).WillRepeatedly(::testing::Return(&bindingTableState));
EXPECT_CALL(*dispatchInterface.get(), getSizeSurfaceStateHeapData()).WillRepeatedly(::testing::Return(static_cast<uint32_t>(sizeof(BINDING_TABLE_STATE))));
EXPECT_CALL(*dispatchInterface.get(), getBindingTableOffset()).WillRepeatedly(::testing::Return(0));
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), nullptr, pDevice, NEO::PreemptionMode::Disabled);
auto interfaceDescriptorData = static_cast<INTERFACE_DESCRIPTOR_DATA *>(cmdContainer->getIddBlock());
EXPECT_EQ(interfaceDescriptorData->getBindingTablePointer(), expectedOffset);
}
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumBindingTableZeroWhenDispatchingKernelThenBindingTableOffsetIsZero) {
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
uint32_t numBindingTable = 0;
BINDING_TABLE_STATE bindingTableState;
bindingTableState.sInit();
auto ssh = cmdContainer->getIndirectHeap(HeapType::SURFACE_STATE);
uint32_t sizeUsed = 0x20;
ssh->getSpace(sizeUsed);
uint32_t dims[] = {2, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
EXPECT_CALL(*dispatchInterface.get(), getNumSurfaceStates()).WillRepeatedly(::testing::Return(numBindingTable));
EXPECT_CALL(*dispatchInterface.get(), getSurfaceStateHeap()).WillRepeatedly(::testing::Return(&bindingTableState));
EXPECT_CALL(*dispatchInterface.get(), getSizeSurfaceStateHeapData()).WillRepeatedly(::testing::Return(static_cast<uint32_t>(sizeof(BINDING_TABLE_STATE))));
EXPECT_CALL(*dispatchInterface.get(), getBindingTableOffset()).WillRepeatedly(::testing::Return(0));
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), nullptr, pDevice, NEO::PreemptionMode::Disabled);
auto interfaceDescriptorData = static_cast<INTERFACE_DESCRIPTOR_DATA *>(cmdContainer->getIddBlock());
EXPECT_EQ(interfaceDescriptorData->getBindingTablePointer(), 0u);
}
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersOneWhenDispatchingKernelThensamplerStateWasCopied) {
using SAMPLER_STATE = typename FamilyType::SAMPLER_STATE;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
uint32_t numSamplers = 1;
SAMPLER_STATE samplerState;
memset(&samplerState, 2, sizeof(SAMPLER_STATE));
auto dsh = cmdContainer->getIndirectHeap(HeapType::DYNAMIC_STATE);
auto usedBefore = dsh->getUsed();
uint32_t dims[] = {2, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
EXPECT_CALL(*dispatchInterface.get(), getNumSamplers()).WillRepeatedly(::testing::Return(numSamplers));
EXPECT_CALL(*dispatchInterface.get(), getSamplerTableOffset()).WillRepeatedly(::testing::Return(0));
EXPECT_CALL(*dispatchInterface.get(), getBorderColor()).WillRepeatedly(::testing::Return(0));
EXPECT_CALL(*dispatchInterface.get(), getDynamicStateHeap()).WillRepeatedly(::testing::Return(&samplerState));
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), nullptr, pDevice, NEO::PreemptionMode::Disabled);
auto interfaceDescriptorData = static_cast<INTERFACE_DESCRIPTOR_DATA *>(cmdContainer->getIddBlock());
auto borderColorOffsetInDsh = usedBefore;
samplerState.setIndirectStatePointer(static_cast<uint32_t>(borderColorOffsetInDsh));
auto samplerStateOffset = interfaceDescriptorData->getSamplerStatePointer();
auto pSmplr = reinterpret_cast<SAMPLER_STATE *>(ptrOffset(dsh->getCpuBase(), samplerStateOffset));
EXPECT_EQ(memcmp(pSmplr, &samplerState, sizeof(SAMPLER_STATE)), 0);
}
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersZeroWhenDispatchingKernelThensamplerStateWasNotCopied) {
using SAMPLER_STATE = typename FamilyType::SAMPLER_STATE;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
uint32_t numSamplers = 0;
SAMPLER_STATE samplerState;
memset(&samplerState, 2, sizeof(SAMPLER_STATE));
auto dsh = cmdContainer->getIndirectHeap(HeapType::DYNAMIC_STATE);
auto usedBefore = dsh->getUsed();
uint32_t dims[] = {2, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
EXPECT_CALL(*dispatchInterface.get(), getNumSamplers()).WillRepeatedly(::testing::Return(numSamplers));
EXPECT_CALL(*dispatchInterface.get(), getSamplerTableOffset()).WillRepeatedly(::testing::Return(0));
EXPECT_CALL(*dispatchInterface.get(), getBorderColor()).WillRepeatedly(::testing::Return(0));
EXPECT_CALL(*dispatchInterface.get(), getDynamicStateHeap()).WillRepeatedly(::testing::Return(&samplerState));
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), nullptr, pDevice, NEO::PreemptionMode::Disabled);
auto interfaceDescriptorData = static_cast<INTERFACE_DESCRIPTOR_DATA *>(cmdContainer->getIddBlock());
auto borderColorOffsetInDsh = usedBefore;
samplerState.setIndirectStatePointer(static_cast<uint32_t>(borderColorOffsetInDsh));
auto samplerStateOffset = interfaceDescriptorData->getSamplerStatePointer();
auto pSmplr = reinterpret_cast<SAMPLER_STATE *>(ptrOffset(dsh->getCpuBase(), samplerStateOffset));
EXPECT_NE(memcmp(pSmplr, &samplerState, sizeof(SAMPLER_STATE)), 0);
}
HWTEST_F(CommandEncodeStatesTest, givenIndarectOffsetsCountsWhenDispatchingKernelThenCorrestMIStoreOffsetsSet) {
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
uint32_t dims[] = {2, 1, 1};
uint32_t offsets[] = {0x10, 0x20, 0x30};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
EXPECT_CALL(*dispatchInterface.get(), hasGroupCounts()).WillRepeatedly(::testing::Return(true));
EXPECT_CALL(*dispatchInterface.get(), getCountOffsets()).WillRepeatedly(::testing::Return(offsets));
EXPECT_CALL(*dispatchInterface.get(), hasGroupSize()).WillRepeatedly(::testing::Return(false));
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dims, true, false, dispatchInterface.get(), nullptr, pDevice, NEO::PreemptionMode::Disabled);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
auto itor = commands.begin();
for (int i = 0; i < 3; i++) {
ASSERT_NE(itor, commands.end());
itor = find<MI_STORE_REGISTER_MEM *>(++itor, commands.end());
}
}
HWTEST_F(CommandEncodeStatesTest, givenIndarectOffsetsSizeWhenDispatchingKernelThenMiMathEncoded) {
using MI_MATH = typename FamilyType::MI_MATH;
uint32_t dims[] = {2, 1, 1};
uint32_t offsets[] = {0x10, 0x20, 0x30};
uint32_t lws[] = {1, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
EXPECT_CALL(*dispatchInterface.get(), hasGroupCounts()).WillRepeatedly(::testing::Return(false));
EXPECT_CALL(*dispatchInterface.get(), getSizeOffsets()).WillRepeatedly(::testing::Return(offsets));
EXPECT_CALL(*dispatchInterface.get(), hasGroupSize()).WillRepeatedly(::testing::Return(true));
EXPECT_CALL(*dispatchInterface.get(), getLocalWorkSize()).WillRepeatedly(::testing::Return(lws));
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dims, true, false, dispatchInterface.get(), nullptr, pDevice, NEO::PreemptionMode::Disabled);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
auto itor = find<MI_MATH *>(commands.begin(), commands.end());
ASSERT_NE(itor, commands.end());
}
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenCleanHeapsAndSlmNotChangedWhenDispatchKernelThenFlushNotAdded) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
uint32_t dims[] = {2, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
cmdContainer->slmSize = 1;
EXPECT_CALL(*dispatchInterface.get(), getSlmTotalSize()).WillRepeatedly(::testing::Return(cmdContainer->slmSize));
cmdContainer->setDirtyStateForAllHeaps(false);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), nullptr, pDevice, NEO::PreemptionMode::Disabled);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
auto itorPC = find<PIPE_CONTROL *>(commands.begin(), commands.end());
ASSERT_EQ(itorPC, commands.end());
}
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenDirtyHeapsAndSlmNotChangedWhenDispatchKernelThenHeapsAreCleanAndFlushAdded) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
uint32_t dims[] = {2, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
cmdContainer->slmSize = 1;
EXPECT_CALL(*dispatchInterface.get(), getSlmTotalSize()).WillRepeatedly(::testing::Return(cmdContainer->slmSize));
cmdContainer->setDirtyStateForAllHeaps(true);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), nullptr, pDevice, NEO::PreemptionMode::Disabled);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
auto itorPC = find<PIPE_CONTROL *>(commands.begin(), commands.end());
ASSERT_NE(itorPC, commands.end());
EXPECT_FALSE(cmdContainer->isAnyHeapDirty());
}
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenCleanHeapsAndSlmChangedWhenDispatchKernelThenFlushAdded) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
uint32_t dims[] = {2, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
EXPECT_CALL(*dispatchInterface.get(), getSlmTotalSize()).WillRepeatedly(::testing::Return(cmdContainer->slmSize + 1));
cmdContainer->setDirtyStateForAllHeaps(false);
auto slmSizeBefore = cmdContainer->slmSize;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), nullptr, pDevice, NEO::PreemptionMode::Disabled);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
auto itorPC = find<PIPE_CONTROL *>(commands.begin(), commands.end());
ASSERT_NE(itorPC, commands.end());
EXPECT_EQ(slmSizeBefore + 1, cmdContainer->slmSize);
}
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNextIddInBlockZeorWhenDispatchKernelThenMediaInterfaceDescriptorEncoded) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD;
uint32_t dims[] = {2, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
cmdContainer->getIndirectHeap(HeapType::DYNAMIC_STATE)->align(HardwareCommandsHelper<FamilyType>::alignInterfaceDescriptorData);
cmdContainer->setIddBlock(cmdContainer->getHeapSpaceAllowGrow(HeapType::DYNAMIC_STATE, sizeof(INTERFACE_DESCRIPTOR_DATA) * cmdContainer->numIddsPerBlock));
cmdContainer->nextIddInBlock = 0;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), nullptr, pDevice, NEO::PreemptionMode::Disabled);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
auto itorPC = find<MEDIA_INTERFACE_DESCRIPTOR_LOAD *>(commands.begin(), commands.end());
ASSERT_NE(itorPC, commands.end());
}

View File

@@ -0,0 +1,77 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "core/command_container/cmdcontainer.h"
#include "core/command_container/command_encoder.h"
#include "core/unit_tests/mocks/mock_dispatch_kernel_encoder_interface.h"
#include "test.h"
#include "unit_tests/fixtures/device_fixture.h"
#include "unit_tests/gen_common/gen_cmd_parse.h"
using namespace NEO;
using EncodeFlushTest = Test<DeviceFixture>;
HWTEST_F(EncodeFlushTest, givenCommandContainerWhenEncodeFluchCalledThenCommandIsAdded) {
CommandContainer cmdContainer;
cmdContainer.initialize(pDevice);
EncodeFlush<FamilyType>::encode(cmdContainer);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer.getCommandStream()->getCpuBase(), 0), cmdContainer.getCommandStream()->getUsed());
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
auto itorPC = find<PIPE_CONTROL *>(commands.begin(), commands.end());
ASSERT_NE(itorPC, commands.end());
{
auto cmd = genCmdCast<PIPE_CONTROL *>(*itorPC);
EXPECT_TRUE(cmd->getCommandStreamerStallEnable());
EXPECT_TRUE(cmd->getDcFlushEnable());
}
}
HWTEST_F(EncodeFlushTest, givenCommandContainerAndDcFlushEnabledWhenEncodeWithQWordCalledThenCommandIsAdded) {
CommandContainer cmdContainer;
cmdContainer.initialize(pDevice);
uint64_t gpuAddress = 0;
uint64_t value = 1;
EncodeFlush<FamilyType>::encodeWithQwordWrite(cmdContainer, gpuAddress, value, true);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer.getCommandStream()->getCpuBase(), 0), cmdContainer.getCommandStream()->getUsed());
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
auto itorPC = find<PIPE_CONTROL *>(commands.begin(), commands.end());
ASSERT_NE(itorPC, commands.end());
{
auto cmd = genCmdCast<PIPE_CONTROL *>(*itorPC);
EXPECT_TRUE(cmd->getCommandStreamerStallEnable());
EXPECT_TRUE(cmd->getDcFlushEnable());
EXPECT_EQ(cmd->getImmediateData(), value);
}
}
HWTEST_F(EncodeFlushTest, givenCommandContainerAndDcFlushDisabledWhenEncodeWithQWordCalledThenCommandIsAdded) {
CommandContainer cmdContainer;
cmdContainer.initialize(pDevice);
uint64_t gpuAddress = 0;
uint64_t value = 1;
EncodeFlush<FamilyType>::encodeWithQwordWrite(cmdContainer, gpuAddress, value, false);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer.getCommandStream()->getCpuBase(), 0), cmdContainer.getCommandStream()->getUsed());
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
auto itorPC = find<PIPE_CONTROL *>(commands.begin(), commands.end());
ASSERT_NE(itorPC, commands.end());
{
auto cmd = genCmdCast<PIPE_CONTROL *>(*itorPC);
EXPECT_TRUE(cmd->getCommandStreamerStallEnable());
EXPECT_FALSE(cmd->getDcFlushEnable());
EXPECT_EQ(cmd->getImmediateData(), value);
}
}

View File

@@ -0,0 +1,183 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "core/command_container/command_encoder.h"
#include "core/helpers/register_offsets.h"
#include "test.h"
#include "unit_tests/fixtures/device_fixture.h"
#include "unit_tests/gen_common/gen_cmd_parse.h"
using namespace NEO;
using EncodeMathMMIOTest = testing::Test;
HWTEST_F(EncodeMathMMIOTest, encodeAluAddHasCorrectOpcodesOperands) {
using MI_MATH_ALU_INST_INLINE = typename FamilyType::MI_MATH_ALU_INST_INLINE;
MI_MATH_ALU_INST_INLINE aluParam[5];
uint32_t regA = ALU_REGISTER_R_0;
uint32_t regB = ALU_REGISTER_R_1;
memset(aluParam, 0, sizeof(MI_MATH_ALU_INST_INLINE) * 5);
EncodeMathMMIO<FamilyType>::encodeAluAdd(aluParam, regA, regB);
EXPECT_EQ(aluParam[0].DW0.BitField.ALUOpcode, ALU_OPCODE_LOAD);
EXPECT_EQ(aluParam[0].DW0.BitField.Operand1, ALU_REGISTER_R_SRCA);
EXPECT_EQ(aluParam[0].DW0.BitField.Operand2, regA);
EXPECT_EQ(aluParam[1].DW0.BitField.ALUOpcode, ALU_OPCODE_LOAD);
EXPECT_EQ(aluParam[1].DW0.BitField.Operand1, ALU_REGISTER_R_SRCB);
EXPECT_EQ(aluParam[1].DW0.BitField.Operand2, regB);
EXPECT_EQ(aluParam[2].DW0.BitField.ALUOpcode, ALU_OPCODE_ADD);
EXPECT_EQ(aluParam[2].DW0.BitField.Operand1, 0u);
EXPECT_EQ(aluParam[2].DW0.BitField.Operand2, 0u);
EXPECT_EQ(aluParam[3].DW0.BitField.ALUOpcode, ALU_OPCODE_STORE);
EXPECT_EQ(aluParam[3].DW0.BitField.Operand1, ALU_REGISTER_R_ACCU);
EXPECT_EQ(aluParam[3].DW0.BitField.Operand2, ALU_REGISTER_R_0);
EXPECT_EQ(aluParam[4].DW0.Value, 0u);
}
HWTEST_F(EncodeMathMMIOTest, encodeAluSubStoreCarryHasCorrectOpcodesOperands) {
using MI_MATH_ALU_INST_INLINE = typename FamilyType::MI_MATH_ALU_INST_INLINE;
MI_MATH_ALU_INST_INLINE aluParam[5];
uint32_t regA = ALU_REGISTER_R_0;
uint32_t regB = ALU_REGISTER_R_1;
memset(aluParam, 0, sizeof(MI_MATH_ALU_INST_INLINE) * 5);
EncodeMathMMIO<FamilyType>::encodeAluSubStoreCarry(aluParam, regA, regB);
EXPECT_EQ(aluParam[0].DW0.BitField.ALUOpcode, ALU_OPCODE_LOAD);
EXPECT_EQ(aluParam[0].DW0.BitField.Operand1, ALU_REGISTER_R_SRCA);
EXPECT_EQ(aluParam[0].DW0.BitField.Operand2, regA);
EXPECT_EQ(aluParam[1].DW0.BitField.ALUOpcode, ALU_OPCODE_LOAD);
EXPECT_EQ(aluParam[1].DW0.BitField.Operand1, ALU_REGISTER_R_SRCB);
EXPECT_EQ(aluParam[1].DW0.BitField.Operand2, regB);
EXPECT_EQ(aluParam[2].DW0.BitField.ALUOpcode, ALU_OPCODE_SUB);
EXPECT_EQ(aluParam[2].DW0.BitField.Operand1, 0u);
EXPECT_EQ(aluParam[2].DW0.BitField.Operand2, 0u);
EXPECT_EQ(aluParam[3].DW0.BitField.ALUOpcode, ALU_OPCODE_STORE);
EXPECT_EQ(aluParam[3].DW0.BitField.Operand1, ALU_REGISTER_R_CF);
EXPECT_EQ(aluParam[3].DW0.BitField.Operand2, ALU_REGISTER_R_0);
EXPECT_EQ(aluParam[4].DW0.Value, 0u);
}
using CommandEncoderMathTest = Test<DeviceFixture>;
HWTEST_F(CommandEncoderMathTest, appendsAGreaterThanPredicate) {
using MI_LOAD_REGISTER_MEM = typename FamilyType::MI_LOAD_REGISTER_MEM;
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG;
using MI_MATH = typename FamilyType::MI_MATH;
using MI_MATH_ALU_INST_INLINE = typename FamilyType::MI_MATH_ALU_INST_INLINE;
CommandContainer cmdContainer;
cmdContainer.initialize(pDevice);
EncodeMathMMIO<FamilyType>::encodeGreaterThanPredicate(cmdContainer, 0xDEADBEEFCAF0u, 17u);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer.getCommandStream()->getCpuBase(), 0), cmdContainer.getCommandStream()->getUsed());
auto itor = commands.begin();
itor = find<MI_LOAD_REGISTER_MEM *>(itor, commands.end());
ASSERT_NE(itor, commands.end());
auto cmdMEM = genCmdCast<MI_LOAD_REGISTER_MEM *>(*itor);
EXPECT_EQ(cmdMEM->getRegisterAddress(), CS_GPR_R0);
EXPECT_EQ(cmdMEM->getMemoryAddress(), 0xDEADBEEFCAF0u);
itor = find<MI_LOAD_REGISTER_IMM *>(itor, commands.end());
ASSERT_NE(itor, commands.end());
auto cmdIMM = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itor);
EXPECT_EQ(cmdIMM->getRegisterOffset(), CS_GPR_R1);
EXPECT_EQ(cmdIMM->getDataDword(), 17u);
itor = find<MI_MATH *>(itor, commands.end());
ASSERT_NE(itor, commands.end());
auto cmdMATH = genCmdCast<MI_MATH *>(*itor);
EXPECT_EQ(cmdMATH->DW0.BitField.DwordLength, 3u);
itor = find<MI_LOAD_REGISTER_REG *>(itor, commands.end());
ASSERT_NE(itor, commands.end());
auto cmdREG = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
EXPECT_EQ(cmdREG->getSourceRegisterAddress(), CS_GPR_R0);
EXPECT_EQ(cmdREG->getDestinationRegisterAddress(), CS_PREDICATE_RESULT);
auto cmdALU = reinterpret_cast<MI_MATH_ALU_INST_INLINE *>(cmdMATH + 3);
EXPECT_EQ(cmdALU->DW0.BitField.ALUOpcode, ALU_OPCODE_SUB);
}
HWTEST_F(CommandEncoderMathTest, setGroupSizeIndirect) {
using MI_MATH = typename FamilyType::MI_MATH;
using MI_MATH_ALU_INST_INLINE = typename FamilyType::MI_MATH_ALU_INST_INLINE;
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
CommandContainer cmdContainer;
cmdContainer.initialize(pDevice);
uint32_t offsets[3] = {0, sizeof(uint32_t), 2 * sizeof(uint32_t)};
uint32_t crossThreadAdress[3] = {};
uint32_t lws[3] = {2, 1, 1};
EncodeIndirectParams<FamilyType>::setGroupSizeIndirect(cmdContainer, offsets, crossThreadAdress, lws);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer.getCommandStream()->getCpuBase(), 0), cmdContainer.getCommandStream()->getUsed());
auto itor = commands.begin();
itor = find<MI_MATH *>(itor, commands.end());
ASSERT_NE(itor, commands.end());
itor = find<MI_STORE_REGISTER_MEM *>(itor, commands.end());
ASSERT_NE(itor, commands.end());
}
HWTEST_F(CommandEncoderMathTest, setGroupCountIndirect) {
using MI_MATH = typename FamilyType::MI_MATH;
using MI_MATH_ALU_INST_INLINE = typename FamilyType::MI_MATH_ALU_INST_INLINE;
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
CommandContainer cmdContainer;
cmdContainer.initialize(pDevice);
uint32_t offsets[3] = {0, sizeof(uint32_t), 2 * sizeof(uint32_t)};
uint32_t crossThreadAdress[3] = {};
EncodeIndirectParams<FamilyType>::setGroupCountIndirect(cmdContainer, offsets, crossThreadAdress);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer.getCommandStream()->getCpuBase(), 0), cmdContainer.getCommandStream()->getUsed());
auto itor = commands.begin();
itor = find<MI_STORE_REGISTER_MEM *>(itor, commands.end());
ASSERT_NE(itor, commands.end());
itor = find<MI_STORE_REGISTER_MEM *>(++itor, commands.end());
ASSERT_NE(itor, commands.end());
itor = find<MI_STORE_REGISTER_MEM *>(++itor, commands.end());
ASSERT_NE(itor, commands.end());
itor = find<MI_STORE_REGISTER_MEM *>(++itor, commands.end());
ASSERT_EQ(itor, commands.end());
}

View File

@@ -0,0 +1,29 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "core/helpers/ptr_math.h"
#include "core/unit_tests/fixtures/command_container_fixture.h"
#include "unit_tests/gen_common/gen_cmd_parse.h"
using namespace NEO;
using CommandEncodeStatesTest = Test<CommandEncodeStatesFixture>;
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, ecodeMediaInterfaceDescriptor) {
using MEDIA_STATE_FLUSH = typename FamilyType::MEDIA_STATE_FLUSH;
using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD;
EncodeMediaInterfaceDescriptorLoad<FamilyType>::encode(*cmdContainer.get());
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
auto itorCmd = find<MEDIA_STATE_FLUSH *>(commands.begin(), commands.end());
ASSERT_NE(itorCmd, commands.end());
itorCmd = find<MEDIA_INTERFACE_DESCRIPTOR_LOAD *>(++itorCmd, commands.end());
ASSERT_NE(itorCmd, commands.end());
}

View File

@@ -0,0 +1,77 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "core/command_container/command_encoder.h"
#include "test.h"
#include "unit_tests/fixtures/device_fixture.h"
#include "unit_tests/gen_common/gen_cmd_parse.h"
using namespace NEO;
class CommandSetMMIOFixture : public DeviceFixture {
public:
void SetUp() {
DeviceFixture::SetUp();
cmdContainer = std::make_unique<CommandContainer>();
cmdContainer->initialize(pDevice);
}
void TearDown() {
cmdContainer.reset();
DeviceFixture::TearDown();
}
std::unique_ptr<CommandContainer> cmdContainer;
};
using CommandSetMMIOTest = Test<CommandSetMMIOFixture>;
HWTEST_F(CommandSetMMIOTest, appendsAMI_LOAD_REGISTER_IMM) {
EncodeSetMMIO<FamilyType>::encodeIMM(*cmdContainer.get(), 0xf00, 0xbaa);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
auto itorLRI = find<MI_LOAD_REGISTER_IMM *>(commands.begin(), commands.end());
ASSERT_NE(itorLRI, commands.end());
{
auto cmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itorLRI);
EXPECT_EQ(cmd->getRegisterOffset(), 0xf00u);
EXPECT_EQ(cmd->getDataDword(), 0xbaau);
}
}
HWTEST_F(CommandSetMMIOTest, appendsAMI_LOAD_REGISTER_MEM) {
EncodeSetMMIO<FamilyType>::encodeMEM(*cmdContainer.get(), 0xf00, 0xDEADBEEFCAF0);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
using MI_LOAD_REGISTER_MEM = typename FamilyType::MI_LOAD_REGISTER_MEM;
auto itorLRI = find<MI_LOAD_REGISTER_MEM *>(commands.begin(), commands.end());
ASSERT_NE(itorLRI, commands.end());
{
auto cmd = genCmdCast<MI_LOAD_REGISTER_MEM *>(*itorLRI);
EXPECT_EQ(cmd->getRegisterAddress(), 0xf00u);
EXPECT_EQ(cmd->getMemoryAddress(), 0xDEADBEEFCAF0u);
}
}
HWTEST_F(CommandSetMMIOTest, appendsAMI_LOAD_REGISTER_REG) {
EncodeSetMMIO<FamilyType>::encodeREG(*cmdContainer.get(), 0xf10, 0xaf0);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG;
auto itorLRI = find<MI_LOAD_REGISTER_REG *>(commands.begin(), commands.end());
ASSERT_NE(itorLRI, commands.end());
{
auto cmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*itorLRI);
EXPECT_EQ(cmd->getDestinationRegisterAddress(), 0xf10u);
EXPECT_EQ(cmd->getSourceRegisterAddress(), 0xaf0u);
}
}

View File

@@ -0,0 +1,147 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "core/helpers/ptr_math.h"
#include "core/unit_tests/fixtures/command_container_fixture.h"
#include "unit_tests/gen_common/gen_cmd_parse.h"
using namespace NEO;
using CommandEncodeStatesTest = Test<CommandEncodeStatesFixture>;
HWTEST_F(CommandEncodeStatesTest, encodeCopySamplerState) {
using SAMPLER_STATE = typename FamilyType::SAMPLER_STATE;
uint32_t numSamplers = 1;
SAMPLER_STATE samplerState;
auto dsh = cmdContainer->getIndirectHeap(HeapType::DYNAMIC_STATE);
auto usedBefore = dsh->getUsed();
auto samplerStateOffset = EncodeStates<FamilyType>::copySamplerState(dsh, 0, numSamplers, 0, &samplerState);
auto pSmplr = reinterpret_cast<SAMPLER_STATE *>(ptrOffset(dsh->getCpuBase(), samplerStateOffset));
EXPECT_EQ(pSmplr->getIndirectStatePointer(), usedBefore);
}
HWTEST_F(CommandEncodeStatesTest, givenCreatedSurfaceStateBufferWhenAllocationProvidedThenUseAllocationAsInput) {
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE;
using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE;
void *stateBuffer = alignedMalloc(sizeof(RENDER_SURFACE_STATE), sizeof(RENDER_SURFACE_STATE));
ASSERT_NE(nullptr, stateBuffer);
RENDER_SURFACE_STATE *state = reinterpret_cast<RENDER_SURFACE_STATE *>(stateBuffer);
memset(stateBuffer, 0, sizeof(RENDER_SURFACE_STATE));
size_t size = 0x1000;
SURFACE_STATE_BUFFER_LENGTH length;
void *cpuAddr = reinterpret_cast<void *>(0x4000);
uint64_t gpuAddr = 0x4000u;
size_t allocSize = size;
length.Length = static_cast<uint32_t>(allocSize - 1);
GraphicsAllocation allocation(0, GraphicsAllocation::AllocationType::UNKNOWN, cpuAddr, gpuAddr, 0u, allocSize, MemoryPool::MemoryNull);
EncodeSurfaceState<FamilyType>::encodeBuffer(stateBuffer, reinterpret_cast<void *>(gpuAddr), allocSize, 1,
RENDER_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT);
EXPECT_EQ(length.SurfaceState.Depth + 1u, state->getDepth());
EXPECT_EQ(length.SurfaceState.Width + 1u, state->getWidth());
EXPECT_EQ(length.SurfaceState.Height + 1u, state->getHeight());
EXPECT_EQ(gpuAddr, state->getSurfaceBaseAddress());
alignedFree(stateBuffer);
}
HWTEST_F(CommandEncodeStatesTest, givenCreatedSurfaceStateBufferWhenAllocationNotProvidedThenStateTypeIsNull) {
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
void *stateBuffer = alignedMalloc(sizeof(RENDER_SURFACE_STATE), sizeof(RENDER_SURFACE_STATE));
ASSERT_NE(nullptr, stateBuffer);
RENDER_SURFACE_STATE *state = reinterpret_cast<RENDER_SURFACE_STATE *>(stateBuffer);
memset(stateBuffer, 0, sizeof(RENDER_SURFACE_STATE));
size_t size = 0x1000;
SURFACE_STATE_BUFFER_LENGTH length;
uint64_t gpuAddr = 0;
size_t allocSize = size;
length.Length = static_cast<uint32_t>(allocSize - 1);
EncodeSurfaceState<FamilyType>::encodeBuffer(stateBuffer, reinterpret_cast<void *>(gpuAddr), allocSize, 1,
RENDER_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT);
EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_NULL, state->getSurfaceType());
alignedFree(stateBuffer);
}
HWTEST_F(CommandEncodeStatesTest, givenCreatedSurfaceStateBufferWhenGpuCoherencyProvidedThenCoherencyGpuIsSet) {
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
void *stateBuffer = alignedMalloc(sizeof(RENDER_SURFACE_STATE), sizeof(RENDER_SURFACE_STATE));
ASSERT_NE(nullptr, stateBuffer);
RENDER_SURFACE_STATE *state = reinterpret_cast<RENDER_SURFACE_STATE *>(stateBuffer);
memset(stateBuffer, 0, sizeof(RENDER_SURFACE_STATE));
size_t size = 0x1000;
SURFACE_STATE_BUFFER_LENGTH length;
uint64_t gpuAddr = 0;
size_t allocSize = size;
length.Length = static_cast<uint32_t>(allocSize - 1);
EncodeSurfaceState<FamilyType>::encodeBuffer(stateBuffer, reinterpret_cast<void *>(gpuAddr), allocSize, 1,
RENDER_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT);
EXPECT_EQ(RENDER_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT, state->getCoherencyType());
alignedFree(stateBuffer);
}
HWTEST_F(CommandEncodeStatesTest, givenCommandContainerWithDirtyHeapsWhenSetStateBaseAddressCalledThenStateBaseAddressAreNotSet) {
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
cmdContainer->dirtyHeaps = 0;
auto baseAddres = cmdContainer->getCommandStream()->getCpuBase();
cmdContainer->setHeapDirty(NEO::HeapType::DYNAMIC_STATE);
cmdContainer->setHeapDirty(NEO::HeapType::INDIRECT_OBJECT);
cmdContainer->setHeapDirty(NEO::HeapType::SURFACE_STATE);
EncodeStateBaseAddress<FamilyType>::encode(*cmdContainer.get());
auto dsh = cmdContainer->getIndirectHeap(NEO::HeapType::DYNAMIC_STATE);
auto ioh = cmdContainer->getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT);
auto ssh = cmdContainer->getIndirectHeap(NEO::HeapType::SURFACE_STATE);
auto pCmd = static_cast<STATE_BASE_ADDRESS *>(baseAddres);
EXPECT_EQ(dsh->getHeapGpuBase(), pCmd->getDynamicStateBaseAddress());
EXPECT_EQ(ioh->getHeapGpuBase(), pCmd->getIndirectObjectBaseAddress());
EXPECT_EQ(ssh->getHeapGpuBase(), pCmd->getSurfaceStateBaseAddress());
}
HWTEST_F(CommandEncodeStatesTest, givenCommandContainerWhenSetStateBaseAddressCalledThenStateBaseAddressIsSetCorrectly) {
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
cmdContainer->dirtyHeaps = 0;
EncodeStateBaseAddress<FamilyType>::encode(*cmdContainer.get());
auto dsh = cmdContainer->getIndirectHeap(NEO::HeapType::DYNAMIC_STATE);
auto ssh = cmdContainer->getIndirectHeap(NEO::HeapType::SURFACE_STATE);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
auto itorCmd = find<STATE_BASE_ADDRESS *>(commands.begin(), commands.end());
ASSERT_NE(itorCmd, commands.end());
auto cmd = genCmdCast<STATE_BASE_ADDRESS *>(*itorCmd);
EXPECT_NE(dsh->getHeapGpuBase(), cmd->getDynamicStateBaseAddress());
EXPECT_NE(ssh->getHeapGpuBase(), cmd->getSurfaceStateBaseAddress());
}

View File

@@ -1,11 +1,12 @@
#
# Copyright (C) 2019 Intel Corporation
# Copyright (C) 2019-2020 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
set(NEO_CORE_tests_fixtures
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/command_container_fixture.h
${CMAKE_CURRENT_SOURCE_DIR}/preemption_fixture.h
${CMAKE_CURRENT_SOURCE_DIR}/preemption_fixture.cpp
)

View File

@@ -0,0 +1,30 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "core/command_container/command_encoder.h"
#include "test.h"
#include "unit_tests/fixtures/device_fixture.h"
class CommandEncodeStatesFixture : public DeviceFixture {
public:
class MyMockCommandContainer : public CommandContainer {
public:
using CommandContainer::dirtyHeaps;
};
void SetUp() {
DeviceFixture::SetUp();
cmdContainer = std::make_unique<MyMockCommandContainer>();
cmdContainer->initialize(pDevice);
}
void TearDown() {
cmdContainer.reset();
DeviceFixture::TearDown();
}
std::unique_ptr<MyMockCommandContainer> cmdContainer;
};

View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2019 Intel Corporation
# Copyright (C) 2019-2020 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
@@ -8,6 +8,7 @@ set(NEO_CORE_TESTS_GEN12LP
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/simd_helper_tests_gen12lp.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_preamble_gen12lp.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_command_encoder_gen12lp.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_preemption_gen12lp.inl
)
set_property(GLOBAL PROPERTY NEO_CORE_TESTS_GEN12LP ${NEO_CORE_TESTS_GEN12LP})

View File

@@ -0,0 +1,63 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "core/command_container/cmdcontainer.h"
#include "core/command_container/command_encoder.h"
#include "test.h"
#include "unit_tests/fixtures/device_fixture.h"
#include "unit_tests/gen_common/gen_cmd_parse.h"
#include "reg_configs_common.h"
using namespace NEO;
using CommandEncoderTest = Test<DeviceFixture>;
GEN12LPTEST_F(CommandEncoderTest, givenAdjustStateComputeModeStateComputeModeShowsNonCoherencySet) {
using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE;
using FORCE_NON_COHERENT = typename STATE_COMPUTE_MODE::FORCE_NON_COHERENT;
CommandContainer cmdContainer;
bool ret = cmdContainer.initialize(pDevice);
ASSERT_TRUE(ret);
auto usedSpaceBefore = cmdContainer.getCommandStream()->getUsed();
// Adjust the State Compute Mode which sets FORCE_NON_COHERENT_FORCE_GPU_NON_COHERENT
EncodeStates<FamilyType>::adjustStateComputeMode(cmdContainer);
auto usedSpaceAfter = cmdContainer.getCommandStream()->getUsed();
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
auto expectedCmdSize = sizeof(STATE_COMPUTE_MODE);
auto cmdAddedSize = usedSpaceAfter - usedSpaceBefore;
EXPECT_EQ(expectedCmdSize, cmdAddedSize);
auto expectedScmCmd = FamilyType::cmdInitStateComputeMode;
expectedScmCmd.setForceNonCoherent(FORCE_NON_COHERENT::FORCE_NON_COHERENT_FORCE_GPU_NON_COHERENT);
expectedScmCmd.setMaskBits(FamilyType::stateComputeModeForceNonCoherentMask);
auto scmCmd = reinterpret_cast<STATE_COMPUTE_MODE *>(ptrOffset(cmdContainer.getCommandStream()->getCpuBase(), usedSpaceBefore));
EXPECT_TRUE(memcmp(&expectedScmCmd, scmCmd, sizeof(STATE_COMPUTE_MODE)) == 0);
}
GEN12LPTEST_F(CommandEncoderTest, givenCommandContainerWhenEncodeL3StateThenSetCorrectMMIO) {
CommandContainer cmdContainer;
cmdContainer.initialize(pDevice);
EncodeL3State<FamilyType>::encode(cmdContainer, false);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer.getCommandStream()->getCpuBase(), 0), cmdContainer.getCommandStream()->getUsed());
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
auto itorLRI = find<MI_LOAD_REGISTER_IMM *>(commands.begin(), commands.end());
ASSERT_NE(itorLRI, commands.end());
auto cmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itorLRI);
EXPECT_EQ(cmd->getRegisterOffset(), 0xB134u);
EXPECT_EQ(cmd->getDataDword(), 0xD0000020u);
}

View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2019 Intel Corporation
# Copyright (C) 2019-2020 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
@@ -8,6 +8,7 @@ set(NEO_CORE_TESTS_GEN9
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/preamble_tests_gen9.cpp
${CMAKE_CURRENT_SOURCE_DIR}/simd_helper_tests_gen9.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_command_encoder_gen9.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_preemption_gen9.cpp
)
set_property(GLOBAL PROPERTY NEO_CORE_TESTS_GEN9 ${NEO_CORE_TESTS_GEN9})

View File

@@ -0,0 +1,64 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "core/command_container/cmdcontainer.h"
#include "core/command_container/command_encoder.h"
#include "test.h"
#include "unit_tests/fixtures/device_fixture.h"
#include "unit_tests/gen_common/gen_cmd_parse.h"
#include "reg_configs_common.h"
using namespace NEO;
using CommandEncoderTest = Test<DeviceFixture>;
GEN9TEST_F(CommandEncoderTest, appendsASetMMIO) {
CommandContainer cmdContainer;
cmdContainer.initialize(pDevice);
EncodeL3State<FamilyType>::encode(cmdContainer, false);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer.getCommandStream()->getCpuBase(), 0), cmdContainer.getCommandStream()->getUsed());
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
auto itorLRI = find<MI_LOAD_REGISTER_IMM *>(commands.begin(), commands.end());
ASSERT_NE(itorLRI, commands.end());
}
GEN9TEST_F(CommandEncoderTest, givenNoSLMSetCorrectMMIO) {
CommandContainer cmdContainer;
cmdContainer.initialize(pDevice);
EncodeL3State<FamilyType>::encode(cmdContainer, false);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer.getCommandStream()->getCpuBase(), 0), cmdContainer.getCommandStream()->getUsed());
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
auto itorLRI = find<MI_LOAD_REGISTER_IMM *>(commands.begin(), commands.end());
ASSERT_NE(itorLRI, commands.end());
auto cmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itorLRI);
auto expectedData = PreambleHelper<FamilyType>::isL3Configurable(cmdContainer.getDevice()->getHardwareInfo()) ? 0x80000340u : 0x60000321u;
EXPECT_EQ(cmd->getRegisterOffset(), 0x7034u);
EXPECT_EQ(cmd->getDataDword(), expectedData);
}
GEN9TEST_F(CommandEncoderTest, givenSLMSetCorrectMMIO) {
CommandContainer cmdContainer;
cmdContainer.initialize(pDevice);
EncodeL3State<FamilyType>::encode(cmdContainer, true);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer.getCommandStream()->getCpuBase(), 0), cmdContainer.getCommandStream()->getUsed());
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
auto itorLRI = find<MI_LOAD_REGISTER_IMM *>(commands.begin(), commands.end());
ASSERT_NE(itorLRI, commands.end());
auto cmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itorLRI);
EXPECT_EQ(cmd->getRegisterOffset(), 0x7034u);
EXPECT_EQ(cmd->getDataDword(), 0x60000321u);
}

View File

@@ -0,0 +1,13 @@
#
# Copyright (C) 2020 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
set(NEO_CORE_tests_mocks
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/mock_dispatch_kernel_encoder_interface.cpp
${CMAKE_CURRENT_SOURCE_DIR}/mock_dispatch_kernel_encoder_interface.h
)
set_property(GLOBAL PROPERTY NEO_CORE_tests_mocks ${NEO_CORE_tests_mocks})

View File

@@ -0,0 +1,44 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "core/unit_tests/mocks/mock_dispatch_kernel_encoder_interface.h"
using namespace NEO;
using ::testing::Return;
MockDispatchKernelEncoder::MockDispatchKernelEncoder() {
EXPECT_CALL(*this, getIsaAllocation).WillRepeatedly(Return(&mockAllocation));
EXPECT_CALL(*this, getSizeCrossThreadData).WillRepeatedly(Return(crossThreadSize));
EXPECT_CALL(*this, getSizePerThreadData).WillRepeatedly(Return(perThreadSize));
EXPECT_CALL(*this, getCrossThread).WillRepeatedly(Return(&dataCrossThread));
EXPECT_CALL(*this, getPerThread).WillRepeatedly(Return(&dataPerThread));
expectAnyMockFunctionCall();
}
void MockDispatchKernelEncoder::expectAnyMockFunctionCall() {
EXPECT_CALL(*this, hasBarriers()).Times(::testing::AnyNumber());
EXPECT_CALL(*this, getSlmTotalSize()).Times(::testing::AnyNumber());
EXPECT_CALL(*this, getBindingTableOffset()).Times(::testing::AnyNumber());
EXPECT_CALL(*this, getBorderColor()).Times(::testing::AnyNumber());
EXPECT_CALL(*this, getSamplerTableOffset()).Times(::testing::AnyNumber());
EXPECT_CALL(*this, getNumSurfaceStates()).Times(::testing::AnyNumber());
EXPECT_CALL(*this, getNumSamplers()).Times(::testing::AnyNumber());
EXPECT_CALL(*this, getSimdSize()).Times(::testing::AnyNumber());
EXPECT_CALL(*this, getPerThreadScratchSize()).Times(::testing::AnyNumber());
EXPECT_CALL(*this, getPerThreadExecutionMask()).Times(::testing::AnyNumber());
EXPECT_CALL(*this, getSizePerThreadDataForWholeGroup()).Times(::testing::AnyNumber());
EXPECT_CALL(*this, getSizeSurfaceStateHeapData()).Times(::testing::AnyNumber());
EXPECT_CALL(*this, getCountOffsets()).Times(::testing::AnyNumber());
EXPECT_CALL(*this, getSizeOffsets()).Times(::testing::AnyNumber());
EXPECT_CALL(*this, getLocalWorkSize()).Times(::testing::AnyNumber());
EXPECT_CALL(*this, getNumGrfRequired()).Times(::testing::AnyNumber());
EXPECT_CALL(*this, getThreadsPerThreadGroupCount()).Times(::testing::AnyNumber());
EXPECT_CALL(*this, hasGroupCounts()).Times(::testing::AnyNumber());
EXPECT_CALL(*this, getSurfaceStateHeap()).Times(::testing::AnyNumber());
EXPECT_CALL(*this, getDynamicStateHeap()).Times(::testing::AnyNumber());
}

View File

@@ -0,0 +1,57 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "core/kernel/dispatch_kernel_encoder_interface.h"
#include "unit_tests/mocks/mock_graphics_allocation.h"
#include "gmock/gmock.h"
#include <stdint.h>
namespace NEO {
class GraphicsAllocation;
struct MockDispatchKernelEncoder : public DispatchKernelEncoderI {
public:
MockDispatchKernelEncoder();
MOCK_METHOD0(hasBarriers, bool());
MOCK_METHOD0(getSlmTotalSize, uint32_t());
MOCK_METHOD0(getBindingTableOffset, uint32_t());
MOCK_METHOD0(getBorderColor, uint32_t());
MOCK_METHOD0(getSamplerTableOffset, uint32_t());
MOCK_METHOD0(getNumSurfaceStates, uint32_t());
MOCK_METHOD0(getNumSamplers, uint32_t());
MOCK_METHOD0(getSimdSize, uint32_t());
MOCK_METHOD0(getSizeCrossThreadData, uint32_t());
MOCK_METHOD0(getPerThreadScratchSize, uint32_t());
MOCK_METHOD0(getPerThreadExecutionMask, uint32_t());
MOCK_METHOD0(getSizePerThreadData, uint32_t());
MOCK_METHOD0(getSizePerThreadDataForWholeGroup, uint32_t());
MOCK_METHOD0(getSizeSurfaceStateHeapData, uint32_t());
MOCK_METHOD0(getCountOffsets, uint32_t *());
MOCK_METHOD0(getSizeOffsets, uint32_t *());
MOCK_METHOD0(getLocalWorkSize, uint32_t *());
MOCK_METHOD0(getNumGrfRequired, uint32_t());
MOCK_METHOD0(getThreadsPerThreadGroupCount, uint32_t());
MOCK_METHOD0(getIsaAllocation, GraphicsAllocation *());
MOCK_METHOD0(hasGroupCounts, bool());
MOCK_METHOD0(hasGroupSize, bool());
MOCK_METHOD0(getSurfaceStateHeap, const void *());
MOCK_METHOD0(getDynamicStateHeap, const void *());
MOCK_METHOD0(getCrossThread, const void *());
MOCK_METHOD0(getPerThread, const void *());
void expectAnyMockFunctionCall();
::testing::NiceMock<MockGraphicsAllocation> mockAllocation;
static constexpr uint32_t crossThreadSize = 0x40;
static constexpr uint32_t perThreadSize = 0x20;
uint8_t dataCrossThread[crossThreadSize];
uint8_t dataPerThread[perThreadSize];
};
} // namespace NEO