Reorganization directory structure [3/n]

Change-Id: If3dfa3f6007f8810a6a1ae1a4f0c7da38544648d
This commit is contained in:
kamdiedrich
2020-02-23 21:00:51 +01:00
committed by sys_ocldev
parent e177b4fc0f
commit e072275ae6
711 changed files with 94 additions and 94 deletions

View File

@@ -0,0 +1,91 @@
#
# Copyright (C) 2019-2020 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
set(NEO_CORE_HELPERS
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/abort.h
${CMAKE_CURRENT_SOURCE_DIR}/address_patch.h
${CMAKE_CURRENT_SOURCE_DIR}/aligned_memory.h
${CMAKE_CURRENT_SOURCE_DIR}/array_count.h
${CMAKE_CURRENT_SOURCE_DIR}/aux_translation.h
${CMAKE_CURRENT_SOURCE_DIR}/basic_math.h
${CMAKE_CURRENT_SOURCE_DIR}/bit_helpers.h
${CMAKE_CURRENT_SOURCE_DIR}/blit_commands_helper_base.inl
${CMAKE_CURRENT_SOURCE_DIR}/blit_commands_helper_bdw_plus.inl
${CMAKE_CURRENT_SOURCE_DIR}/blit_commands_helper.cpp
${CMAKE_CURRENT_SOURCE_DIR}/blit_commands_helper.h
${CMAKE_CURRENT_SOURCE_DIR}/cache_policy.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cache_policy.h
${CMAKE_CURRENT_SOURCE_DIR}/common_types.h
${CMAKE_CURRENT_SOURCE_DIR}/completion_stamp.cpp
${CMAKE_CURRENT_SOURCE_DIR}/completion_stamp.h
${CMAKE_CURRENT_SOURCE_DIR}/debug_helpers.h
${CMAKE_CURRENT_SOURCE_DIR}/deferred_deleter_helper.h
${CMAKE_CURRENT_SOURCE_DIR}/dirty_state_helpers.cpp
${CMAKE_CURRENT_SOURCE_DIR}/dirty_state_helpers.h
${CMAKE_CURRENT_SOURCE_DIR}/engine_control.h
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/engine_node_helper.cpp
${CMAKE_CURRENT_SOURCE_DIR}/engine_node_helper.h
${CMAKE_CURRENT_SOURCE_DIR}/extendable_enum.h
${CMAKE_CURRENT_SOURCE_DIR}/file_io.cpp
${CMAKE_CURRENT_SOURCE_DIR}/file_io.h
${CMAKE_CURRENT_SOURCE_DIR}/flat_batch_buffer_helper.h
${CMAKE_CURRENT_SOURCE_DIR}/flat_batch_buffer_helper.cpp
${CMAKE_CURRENT_SOURCE_DIR}/flat_batch_buffer_helper_hw.h
${CMAKE_CURRENT_SOURCE_DIR}/flat_batch_buffer_helper_hw.inl
${CMAKE_CURRENT_SOURCE_DIR}/flush_stamp.cpp
${CMAKE_CURRENT_SOURCE_DIR}/flush_stamp.h
${CMAKE_CURRENT_SOURCE_DIR}/get_info.h
${CMAKE_CURRENT_SOURCE_DIR}/hash.h
${CMAKE_CURRENT_SOURCE_DIR}/heap_helper.cpp
${CMAKE_CURRENT_SOURCE_DIR}/heap_helper.h
${CMAKE_CURRENT_SOURCE_DIR}/hw_cmds.h
${CMAKE_CURRENT_SOURCE_DIR}/hw_helper.cpp
${CMAKE_CURRENT_SOURCE_DIR}/hw_helper.h
${CMAKE_CURRENT_SOURCE_DIR}/hw_helper_base.inl
${CMAKE_CURRENT_SOURCE_DIR}/hw_helper_bdw_plus.inl
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/hw_helper_extended.cpp
${CMAKE_CURRENT_SOURCE_DIR}/hw_info.cpp
${CMAKE_CURRENT_SOURCE_DIR}/hw_info.h
${CMAKE_CURRENT_SOURCE_DIR}/interlocked_max.h
${CMAKE_CURRENT_SOURCE_DIR}/kernel_helpers.cpp
${CMAKE_CURRENT_SOURCE_DIR}/kernel_helpers.h
${CMAKE_CURRENT_SOURCE_DIR}/kmd_notify_properties.cpp
${CMAKE_CURRENT_SOURCE_DIR}/kmd_notify_properties.h
${CMAKE_CURRENT_SOURCE_DIR}/non_copyable_or_moveable.h
${CMAKE_CURRENT_SOURCE_DIR}/options.h
${CMAKE_CURRENT_SOURCE_DIR}/pipeline_select_args.h
${CMAKE_CURRENT_SOURCE_DIR}/pipeline_select_helper.h
${CMAKE_CURRENT_SOURCE_DIR}/preamble.h
${CMAKE_CURRENT_SOURCE_DIR}/preamble_base.inl
${CMAKE_CURRENT_SOURCE_DIR}/preamble_bdw_plus.inl
${CMAKE_CURRENT_SOURCE_DIR}/ptr_math.h
${CMAKE_CURRENT_SOURCE_DIR}/register_offsets.h
${CMAKE_CURRENT_SOURCE_DIR}/registered_method_dispatcher.h
${CMAKE_CURRENT_SOURCE_DIR}/simd_helper.h
${CMAKE_CURRENT_SOURCE_DIR}/state_base_address.h
${CMAKE_CURRENT_SOURCE_DIR}/state_base_address_base.inl
${CMAKE_CURRENT_SOURCE_DIR}/state_base_address_bdw_plus.inl
${CMAKE_CURRENT_SOURCE_DIR}/state_compute_mode_helper.h
${CMAKE_CURRENT_SOURCE_DIR}/stdio.h
${CMAKE_CURRENT_SOURCE_DIR}/string.h
${CMAKE_CURRENT_SOURCE_DIR}/surface_format_info.h
${CMAKE_CURRENT_SOURCE_DIR}/timestamp_packet.cpp
${CMAKE_CURRENT_SOURCE_DIR}/timestamp_packet.h
${CMAKE_CURRENT_SOURCE_DIR}/vec.h
)
set_property(GLOBAL PROPERTY NEO_CORE_HELPERS ${NEO_CORE_HELPERS})
if(WIN32)
set(NEO_CORE_SRCS_HELPERS_WINDOWS
${CMAKE_CURRENT_SOURCE_DIR}/windows/gmm_callbacks.cpp
${CMAKE_CURRENT_SOURCE_DIR}/windows/gmm_callbacks.h
${CMAKE_CURRENT_SOURCE_DIR}/windows/gmm_callbacks.inl
)
set_property(GLOBAL PROPERTY NEO_CORE_SRCS_HELPERS_WINDOWS ${NEO_CORE_SRCS_HELPERS_WINDOWS})
endif()
add_subdirectories()

View File

@@ -0,0 +1,16 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "helpers/abort.h"
#include <cstdlib>
namespace NEO {
void abortExecution() {
abort();
}
} // namespace NEO

View File

@@ -0,0 +1,12 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
namespace NEO {
[[noreturn]] void abortExecution();
}

View File

@@ -0,0 +1,80 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include <stdint.h>
namespace NEO {
enum PatchInfoAllocationType {
Default = 0,
KernelArg,
GeneralStateHeap,
DynamicStateHeap,
IndirectObjectHeap,
SurfaceStateHeap,
InstructionHeap,
TagAddress,
TagValue,
GUCStartMessage,
ScratchSpace
};
struct PatchInfoData {
uint64_t sourceAllocation;
uint64_t sourceAllocationOffset;
PatchInfoAllocationType sourceType;
uint64_t targetAllocation;
uint64_t targetAllocationOffset;
PatchInfoAllocationType targetType;
uint32_t patchAddressSize;
PatchInfoData(uint64_t sourceAllocation,
uint64_t sourceAllocationOffset,
PatchInfoAllocationType sourceType,
uint64_t targetAllocation,
uint64_t targetAllocationOffset,
PatchInfoAllocationType targetType,
uint32_t patchAddressSize)
: sourceAllocation(sourceAllocation),
sourceAllocationOffset(sourceAllocationOffset),
sourceType(sourceType),
targetAllocation(targetAllocation),
targetAllocationOffset(targetAllocationOffset),
targetType(targetType),
patchAddressSize(patchAddressSize) {
}
PatchInfoData(uint64_t sourceAllocation,
uint64_t sourceAllocationOffset,
PatchInfoAllocationType sourceType,
uint64_t targetAllocation,
uint64_t targetAllocationOffset,
PatchInfoAllocationType targetType)
: sourceAllocation(sourceAllocation),
sourceAllocationOffset(sourceAllocationOffset),
sourceType(sourceType),
targetAllocation(targetAllocation),
targetAllocationOffset(targetAllocationOffset),
targetType(targetType),
patchAddressSize(sizeof(void *)) {
}
bool requiresIndirectPatching() {
return (targetType != PatchInfoAllocationType::Default && targetType != PatchInfoAllocationType::GUCStartMessage);
}
};
struct CommandChunk {
uint64_t baseAddressCpu = 0;
uint64_t baseAddressGpu = 0;
uint64_t startOffset = 0;
uint64_t endOffset = 0;
uint64_t batchBufferStartLocation = 0;
uint64_t batchBufferStartAddress = 0;
};
} // namespace NEO

View File

@@ -0,0 +1,112 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "helpers/debug_helpers.h"
#include "memory_manager/memory_constants.h"
#include "opencl/source/utilities/logger.h"
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <functional>
#include <memory>
#include <new>
#ifdef _MSC_VER
#define ALIGNAS(x) __declspec(align(x))
#else
#define ALIGNAS(x) alignas(x)
#endif
template <typename T, typename TNoRef = typename std::remove_reference<T>::type>
constexpr inline TNoRef alignUp(T before, size_t alignment) {
TNoRef mask = static_cast<TNoRef>(alignment - 1);
return (before + mask) & ~mask;
}
template <typename T>
constexpr inline T *alignUp(T *ptrBefore, size_t alignment) {
return reinterpret_cast<T *>(alignUp(reinterpret_cast<uintptr_t>(ptrBefore), alignment));
}
template <typename T, typename TNoRef = typename std::remove_reference<T>::type>
constexpr inline TNoRef alignDown(T before, size_t alignment) {
TNoRef mask = static_cast<TNoRef>(alignment - 1);
return before & ~mask;
}
template <typename T>
constexpr inline T *alignDown(T *ptrBefore, size_t alignment) {
return reinterpret_cast<T *>(alignDown(reinterpret_cast<uintptr_t>(ptrBefore), alignment));
}
inline void *alignedMalloc(size_t bytes, size_t alignment) {
DEBUG_BREAK_IF(alignment <= 0);
if (bytes == 0) {
bytes = sizeof(void *);
}
// Make sure our alignment is at least the size of a pointer
alignment = std::max(alignment, sizeof(void *));
// Allocate _bytes + _alignment
size_t sizeToAlloc = bytes + alignment;
auto pOriginalMemory = new (std::nothrow) char[sizeToAlloc];
// Add in the alignment
auto pAlignedMemory = reinterpret_cast<uintptr_t>(pOriginalMemory);
if (pAlignedMemory) {
pAlignedMemory += alignment;
pAlignedMemory -= pAlignedMemory % alignment;
// Store the original pointer to facilitate deallocation
reinterpret_cast<void **>(pAlignedMemory)[-1] = pOriginalMemory;
}
DBG_LOG(LogAlignedAllocations, __FUNCTION__, "Pointer:", reinterpret_cast<void *>(pOriginalMemory), "size:", sizeToAlloc);
// Return result
return reinterpret_cast<void *>(pAlignedMemory); // NOLINT(clang-analyzer-cplusplus.NewDeleteLeaks)
}
inline void alignedFree(void *ptr) {
if (ptr) {
auto originalPtr = reinterpret_cast<char **>(ptr)[-1];
DBG_LOG(LogAlignedAllocations, __FUNCTION__, "Pointer:", reinterpret_cast<void *>(originalPtr));
delete[] originalPtr;
}
}
inline size_t alignSizeWholePage(const void *ptr, size_t size) {
uintptr_t startPageMisalignedAddressOffset = reinterpret_cast<uintptr_t>(ptr) & MemoryConstants::pageMask;
size_t alignedSizeToPage = alignUp(startPageMisalignedAddressOffset + size, MemoryConstants::pageSize);
return alignedSizeToPage;
}
template <size_t alignment, typename T>
inline constexpr bool isAligned(T val) {
return (static_cast<size_t>(val) % alignment) == 0;
}
template <size_t alignment, typename T>
inline bool isAligned(T *ptr) {
return ((reinterpret_cast<uintptr_t>(ptr)) % alignment) == 0;
}
template <typename T1, typename T2>
inline bool isAligned(T1 ptr, T2 alignment) {
return ((static_cast<size_t>(ptr)) & (static_cast<size_t>(alignment) - 1u)) == 0;
}
template <typename T>
inline bool isAligned(T *ptr) {
return (reinterpret_cast<uintptr_t>(ptr) & (alignof(T) - 1)) == 0;
}
inline auto allocateAlignedMemory(size_t bytes, size_t alignment) {
return std::unique_ptr<void, std::function<decltype(alignedFree)>>(alignedMalloc(bytes, alignment), alignedFree);
}

View File

@@ -0,0 +1,15 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "debug_settings/debug_settings_manager.h"
#include "helpers/deferred_deleter_helper.h"
namespace NEO {
bool isDeferredDeleterEnabled() {
return DebugManager.flags.EnableDeferredDeleter.get();
}
} // namespace NEO

View File

@@ -0,0 +1,20 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include <stddef.h>
template <typename T, size_t N>
constexpr size_t arrayCount(const T (&)[N]) {
return N;
}
template <typename T, size_t N>
constexpr bool isInRange(size_t idx, const T (&)[N]) {
return (idx < N);
}

View File

@@ -0,0 +1,24 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include <cstdint>
namespace NEO {
enum class AuxTranslationDirection {
None,
AuxToNonAux,
NonAuxToAux
};
enum class AuxTranslationMode : int32_t {
Builtin = 0,
Blit = 1
};
} // namespace NEO

View File

@@ -0,0 +1,182 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "helpers/vec.h"
#include <algorithm>
#include <cmath>
#include <cstdint>
#include <cstdlib>
#include <stdio.h>
#define KB 1024uLL
#define MB (KB * KB)
#define GB (KB * MB)
namespace Math {
constexpr uint32_t nextPowerOfTwo(uint32_t value) {
--value;
value |= value >> 1;
value |= value >> 2;
value |= value >> 4;
value |= value >> 8;
value |= value >> 16;
++value;
return value;
}
constexpr uint64_t nextPowerOfTwo(uint64_t value) {
--value;
value |= value >> 1;
value |= value >> 2;
value |= value >> 4;
value |= value >> 8;
value |= value >> 16;
value |= value >> 32;
++value;
return value;
}
constexpr uint32_t prevPowerOfTwo(uint32_t value) {
value |= value >> 1;
value |= value >> 2;
value |= value >> 4;
value |= value >> 8;
value |= value >> 16;
return (value - (value >> 1));
}
constexpr uint64_t prevPowerOfTwo(uint64_t value) {
value |= value >> 1;
value |= value >> 2;
value |= value >> 4;
value |= value >> 8;
value |= value >> 16;
value |= value >> 32;
return (value - (value >> 1));
}
inline uint32_t getMinLsbSet(uint32_t value) {
static const uint8_t multiplyDeBruijnBitPosition[32] = {
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9};
auto invert = -static_cast<int64_t>(value);
value &= static_cast<uint32_t>(invert);
return multiplyDeBruijnBitPosition[static_cast<uint32_t>(value * 0x077CB531U) >> 27];
}
constexpr uint32_t log2(uint32_t value) {
if (value == 0) {
return 32;
}
uint32_t exponent = 0u;
while (value >>= 1) {
exponent++;
}
return exponent;
}
constexpr uint32_t log2(uint64_t value) {
if (value == 0) {
return 64;
}
uint32_t exponent = 0;
while (value >>= 1) {
exponent++;
}
return exponent;
}
union FloatConversion {
uint32_t u;
float f;
};
// clang-format off
static const FloatConversion PosInfinity = {0x7f800000};
static const FloatConversion NegInfinity = {0xff800000};
static const FloatConversion Nan = {0x7fc00000};
// clang-format on
inline uint16_t float2Half(float f) {
FloatConversion u;
u.f = f;
uint32_t fsign = (u.u >> 16) & 0x8000;
float x = std::fabs(f);
//Nan
if (x != x) {
u.u >>= (24 - 11);
u.u &= 0x7fff;
u.u |= 0x0200; //silence the NaN
return u.u | fsign;
}
// overflow
if (x >= std::ldexp(1.0f, 16)) {
if (x == PosInfinity.f)
return 0x7c00 | fsign;
return 0x7bff | fsign;
}
// underflow
if (x < std::ldexp(1.0f, -24))
return fsign; // The halfway case can return 0x0001 or 0. 0 is even.
// half denormal
if (x < std::ldexp(1.0f, -14)) {
x *= std::ldexp(1.0f, 24);
return (uint16_t)((int)x | fsign);
}
u.u &= 0xFFFFE000U;
u.u -= 0x38000000U;
return (u.u >> (24 - 11)) | fsign;
}
constexpr bool isDivisibleByPowerOfTwoDivisor(uint32_t number, uint32_t divisor) {
return (number & (divisor - 1)) == 0;
}
constexpr size_t computeTotalElementsCount(const Vec3<size_t> &inputVector) {
size_t minElementCount = 1;
auto xDim = std::max(minElementCount, inputVector.x);
auto yDim = std::max(minElementCount, inputVector.y);
auto zDim = std::max(minElementCount, inputVector.z);
return xDim * yDim * zDim;
}
template <typename T>
constexpr bool isPow2(T val) {
return val != 0 && (val & (val - 1)) == 0;
}
template <typename T>
constexpr T ffs(T v) {
if (v == 0) {
return std::numeric_limits<T>::max();
}
for (T i = 0; i < sizeof(T) * 8; ++i) {
if (0 != (v & (1ULL << i))) {
return i;
}
}
std::abort();
}
constexpr size_t divideAndRoundUp(size_t dividend, size_t divisor) {
return (dividend + divisor - 1) / divisor;
}
} // namespace Math

View File

@@ -0,0 +1,40 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include <cassert>
#include <cstdint>
#include <limits>
namespace NEO {
constexpr bool isBitSet(uint64_t field, uint64_t bitPosition) {
assert(bitPosition < std::numeric_limits<uint64_t>::digits); // undefined behavior
return (field & (1ull << bitPosition));
}
constexpr bool isAnyBitSet(uint64_t field, uint64_t checkedBits) {
return ((field & checkedBits) != 0);
}
constexpr bool isValueSet(uint64_t field, uint64_t value) {
assert(value != 0);
return ((field & value) == value);
}
constexpr bool isFieldValid(uint64_t field, uint64_t acceptedBits) {
return ((field & (~acceptedBits)) == 0);
}
constexpr uint64_t setBits(uint64_t field, bool newValue, uint64_t bitsToModify) {
if (newValue) {
return (field | bitsToModify);
}
return (field & (~bitsToModify));
}
} // namespace NEO

View File

@@ -0,0 +1,125 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "helpers/blit_commands_helper.h"
#include "helpers/timestamp_packet.h"
#include "memory_manager/surface.h"
namespace NEO {
BlitProperties BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection blitDirection,
CommandStreamReceiver &commandStreamReceiver,
GraphicsAllocation *memObjAllocation,
GraphicsAllocation *preallocatedHostAllocation,
void *hostPtr, uint64_t memObjGpuVa,
uint64_t hostAllocGpuVa, size_t hostPtrOffset,
size_t copyOffset, uint64_t copySize) {
GraphicsAllocation *hostAllocation = nullptr;
if (preallocatedHostAllocation) {
hostAllocation = preallocatedHostAllocation;
UNRECOVERABLE_IF(hostAllocGpuVa == 0);
} else {
HostPtrSurface hostPtrSurface(hostPtr, static_cast<size_t>(copySize), true);
bool success = commandStreamReceiver.createAllocationForHostSurface(hostPtrSurface, false);
UNRECOVERABLE_IF(!success);
hostAllocation = hostPtrSurface.getAllocation();
hostAllocGpuVa = hostAllocation->getGpuAddress();
}
if (BlitterConstants::BlitDirection::HostPtrToBuffer == blitDirection) {
return {
nullptr, // outputTimestampPacket
blitDirection, // blitDirection
{}, // csrDependencies
AuxTranslationDirection::None, // auxTranslationDirection
memObjAllocation, // dstAllocation
hostAllocation, // srcAllocation
memObjGpuVa, // dstGpuAddress
hostAllocGpuVa, // srcGpuAddress
copySize, // copySize
copyOffset, // dstOffset
hostPtrOffset}; // srcOffset
} else {
return {
nullptr, // outputTimestampPacket
blitDirection, // blitDirection
{}, // csrDependencies
AuxTranslationDirection::None, // auxTranslationDirection
hostAllocation, // dstAllocation
memObjAllocation, // srcAllocation
hostAllocGpuVa, // dstGpuAddress
memObjGpuVa, // srcGpuAddress
copySize, // copySize
hostPtrOffset, // dstOffset
copyOffset}; // srcOffset
}
}
BlitProperties BlitProperties::constructPropertiesForCopyBuffer(GraphicsAllocation *dstAllocation, GraphicsAllocation *srcAllocation,
size_t dstOffset, size_t srcOffset, uint64_t copySize) {
return {
nullptr, // outputTimestampPacket
BlitterConstants::BlitDirection::BufferToBuffer, // blitDirection
{}, // csrDependencies
AuxTranslationDirection::None, // auxTranslationDirection
dstAllocation, // dstAllocation
srcAllocation, // srcAllocation
dstAllocation->getGpuAddress(), // dstGpuAddress
srcAllocation->getGpuAddress(), // srcGpuAddress
copySize, // copySize
dstOffset, // dstOffset
srcOffset}; // srcOffset
}
BlitProperties BlitProperties::constructPropertiesForAuxTranslation(AuxTranslationDirection auxTranslationDirection,
GraphicsAllocation *allocation) {
auto allocationSize = allocation->getUnderlyingBufferSize();
return {
nullptr, // outputTimestampPacket
BlitterConstants::BlitDirection::BufferToBuffer, // blitDirection
{}, // csrDependencies
auxTranslationDirection, // auxTranslationDirection
allocation, // dstAllocation
allocation, // srcAllocation
allocation->getGpuAddress(), // dstGpuAddress
allocation->getGpuAddress(), // srcGpuAddress
allocationSize, // copySize
0, // dstOffset
0 // srcOffset
};
}
void BlitProperties::setupDependenciesForAuxTranslation(BlitPropertiesContainer &blitPropertiesContainer, TimestampPacketDependencies &timestampPacketDependencies,
TimestampPacketContainer &kernelTimestamps, const CsrDependencies &depsFromEvents,
CommandStreamReceiver &gpguCsr, CommandStreamReceiver &bcsCsr) {
auto numObjects = blitPropertiesContainer.size() / 2;
for (size_t i = 0; i < numObjects; i++) {
blitPropertiesContainer[i].outputTimestampPacket = timestampPacketDependencies.auxToNonAuxNodes.peekNodes()[i];
blitPropertiesContainer[i + numObjects].outputTimestampPacket = timestampPacketDependencies.nonAuxToAuxNodes.peekNodes()[i];
}
gpguCsr.requestStallingPipeControlOnNextFlush();
auto nodesAllocator = gpguCsr.getTimestampPacketAllocator();
timestampPacketDependencies.barrierNodes.add(nodesAllocator->getTag());
// wait for barrier and events before AuxToNonAux
blitPropertiesContainer[0].csrDependencies.push_back(&timestampPacketDependencies.barrierNodes);
for (auto dep : depsFromEvents) {
blitPropertiesContainer[0].csrDependencies.push_back(dep);
}
// wait for NDR before NonAuxToAux
blitPropertiesContainer[numObjects].csrDependencies.push_back(&kernelTimestamps);
}
} // namespace NEO

View File

@@ -0,0 +1,73 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "command_stream/csr_deps.h"
#include "helpers/aux_translation.h"
#include "memory_manager/memory_constants.h"
#include "utilities/stackvec.h"
#include <cstdint>
namespace NEO {
class CommandStreamReceiver;
class GraphicsAllocation;
class LinearStream;
struct TimestampPacketStorage;
struct RootDeviceEnvironment;
template <typename TagType>
struct TagNode;
struct BlitProperties;
struct HardwareInfo;
struct TimestampPacketDependencies;
using BlitPropertiesContainer = StackVec<BlitProperties, 16>;
struct BlitProperties {
static BlitProperties constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection blitDirection,
CommandStreamReceiver &commandStreamReceiver,
GraphicsAllocation *memObjAllocation,
GraphicsAllocation *preallocatedHostAllocation,
void *hostPtr, uint64_t memObjGpuVa,
uint64_t hostAllocGpuVa, size_t hostPtrOffset,
size_t copyOffset, uint64_t copySize);
static BlitProperties constructPropertiesForCopyBuffer(GraphicsAllocation *dstAllocation, GraphicsAllocation *srcAllocation,
size_t dstOffset, size_t srcOffset, uint64_t copySize);
static BlitProperties constructPropertiesForAuxTranslation(AuxTranslationDirection auxTranslationDirection,
GraphicsAllocation *allocation);
static void setupDependenciesForAuxTranslation(BlitPropertiesContainer &blitPropertiesContainer, TimestampPacketDependencies &timestampPacketDependencies,
TimestampPacketContainer &kernelTimestamps, const CsrDependencies &depsFromEvents,
CommandStreamReceiver &gpguCsr, CommandStreamReceiver &bcsCsr);
static BlitterConstants::BlitDirection obtainBlitDirection(uint32_t commandType);
TagNode<TimestampPacketStorage> *outputTimestampPacket = nullptr;
BlitterConstants::BlitDirection blitDirection;
CsrDependencies csrDependencies;
AuxTranslationDirection auxTranslationDirection = AuxTranslationDirection::None;
GraphicsAllocation *dstAllocation = nullptr;
GraphicsAllocation *srcAllocation = nullptr;
uint64_t dstGpuAddress = 0;
uint64_t srcGpuAddress = 0;
uint64_t copySize = 0;
size_t dstOffset = 0;
size_t srcOffset = 0;
};
template <typename GfxFamily>
struct BlitCommandsHelper {
static size_t estimateBlitCommandsSize(uint64_t copySize, const CsrDependencies &csrDependencies, bool updateTimestampPacket);
static size_t estimateBlitCommandsSize(const BlitPropertiesContainer &blitPropertiesContainer, const HardwareInfo &hwInfo);
static void dispatchBlitCommandsForBuffer(const BlitProperties &blitProperties, LinearStream &linearStream, const RootDeviceEnvironment &rootDeviceEnvironment);
static void appendBlitCommandsForBuffer(const BlitProperties &blitProperties, typename GfxFamily::XY_COPY_BLT &blitCmd, const RootDeviceEnvironment &rootDeviceEnvironment);
};
} // namespace NEO

View File

@@ -0,0 +1,91 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "helpers/blit_commands_helper.h"
#include "helpers/hw_helper.h"
#include "helpers/timestamp_packet.h"
namespace NEO {
template <typename GfxFamily>
size_t BlitCommandsHelper<GfxFamily>::estimateBlitCommandsSize(uint64_t copySize, const CsrDependencies &csrDependencies, bool updateTimestampPacket) {
size_t numberOfBlits = 0;
uint64_t sizeToBlit = copySize;
uint64_t width = 1;
uint64_t height = 1;
while (sizeToBlit != 0) {
if (sizeToBlit > BlitterConstants::maxBlitWidth) {
// 2D: maxBlitWidth x (1 .. maxBlitHeight)
width = BlitterConstants::maxBlitWidth;
height = std::min((sizeToBlit / width), BlitterConstants::maxBlitHeight);
} else {
// 1D: (1 .. maxBlitWidth) x 1
width = sizeToBlit;
height = 1;
}
sizeToBlit -= (width * height);
numberOfBlits++;
}
return TimestampPacketHelper::getRequiredCmdStreamSize<GfxFamily>(csrDependencies) +
(sizeof(typename GfxFamily::XY_COPY_BLT) * numberOfBlits) +
(sizeof(typename GfxFamily::MI_FLUSH_DW) * static_cast<size_t>(updateTimestampPacket));
}
template <typename GfxFamily>
size_t BlitCommandsHelper<GfxFamily>::estimateBlitCommandsSize(const BlitPropertiesContainer &blitPropertiesContainer, const HardwareInfo &hwInfo) {
size_t size = 0;
for (auto &blitProperties : blitPropertiesContainer) {
size += BlitCommandsHelper<GfxFamily>::estimateBlitCommandsSize(blitProperties.copySize, blitProperties.csrDependencies,
blitProperties.outputTimestampPacket != nullptr);
}
size += MemorySynchronizationCommands<GfxFamily>::getSizeForAdditonalSynchronization(hwInfo);
size += sizeof(typename GfxFamily::MI_FLUSH_DW) + sizeof(typename GfxFamily::MI_BATCH_BUFFER_END);
return alignUp(size, MemoryConstants::cacheLineSize);
}
template <typename GfxFamily>
void BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForBuffer(const BlitProperties &blitProperties, LinearStream &linearStream, const RootDeviceEnvironment &rootDeviceEnvironment) {
uint64_t sizeToBlit = blitProperties.copySize;
uint64_t width = 1;
uint64_t height = 1;
uint64_t offset = 0;
while (sizeToBlit != 0) {
if (sizeToBlit > BlitterConstants::maxBlitWidth) {
// dispatch 2D blit: maxBlitWidth x (1 .. maxBlitHeight)
width = BlitterConstants::maxBlitWidth;
height = std::min((sizeToBlit / width), BlitterConstants::maxBlitHeight);
} else {
// dispatch 1D blt: (1 .. maxBlitWidth) x 1
width = sizeToBlit;
height = 1;
}
auto bltCmd = linearStream.getSpaceForCmd<typename GfxFamily::XY_COPY_BLT>();
*bltCmd = GfxFamily::cmdInitXyCopyBlt;
bltCmd->setTransferWidth(static_cast<uint32_t>(width));
bltCmd->setTransferHeight(static_cast<uint32_t>(height));
bltCmd->setDestinationPitch(static_cast<uint32_t>(width));
bltCmd->setSourcePitch(static_cast<uint32_t>(width));
bltCmd->setDestinationBaseAddress(blitProperties.dstGpuAddress + blitProperties.dstOffset + offset);
bltCmd->setSourceBaseAddress(blitProperties.srcGpuAddress + blitProperties.srcOffset + offset);
appendBlitCommandsForBuffer(blitProperties, *bltCmd, rootDeviceEnvironment);
auto blitSize = width * height;
sizeToBlit -= blitSize;
offset += blitSize;
}
}
} // namespace NEO

View File

@@ -0,0 +1,15 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "helpers/blit_commands_helper_base.inl"
namespace NEO {
template <typename GfxFamily>
void BlitCommandsHelper<GfxFamily>::appendBlitCommandsForBuffer(const BlitProperties &blitProperties, typename GfxFamily::XY_COPY_BLT &blitCmd, const RootDeviceEnvironment &rootDeviceEnvironment) {}
} // namespace NEO

View File

@@ -0,0 +1,24 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "helpers/cache_policy.h"
#include "helpers/aligned_memory.h"
#include "memory_manager/graphics_allocation.h"
namespace NEO {
bool isL3Capable(void *ptr, size_t size) {
return isAligned<MemoryConstants::cacheLineSize>(ptr) &&
isAligned<MemoryConstants::cacheLineSize>(size);
}
bool isL3Capable(const NEO::GraphicsAllocation &graphicsAllocation) {
return isL3Capable(graphicsAllocation.getUnderlyingBuffer(), graphicsAllocation.getUnderlyingBufferSize());
}
} // namespace NEO

View File

@@ -0,0 +1,19 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "gmm_helper/gmm_lib.h"
namespace CacheSettings {
constexpr uint32_t unknownMocs = GMM_RESOURCE_USAGE_UNKNOWN;
} // namespace CacheSettings
namespace NEO {
class GraphicsAllocation;
bool isL3Capable(void *ptr, size_t size);
bool isL3Capable(const GraphicsAllocation &graphicsAllocation);
} // namespace NEO

View File

@@ -0,0 +1,16 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include <bitset>
#include <memory>
#include <vector>
namespace NEO {
struct EngineControl;
using EngineControlContainer = std::vector<EngineControl>;
using DeviceBitfield = std::bitset<32>;
} // namespace NEO

View File

@@ -0,0 +1,14 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "helpers/completion_stamp.h"
namespace NEO {
const uint32_t CompletionStamp::levelNotReady = 0xFFFFFFF0;
} // namespace NEO

View File

@@ -0,0 +1,22 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include <cstdint>
namespace NEO {
typedef uint64_t FlushStamp;
struct CompletionStamp {
uint32_t taskCount;
uint32_t taskLevel;
FlushStamp flushStamp;
static const uint32_t levelNotReady;
};
} // namespace NEO

View File

@@ -0,0 +1,26 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "helpers/debug_helpers.h"
#include "debug_settings/debug_settings_manager.h"
#include <assert.h>
#include <cstdio>
namespace NEO {
void debugBreak(int line, const char *file) {
if (DebugManager.flags.EnableDebugBreak.get()) {
printf("Assert was called at %d line in file:\n%s\n", line, file);
assert(false);
}
}
void abortUnrecoverable(int line, const char *file) {
printf("Abort was called at %d line in file:\n%s\n", line, file);
abortExecution();
}
} // namespace NEO

View File

@@ -0,0 +1,36 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "helpers/abort.h"
#define UNRECOVERABLE_IF(expression) \
\
if (expression) { \
NEO::abortUnrecoverable(__LINE__, __FILE__); \
}
#define UNREACHABLE(...) std::abort()
#ifndef DEBUG_BREAK_IF
#ifdef _DEBUG
#define DEBUG_BREAK_IF(expression) \
\
if (expression) { \
NEO::debugBreak(__LINE__, __FILE__); \
}
#else
#define DEBUG_BREAK_IF(expression) (void)0
#endif // _DEBUG
#endif // !DEBUG_BREAK_IF
#define UNUSED_VARIABLE(x) ((void)(x))
namespace NEO {
void debugBreak(int line, const char *file);
[[noreturn]] void abortUnrecoverable(int line, const char *file);
} // namespace NEO

View File

@@ -0,0 +1,10 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
namespace NEO {
bool isDeferredDeleterEnabled();
} // namespace NEO

View File

@@ -0,0 +1,25 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "helpers/dirty_state_helpers.h"
#include "indirect_heap/indirect_heap.h"
using namespace NEO;
bool HeapDirtyState::updateAndCheck(const IndirectHeap *heap) {
if (!heap->getGraphicsAllocation()) {
sizeInPages = 0llu;
return true;
}
bool dirty = gpuBaseAddress != heap->getHeapGpuBase() || sizeInPages != heap->getHeapSizeInPages();
if (dirty) {
gpuBaseAddress = heap->getHeapGpuBase();
sizeInPages = heap->getHeapSizeInPages();
}
return dirty;
}

View File

@@ -0,0 +1,23 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include <cstdint>
#include <cstdlib>
namespace NEO {
class IndirectHeap;
class HeapDirtyState {
public:
bool updateAndCheck(const IndirectHeap *heap);
protected:
uint64_t gpuBaseAddress = 0llu;
size_t sizeInPages = 0u;
};
} // namespace NEO

View File

@@ -0,0 +1,22 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
namespace NEO {
class CommandStreamReceiver;
class OsContext;
struct EngineControl {
EngineControl() = default;
EngineControl(CommandStreamReceiver *commandStreamReceiver, OsContext *osContext)
: commandStreamReceiver(commandStreamReceiver), osContext(osContext){};
CommandStreamReceiver *commandStreamReceiver = nullptr;
OsContext *osContext = nullptr;
};
} // namespace NEO

View File

@@ -0,0 +1,24 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "helpers/engine_node_helper.h"
namespace NEO {
namespace EngineHelpers {
bool isCcs(aub_stream::EngineType engineType) {
return engineType == aub_stream::ENGINE_CCS;
}
bool isBcs(aub_stream::EngineType engineType) {
return engineType == aub_stream::ENGINE_BCS;
}
aub_stream::EngineType getBcsEngineType(const HardwareInfo &hwInfo) {
return aub_stream::EngineType::ENGINE_BCS;
}
} // namespace EngineHelpers
} // namespace NEO

View File

@@ -0,0 +1,20 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "engine_node.h"
namespace NEO {
struct HardwareInfo;
namespace EngineHelpers {
bool isCcs(aub_stream::EngineType engineType);
bool isBcs(aub_stream::EngineType engineType);
aub_stream::EngineType getBcsEngineType(const HardwareInfo &hwInfo);
}; // namespace EngineHelpers
} // namespace NEO

View File

@@ -0,0 +1,21 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include <cstdint>
struct ExtendableEnum {
constexpr operator uint32_t() const {
return value;
}
constexpr ExtendableEnum(uint32_t val) : value(val) {}
protected:
uint32_t value;
};

View File

@@ -0,0 +1,98 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "file_io.h"
#include "helpers/debug_helpers.h"
#include "helpers/stdio.h"
#include <cstring>
#include <new>
std::unique_ptr<char[]> loadDataFromFile(
const char *filename,
size_t &retSize) {
FILE *fp = nullptr;
size_t nsize = 0;
std::unique_ptr<char[]> ret;
DEBUG_BREAK_IF(nullptr == filename);
// Open the file
fopen_s(&fp, filename, "rb");
if (fp) {
// Allocate a buffer for the file contents
fseek(fp, 0, SEEK_END);
nsize = (size_t)ftell(fp);
fseek(fp, 0, SEEK_SET);
ret.reset(new (std::nothrow) char[nsize + 1]);
if (ret) {
// we initialize to all zeroes before reading in data
memset(ret.get(), 0x00, nsize + 1);
auto read = fread(ret.get(), sizeof(unsigned char), nsize, fp);
DEBUG_BREAK_IF(read != nsize);
UNUSED_VARIABLE(read);
} else {
nsize = 0;
}
fclose(fp);
}
retSize = nsize;
return ret;
}
size_t writeDataToFile(
const char *filename,
const void *pData,
size_t dataSize) {
FILE *fp = nullptr;
size_t nsize = 0;
DEBUG_BREAK_IF(nullptr == pData);
DEBUG_BREAK_IF(nullptr == filename);
fopen_s(&fp, filename, "wb");
if (fp) {
nsize = fwrite(pData, sizeof(unsigned char), dataSize, fp);
fclose(fp);
}
return nsize;
}
bool fileExists(const std::string &fileName) {
FILE *pFile = nullptr;
DEBUG_BREAK_IF(fileName.empty());
DEBUG_BREAK_IF(fileName == "");
fopen_s(&pFile, fileName.c_str(), "rb");
if (pFile) {
fclose(pFile);
}
return pFile != nullptr;
}
bool fileExistsHasSize(const std::string &fileName) {
FILE *pFile = nullptr;
size_t nsize = 0;
DEBUG_BREAK_IF(fileName.empty());
DEBUG_BREAK_IF(fileName == "");
fopen_s(&pFile, fileName.c_str(), "rb");
if (pFile) {
fseek(pFile, 0, SEEK_END);
nsize = (size_t)ftell(pFile);
fclose(pFile);
}
return pFile != nullptr && nsize > 0;
}

View File

@@ -0,0 +1,24 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include <cstdint>
#include <memory>
#include <string>
std::unique_ptr<char[]> loadDataFromFile(
const char *filename,
size_t &retSize);
size_t writeDataToFile(
const char *filename,
const void *pData,
size_t dataSize);
bool fileExists(const std::string &fileName);
bool fileExistsHasSize(const std::string &fileName);

View File

@@ -0,0 +1,70 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "helpers/flat_batch_buffer_helper.h"
#include "execution_environment/execution_environment.h"
#include "memory_manager/graphics_allocation.h"
namespace NEO {
bool FlatBatchBufferHelper::setPatchInfoData(const PatchInfoData &data) {
patchInfoCollection.push_back(data);
return true;
}
bool FlatBatchBufferHelper::removePatchInfoData(uint64_t targetLocation) {
for (auto it = patchInfoCollection.begin(); it != patchInfoCollection.end(); ++it) {
if (it->targetAllocation + it->targetAllocationOffset == targetLocation) {
patchInfoCollection.erase(it);
break;
}
}
return true;
}
bool FlatBatchBufferHelper::registerCommandChunk(uint64_t baseCpu, uint64_t baseGpu, uint64_t startOffset, uint64_t endOffset) {
CommandChunk commandChunk;
commandChunk.baseAddressGpu = baseGpu;
commandChunk.baseAddressCpu = baseCpu;
commandChunk.startOffset = startOffset;
commandChunk.endOffset = endOffset;
return registerCommandChunk(commandChunk);
}
bool FlatBatchBufferHelper::registerCommandChunk(BatchBuffer &batchBuffer, size_t batchBufferStartCommandSize) {
CommandChunk commandChunk;
commandChunk.baseAddressGpu = batchBuffer.stream->getGraphicsAllocation()->getGpuAddress();
commandChunk.baseAddressCpu = reinterpret_cast<uint64_t>(batchBuffer.stream->getCpuBase());
commandChunk.startOffset = batchBuffer.startOffset;
commandChunk.endOffset = batchBuffer.chainedBatchBufferStartOffset + batchBufferStartCommandSize;
return registerCommandChunk(commandChunk);
}
bool FlatBatchBufferHelper::registerCommandChunk(CommandChunk &commandChunk) {
commandChunkList.push_back(commandChunk);
return true;
}
bool FlatBatchBufferHelper::registerBatchBufferStartAddress(uint64_t commandAddress, uint64_t startAddress) {
batchBufferStartAddressSequence.insert(std::pair<uint64_t, uint64_t>(commandAddress, startAddress));
return true;
}
void FlatBatchBufferHelper::fixCrossThreadDataInfo(std::vector<PatchInfoData> &data, size_t offsetCrossThreadData, uint64_t gpuAddress) {
for (auto &patchInfoData : data) {
if (patchInfoData.sourceType == PatchInfoAllocationType::KernelArg) {
patchInfoData.targetAllocation = gpuAddress;
patchInfoData.targetAllocationOffset += offsetCrossThreadData;
}
}
}
MemoryManager *FlatBatchBufferHelper::getMemoryManager() const {
return executionEnvironemnt.memoryManager.get();
}
}; // namespace NEO

View File

@@ -0,0 +1,51 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "command_stream/submissions_aggregator.h"
#include "helpers/address_patch.h"
#include <map>
#include <vector>
namespace NEO {
enum class DispatchMode;
class MemoryManager;
class ExecutionEnvironment;
class FlatBatchBufferHelper {
public:
FlatBatchBufferHelper(ExecutionEnvironment &executionEnvironemnt) : executionEnvironemnt(executionEnvironemnt) {}
virtual ~FlatBatchBufferHelper(){};
MOCKABLE_VIRTUAL bool setPatchInfoData(const PatchInfoData &data);
MOCKABLE_VIRTUAL bool removePatchInfoData(uint64_t targetLocation);
MOCKABLE_VIRTUAL bool registerCommandChunk(uint64_t baseCpu, uint64_t baseGpu, uint64_t startOffset, uint64_t endOffset);
MOCKABLE_VIRTUAL bool registerCommandChunk(CommandChunk &commandChunk);
MOCKABLE_VIRTUAL bool registerCommandChunk(BatchBuffer &batchBuffer, size_t batchBufferStartCommandSize);
MOCKABLE_VIRTUAL bool registerBatchBufferStartAddress(uint64_t commandAddress, uint64_t startAddress);
virtual GraphicsAllocation *flattenBatchBuffer(uint32_t rootDeviceIndex, BatchBuffer &batchBuffer, size_t &sizeBatchBuffer, DispatchMode dispatchMode) = 0;
virtual char *getIndirectPatchCommands(size_t &indirectPatchCommandsSize, std::vector<PatchInfoData> &indirectPatchInfo) = 0;
virtual void removePipeControlData(size_t pipeControlLocationSize, void *pipeControlForNooping, const HardwareInfo &hwInfo) = 0;
virtual void collectScratchSpacePatchInfo(uint64_t scratchAddress, uint64_t commandOffset, const LinearStream &csr) = 0;
static void fixCrossThreadDataInfo(std::vector<PatchInfoData> &data, size_t offsetCrossThreadData, uint64_t gpuAddress);
std::vector<CommandChunk> &getCommandChunkList() { return commandChunkList; }
std::vector<PatchInfoData> &getPatchInfoCollection() { return patchInfoCollection; }
std::map<uint64_t, uint64_t> &getBatchBufferStartAddressSequence() { return batchBufferStartAddressSequence; }
protected:
MemoryManager *getMemoryManager() const;
ExecutionEnvironment &executionEnvironemnt;
std::vector<PatchInfoData> patchInfoCollection;
std::vector<CommandChunk> commandChunkList;
std::map<uint64_t, uint64_t> batchBufferStartAddressSequence;
};
} // namespace NEO

View File

@@ -0,0 +1,24 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "helpers/flat_batch_buffer_helper.h"
namespace NEO {
template <typename GfxFamily>
class FlatBatchBufferHelperHw : public FlatBatchBufferHelper {
public:
using FlatBatchBufferHelper::FlatBatchBufferHelper;
GraphicsAllocation *flattenBatchBuffer(uint32_t rootDeviceIndex, BatchBuffer &batchBuffer, size_t &sizeBatchBuffer, DispatchMode dispatchMode) override;
char *getIndirectPatchCommands(size_t &indirectPatchCommandsSize, std::vector<PatchInfoData> &indirectPatchInfo) override;
void removePipeControlData(size_t pipeControlLocationSize, void *pipeControlForNooping, const HardwareInfo &hwInfo) override;
void collectScratchSpacePatchInfo(uint64_t scratchAddress, uint64_t commandOffset, const LinearStream &csr) override;
};
} // namespace NEO

View File

@@ -0,0 +1,203 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "command_stream/command_stream_receiver.h"
#include "helpers/flat_batch_buffer_helper_hw.h"
#include "helpers/hw_helper.h"
#include "helpers/string.h"
#include "memory_manager/graphics_allocation.h"
#include "memory_manager/memory_manager.h"
namespace NEO {
template <typename GfxFamily>
GraphicsAllocation *FlatBatchBufferHelperHw<GfxFamily>::flattenBatchBuffer(uint32_t rootDeviceIndex, BatchBuffer &batchBuffer, size_t &sizeBatchBuffer,
DispatchMode dispatchMode) {
typedef typename GfxFamily::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START;
typedef typename GfxFamily::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END;
typedef typename GfxFamily::MI_USER_INTERRUPT MI_USER_INTERRUPT;
GraphicsAllocation *flatBatchBuffer = nullptr;
size_t indirectPatchCommandsSize = 0u;
std::vector<PatchInfoData> indirectPatchInfo;
std::unique_ptr<char> indirectPatchCommands(getIndirectPatchCommands(indirectPatchCommandsSize, indirectPatchInfo));
if (dispatchMode == DispatchMode::ImmediateDispatch) {
if (batchBuffer.chainedBatchBuffer) {
batchBuffer.chainedBatchBuffer->setAubWritable(false, GraphicsAllocation::defaultBank);
auto sizeMainBatchBuffer = batchBuffer.chainedBatchBufferStartOffset - batchBuffer.startOffset;
auto alignedMainBatchBufferSize = alignUp(sizeMainBatchBuffer + indirectPatchCommandsSize + batchBuffer.chainedBatchBuffer->getUnderlyingBufferSize(), MemoryConstants::pageSize);
AllocationProperties flatBatchBufferProperties(rootDeviceIndex, alignedMainBatchBufferSize, GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY);
flatBatchBufferProperties.alignment = MemoryConstants::pageSize;
flatBatchBuffer =
getMemoryManager()->allocateGraphicsMemoryWithProperties(flatBatchBufferProperties);
UNRECOVERABLE_IF(flatBatchBuffer == nullptr);
// Copy main batchbuffer
memcpy_s(flatBatchBuffer->getUnderlyingBuffer(), sizeMainBatchBuffer,
ptrOffset(batchBuffer.commandBufferAllocation->getUnderlyingBuffer(), batchBuffer.startOffset),
sizeMainBatchBuffer);
// Copy indirect patch commands
memcpy_s(ptrOffset(flatBatchBuffer->getUnderlyingBuffer(), sizeMainBatchBuffer), indirectPatchCommandsSize,
indirectPatchCommands.get(), indirectPatchCommandsSize);
// Copy chained batchbuffer
memcpy_s(ptrOffset(flatBatchBuffer->getUnderlyingBuffer(), sizeMainBatchBuffer + indirectPatchCommandsSize),
batchBuffer.chainedBatchBuffer->getUnderlyingBufferSize(), batchBuffer.chainedBatchBuffer->getUnderlyingBuffer(),
batchBuffer.chainedBatchBuffer->getUnderlyingBufferSize());
sizeBatchBuffer = flatBatchBufferProperties.size;
patchInfoCollection.insert(std::end(patchInfoCollection), std::begin(indirectPatchInfo), std::end(indirectPatchInfo));
}
} else if (dispatchMode == DispatchMode::BatchedDispatch) {
CommandChunk firstChunk;
for (auto &chunk : commandChunkList) {
bool found = false;
for (auto &batchBuffer : batchBufferStartAddressSequence) {
if ((batchBuffer.first <= chunk.baseAddressGpu + chunk.endOffset) && (batchBuffer.first >= chunk.baseAddressGpu + chunk.startOffset)) {
chunk.batchBufferStartLocation = batchBuffer.first;
chunk.batchBufferStartAddress = batchBuffer.second;
chunk.endOffset = chunk.batchBufferStartLocation - chunk.baseAddressGpu;
}
if (batchBuffer.second == chunk.baseAddressGpu + chunk.startOffset) {
found = true;
}
}
if (!found) {
firstChunk = chunk;
}
}
std::vector<CommandChunk> orderedChunks;
CommandChunk &nextChunk = firstChunk;
while (true) {
bool hasNextChunk = false;
for (auto &chunk : commandChunkList) {
if (nextChunk.batchBufferStartAddress == chunk.baseAddressGpu + chunk.startOffset) {
hasNextChunk = true;
orderedChunks.push_back(nextChunk);
nextChunk = chunk;
break;
}
}
if (!hasNextChunk) {
nextChunk.endOffset -= sizeof(MI_BATCH_BUFFER_START);
orderedChunks.push_back(nextChunk);
break;
}
}
uint64_t flatBatchBufferSize = 0u;
std::vector<PatchInfoData> patchInfoCopy = patchInfoCollection;
patchInfoCollection.clear();
for (auto &chunk : orderedChunks) {
for (auto &patch : patchInfoCopy) {
if (patch.targetAllocation + patch.targetAllocationOffset >= chunk.baseAddressGpu + chunk.startOffset && patch.targetAllocation + patch.targetAllocationOffset <= chunk.baseAddressGpu + chunk.endOffset) {
patch.targetAllocationOffset = patch.targetAllocationOffset - chunk.startOffset + flatBatchBufferSize + indirectPatchCommandsSize;
patchInfoCollection.push_back(patch);
}
}
flatBatchBufferSize += chunk.endOffset - chunk.startOffset;
}
patchInfoCollection.insert(std::end(patchInfoCollection), std::begin(indirectPatchInfo), std::end(indirectPatchInfo));
flatBatchBufferSize += sizeof(MI_USER_INTERRUPT);
flatBatchBufferSize += sizeof(MI_BATCH_BUFFER_END);
flatBatchBufferSize += indirectPatchCommandsSize;
flatBatchBufferSize = alignUp(flatBatchBufferSize, MemoryConstants::pageSize);
flatBatchBufferSize += CSRequirements::csOverfetchSize;
AllocationProperties flatBatchBufferProperties(rootDeviceIndex, static_cast<size_t>(flatBatchBufferSize), GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY);
flatBatchBufferProperties.alignment = MemoryConstants::pageSize;
flatBatchBuffer = getMemoryManager()->allocateGraphicsMemoryWithProperties(flatBatchBufferProperties);
UNRECOVERABLE_IF(flatBatchBuffer == nullptr);
char *ptr = static_cast<char *>(flatBatchBuffer->getUnderlyingBuffer());
memcpy_s(ptr, indirectPatchCommandsSize, indirectPatchCommands.get(), indirectPatchCommandsSize);
ptr += indirectPatchCommandsSize;
for (auto &chunk : orderedChunks) {
size_t chunkSize = static_cast<size_t>(chunk.endOffset - chunk.startOffset);
memcpy_s(ptr,
chunkSize,
reinterpret_cast<char *>(ptrOffset(chunk.baseAddressCpu, static_cast<size_t>(chunk.startOffset))),
chunkSize);
ptr += chunkSize;
}
auto pCmdMui = reinterpret_cast<MI_USER_INTERRUPT *>(ptr);
*pCmdMui = GfxFamily::cmdInitUserInterrupt;
ptr += sizeof(MI_USER_INTERRUPT);
auto pCmdBBend = reinterpret_cast<MI_BATCH_BUFFER_END *>(ptr);
*pCmdBBend = GfxFamily::cmdInitBatchBufferEnd;
ptr += sizeof(MI_BATCH_BUFFER_END);
sizeBatchBuffer = static_cast<size_t>(flatBatchBufferSize);
commandChunkList.clear();
batchBufferStartAddressSequence.clear();
}
return flatBatchBuffer;
}
template <typename GfxFamily>
char *FlatBatchBufferHelperHw<GfxFamily>::getIndirectPatchCommands(size_t &indirectPatchCommandsSize, std::vector<PatchInfoData> &indirectPatchInfo) {
typedef typename GfxFamily::MI_STORE_DATA_IMM MI_STORE_DATA_IMM;
indirectPatchCommandsSize = 0;
for (auto &patchInfoData : patchInfoCollection) {
if (patchInfoData.requiresIndirectPatching()) {
indirectPatchCommandsSize += sizeof(MI_STORE_DATA_IMM);
}
}
uint64_t stiCommandOffset = 0;
std::vector<PatchInfoData> patchInfoCopy = patchInfoCollection;
std::unique_ptr<char> buffer(new char[indirectPatchCommandsSize]);
LinearStream indirectPatchCommandStream(buffer.get(), indirectPatchCommandsSize);
patchInfoCollection.clear();
for (auto &patchInfoData : patchInfoCopy) {
if (patchInfoData.requiresIndirectPatching()) {
auto storeDataImmediate = indirectPatchCommandStream.getSpaceForCmd<MI_STORE_DATA_IMM>();
*storeDataImmediate = GfxFamily::cmdInitStoreDataImm;
storeDataImmediate->setAddress(patchInfoData.targetAllocation + patchInfoData.targetAllocationOffset);
storeDataImmediate->setStoreQword(patchInfoData.patchAddressSize != sizeof(uint32_t));
storeDataImmediate->setDataDword0(static_cast<uint32_t>((patchInfoData.sourceAllocation + patchInfoData.sourceAllocationOffset) & 0x0000FFFFFFFFULL));
storeDataImmediate->setDataDword1(static_cast<uint32_t>((patchInfoData.sourceAllocation + patchInfoData.sourceAllocationOffset) >> 32));
PatchInfoData patchInfoForAddress(patchInfoData.targetAllocation, patchInfoData.targetAllocationOffset, patchInfoData.targetType, 0u, stiCommandOffset + sizeof(MI_STORE_DATA_IMM) - 2 * sizeof(uint64_t), PatchInfoAllocationType::Default);
PatchInfoData patchInfoForValue(patchInfoData.sourceAllocation, patchInfoData.sourceAllocationOffset, patchInfoData.sourceType, 0u, stiCommandOffset + sizeof(MI_STORE_DATA_IMM) - sizeof(uint64_t), PatchInfoAllocationType::Default);
indirectPatchInfo.push_back(patchInfoForAddress);
indirectPatchInfo.push_back(patchInfoForValue);
stiCommandOffset += sizeof(MI_STORE_DATA_IMM);
} else {
patchInfoCollection.push_back(patchInfoData);
}
}
return buffer.release();
}
template <typename GfxFamily>
void FlatBatchBufferHelperHw<GfxFamily>::removePipeControlData(size_t pipeControlLocationSize, void *pipeControlForNooping, const HardwareInfo &hwInfo) {
typedef typename GfxFamily::PIPE_CONTROL PIPE_CONTROL;
size_t numPipeControls = (pipeControlLocationSize - MemorySynchronizationCommands<GfxFamily>::getSizeForAdditonalSynchronization(hwInfo)) / (sizeof(PIPE_CONTROL));
for (size_t i = 0; i < numPipeControls; i++) {
PIPE_CONTROL *erasedPipeControl = reinterpret_cast<PIPE_CONTROL *>(pipeControlForNooping);
removePatchInfoData(reinterpret_cast<uint64_t>(erasedPipeControl) + (i + 1) * sizeof(PIPE_CONTROL) - 2 * sizeof(uint64_t));
removePatchInfoData(reinterpret_cast<uint64_t>(erasedPipeControl) + (i + 1) * sizeof(PIPE_CONTROL) - sizeof(uint64_t));
}
}
template <typename GfxFamily>
void FlatBatchBufferHelperHw<GfxFamily>::collectScratchSpacePatchInfo(uint64_t scratchAddress, uint64_t commandOffset, const LinearStream &csr) {
if (scratchAddress) {
auto scratchOffset = reinterpret_cast<uint32_t *>(reinterpret_cast<uint8_t *>(csr.getCpuBase()) + commandOffset)[0] & 0x3FF;
PatchInfoData patchInfoData(scratchAddress, scratchOffset, PatchInfoAllocationType::ScratchSpace, csr.getGraphicsAllocation()->getGpuAddress(), commandOffset, PatchInfoAllocationType::Default);
patchInfoCollection.push_back(patchInfoData);
}
}
}; // namespace NEO

View File

@@ -0,0 +1,65 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "helpers/flush_stamp.h"
using namespace NEO;
FlushStampTracker::FlushStampTracker(bool allocateStamp) {
if (allocateStamp) {
flushStampSharedHandle = new FlushStampTrackingObj();
flushStampSharedHandle->incRefInternal();
}
}
FlushStampTracker::~FlushStampTracker() {
if (flushStampSharedHandle) {
flushStampSharedHandle->decRefInternal();
}
}
FlushStamp FlushStampTracker::peekStamp() const {
if (flushStampSharedHandle->initialized) {
return flushStampSharedHandle->flushStamp;
} else {
return 0;
}
}
void FlushStampTracker::setStamp(FlushStamp stamp) {
if (stamp != 0) {
flushStampSharedHandle->flushStamp = stamp;
flushStampSharedHandle->initialized = true;
}
}
void FlushStampTracker::replaceStampObject(FlushStampTrackingObj *stampObj) {
if (stampObj) {
stampObj->incRefInternal();
if (flushStampSharedHandle) {
flushStampSharedHandle->decRefInternal();
}
flushStampSharedHandle = stampObj;
}
}
void FlushStampUpdateHelper::insert(FlushStampTrackingObj *stampObj) {
if (stampObj) {
flushStampsToUpdate.push_back(stampObj);
}
}
void FlushStampUpdateHelper::updateAll(const FlushStamp &flushStamp) {
for (const auto &stamp : flushStampsToUpdate) {
stamp->flushStamp = flushStamp;
stamp->initialized = true;
}
}
size_t FlushStampUpdateHelper::size() const {
return flushStampsToUpdate.size();
}

View File

@@ -0,0 +1,48 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "helpers/completion_stamp.h"
#include "utilities/reference_tracked_object.h"
#include "utilities/stackvec.h"
namespace NEO {
struct FlushStampTrackingObj : public ReferenceTrackedObject<FlushStampTrackingObj> {
FlushStamp flushStamp = 0;
std::atomic<bool> initialized{false};
};
class FlushStampTracker {
public:
FlushStampTracker() = delete;
FlushStampTracker(bool allocateStamp);
~FlushStampTracker();
FlushStamp peekStamp() const;
void setStamp(FlushStamp stamp);
void replaceStampObject(FlushStampTrackingObj *stampObj);
// Temporary. Method will be removed
FlushStampTrackingObj *getStampReference() {
return flushStampSharedHandle;
}
protected:
FlushStampTrackingObj *flushStampSharedHandle = nullptr;
};
class FlushStampUpdateHelper {
public:
void insert(FlushStampTrackingObj *stampObj);
void updateAll(const FlushStamp &flushStamp);
size_t size() const;
private:
StackVec<FlushStampTrackingObj *, 64> flushStampsToUpdate;
};
} // namespace NEO

View File

@@ -0,0 +1,96 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "get_info_status.h"
#include <cstring>
// Need for linux compatibility with memcpy_s
#include "helpers/string.h"
inline GetInfoStatus getInfo(void *destParamValue, size_t destParamValueSize,
const void *srcParamValue, size_t srcParamValueSize) {
auto retVal = GetInfoStatus::INVALID_VALUE;
if (srcParamValue && srcParamValueSize) {
if (!destParamValue && !destParamValueSize) {
// Report ok if they're looking for size.
retVal = GetInfoStatus::SUCCESS;
} else if (destParamValue && destParamValueSize >= srcParamValueSize) {
// Report ok if we can copy safely
retVal = GetInfoStatus::SUCCESS;
memcpy_s(destParamValue, destParamValueSize, srcParamValue, srcParamValueSize);
} else if (!destParamValue) {
// Report ok if destParamValue == nullptr and destParamValueSize > 0
retVal = GetInfoStatus::SUCCESS;
}
}
return retVal;
}
struct GetInfoHelper {
GetInfoHelper(void *dst, size_t dstSize, size_t *retSize, GetInfoStatus *retVal = nullptr)
: dst(dst), dstSize(dstSize), retSize(retSize), retVal(retVal) {
}
template <typename DataType>
GetInfoStatus set(const DataType &val) {
auto errCode = GetInfoStatus::SUCCESS;
if (retSize != nullptr) {
*retSize = sizeof(val);
}
if (dst != nullptr) {
if (dstSize >= sizeof(val)) {
*reinterpret_cast<DataType *>(dst) = val;
} else {
errCode = GetInfoStatus::INVALID_VALUE;
}
}
if (retVal)
*retVal = errCode;
return errCode;
}
template <typename DataType>
static void set(DataType *dst, DataType val) {
if (dst) {
*dst = val;
}
}
void *dst;
size_t dstSize;
size_t *retSize;
GetInfoStatus *retVal;
};
struct ErrorCodeHelper {
ErrorCodeHelper(int *errcodeRet, int defaultCode)
: errcodeRet(errcodeRet) {
set(defaultCode);
}
void set(int code) {
if (errcodeRet != nullptr) {
*errcodeRet = code;
}
localErrcode = code;
}
int *errcodeRet;
int localErrcode;
};
template <typename T>
T getValidParam(T param, T defaultVal = 1, T invalidVal = 0) {
if (param == invalidVal) {
return defaultVal;
}
return param;
}

View File

@@ -0,0 +1,14 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
enum class GetInfoStatus {
INVALID_CONTEXT = -2,
INVALID_VALUE = -1,
SUCCESS = 0
};

View File

@@ -0,0 +1,117 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "helpers/aligned_memory.h"
#include "utilities/compiler_support.h"
#include <cstdint>
namespace NEO {
// clang-format off
#define HASH_JENKINS_MIX(a,b,c) \
{ \
a -= b; a -= c; a ^= (c>>13); \
b -= c; b -= a; b ^= (a<<8); \
c -= a; c -= b; c ^= (b>>13); \
a -= b; a -= c; a ^= (c>>12); \
b -= c; b -= a; b ^= (a<<16); \
c -= a; c -= b; c ^= (b>>5); \
a -= b; a -= c; a ^= (c>>3); \
b -= c; b -= a; b ^= (a<<10); \
c -= a; c -= b; c ^= (b>>15); \
}
// clang-format on
class Hash {
public:
Hash() {
reset();
};
uint32_t getValue(const char *data, size_t size) {
uint32_t value = 0;
switch (size) {
case 3:
value = static_cast<uint32_t>(*reinterpret_cast<const unsigned char *>(data++));
value <<= 8;
CPP_ATTRIBUTE_FALLTHROUGH;
case 2:
value |= static_cast<uint32_t>(*reinterpret_cast<const unsigned char *>(data++));
value <<= 8;
CPP_ATTRIBUTE_FALLTHROUGH;
case 1:
value |= static_cast<uint32_t>(*reinterpret_cast<const unsigned char *>(data++));
value <<= 8;
}
return value;
}
void update(const char *buff, size_t size) {
if (buff == nullptr)
return;
if ((reinterpret_cast<uintptr_t>(buff) & 0x3) != 0) {
const unsigned char *tmp = (const unsigned char *)buff;
while (size >= sizeof(uint32_t)) {
uint32_t value = (uint32_t)tmp[0] + (((uint32_t)tmp[1]) << 8) + ((uint32_t)tmp[2] << 16) + ((uint32_t)tmp[3] << 24);
a ^= value;
HASH_JENKINS_MIX(a, hi, lo);
size -= sizeof(uint32_t);
tmp += sizeof(uint32_t);
}
if (size > 0) {
uint32_t value = getValue((char *)tmp, size);
a ^= value;
HASH_JENKINS_MIX(a, hi, lo);
}
} else {
const uint32_t *tmp = reinterpret_cast<const uint32_t *>(buff);
while (size >= sizeof(*tmp)) {
a ^= *(tmp++);
HASH_JENKINS_MIX(a, hi, lo);
size -= sizeof(*tmp);
}
if (size > 0) {
uint32_t value = getValue((char *)tmp, size);
a ^= value;
HASH_JENKINS_MIX(a, hi, lo);
}
}
}
uint64_t finish() {
return (((uint64_t)hi) << 32) | lo;
}
void reset() {
a = 0x428a2f98;
hi = 0x71374491;
lo = 0xb5c0fbcf;
}
static uint64_t hash(const char *buff, size_t size) {
Hash hash;
hash.update(buff, size);
return hash.finish();
}
protected:
uint32_t a, hi, lo;
};
template <typename T>
uint32_t hashPtrToU32(const T *src) {
auto asInt = reinterpret_cast<uintptr_t>(src);
constexpr auto m = sizeof(uintptr_t) / 8;
asInt = asInt ^ ((asInt & ~(m - 1)) >> (m * 32));
return static_cast<uint32_t>(asInt);
}
} // namespace NEO

View File

@@ -0,0 +1,35 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "helpers/heap_helper.h"
#include "indirect_heap/indirect_heap.h"
#include "memory_manager/graphics_allocation.h"
#include "memory_manager/internal_allocation_storage.h"
#include "memory_manager/memory_manager.h"
namespace NEO {
GraphicsAllocation *HeapHelper::getHeapAllocation(uint32_t heapType, size_t heapSize, size_t alignment, uint32_t rootDeviceIndex) {
auto allocationType = GraphicsAllocation::AllocationType::LINEAR_STREAM;
if (IndirectHeap::Type::INDIRECT_OBJECT == heapType) {
allocationType = GraphicsAllocation::AllocationType::INTERNAL_HEAP;
}
auto allocation = this->storageForReuse->obtainReusableAllocation(heapSize, allocationType);
if (allocation) {
return allocation.release();
}
NEO::AllocationProperties properties{rootDeviceIndex, true, heapSize, allocationType, isMultiOsContextCapable, false, {}};
properties.alignment = alignment;
return this->memManager->allocateGraphicsMemoryWithProperties(properties);
}
void HeapHelper::storeHeapAllocation(GraphicsAllocation *heapAllocation) {
this->storageForReuse->storeAllocation(std::unique_ptr<NEO::GraphicsAllocation>(heapAllocation), NEO::AllocationUsage::REUSABLE_ALLOCATION);
}
} // namespace NEO

View File

@@ -0,0 +1,31 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include <stddef.h>
#include <stdint.h>
namespace NEO {
class MemoryManager;
class GraphicsAllocation;
class InternalAllocationStorage;
class HeapHelper {
public:
HeapHelper(MemoryManager *memManager, InternalAllocationStorage *storageForReuse, bool isMultiOsContextCapable) : storageForReuse(storageForReuse),
memManager(memManager),
isMultiOsContextCapable(isMultiOsContextCapable) {}
GraphicsAllocation *getHeapAllocation(uint32_t heapType, size_t heapSize, size_t alignment, uint32_t rootDeviceIndex);
void storeHeapAllocation(GraphicsAllocation *heapAllocation);
protected:
InternalAllocationStorage *storageForReuse = nullptr;
MemoryManager *memManager = nullptr;
bool isMultiOsContextCapable = false;
};
} // namespace NEO

View File

@@ -0,0 +1,20 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#ifdef SUPPORT_GEN8
#include "gen8/hw_cmds.h"
#endif
#ifdef SUPPORT_GEN9
#include "gen9/hw_cmds.h"
#endif
#ifdef SUPPORT_GEN11
#include "gen11/hw_cmds.h"
#endif
#ifdef SUPPORT_GEN12LP
#include "gen12lp/hw_cmds.h"
#endif

View File

@@ -0,0 +1,52 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "helpers/hw_helper.h"
#include "debug_settings/debug_settings_manager.h"
namespace NEO {
HwHelper *hwHelperFactory[IGFX_MAX_CORE] = {};
HwHelper &HwHelper::get(GFXCORE_FAMILY gfxCore) {
return *hwHelperFactory[gfxCore];
}
bool HwHelper::renderCompressedBuffersSupported(const HardwareInfo &hwInfo) {
if (DebugManager.flags.RenderCompressedBuffersEnabled.get() != -1) {
return !!DebugManager.flags.RenderCompressedBuffersEnabled.get();
}
return hwInfo.capabilityTable.ftrRenderCompressedBuffers;
}
bool HwHelper::renderCompressedImagesSupported(const HardwareInfo &hwInfo) {
if (DebugManager.flags.RenderCompressedImagesEnabled.get() != -1) {
return !!DebugManager.flags.RenderCompressedImagesEnabled.get();
}
return hwInfo.capabilityTable.ftrRenderCompressedImages;
}
bool HwHelper::cacheFlushAfterWalkerSupported(const HardwareInfo &hwInfo) {
int32_t dbgFlag = DebugManager.flags.EnableCacheFlushAfterWalker.get();
if (dbgFlag == 1) {
return true;
} else if (dbgFlag == 0) {
return false;
}
return hwInfo.capabilityTable.supportCacheFlushAfterWalker;
}
uint32_t HwHelper::getMaxThreadsForVfe(const HardwareInfo &hwInfo) {
uint32_t threadsPerEU = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount) + hwInfo.capabilityTable.extraQuantityThreadsPerEU;
return hwInfo.gtSystemInfo.EUCount * threadsPerEU;
}
uint32_t HwHelper::getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const {
uint32_t numThreadsPerEU = hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount;
return maxNumEUsPerSubSlice * numThreadsPerEU;
}
} // namespace NEO

View File

@@ -0,0 +1,273 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "command_stream/linear_stream.h"
#include "helpers/aux_translation.h"
#include "helpers/hw_cmds.h"
#include "opencl/source/built_ins/sip.h"
#include "opencl/source/gen_common/aub_mapper.h"
#include "opencl/source/mem_obj/buffer.h"
#include <cstdint>
#include <string>
#include <type_traits>
namespace NEO {
class ExecutionEnvironment;
class GraphicsAllocation;
struct HardwareCapabilities;
class GmmHelper;
class HwHelper {
public:
static HwHelper &get(GFXCORE_FAMILY gfxCore);
virtual uint32_t getBindingTableStateSurfaceStatePointer(const void *pBindingTable, uint32_t index) = 0;
virtual size_t getBindingTableStateSize() const = 0;
virtual uint32_t getBindingTableStateAlignement() const = 0;
virtual size_t getInterfaceDescriptorDataSize() const = 0;
virtual size_t getMaxBarrierRegisterPerSlice() const = 0;
virtual uint32_t getComputeUnitsUsedForScratch(const HardwareInfo *pHwInfo) const = 0;
virtual uint32_t getPitchAlignmentForImage(const HardwareInfo *hwInfo) = 0;
virtual void setCapabilityCoherencyFlag(const HardwareInfo *pHwInfo, bool &coherencyFlag) = 0;
virtual void adjustDefaultEngineType(HardwareInfo *pHwInfo) = 0;
virtual void setupHardwareCapabilities(HardwareCapabilities *caps, const HardwareInfo &hwInfo) = 0;
virtual bool isL3Configurable(const HardwareInfo &hwInfo) = 0;
virtual SipKernelType getSipKernelType(bool debuggingActive) = 0;
virtual bool isLocalMemoryEnabled(const HardwareInfo &hwInfo) const = 0;
virtual bool isPageTableManagerSupported(const HardwareInfo &hwInfo) const = 0;
virtual bool isFenceAllocationRequired(const HardwareInfo &hwInfo) const = 0;
virtual const AubMemDump::LrcaHelper &getCsTraits(aub_stream::EngineType engineType) const = 0;
virtual bool hvAlign4Required() const = 0;
virtual bool obtainRenderBufferCompressionPreference(const HardwareInfo &hwInfo, const size_t size) const = 0;
virtual bool checkResourceCompatibility(GraphicsAllocation &graphicsAllocation) = 0;
static bool renderCompressedBuffersSupported(const HardwareInfo &hwInfo);
static bool renderCompressedImagesSupported(const HardwareInfo &hwInfo);
static bool cacheFlushAfterWalkerSupported(const HardwareInfo &hwInfo);
virtual bool timestampPacketWriteSupported() const = 0;
virtual size_t getRenderSurfaceStateSize() const = 0;
virtual void setRenderSurfaceStateForBuffer(ExecutionEnvironment &executionEnvironment,
void *surfaceStateBuffer,
size_t bufferSize,
uint64_t gpuVa,
size_t offset,
uint32_t pitch,
GraphicsAllocation *gfxAlloc,
bool isReadOnly,
uint32_t surfaceType,
bool forceNonAuxMode) = 0;
virtual const std::vector<aub_stream::EngineType> getGpgpuEngineInstances() const = 0;
virtual const StackVec<size_t, 3> getDeviceSubGroupSizes() const = 0;
virtual bool getEnableLocalMemory(const HardwareInfo &hwInfo) const = 0;
virtual std::string getExtensions() const = 0;
static uint32_t getMaxThreadsForVfe(const HardwareInfo &hwInfo);
virtual uint32_t getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const;
virtual uint32_t getMetricsLibraryGenId() const = 0;
virtual uint32_t getMocsIndex(const GmmHelper &gmmHelper, bool l3enabled, bool l1enabled) const = 0;
virtual bool requiresAuxResolves() const = 0;
virtual bool tilingAllowed(bool isSharedContext, bool isImage1d, bool forceLinearStorage) = 0;
virtual uint32_t getBarriersCountFromHasBarriers(uint32_t hasBarriers) = 0;
virtual uint32_t calculateAvailableThreadCount(PRODUCT_FAMILY family, uint32_t grfCount, uint32_t euCount,
uint32_t threadsPerEu) = 0;
virtual uint32_t alignSlmSize(uint32_t slmSize) = 0;
virtual bool isForceEmuInt32DivRemSPWARequired(const HardwareInfo &hwInfo) = 0;
virtual uint32_t getMinimalSIMDSize() = 0;
virtual bool isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const = 0;
static uint32_t getSubDevicesCount(const HardwareInfo *pHwInfo);
static uint32_t getEnginesCount(const HardwareInfo &hwInfo);
static constexpr uint32_t lowPriorityGpgpuEngineIndex = 1;
static constexpr uint32_t internalUsageEngineIndex = 2;
protected:
HwHelper() = default;
};
template <typename GfxFamily>
class HwHelperHw : public HwHelper {
public:
static HwHelper &get() {
static HwHelperHw<GfxFamily> hwHelper;
return hwHelper;
}
static const aub_stream::EngineType lowPriorityEngineType;
uint32_t getBindingTableStateSurfaceStatePointer(const void *pBindingTable, uint32_t index) override {
using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE;
const BINDING_TABLE_STATE *bindingTableState = static_cast<const BINDING_TABLE_STATE *>(pBindingTable);
return bindingTableState[index].getRawData(0);
}
size_t getBindingTableStateSize() const override {
using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE;
return sizeof(BINDING_TABLE_STATE);
}
uint32_t getBindingTableStateAlignement() const override {
using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE;
return BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE;
}
size_t getInterfaceDescriptorDataSize() const override {
using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA;
return sizeof(INTERFACE_DESCRIPTOR_DATA);
}
size_t getRenderSurfaceStateSize() const override {
using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE;
return sizeof(RENDER_SURFACE_STATE);
}
const AubMemDump::LrcaHelper &getCsTraits(aub_stream::EngineType engineType) const override;
size_t getMaxBarrierRegisterPerSlice() const override;
uint32_t getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const override;
uint32_t getComputeUnitsUsedForScratch(const HardwareInfo *pHwInfo) const override;
uint32_t getPitchAlignmentForImage(const HardwareInfo *hwInfo) override;
void setCapabilityCoherencyFlag(const HardwareInfo *pHwInfo, bool &coherencyFlag) override;
void adjustDefaultEngineType(HardwareInfo *pHwInfo) override;
void setupHardwareCapabilities(HardwareCapabilities *caps, const HardwareInfo &hwInfo) override;
bool isL3Configurable(const HardwareInfo &hwInfo) override;
SipKernelType getSipKernelType(bool debuggingActive) override;
bool isLocalMemoryEnabled(const HardwareInfo &hwInfo) const override;
bool hvAlign4Required() const override;
bool obtainRenderBufferCompressionPreference(const HardwareInfo &hwInfo, const size_t size) const override;
bool checkResourceCompatibility(GraphicsAllocation &graphicsAllocation) override;
bool timestampPacketWriteSupported() const override;
bool isPageTableManagerSupported(const HardwareInfo &hwInfo) const override;
bool isFenceAllocationRequired(const HardwareInfo &hwInfo) const override;
void setRenderSurfaceStateForBuffer(ExecutionEnvironment &executionEnvironment,
void *surfaceStateBuffer,
size_t bufferSize,
uint64_t gpuVa,
size_t offset,
uint32_t pitch,
GraphicsAllocation *gfxAlloc,
bool isReadOnly,
uint32_t surfaceType,
bool forceNonAuxMode) override;
const std::vector<aub_stream::EngineType> getGpgpuEngineInstances() const override;
const StackVec<size_t, 3> getDeviceSubGroupSizes() const override;
bool getEnableLocalMemory(const HardwareInfo &hwInfo) const override;
std::string getExtensions() const override;
uint32_t getMetricsLibraryGenId() const override;
uint32_t getMocsIndex(const GmmHelper &gmmHelper, bool l3enabled, bool l1enabled) const override;
bool requiresAuxResolves() const override;
bool tilingAllowed(bool isSharedContext, bool isImage1d, bool forceLinearStorage) override;
uint32_t getBarriersCountFromHasBarriers(uint32_t hasBarriers) override;
uint32_t calculateAvailableThreadCount(PRODUCT_FAMILY family, uint32_t grfCount, uint32_t euCount, uint32_t threadsPerEu) override;
uint32_t alignSlmSize(uint32_t slmSize) override;
static AuxTranslationMode getAuxTranslationMode();
static bool isBlitAuxTranslationRequired(const HardwareInfo &hwInfo, const MultiDispatchInfo &multiDispatchInfo);
bool isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const override;
static bool isForceDefaultRCSEngineWARequired(const HardwareInfo &hwInfo);
bool isForceEmuInt32DivRemSPWARequired(const HardwareInfo &hwInfo) override;
uint32_t getMinimalSIMDSize() override;
protected:
static const AuxTranslationMode defaultAuxTranslationMode;
HwHelperHw() = default;
};
struct DwordBuilder {
static uint32_t build(uint32_t bitNumberToSet, bool masked, bool set = true, uint32_t initValue = 0) {
uint32_t dword = initValue;
if (set) {
dword |= (1 << bitNumberToSet);
}
if (masked) {
dword |= (1 << (bitNumberToSet + 16));
}
return dword;
};
};
template <typename GfxFamily>
struct LriHelper {
using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM;
static MI_LOAD_REGISTER_IMM *program(LinearStream *cmdStream, uint32_t address, uint32_t value) {
auto lri = (MI_LOAD_REGISTER_IMM *)cmdStream->getSpace(sizeof(MI_LOAD_REGISTER_IMM));
*lri = GfxFamily::cmdInitLoadRegisterImm;
lri->setRegisterOffset(address);
lri->setDataDword(value);
return lri;
}
};
template <typename GfxFamily>
struct MemorySynchronizationCommands {
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
using POST_SYNC_OPERATION = typename GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION;
static PIPE_CONTROL *obtainPipeControlAndProgramPostSyncOperation(LinearStream &commandStream,
POST_SYNC_OPERATION operation,
uint64_t gpuAddress,
uint64_t immediateData,
bool dcFlush, const HardwareInfo &hwInfo);
static void addAdditionalSynchronization(LinearStream &commandStream, uint64_t gpuAddress, const HardwareInfo &hwInfo);
static void addPipeControlWA(LinearStream &commandStream, uint64_t gpuAddress, const HardwareInfo &hwInfo);
static void setExtraPipeControlProperties(PIPE_CONTROL &pipeControl, const HardwareInfo &hwInfo);
static PIPE_CONTROL *addPipeControl(LinearStream &commandStream, bool dcFlush);
static size_t getSizeForPipeControlWithPostSyncOperation(const HardwareInfo &hwInfo);
static size_t getSizeForSinglePipeControl();
static size_t getSizeForSingleSynchronization(const HardwareInfo &hwInfo);
static size_t getSizeForAdditonalSynchronization(const HardwareInfo &hwInfo);
static PIPE_CONTROL *addFullCacheFlush(LinearStream &commandStream);
static size_t getSizeForFullCacheFlush();
static void setExtraCacheFlushFields(PIPE_CONTROL *pipeControl);
protected:
static PIPE_CONTROL *obtainPipeControl(LinearStream &commandStream, bool dcFlush);
};
union SURFACE_STATE_BUFFER_LENGTH {
uint32_t Length;
struct SurfaceState {
uint32_t Width : BITFIELD_RANGE(0, 6);
uint32_t Height : BITFIELD_RANGE(7, 20);
uint32_t Depth : BITFIELD_RANGE(21, 31);
} SurfaceState;
};
} // namespace NEO

View File

@@ -0,0 +1,320 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "execution_environment/execution_environment.h"
#include "gmm_helper/gmm.h"
#include "gmm_helper/gmm_helper.h"
#include "helpers/aligned_memory.h"
#include "helpers/hw_helper.h"
#include "helpers/hw_info.h"
#include "helpers/preamble.h"
#include "memory_manager/graphics_allocation.h"
#include "memory_manager/memory_constants.h"
#include "os_interface/os_interface.h"
#include "opencl/source/aub_mem_dump/aub_mem_dump.h"
#include "opencl/source/helpers/dispatch_info.h"
#include "opencl/source/helpers/hardware_commands_helper.h"
#include "instrumentation.h"
namespace NEO {
template <typename Family>
const aub_stream::EngineType HwHelperHw<Family>::lowPriorityEngineType = aub_stream::EngineType::ENGINE_RCS;
template <typename Family>
const AuxTranslationMode HwHelperHw<Family>::defaultAuxTranslationMode = AuxTranslationMode::Builtin;
template <typename Family>
bool HwHelperHw<Family>::obtainRenderBufferCompressionPreference(const HardwareInfo &hwInfo, const size_t size) const {
return size > KB;
}
template <typename Family>
void HwHelperHw<Family>::setupHardwareCapabilities(HardwareCapabilities *caps, const HardwareInfo &hwInfo) {
caps->image3DMaxHeight = 16384;
caps->image3DMaxWidth = 16384;
//With statefull messages we have an allocation cap of 4GB
//Reason to subtract 8KB is that driver may pad the buffer with addition pages for over fetching..
caps->maxMemAllocSize = (4ULL * MemoryConstants::gigaByte) - (8ULL * MemoryConstants::kiloByte);
caps->isStatelesToStatefullWithOffsetSupported = true;
}
template <typename Family>
bool HwHelperHw<Family>::isL3Configurable(const HardwareInfo &hwInfo) {
return PreambleHelper<Family>::isL3Configurable(hwInfo);
}
template <typename Family>
SipKernelType HwHelperHw<Family>::getSipKernelType(bool debuggingActive) {
if (!debuggingActive) {
return SipKernelType::Csr;
}
return SipKernelType::DbgCsr;
}
template <typename Family>
size_t HwHelperHw<Family>::getMaxBarrierRegisterPerSlice() const {
return 32;
}
template <typename Family>
uint32_t HwHelperHw<Family>::getPitchAlignmentForImage(const HardwareInfo *hwInfo) {
return 4u;
}
template <typename Family>
const AubMemDump::LrcaHelper &HwHelperHw<Family>::getCsTraits(aub_stream::EngineType engineType) const {
return *AUBFamilyMapper<Family>::csTraits[engineType];
}
template <typename Family>
bool HwHelperHw<Family>::isPageTableManagerSupported(const HardwareInfo &hwInfo) const {
return false;
}
template <typename Family>
bool HwHelperHw<Family>::isFenceAllocationRequired(const HardwareInfo &hwInfo) const {
return false;
}
template <typename GfxFamily>
inline bool HwHelperHw<GfxFamily>::checkResourceCompatibility(GraphicsAllocation &graphicsAllocation) {
return true;
}
template <typename Family>
void HwHelperHw<Family>::setRenderSurfaceStateForBuffer(ExecutionEnvironment &executionEnvironment,
void *surfaceStateBuffer,
size_t bufferSize,
uint64_t gpuVa,
size_t offset,
uint32_t pitch,
GraphicsAllocation *gfxAlloc,
bool isReadOnly,
uint32_t surfaceType,
bool forceNonAuxMode) {
using RENDER_SURFACE_STATE = typename Family::RENDER_SURFACE_STATE;
using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT;
using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE;
auto gmmHelper = executionEnvironment.getGmmHelper();
auto surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(surfaceStateBuffer);
*surfaceState = Family::cmdInitRenderSurfaceState;
auto surfaceSize = alignUp(bufferSize, 4);
SURFACE_STATE_BUFFER_LENGTH Length = {0};
Length.Length = static_cast<uint32_t>(surfaceSize - 1);
surfaceState->setWidth(Length.SurfaceState.Width + 1);
surfaceState->setHeight(Length.SurfaceState.Height + 1);
surfaceState->setDepth(Length.SurfaceState.Depth + 1);
if (pitch) {
surfaceState->setSurfacePitch(pitch);
}
// The graphics allocation for Host Ptr surface will be created in makeResident call and GPU address is expected to be the same as CPU address
auto bufferStateAddress = (gfxAlloc != nullptr) ? gfxAlloc->getGpuAddress() : gpuVa;
bufferStateAddress += offset;
auto bufferStateSize = (gfxAlloc != nullptr) ? gfxAlloc->getUnderlyingBufferSize() : bufferSize;
surfaceState->setSurfaceType(static_cast<typename RENDER_SURFACE_STATE::SURFACE_TYPE>(surfaceType));
surfaceState->setSurfaceFormat(SURFACE_FORMAT::SURFACE_FORMAT_RAW);
surfaceState->setSurfaceVerticalAlignment(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4);
surfaceState->setSurfaceHorizontalAlignment(RENDER_SURFACE_STATE::SURFACE_HORIZONTAL_ALIGNMENT_HALIGN_4);
surfaceState->setTileMode(RENDER_SURFACE_STATE::TILE_MODE_LINEAR);
surfaceState->setVerticalLineStride(0);
surfaceState->setVerticalLineStrideOffset(0);
if ((isAligned<MemoryConstants::cacheLineSize>(bufferStateAddress) && isAligned<MemoryConstants::cacheLineSize>(bufferStateSize)) ||
isReadOnly) {
surfaceState->setMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER));
} else {
surfaceState->setMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED));
}
surfaceState->setSurfaceBaseAddress(bufferStateAddress);
Gmm *gmm = gfxAlloc ? gfxAlloc->getDefaultGmm() : nullptr;
if (gmm && gmm->isRenderCompressed && !forceNonAuxMode &&
GraphicsAllocation::AllocationType::BUFFER_COMPRESSED == gfxAlloc->getAllocationType()) {
// Its expected to not program pitch/qpitch/baseAddress for Aux surface in CCS scenarios
surfaceState->setCoherencyType(RENDER_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT);
surfaceState->setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E);
} else {
surfaceState->setCoherencyType(RENDER_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT);
surfaceState->setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE);
}
}
template <typename Family>
bool HwHelperHw<Family>::getEnableLocalMemory(const HardwareInfo &hwInfo) const {
if (DebugManager.flags.EnableLocalMemory.get() != -1) {
return DebugManager.flags.EnableLocalMemory.get();
} else if (DebugManager.flags.AUBDumpForceAllToLocalMemory.get()) {
return true;
}
return OSInterface::osEnableLocalMemory && isLocalMemoryEnabled(hwInfo);
}
template <typename Family>
AuxTranslationMode HwHelperHw<Family>::getAuxTranslationMode() {
if (DebugManager.flags.ForceAuxTranslationMode.get() != -1) {
return static_cast<AuxTranslationMode>(DebugManager.flags.ForceAuxTranslationMode.get());
}
return HwHelperHw<Family>::defaultAuxTranslationMode;
}
template <typename Family>
bool HwHelperHw<Family>::isBlitAuxTranslationRequired(const HardwareInfo &hwInfo, const MultiDispatchInfo &multiDispatchInfo) {
return (HwHelperHw<Family>::getAuxTranslationMode() == AuxTranslationMode::Blit) &&
hwInfo.capabilityTable.blitterOperationsSupported &&
multiDispatchInfo.getMemObjsForAuxTranslation() &&
(multiDispatchInfo.getMemObjsForAuxTranslation()->size() > 0);
}
template <typename Family>
typename Family::PIPE_CONTROL *MemorySynchronizationCommands<Family>::obtainPipeControlAndProgramPostSyncOperation(
LinearStream &commandStream, POST_SYNC_OPERATION operation, uint64_t gpuAddress, uint64_t immediateData, bool dcFlush, const HardwareInfo &hwInfo) {
addPipeControlWA(commandStream, gpuAddress, hwInfo);
auto pipeControl = obtainPipeControl(commandStream, dcFlush);
pipeControl->setPostSyncOperation(operation);
pipeControl->setAddress(static_cast<uint32_t>(gpuAddress & 0x0000FFFFFFFFULL));
pipeControl->setAddressHigh(static_cast<uint32_t>(gpuAddress >> 32));
pipeControl->setDcFlushEnable(dcFlush);
if (operation == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
pipeControl->setImmediateData(immediateData);
}
setExtraPipeControlProperties(*pipeControl, hwInfo);
MemorySynchronizationCommands<Family>::addAdditionalSynchronization(commandStream, gpuAddress, hwInfo);
return pipeControl;
}
template <typename GfxFamily>
typename GfxFamily::PIPE_CONTROL *MemorySynchronizationCommands<GfxFamily>::obtainPipeControl(LinearStream &commandStream, bool dcFlush) {
auto pCmd = reinterpret_cast<PIPE_CONTROL *>(commandStream.getSpace(sizeof(PIPE_CONTROL)));
*pCmd = GfxFamily::cmdInitPipeControl;
pCmd->setCommandStreamerStallEnable(true);
pCmd->setDcFlushEnable(dcFlush);
if (DebugManager.flags.FlushAllCaches.get()) {
pCmd->setDcFlushEnable(true);
pCmd->setRenderTargetCacheFlushEnable(true);
pCmd->setInstructionCacheInvalidateEnable(true);
pCmd->setTextureCacheInvalidationEnable(true);
pCmd->setPipeControlFlushEnable(true);
pCmd->setVfCacheInvalidationEnable(true);
pCmd->setConstantCacheInvalidationEnable(true);
pCmd->setStateCacheInvalidationEnable(true);
}
return pCmd;
}
template <typename GfxFamily>
typename GfxFamily::PIPE_CONTROL *MemorySynchronizationCommands<GfxFamily>::addPipeControl(LinearStream &commandStream, bool dcFlush) {
return MemorySynchronizationCommands<GfxFamily>::obtainPipeControl(commandStream, dcFlush);
}
template <typename GfxFamily>
size_t MemorySynchronizationCommands<GfxFamily>::getSizeForSinglePipeControl() {
return sizeof(typename GfxFamily::PIPE_CONTROL);
}
template <typename GfxFamily>
size_t MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(const HardwareInfo &hwInfo) {
const auto pipeControlCount = HardwareCommandsHelper<GfxFamily>::isPipeControlWArequired(hwInfo) ? 2u : 1u;
return pipeControlCount * getSizeForSinglePipeControl() + getSizeForAdditonalSynchronization(hwInfo);
}
template <typename GfxFamily>
uint32_t HwHelperHw<GfxFamily>::getMetricsLibraryGenId() const {
return static_cast<uint32_t>(MetricsLibraryApi::ClientGen::Gen9);
}
template <typename GfxFamily>
inline bool HwHelperHw<GfxFamily>::requiresAuxResolves() const {
return true;
}
template <typename GfxFamily>
bool HwHelperHw<GfxFamily>::tilingAllowed(bool isSharedContext, bool isImage1d, bool forceLinearStorage) {
if (DebugManager.flags.ForceLinearImages.get() || forceLinearStorage || isSharedContext) {
return false;
}
return !isImage1d;
}
template <typename GfxFamily>
uint32_t HwHelperHw<GfxFamily>::alignSlmSize(uint32_t slmSize) {
return HardwareCommandsHelper<GfxFamily>::alignSlmSize(slmSize);
}
template <typename GfxFamily>
uint32_t HwHelperHw<GfxFamily>::getBarriersCountFromHasBarriers(uint32_t hasBarriers) {
return hasBarriers;
}
template <typename GfxFamily>
bool HwHelperHw<GfxFamily>::isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const {
return false;
}
template <typename GfxFamily>
bool HwHelperHw<GfxFamily>::isForceDefaultRCSEngineWARequired(const HardwareInfo &hwInfo) {
return false;
}
template <typename GfxFamily>
bool HwHelperHw<GfxFamily>::isForceEmuInt32DivRemSPWARequired(const HardwareInfo &hwInfo) {
return false;
}
template <typename GfxFamily>
inline uint32_t HwHelperHw<GfxFamily>::getMinimalSIMDSize() {
return 8u;
}
template <typename GfxFamily>
uint32_t HwHelperHw<GfxFamily>::getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const {
return HwHelper::getMaxThreadsForWorkgroup(hwInfo, maxNumEUsPerSubSlice);
}
template <typename GfxFamily>
size_t MemorySynchronizationCommands<GfxFamily>::getSizeForFullCacheFlush() {
return sizeof(typename GfxFamily::PIPE_CONTROL);
}
template <typename GfxFamily>
typename GfxFamily::PIPE_CONTROL *MemorySynchronizationCommands<GfxFamily>::addFullCacheFlush(LinearStream &commandStream) {
auto pipeControl = MemorySynchronizationCommands<GfxFamily>::obtainPipeControl(commandStream, true);
pipeControl->setRenderTargetCacheFlushEnable(true);
pipeControl->setInstructionCacheInvalidateEnable(true);
pipeControl->setTextureCacheInvalidationEnable(true);
pipeControl->setPipeControlFlushEnable(true);
pipeControl->setConstantCacheInvalidationEnable(true);
pipeControl->setStateCacheInvalidationEnable(true);
MemorySynchronizationCommands<GfxFamily>::setExtraCacheFlushFields(pipeControl);
return pipeControl;
}
template <typename GfxFamily>
const StackVec<size_t, 3> HwHelperHw<GfxFamily>::getDeviceSubGroupSizes() const {
return {8, 16, 32};
}
} // namespace NEO

View File

@@ -0,0 +1,96 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "gmm_helper/gmm_helper.h"
#include "helpers/hw_helper_base.inl"
namespace NEO {
template <typename GfxFamily>
void HwHelperHw<GfxFamily>::adjustDefaultEngineType(HardwareInfo *pHwInfo) {
}
template <typename GfxFamily>
uint32_t HwHelperHw<GfxFamily>::getComputeUnitsUsedForScratch(const HardwareInfo *pHwInfo) const {
return pHwInfo->gtSystemInfo.MaxSubSlicesSupported * pHwInfo->gtSystemInfo.MaxEuPerSubSlice *
pHwInfo->gtSystemInfo.ThreadCount / pHwInfo->gtSystemInfo.EUCount;
}
template <typename GfxFamily>
void HwHelperHw<GfxFamily>::setCapabilityCoherencyFlag(const HardwareInfo *pHwInfo, bool &coherencyFlag) {
coherencyFlag = true;
}
template <typename GfxFamily>
bool HwHelperHw<GfxFamily>::isLocalMemoryEnabled(const HardwareInfo &hwInfo) const {
return false;
}
template <typename GfxFamily>
bool HwHelperHw<GfxFamily>::hvAlign4Required() const {
return true;
}
template <typename GfxFamily>
bool HwHelperHw<GfxFamily>::timestampPacketWriteSupported() const {
return false;
}
template <typename GfxFamily>
const std::vector<aub_stream::EngineType> HwHelperHw<GfxFamily>::getGpgpuEngineInstances() const {
constexpr std::array<aub_stream::EngineType, 3> gpgpuEngineInstances = {{aub_stream::ENGINE_RCS,
aub_stream::ENGINE_RCS, // low priority
aub_stream::ENGINE_RCS}}; // internal usage
return std::vector<aub_stream::EngineType>(gpgpuEngineInstances.begin(), gpgpuEngineInstances.end());
}
template <typename GfxFamily>
std::string HwHelperHw<GfxFamily>::getExtensions() const {
return "";
}
template <typename GfxFamily>
uint32_t HwHelperHw<GfxFamily>::getMocsIndex(const GmmHelper &gmmHelper, bool l3enabled, bool l1enabled) const {
if (l3enabled) {
return gmmHelper.getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1;
}
return gmmHelper.getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) >> 1;
}
template <typename GfxFamily>
uint32_t HwHelperHw<GfxFamily>::calculateAvailableThreadCount(PRODUCT_FAMILY family, uint32_t grfCount, uint32_t euCount,
uint32_t threadsPerEu) {
return threadsPerEu * euCount;
}
template <typename GfxFamily>
void MemorySynchronizationCommands<GfxFamily>::addAdditionalSynchronization(LinearStream &commandStream, uint64_t gpuAddress, const HardwareInfo &hwInfo) {
}
template <typename GfxFamily>
void MemorySynchronizationCommands<GfxFamily>::addPipeControlWA(LinearStream &commandStream, uint64_t gpuAddress, const HardwareInfo &hwInfo) {
}
template <typename GfxFamily>
inline size_t MemorySynchronizationCommands<GfxFamily>::getSizeForSingleSynchronization(const HardwareInfo &hwInfo) {
return 0u;
}
template <typename GfxFamily>
inline size_t MemorySynchronizationCommands<GfxFamily>::getSizeForAdditonalSynchronization(const HardwareInfo &hwInfo) {
return 0u;
}
template <typename GfxFamily>
void MemorySynchronizationCommands<GfxFamily>::setExtraPipeControlProperties(PIPE_CONTROL &pipeControl, const HardwareInfo &hwInfo) {
}
template <typename GfxFamily>
void MemorySynchronizationCommands<GfxFamily>::setExtraCacheFlushFields(PIPE_CONTROL *pipeControl) {
}
} // namespace NEO

View File

@@ -0,0 +1,21 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "debug_settings/debug_settings_manager.h"
#include "helpers/hw_helper.h"
namespace NEO {
uint32_t HwHelper::getSubDevicesCount(const HardwareInfo *pHwInfo) {
return DebugManager.flags.CreateMultipleSubDevices.get() > 0 ? DebugManager.flags.CreateMultipleSubDevices.get() : 1u;
}
uint32_t HwHelper::getEnginesCount(const HardwareInfo &hwInfo) {
return 1u;
}
} // namespace NEO

View File

@@ -0,0 +1,122 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "helpers/hw_info.h"
#include "debug_settings/debug_settings_manager.h"
#include "helpers/hw_cmds.h"
#include <algorithm>
namespace NEO {
HardwareInfo::HardwareInfo(const PLATFORM *platform, const FeatureTable *featureTable, const WorkaroundTable *workaroundTable,
const GT_SYSTEM_INFO *gtSystemInfo, const RuntimeCapabilityTable &capabilityTable)
: platform(*platform), featureTable(*featureTable), workaroundTable(*workaroundTable), gtSystemInfo(*gtSystemInfo), capabilityTable(capabilityTable) {
}
// Global table of hardware prefixes
const char *hardwarePrefix[IGFX_MAX_PRODUCT] = {
nullptr,
};
// Global table of default hardware info configs
uint64_t defaultHardwareInfoConfigTable[IGFX_MAX_PRODUCT] = {
0x0,
};
// Global table of family names
const char *familyName[IGFX_MAX_CORE] = {
nullptr,
};
// Global table of family names
bool familyEnabled[IGFX_MAX_CORE] = {
false,
};
const HardwareInfo *hardwareInfoTable[IGFX_MAX_PRODUCT] = {};
void (*hardwareInfoSetup[IGFX_MAX_PRODUCT])(HardwareInfo *, bool, uint64_t) = {
0x0,
};
bool getHwInfoForPlatformString(std::string &platform, const HardwareInfo *&hwInfoIn) {
std::transform(platform.begin(), platform.end(), platform.begin(), ::tolower);
bool ret = false;
for (int j = 0; j < IGFX_MAX_PRODUCT; j++) {
if (hardwarePrefix[j] == nullptr)
continue;
if (hardwarePrefix[j] == platform) {
hwInfoIn = hardwareInfoTable[j];
ret = true;
break;
}
}
return ret;
}
void setHwInfoValuesFromConfig(const uint64_t hwInfoConfig, HardwareInfo &hwInfoIn) {
uint32_t sliceCount = static_cast<uint16_t>(hwInfoConfig >> 32);
uint32_t subSlicePerSliceCount = static_cast<uint16_t>(hwInfoConfig >> 16);
uint32_t euPerSubSliceCount = static_cast<uint16_t>(hwInfoConfig);
hwInfoIn.gtSystemInfo.SliceCount = sliceCount;
hwInfoIn.gtSystemInfo.SubSliceCount = subSlicePerSliceCount * sliceCount;
hwInfoIn.gtSystemInfo.EUCount = euPerSubSliceCount * subSlicePerSliceCount * sliceCount;
}
bool parseHwInfoConfigString(const std::string &hwInfoConfigStr, uint64_t &hwInfoConfig) {
hwInfoConfig = 0u;
size_t currPos = hwInfoConfigStr.find('x', 0);
if (currPos == std::string::npos) {
return false;
}
uint32_t sliceCount = static_cast<uint32_t>(std::stoul(hwInfoConfigStr.substr(0, currPos)));
if (sliceCount > std::numeric_limits<uint16_t>::max()) {
return false;
}
size_t prevPos = currPos + 1;
currPos = hwInfoConfigStr.find('x', prevPos);
if (currPos == std::string::npos) {
return false;
}
uint32_t subSlicePerSliceCount = static_cast<uint32_t>(std::stoul(hwInfoConfigStr.substr(prevPos, currPos)));
if (subSlicePerSliceCount > std::numeric_limits<uint16_t>::max()) {
return false;
}
uint32_t subSliceCount = subSlicePerSliceCount * sliceCount;
if (subSliceCount > std::numeric_limits<uint16_t>::max()) {
return false;
}
prevPos = currPos + 1;
uint32_t euPerSubSliceCount = static_cast<uint32_t>(std::stoul(hwInfoConfigStr.substr(prevPos, std::string::npos)));
if (euPerSubSliceCount > std::numeric_limits<uint16_t>::max()) {
return false;
}
uint32_t euCount = euPerSubSliceCount * subSliceCount;
if (euCount > std::numeric_limits<uint16_t>::max()) {
return false;
}
hwInfoConfig = static_cast<uint64_t>(sliceCount & 0xffff) << 32 | static_cast<uint64_t>(subSlicePerSliceCount & 0xffff) << 16 | static_cast<uint64_t>(euPerSubSliceCount & 0xffff);
return true;
}
aub_stream::EngineType getChosenEngineType(const HardwareInfo &hwInfo) {
return DebugManager.flags.NodeOrdinal.get() == -1
? hwInfo.capabilityTable.defaultEngineType
: static_cast<aub_stream::EngineType>(DebugManager.flags.NodeOrdinal.get());
}
const std::string getFamilyNameWithType(const HardwareInfo &hwInfo) {
std::string platformName = familyName[hwInfo.platform.eRenderCoreFamily];
platformName.append(hwInfo.capabilityTable.platformType);
return platformName;
}
} // namespace NEO

View File

@@ -0,0 +1,115 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "command_stream/preemption_mode.h"
#include "direct_submission/direct_submission_properties.h"
#include "helpers/kmd_notify_properties.h"
#include "engine_node.h"
#include "gtsysinfo.h"
#include "igfxfmid.h"
#include "sku_info.h"
#include <cstddef>
#include <string>
namespace NEO {
struct RuntimeCapabilityTable {
DirectSubmissionProperyEngines directSubmissionEngines;
KmdNotifyProperties kmdNotifyProperties;
uint64_t gpuAddressSpace;
double defaultProfilingTimerResolution;
size_t requiredPreemptionSurfaceSize;
bool (*isSimulation)(unsigned short);
PreemptionMode defaultPreemptionMode;
aub_stream::EngineType defaultEngineType;
uint32_t maxRenderFrequency;
unsigned int clVersionSupport;
uint32_t aubDeviceId;
uint32_t extraQuantityThreadsPerEU;
uint32_t slmSize;
uint32_t grfSize;
bool blitterOperationsSupported;
bool ftrSupportsInteger64BitAtomics;
bool ftrSupportsFP64;
bool ftrSupports64BitMath;
bool ftrSvm;
bool ftrSupportsCoherency;
bool ftrSupportsVmeAvcTextureSampler;
bool ftrSupportsVmeAvcPreemption;
bool ftrRenderCompressedBuffers;
bool ftrRenderCompressedImages;
bool ftr64KBpages;
bool instrumentationEnabled;
bool forceStatelessCompilationFor32Bit;
const char *platformType;
bool debuggerSupported;
bool supportsVme;
bool supportCacheFlushAfterWalker;
bool supportsImages;
bool supportsDeviceEnqueue;
bool hostPtrTrackingEnabled;
};
struct HardwareCapabilities {
size_t image3DMaxWidth;
size_t image3DMaxHeight;
uint64_t maxMemAllocSize;
bool isStatelesToStatefullWithOffsetSupported;
};
struct HardwareInfo {
HardwareInfo() = default;
HardwareInfo(const PLATFORM *platform, const FeatureTable *featureTable, const WorkaroundTable *workaroundTable,
const GT_SYSTEM_INFO *gtSystemInfo, const RuntimeCapabilityTable &capabilityTable);
PLATFORM platform = {};
FeatureTable featureTable = {};
WorkaroundTable workaroundTable = {};
alignas(4) GT_SYSTEM_INFO gtSystemInfo = {};
RuntimeCapabilityTable capabilityTable = {};
};
template <PRODUCT_FAMILY product>
struct HwMapper {};
template <GFXCORE_FAMILY gfxFamily>
struct GfxFamilyMapper {};
// Global table of hardware prefixes
extern bool familyEnabled[IGFX_MAX_CORE];
extern const char *familyName[IGFX_MAX_CORE];
extern const char *hardwarePrefix[IGFX_MAX_PRODUCT];
extern uint64_t defaultHardwareInfoConfigTable[IGFX_MAX_PRODUCT];
extern const HardwareInfo *hardwareInfoTable[IGFX_MAX_PRODUCT];
extern void (*hardwareInfoSetup[IGFX_MAX_PRODUCT])(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable, uint64_t hwInfoConfig);
template <GFXCORE_FAMILY gfxFamily>
struct EnableGfxFamilyHw {
EnableGfxFamilyHw() {
familyEnabled[gfxFamily] = true;
familyName[gfxFamily] = GfxFamilyMapper<gfxFamily>::name;
}
};
bool getHwInfoForPlatformString(std::string &platform, const HardwareInfo *&hwInfoIn);
void setHwInfoValuesFromConfig(const uint64_t hwInfoConfig, HardwareInfo &hwInfoIn);
bool parseHwInfoConfigString(const std::string &hwInfoConfigStr, uint64_t &hwInfoConfig);
aub_stream::EngineType getChosenEngineType(const HardwareInfo &hwInfo);
const std::string getFamilyNameWithType(const HardwareInfo &hwInfo);
// Utility conversion
template <PRODUCT_FAMILY productFamily>
struct ToGfxCoreFamily {
static const GFXCORE_FAMILY gfxCoreFamily =
static_cast<GFXCORE_FAMILY>(NEO::HwMapper<productFamily>::gfxFamily);
static constexpr GFXCORE_FAMILY get() { return gfxCoreFamily; }
};
} // namespace NEO

View File

@@ -0,0 +1,19 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include <atomic>
template <typename Type>
void interlockedMax(std::atomic<Type> &dest, Type newVal) {
Type oldVal = dest;
Type maxVal = oldVal < newVal ? newVal : oldVal;
while (!std::atomic_compare_exchange_weak(&dest, &oldVal, maxVal)) {
oldVal = dest;
maxVal = oldVal < newVal ? newVal : oldVal;
}
}

View File

@@ -0,0 +1,44 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "helpers/kernel_helpers.h"
#include "helpers/basic_math.h"
#include "helpers/debug_helpers.h"
#include <algorithm>
namespace NEO {
uint32_t KernelHelper::getMaxWorkGroupCount(uint32_t simd, uint32_t availableThreadCount, uint32_t dssCount, uint32_t availableSlmSize,
uint32_t usedSlmSize, uint32_t maxBarrierCount, uint32_t numberOfBarriers, uint32_t workDim,
const size_t *localWorkSize) {
UNRECOVERABLE_IF((workDim == 0) || (workDim > 3));
UNRECOVERABLE_IF(localWorkSize == nullptr);
size_t workGroupSize = localWorkSize[0];
for (uint32_t i = 1; i < workDim; i++) {
workGroupSize *= localWorkSize[i];
}
auto threadsPerThreadGroup = static_cast<uint32_t>(Math::divideAndRoundUp(workGroupSize, simd));
auto maxWorkGroupsCount = availableThreadCount / threadsPerThreadGroup;
if (numberOfBarriers > 0) {
auto maxWorkGroupsCountDueToBarrierUsage = dssCount * (maxBarrierCount / numberOfBarriers);
maxWorkGroupsCount = std::min(maxWorkGroupsCount, maxWorkGroupsCountDueToBarrierUsage);
}
if (usedSlmSize > 0) {
auto maxWorkGroupsCountDueToSlm = availableSlmSize / usedSlmSize;
maxWorkGroupsCount = std::min(maxWorkGroupsCount, maxWorkGroupsCountDueToSlm);
}
return maxWorkGroupsCount;
}
} // namespace NEO

View File

@@ -0,0 +1,19 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include <cstddef>
#include <cstdint>
namespace NEO {
struct KernelHelper {
static uint32_t getMaxWorkGroupCount(uint32_t simd, uint32_t availableThreadCount, uint32_t dssCount, uint32_t availableSlmSize,
uint32_t usedSlmSize, uint32_t maxBarrierCount, uint32_t numberOfBarriers, uint32_t workDim,
const size_t *localWorkSize);
};
} // namespace NEO

View File

@@ -0,0 +1,78 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "helpers/kmd_notify_properties.h"
#include "debug_settings/debug_settings_manager.h"
#include <cstdint>
using namespace NEO;
bool KmdNotifyHelper::obtainTimeoutParams(int64_t &timeoutValueOutput,
bool quickKmdSleepRequest,
uint32_t currentHwTag,
uint32_t taskCountToWait,
FlushStamp flushStampToWait,
bool forcePowerSavingMode) {
if (flushStampToWait == 0) {
return false;
}
if (DebugManager.flags.PowerSavingMode.get() || forcePowerSavingMode) {
timeoutValueOutput = 1;
return true;
}
int64_t multiplier = (currentHwTag < taskCountToWait) ? static_cast<int64_t>(taskCountToWait - currentHwTag) : 1;
if (!properties->enableKmdNotify && multiplier > KmdNotifyConstants::minimumTaskCountDiffToCheckAcLine) {
updateAcLineStatus();
}
quickKmdSleepRequest |= applyQuickKmdSleepForSporadicWait();
if (!properties->enableKmdNotify && !acLineConnected) {
timeoutValueOutput = KmdNotifyConstants::timeoutInMicrosecondsForDisconnectedAcLine;
} else if (quickKmdSleepRequest && properties->enableQuickKmdSleep) {
timeoutValueOutput = properties->delayQuickKmdSleepMicroseconds;
} else {
timeoutValueOutput = getBaseTimeout(multiplier);
}
return (properties->enableKmdNotify || !acLineConnected);
}
bool KmdNotifyHelper::applyQuickKmdSleepForSporadicWait() const {
if (properties->enableQuickKmdSleepForSporadicWaits) {
auto timeDiff = getMicrosecondsSinceEpoch() - lastWaitForCompletionTimestampUs.load();
if (timeDiff > properties->delayQuickKmdSleepForSporadicWaitsMicroseconds) {
return true;
}
}
return false;
}
void KmdNotifyHelper::updateLastWaitForCompletionTimestamp() {
lastWaitForCompletionTimestampUs = getMicrosecondsSinceEpoch();
}
int64_t KmdNotifyHelper::getMicrosecondsSinceEpoch() const {
auto now = std::chrono::high_resolution_clock::now().time_since_epoch();
return std::chrono::duration_cast<std::chrono::microseconds>(now).count();
}
void KmdNotifyHelper::overrideFromDebugVariable(int32_t debugVariableValue, int64_t &destination) {
if (debugVariableValue >= 0) {
destination = static_cast<int64_t>(debugVariableValue);
}
}
void KmdNotifyHelper::overrideFromDebugVariable(int32_t debugVariableValue, bool &destination) {
if (debugVariableValue >= 0) {
destination = !!(debugVariableValue);
}
}

View File

@@ -0,0 +1,62 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "helpers/completion_stamp.h"
#include <atomic>
#include <chrono>
#include <cstdint>
namespace NEO {
struct KmdNotifyProperties {
int64_t delayKmdNotifyMicroseconds;
int64_t delayQuickKmdSleepMicroseconds;
int64_t delayQuickKmdSleepForSporadicWaitsMicroseconds;
// Main switch for KMD Notify optimization - if its disabled, all below are disabled too
bool enableKmdNotify;
// Use smaller delay in specific situations (ie. from AsyncEventsHandler)
bool enableQuickKmdSleep;
// If waits are called sporadically use QuickKmdSleep mode, otherwise use standard delay
bool enableQuickKmdSleepForSporadicWaits;
};
namespace KmdNotifyConstants {
constexpr int64_t timeoutInMicrosecondsForDisconnectedAcLine = 10000;
constexpr uint32_t minimumTaskCountDiffToCheckAcLine = 10;
} // namespace KmdNotifyConstants
class KmdNotifyHelper {
public:
KmdNotifyHelper() = delete;
KmdNotifyHelper(const KmdNotifyProperties *properties) : properties(properties){};
MOCKABLE_VIRTUAL ~KmdNotifyHelper() = default;
bool obtainTimeoutParams(int64_t &timeoutValueOutput,
bool quickKmdSleepRequest,
uint32_t currentHwTag,
uint32_t taskCountToWait,
FlushStamp flushStampToWait,
bool forcePowerSavingMode);
bool quickKmdSleepForSporadicWaitsEnabled() const { return properties->enableQuickKmdSleepForSporadicWaits; }
MOCKABLE_VIRTUAL void updateLastWaitForCompletionTimestamp();
MOCKABLE_VIRTUAL void updateAcLineStatus();
static void overrideFromDebugVariable(int32_t debugVariableValue, int64_t &destination);
static void overrideFromDebugVariable(int32_t debugVariableValue, bool &destination);
protected:
bool applyQuickKmdSleepForSporadicWait() const;
int64_t getBaseTimeout(const int64_t &multiplier) const;
int64_t getMicrosecondsSinceEpoch() const;
const KmdNotifyProperties *properties = nullptr;
std::atomic<int64_t> lastWaitForCompletionTimestampUs{0};
std::atomic<bool> acLineConnected{true};
};
} // namespace NEO

View File

@@ -0,0 +1,29 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
namespace NEO {
class NonCopyableOrMovableClass {
public:
NonCopyableOrMovableClass() = default;
NonCopyableOrMovableClass(const NonCopyableOrMovableClass &) = delete;
NonCopyableOrMovableClass &operator=(const NonCopyableOrMovableClass &) = delete;
NonCopyableOrMovableClass(NonCopyableOrMovableClass &&) = delete;
NonCopyableOrMovableClass &operator=(NonCopyableOrMovableClass &&) = delete;
};
class NonCopyableClass {
public:
NonCopyableClass() = default;
NonCopyableClass(const NonCopyableClass &) = delete;
NonCopyableClass &operator=(const NonCopyableClass &) = delete;
NonCopyableClass(NonCopyableClass &&) = default;
NonCopyableClass &operator=(NonCopyableClass &&) = default;
};
} // namespace NEO

View File

@@ -0,0 +1,37 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include <cstdint>
#ifndef KMD_PROFILING
#define KMD_PROFILING 0
#endif
namespace NEO {
enum CommandStreamReceiverType {
// Use receiver for real HW
CSR_HW = 0,
// Capture an AUB file automatically for all traffic going through Device -> CommandStreamReceiver
CSR_AUB,
// Capture an AUB and tunnel all commands going through Device -> CommandStreamReceiver to a TBX server
CSR_TBX,
// Use receiver for real HW and capture AUB file
CSR_HW_WITH_AUB,
// Use TBX server and capture AUB file
CSR_TBX_WITH_AUB,
// Number of CSR types
CSR_TYPES_NUM
};
// AUB file folder location
extern const char *folderAUB;
// Initial value for HW tag
// Set to 0 if using HW or simulator, otherwise 0xFFFFFF00, needs to be lower then CompletionStamp::levelNotReady.
extern uint32_t initialHardwareTag;
} // namespace NEO

View File

@@ -0,0 +1,15 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
namespace NEO {
struct PipelineSelectArgs {
bool specialPipelineSelectMode = false;
bool mediaSamplerRequired = false;
};
} // namespace NEO

View File

@@ -0,0 +1,15 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include <cstdint>
namespace NEO {
const uint32_t pipelineSelectEnablePipelineSelectMaskBits = 0x3;
const uint32_t pipelineSelectMediaSamplerDopClockGateMaskBits = 0x10;
const uint32_t pipelineSelectMediaSamplerPowerClockGateMaskBits = 0x40;
} // namespace NEO

View File

@@ -0,0 +1,98 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "helpers/pipeline_select_helper.h"
#include "engine_node.h"
#include "igfxfmid.h"
#include <cstddef>
#include <cstdint>
namespace NEO {
struct HardwareInfo;
class Device;
struct DispatchFlags;
class GraphicsAllocation;
class LinearStream;
struct PipelineSelectArgs;
template <typename GfxFamily>
struct PreambleHelper {
using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM;
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
using VFE_STATE_TYPE = typename GfxFamily::VFE_STATE_TYPE;
static void programL3(LinearStream *pCommandStream, uint32_t l3Config);
static void programPipelineSelect(LinearStream *pCommandStream,
const PipelineSelectArgs &pipelineSelectArgs,
const HardwareInfo &hwInfo);
static uint32_t getDefaultThreadArbitrationPolicy();
static void programThreadArbitration(LinearStream *pCommandStream, uint32_t requiredThreadArbitrationPolicy);
static void programPreemption(LinearStream *pCommandStream, Device &device, GraphicsAllocation *preemptionCsr);
static void addPipeControlBeforeVfeCmd(LinearStream *pCommandStream, const HardwareInfo *hwInfo, aub_stream::EngineType engineType);
static uint64_t programVFEState(LinearStream *pCommandStream,
const HardwareInfo &hwInfo,
int scratchSize,
uint64_t scratchAddress,
uint32_t maxFrontEndThreads,
aub_stream::EngineType engineType);
static void programAdditionalFieldsInVfeState(VFE_STATE_TYPE *mediaVfeState, const HardwareInfo &hwInfo);
static void programPreamble(LinearStream *pCommandStream, Device &device, uint32_t l3Config,
uint32_t requiredThreadArbitrationPolicy, GraphicsAllocation *preemptionCsr, GraphicsAllocation *perDssBackedBuffer);
static void programKernelDebugging(LinearStream *pCommandStream);
static void programPerDssBackedBuffer(LinearStream *pCommandStream, const HardwareInfo &hwInfo, GraphicsAllocation *perDssBackBufferOffset);
static uint32_t getL3Config(const HardwareInfo &hwInfo, bool useSLM);
static bool isL3Configurable(const HardwareInfo &hwInfo);
static size_t getAdditionalCommandsSize(const Device &device);
static size_t getThreadArbitrationCommandsSize();
static size_t getVFECommandsSize();
static size_t getKernelDebuggingCommandsSize(bool debuggingActive);
static void programGenSpecificPreambleWorkArounds(LinearStream *pCommandStream, const HardwareInfo &hwInfo);
static uint32_t getUrbEntryAllocationSize();
static size_t getPerDssBackedBufferCommandsSize(const HardwareInfo &hwInfo);
static size_t getCmdSizeForPipelineSelect(const HardwareInfo &hwInfo);
};
template <PRODUCT_FAMILY ProductFamily>
static uint32_t getL3ConfigHelper(bool useSLM);
template <PRODUCT_FAMILY ProductFamily>
struct L3CNTLREGConfig {
static const uint32_t valueForSLM;
static const uint32_t valueForNoSLM;
};
template <PRODUCT_FAMILY ProductFamily>
uint32_t getL3ConfigHelper(bool useSLM) {
if (!useSLM) {
return L3CNTLREGConfig<ProductFamily>::valueForNoSLM;
}
return L3CNTLREGConfig<ProductFamily>::valueForSLM;
}
template <typename GfxFamily>
struct L3CNTLRegisterOffset {
static const uint32_t registerOffset;
};
template <typename GfxFamily>
struct DebugModeRegisterOffset {
enum {
registerOffset = 0x20ec,
debugEnabledValue = (1 << 6) | (1 << 22)
};
};
namespace TdDebugControlRegisterOffset {
static constexpr uint32_t registerOffset = 0xe400;
static constexpr uint32_t debugEnabledValue = (1 << 4) | (1 << 7);
}; // namespace TdDebugControlRegisterOffset
} // namespace NEO

View File

@@ -0,0 +1,118 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "command_stream/linear_stream.h"
#include "command_stream/preemption.h"
#include "device/device.h"
#include "helpers/aligned_memory.h"
#include "helpers/hw_cmds.h"
#include "helpers/preamble.h"
#include "opencl/source/helpers/hardware_commands_helper.h"
#include "opencl/source/kernel/kernel.h"
#include "reg_configs_common.h"
#include <cstddef>
namespace NEO {
template <typename GfxFamily>
void PreambleHelper<GfxFamily>::programThreadArbitration(LinearStream *pCommandStream, uint32_t requiredThreadArbitrationPolicy) {
}
template <typename GfxFamily>
size_t PreambleHelper<GfxFamily>::getThreadArbitrationCommandsSize() {
return 0;
}
template <typename GfxFamily>
uint32_t PreambleHelper<GfxFamily>::getDefaultThreadArbitrationPolicy() {
return 0;
}
template <typename GfxFamily>
void PreambleHelper<GfxFamily>::programGenSpecificPreambleWorkArounds(LinearStream *pCommandStream, const HardwareInfo &hwInfo) {
}
template <typename GfxFamily>
void PreambleHelper<GfxFamily>::programPerDssBackedBuffer(LinearStream *pCommandStream, const HardwareInfo &hwInfo, GraphicsAllocation *perDssBackBufferOffset) {
}
template <typename GfxFamily>
size_t PreambleHelper<GfxFamily>::getPerDssBackedBufferCommandsSize(const HardwareInfo &hwInfo) {
return 0;
}
template <typename GfxFamily>
size_t PreambleHelper<GfxFamily>::getAdditionalCommandsSize(const Device &device) {
size_t totalSize = PreemptionHelper::getRequiredPreambleSize<GfxFamily>(device);
totalSize += getKernelDebuggingCommandsSize(device.isDebuggerActive());
return totalSize;
}
template <typename GfxFamily>
size_t PreambleHelper<GfxFamily>::getCmdSizeForPipelineSelect(const HardwareInfo &hwInfo) {
size_t size = 0;
using PIPELINE_SELECT = typename GfxFamily::PIPELINE_SELECT;
size += sizeof(PIPELINE_SELECT);
if (HardwareCommandsHelper<GfxFamily>::isPipeControlPriorToPipelineSelectWArequired(hwInfo)) {
size += sizeof(PIPE_CONTROL);
}
return size;
}
template <typename GfxFamily>
void PreambleHelper<GfxFamily>::programPreamble(LinearStream *pCommandStream, Device &device, uint32_t l3Config,
uint32_t requiredThreadArbitrationPolicy, GraphicsAllocation *preemptionCsr, GraphicsAllocation *perDssBackedBuffer) {
programL3(pCommandStream, l3Config);
programThreadArbitration(pCommandStream, requiredThreadArbitrationPolicy);
programPreemption(pCommandStream, device, preemptionCsr);
if (device.isDebuggerActive()) {
programKernelDebugging(pCommandStream);
}
programGenSpecificPreambleWorkArounds(pCommandStream, device.getHardwareInfo());
if (perDssBackedBuffer != nullptr) {
programPerDssBackedBuffer(pCommandStream, device.getHardwareInfo(), perDssBackedBuffer);
}
}
template <typename GfxFamily>
void PreambleHelper<GfxFamily>::programPreemption(LinearStream *pCommandStream, Device &device, GraphicsAllocation *preemptionCsr) {
PreemptionHelper::programCsrBaseAddress<GfxFamily>(*pCommandStream, device, preemptionCsr);
}
template <typename GfxFamily>
void PreambleHelper<GfxFamily>::programKernelDebugging(LinearStream *pCommandStream) {
auto pCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(pCommandStream->getSpace(sizeof(MI_LOAD_REGISTER_IMM)));
*pCmd = GfxFamily::cmdInitLoadRegisterImm;
pCmd->setRegisterOffset(DebugModeRegisterOffset<GfxFamily>::registerOffset);
pCmd->setDataDword(DebugModeRegisterOffset<GfxFamily>::debugEnabledValue);
auto pCmd2 = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(pCommandStream->getSpace(sizeof(MI_LOAD_REGISTER_IMM)));
*pCmd2 = GfxFamily::cmdInitLoadRegisterImm;
pCmd2->setRegisterOffset(TdDebugControlRegisterOffset::registerOffset);
pCmd2->setDataDword(TdDebugControlRegisterOffset::debugEnabledValue);
}
template <typename GfxFamily>
size_t PreambleHelper<GfxFamily>::getKernelDebuggingCommandsSize(bool debuggingActive) {
if (debuggingActive) {
return 2 * sizeof(MI_LOAD_REGISTER_IMM);
}
return 0;
}
template <typename GfxFamily>
bool PreambleHelper<GfxFamily>::isL3Configurable(const HardwareInfo &hwInfo) {
return false;
}
template <typename GfxFamily>
void PreambleHelper<GfxFamily>::programAdditionalFieldsInVfeState(VFE_STATE_TYPE *mediaVfeState, const HardwareInfo &hwInfo) {
}
} // namespace NEO

View File

@@ -0,0 +1,63 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "helpers/flat_batch_buffer_helper.h"
#include "helpers/hw_helper.h"
#include "helpers/preamble_base.inl"
namespace NEO {
template <typename GfxFamily>
void PreambleHelper<GfxFamily>::programL3(LinearStream *pCommandStream, uint32_t l3Config) {
auto pCmd = (MI_LOAD_REGISTER_IMM *)pCommandStream->getSpace(sizeof(MI_LOAD_REGISTER_IMM));
*pCmd = GfxFamily::cmdInitLoadRegisterImm;
pCmd->setRegisterOffset(L3CNTLRegisterOffset<GfxFamily>::registerOffset);
pCmd->setDataDword(l3Config);
}
template <typename GfxFamily>
uint32_t PreambleHelper<GfxFamily>::getUrbEntryAllocationSize() {
return 0x782;
}
template <typename GfxFamily>
uint64_t PreambleHelper<GfxFamily>::programVFEState(LinearStream *pCommandStream,
const HardwareInfo &hwInfo,
int scratchSize,
uint64_t scratchAddress,
uint32_t maxFrontEndThreads,
aub_stream::EngineType engineType) {
using MEDIA_VFE_STATE = typename GfxFamily::MEDIA_VFE_STATE;
addPipeControlBeforeVfeCmd(pCommandStream, &hwInfo, engineType);
auto scratchSpaceAddressOffset = static_cast<uint64_t>(pCommandStream->getUsed() + MEDIA_VFE_STATE::PATCH_CONSTANTS::SCRATCHSPACEBASEPOINTER_BYTEOFFSET);
auto pMediaVfeState = reinterpret_cast<MEDIA_VFE_STATE *>(pCommandStream->getSpace(sizeof(MEDIA_VFE_STATE)));
*pMediaVfeState = GfxFamily::cmdInitMediaVfeState;
pMediaVfeState->setMaximumNumberOfThreads(maxFrontEndThreads);
pMediaVfeState->setNumberOfUrbEntries(1);
pMediaVfeState->setUrbEntryAllocationSize(PreambleHelper<GfxFamily>::getUrbEntryAllocationSize());
pMediaVfeState->setPerThreadScratchSpace(Kernel::getScratchSizeValueToProgramMediaVfeState(scratchSize));
pMediaVfeState->setStackSize(Kernel::getScratchSizeValueToProgramMediaVfeState(scratchSize));
uint32_t lowAddress = static_cast<uint32_t>(0xFFFFFFFF & scratchAddress);
uint32_t highAddress = static_cast<uint32_t>(0xFFFFFFFF & (scratchAddress >> 32));
pMediaVfeState->setScratchSpaceBasePointer(lowAddress);
pMediaVfeState->setScratchSpaceBasePointerHigh(highAddress);
programAdditionalFieldsInVfeState(pMediaVfeState, hwInfo);
return scratchSpaceAddressOffset;
}
template <typename GfxFamily>
size_t PreambleHelper<GfxFamily>::getVFECommandsSize() {
using MEDIA_VFE_STATE = typename GfxFamily::MEDIA_VFE_STATE;
return sizeof(MEDIA_VFE_STATE) + sizeof(PIPE_CONTROL);
}
} // namespace NEO

View File

@@ -0,0 +1,81 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include <cstddef>
#include <cstdint>
static const int ptrGarbageContent[16] = {
0x0131, 0x133, 0xA, 0xEF,
0x0131, 0x133, 0xA, 0xEF,
0x0131, 0x133, 0xA, 0xEF,
0x0131, 0x133, 0xA, 0xEF};
static const auto ptrGarbage = (void *)ptrGarbageContent;
template <typename T>
inline T ptrOffset(T ptrBefore, size_t offset) {
auto addrBefore = (uintptr_t)ptrBefore;
auto addrAfter = addrBefore + offset;
return (T)addrAfter;
}
template <>
inline uint64_t ptrOffset(uint64_t ptrBefore, size_t offset) {
return ptrBefore + offset;
}
template <typename TA, typename TB>
inline size_t ptrDiff(TA ptrAfter, TB ptrBefore) {
auto addrBefore = (uintptr_t)ptrBefore;
auto addrAfter = (uintptr_t)ptrAfter;
return addrAfter - addrBefore;
}
template <typename T>
inline uint64_t ptrDiff(uint64_t ptrAfter, T ptrBefore) {
return ptrAfter - ptrBefore;
}
template <typename IntegerAddressType>
inline void *addrToPtr(IntegerAddressType addr) {
uintptr_t correctBitnessAddress = static_cast<uintptr_t>(addr);
void *ptrReturn = reinterpret_cast<void *>(correctBitnessAddress);
return ptrReturn;
}
struct PatchStoreOperation {
template <typename T>
void operator()(T *memory, T value) {
*memory = value;
}
};
struct PatchIncrementOperation {
template <typename T>
void operator()(T *memory, T value) {
*memory += value;
}
};
template <typename PatchOperationT = PatchStoreOperation>
inline void patchWithRequiredSize(void *memoryToBePatched, uint32_t patchSize, uint64_t patchValue) {
if (patchSize == sizeof(uint64_t)) {
uint64_t *curbeAddress = reinterpret_cast<uint64_t *>(memoryToBePatched);
PatchOperationT{}(curbeAddress, patchValue);
} else {
uint32_t *curbeAddress = reinterpret_cast<uint32_t *>(memoryToBePatched);
PatchOperationT{}(curbeAddress, static_cast<uint32_t>(patchValue));
}
}
inline void patchIncrement(void *memoryToBePatched, uint32_t patchSize, uint64_t patchIncrementValue) {
patchWithRequiredSize<PatchIncrementOperation>(memoryToBePatched, patchSize, patchIncrementValue);
}
inline uint64_t castToUint64(const void *address) {
return static_cast<uint64_t>(reinterpret_cast<uintptr_t>(const_cast<void *>(address)));
}

View File

@@ -0,0 +1,74 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include <stdint.h>
constexpr uint32_t L3SQC_BIT_LQSC_RO_PERF_DIS = 0x08000000;
constexpr uint32_t L3SQC_REG4 = 0xB118;
constexpr uint32_t GPGPU_WALKER_COOKIE_VALUE_BEFORE_WALKER = 0xFFFFFFFF;
constexpr uint32_t GPGPU_WALKER_COOKIE_VALUE_AFTER_WALKER = 0x00000000;
//Threads Dimension X/Y/Z
constexpr uint32_t GPUGPU_DISPATCHDIMX = 0x2500;
constexpr uint32_t GPUGPU_DISPATCHDIMY = 0x2504;
constexpr uint32_t GPUGPU_DISPATCHDIMZ = 0x2508;
constexpr uint32_t CS_GPR_R0 = 0x2600;
constexpr uint32_t CS_GPR_R1 = 0x2608;
constexpr uint32_t CS_GPR_R2 = 0x2610;
constexpr uint32_t CS_GPR_R3 = 0x2618;
constexpr uint32_t CS_GPR_R4 = 0x2620;
constexpr uint32_t CS_GPR_R5 = 0x2628;
constexpr uint32_t CS_GPR_R6 = 0x2630;
constexpr uint32_t CS_GPR_R7 = 0x2638;
constexpr uint32_t CS_GPR_R8 = 0x2640;
constexpr uint32_t CS_GPR_R9 = 0x2648;
constexpr uint32_t CS_GPR_R10 = 0x2650;
constexpr uint32_t CS_GPR_R11 = 0x2658;
constexpr uint32_t CS_GPR_R12 = 0x2660;
constexpr uint32_t CS_GPR_R13 = 0x2668;
constexpr uint32_t CS_GPR_R14 = 0x2670;
constexpr uint32_t CS_GPR_R15 = 0x2678;
constexpr uint32_t CS_PREDICATE_RESULT = 0x2418;
//Alu opcodes
constexpr uint32_t NUM_ALU_INST_FOR_READ_MODIFY_WRITE = 4;
constexpr uint32_t ALU_OPCODE_LOAD = 0x080;
constexpr uint32_t ALU_OPCODE_STORE = 0x180;
constexpr uint32_t ALU_OPCODE_ADD = 0x100;
constexpr uint32_t ALU_OPCODE_SUB = 0x101;
constexpr uint32_t ALU_OPCODE_AND = 0x102;
constexpr uint32_t ALU_OPCODE_OR = 0x103;
constexpr uint32_t ALU_REGISTER_R_0 = 0x0;
constexpr uint32_t ALU_REGISTER_R_1 = 0x1;
constexpr uint32_t ALU_REGISTER_R_2 = 0x2;
constexpr uint32_t ALU_REGISTER_R_3 = 0x3;
constexpr uint32_t ALU_REGISTER_R_4 = 0x4;
constexpr uint32_t ALU_REGISTER_R_5 = 0x5;
constexpr uint32_t ALU_REGISTER_R_6 = 0x6;
constexpr uint32_t ALU_REGISTER_R_7 = 0x7;
constexpr uint32_t ALU_REGISTER_R_8 = 0x8;
constexpr uint32_t ALU_REGISTER_R_9 = 0x9;
constexpr uint32_t ALU_REGISTER_R_10 = 0xA;
constexpr uint32_t ALU_REGISTER_R_11 = 0xB;
constexpr uint32_t ALU_REGISTER_R_12 = 0xC;
constexpr uint32_t ALU_REGISTER_R_13 = 0xD;
constexpr uint32_t ALU_REGISTER_R_14 = 0xE;
constexpr uint32_t ALU_REGISTER_R_15 = 0xF;
constexpr uint32_t ALU_REGISTER_R_SRCA = 0x20;
constexpr uint32_t ALU_REGISTER_R_SRCB = 0x21;
constexpr uint32_t ALU_REGISTER_R_ACCU = 0x31;
constexpr uint32_t ALU_REGISTER_R_ZF = 0x32;
constexpr uint32_t ALU_REGISTER_R_CF = 0x33;
constexpr uint32_t GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW = 0x23A8;

View File

@@ -0,0 +1,49 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include <cstdint>
#include <functional>
namespace NEO {
template <typename MethodArgsT, typename EstimateMethodArgsT>
class RegisteredMethodDispatcher {
public:
using CommandsSizeEstimationMethodT = std::function<EstimateMethodArgsT>;
using RegisteredMethodT = std::function<MethodArgsT>;
void registerMethod(RegisteredMethodT method) {
this->method = method;
}
void registerCommandsSizeEstimationMethod(CommandsSizeEstimationMethodT method) {
this->commandsEstimationMethod = method;
}
template <typename... Args>
void operator()(Args &&... args) const {
if (method) {
method(std::forward<Args>(args)...);
}
}
template <typename... Args>
size_t estimateCommandsSize(Args &&... args) const {
if (commandsEstimationMethod) {
return commandsEstimationMethod(std::forward<Args>(args)...);
}
return 0;
}
protected:
CommandsSizeEstimationMethodT commandsEstimationMethod;
RegisteredMethodT method;
};
} // namespace NEO

View File

@@ -0,0 +1,14 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include <stdint.h>
template <typename WALKER_TYPE>
constexpr typename WALKER_TYPE::SIMD_SIZE getSimdConfig(uint32_t simdSize) {
return static_cast<typename WALKER_TYPE::SIMD_SIZE>((simdSize == 1) ? (32 >> 4) : (simdSize >> 4));
}

View File

@@ -0,0 +1,46 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include <cstddef>
#include <cstdint>
namespace NEO {
class GmmHelper;
class IndirectHeap;
class LinearStream;
struct DispatchFlags;
template <typename GfxFamily>
struct StateBaseAddressHelper {
using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS;
static void programStateBaseAddress(
LinearStream &commandStream,
const IndirectHeap *dsh,
const IndirectHeap *ioh,
const IndirectHeap *ssh,
uint64_t generalStateBase,
bool setGeneralStateBaseAddress,
uint32_t statelessMocsIndex,
uint64_t internalHeapBase,
bool setInstructionStateBaseAddress,
GmmHelper *gmmHelper,
bool isMultiOsContextCapable);
static void appendStateBaseAddressParameters(
STATE_BASE_ADDRESS *stateBaseAddress,
const IndirectHeap *ssh,
bool setGeneralStateBaseAddress,
uint64_t internalHeapBase,
GmmHelper *gmmHelper,
bool isMultiOsContextCapable);
static void programBindingTableBaseAddress(LinearStream &commandStream, const IndirectHeap &ssh, GmmHelper *gmmHelper);
};
} // namespace NEO

View File

@@ -0,0 +1,79 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "gmm_helper/gmm_helper.h"
#include "helpers/cache_policy.h"
#include "helpers/hw_cmds.h"
#include "helpers/state_base_address.h"
#include "indirect_heap/indirect_heap.h"
#include "memory_manager/memory_constants.h"
namespace NEO {
template <typename GfxFamily>
void StateBaseAddressHelper<GfxFamily>::programStateBaseAddress(
LinearStream &commandStream,
const IndirectHeap *dsh,
const IndirectHeap *ioh,
const IndirectHeap *ssh,
uint64_t generalStateBase,
bool setGeneralStateBaseAddress,
uint32_t statelessMocsIndex,
uint64_t internalHeapBase,
bool setInstructionStateBaseAddress,
GmmHelper *gmmHelper,
bool isMultiOsContextCapable) {
auto pCmd = static_cast<STATE_BASE_ADDRESS *>(commandStream.getSpace(sizeof(STATE_BASE_ADDRESS)));
*pCmd = GfxFamily::cmdInitStateBaseAddress;
if (dsh) {
pCmd->setDynamicStateBaseAddressModifyEnable(true);
pCmd->setDynamicStateBufferSizeModifyEnable(true);
pCmd->setDynamicStateBaseAddress(dsh->getHeapGpuBase());
pCmd->setDynamicStateBufferSize(dsh->getHeapSizeInPages());
}
if (ioh) {
pCmd->setIndirectObjectBaseAddressModifyEnable(true);
pCmd->setIndirectObjectBufferSizeModifyEnable(true);
pCmd->setIndirectObjectBaseAddress(ioh->getHeapGpuBase());
pCmd->setIndirectObjectBufferSize(ioh->getHeapSizeInPages());
}
if (ssh) {
pCmd->setSurfaceStateBaseAddressModifyEnable(true);
pCmd->setSurfaceStateBaseAddress(ssh->getHeapGpuBase());
}
if (setInstructionStateBaseAddress) {
pCmd->setInstructionBaseAddressModifyEnable(true);
pCmd->setInstructionBaseAddress(internalHeapBase);
pCmd->setInstructionBufferSizeModifyEnable(true);
pCmd->setInstructionBufferSize(MemoryConstants::sizeOf4GBinPageEntities);
pCmd->setInstructionMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_STATE_HEAP_BUFFER));
}
if (setGeneralStateBaseAddress) {
pCmd->setGeneralStateBaseAddressModifyEnable(true);
pCmd->setGeneralStateBufferSizeModifyEnable(true);
// GSH must be set to 0 for stateless
pCmd->setGeneralStateBaseAddress(GmmHelper::decanonize(generalStateBase));
pCmd->setGeneralStateBufferSize(0xfffff);
}
if (DebugManager.flags.OverrideStatelessMocsIndex.get() != -1) {
statelessMocsIndex = DebugManager.flags.OverrideStatelessMocsIndex.get();
}
statelessMocsIndex = statelessMocsIndex << 1;
pCmd->setStatelessDataPortAccessMemoryObjectControlState(statelessMocsIndex);
appendStateBaseAddressParameters(pCmd, ssh, setGeneralStateBaseAddress, internalHeapBase, gmmHelper, isMultiOsContextCapable);
}
} // namespace NEO

View File

@@ -0,0 +1,26 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "helpers/state_base_address_base.inl"
namespace NEO {
template <typename GfxFamily>
void StateBaseAddressHelper<GfxFamily>::appendStateBaseAddressParameters(
STATE_BASE_ADDRESS *stateBaseAddress,
const IndirectHeap *ssh,
bool setGeneralStateBaseAddress,
uint64_t internalHeapBase,
GmmHelper *gmmHelper,
bool isMultiOsContextCapable) {
}
template <typename GfxFamily>
void StateBaseAddressHelper<GfxFamily>::programBindingTableBaseAddress(LinearStream &commandStream, const IndirectHeap &ssh, GmmHelper *gmmHelper) {
}
} // namespace NEO

View File

@@ -0,0 +1,17 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "command_stream/csr_definitions.h"
#include "helpers/hw_cmds.h"
namespace NEO {
template <typename GfxFamily>
struct StateComputeModeHelper {
static bool isStateComputeModeRequired(CsrSizeRequestFlags &csrSizeRequestFlags, bool isThreadArbitionPolicyProgrammed);
};
} // namespace NEO

View File

@@ -0,0 +1,31 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#ifndef _WIN32
#ifndef __STDC_LIB_EXT1__
#if __STDC_WANT_LIB_EXT1__ != 1
#include <cstdio>
#include <errno.h>
inline int fopen_s(FILE **pFile, const char *filename, const char *mode) {
if ((pFile == nullptr) || (filename == nullptr) || (mode == nullptr)) {
return -EINVAL;
}
*pFile = fopen(filename, mode);
if (*pFile == nullptr) {
return -errno;
}
return 0;
}
#endif
#endif
#endif

View File

@@ -0,0 +1,106 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include <memory>
#include <type_traits>
#if defined(__linux__)
#include <cstring>
#include <errno.h>
#include <string>
inline int strcpy_s(char *dst, size_t dstSize, const char *src) {
if ((dst == nullptr) || (src == nullptr)) {
return -EINVAL;
}
size_t length = strlen(src);
if (dstSize <= length) {
return -ERANGE;
}
memcpy(dst, src, length);
dst[length] = '\0';
return 0;
}
inline int strncpy_s(char *dst, size_t numberOfElements, const char *src, size_t count) {
if ((dst == nullptr) || (src == nullptr)) {
return -EINVAL;
}
if (numberOfElements < count) {
return -ERANGE;
}
size_t length = strlen(src);
if (length > count) {
length = count;
}
memcpy(dst, src, length);
if (length < numberOfElements) {
numberOfElements = length;
}
dst[numberOfElements] = '\0';
return 0;
}
inline size_t strnlen_s(const char *str, size_t count) {
if (str == nullptr) {
return 0;
}
for (size_t i = 0; i < count; ++i) {
if (str[i] == '\0')
return i;
}
return count;
}
inline int memcpy_s(void *dst, size_t destSize, const void *src, size_t count) {
if ((dst == nullptr) || (src == nullptr)) {
return -EINVAL;
}
if (destSize < count) {
return -ERANGE;
}
memcpy(dst, src, count);
return 0;
}
inline int memmove_s(void *dst, size_t numberOfElements, const void *src, size_t count) {
if ((dst == nullptr) || (src == nullptr)) {
return -EINVAL;
}
if (numberOfElements < count) {
return -ERANGE;
}
memmove(dst, src, count);
return 0;
}
#endif
template <typename T = char>
inline std::unique_ptr<T[]> makeCopy(const void *src, size_t size) {
if (size == 0) {
return nullptr;
}
using ElT = typename std::remove_all_extents<T>::type;
std::unique_ptr<T[]> copiedData(new ElT[size]);
memcpy_s(copiedData.get(), size, src, size);
return copiedData;
}

View File

@@ -0,0 +1,254 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "gmm_helper/gmm_lib.h"
namespace NEO {
enum GFX3DSTATE_SURFACEFORMAT : unsigned short {
GFX3DSTATE_SURFACEFORMAT_R32G32B32A32_FLOAT = 0x000,
GFX3DSTATE_SURFACEFORMAT_R32G32B32A32_SINT = 0x001,
GFX3DSTATE_SURFACEFORMAT_R32G32B32A32_UINT = 0x002,
GFX3DSTATE_SURFACEFORMAT_R32G32B32A32_UNORM = 0x003,
GFX3DSTATE_SURFACEFORMAT_R32G32B32A32_SNORM = 0x004,
GFX3DSTATE_SURFACEFORMAT_R64G64_FLOAT = 0x005,
GFX3DSTATE_SURFACEFORMAT_R32G32B32X32_FLOAT = 0x006,
GFX3DSTATE_SURFACEFORMAT_R32G32B32A32_SSCALED = 0x007,
GFX3DSTATE_SURFACEFORMAT_R32G32B32A32_USCALED = 0x008,
GFX3DSTATE_SURFACEFORMAT_R32G32B32_FLOAT = 0x040,
GFX3DSTATE_SURFACEFORMAT_R32G32B32_SINT = 0x041,
GFX3DSTATE_SURFACEFORMAT_R32G32B32_UINT = 0x042,
GFX3DSTATE_SURFACEFORMAT_R32G32B32_UNORM = 0x043,
GFX3DSTATE_SURFACEFORMAT_R32G32B32_SNORM = 0x044,
GFX3DSTATE_SURFACEFORMAT_R32G32B32_SSCALED = 0x045,
GFX3DSTATE_SURFACEFORMAT_R32G32B32_USCALED = 0x046,
GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_UNORM = 0x080,
GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_SNORM = 0x081,
GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_SINT = 0x082,
GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_UINT = 0x083,
GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_FLOAT = 0x084,
GFX3DSTATE_SURFACEFORMAT_R32G32_FLOAT = 0x085,
GFX3DSTATE_SURFACEFORMAT_R32G32_SINT = 0x086,
GFX3DSTATE_SURFACEFORMAT_R32G32_UINT = 0x087,
GFX3DSTATE_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS = 0x088,
GFX3DSTATE_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT = 0x089,
GFX3DSTATE_SURFACEFORMAT_L32A32_FLOAT = 0x08A,
GFX3DSTATE_SURFACEFORMAT_R32G32_UNORM = 0x08B,
GFX3DSTATE_SURFACEFORMAT_R32G32_SNORM = 0x08C,
GFX3DSTATE_SURFACEFORMAT_R64_FLOAT = 0x08D,
GFX3DSTATE_SURFACEFORMAT_R16G16B16X16_UNORM = 0x08E,
GFX3DSTATE_SURFACEFORMAT_R16G16B16X16_FLOAT = 0x08F,
GFX3DSTATE_SURFACEFORMAT_A32X32_FLOAT = 0x090,
GFX3DSTATE_SURFACEFORMAT_L32X32_FLOAT = 0x091,
GFX3DSTATE_SURFACEFORMAT_I32X32_FLOAT = 0x092,
GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_SSCALED = 0x093,
GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_USCALED = 0x094,
GFX3DSTATE_SURFACEFORMAT_R32G32_SSCALED = 0x095,
GFX3DSTATE_SURFACEFORMAT_R32G32_USCALED = 0x096,
GFX3DSTATE_SURFACEFORMAT_B8G8R8A8_UNORM = 0x0C0,
GFX3DSTATE_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB = 0x0C1,
GFX3DSTATE_SURFACEFORMAT_R10G10B10A2_UNORM = 0x0C2,
GFX3DSTATE_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB = 0x0C3,
GFX3DSTATE_SURFACEFORMAT_R10G10B10A2_UINT = 0x0C4,
GFX3DSTATE_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM = 0x0C5,
GFX3DSTATE_SURFACEFORMAT_R8G8B8A8_UNORM = 0x0C7,
GFX3DSTATE_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB = 0x0C8,
GFX3DSTATE_SURFACEFORMAT_R8G8B8A8_SNORM = 0x0C9,
GFX3DSTATE_SURFACEFORMAT_R8G8B8A8_SINT = 0x0CA,
GFX3DSTATE_SURFACEFORMAT_R8G8B8A8_UINT = 0x0CB,
GFX3DSTATE_SURFACEFORMAT_R16G16_UNORM = 0x0CC,
GFX3DSTATE_SURFACEFORMAT_R16G16_SNORM = 0x0CD,
GFX3DSTATE_SURFACEFORMAT_R16G16_SINT = 0x0CE,
GFX3DSTATE_SURFACEFORMAT_R16G16_UINT = 0x0CF,
GFX3DSTATE_SURFACEFORMAT_R16G16_FLOAT = 0x0D0,
GFX3DSTATE_SURFACEFORMAT_B10G10R10A2_UNORM = 0x0D1,
GFX3DSTATE_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB = 0x0D2,
GFX3DSTATE_SURFACEFORMAT_R11G11B10_FLOAT = 0x0D3,
GFX3DSTATE_SURFACEFORMAT_R32_SINT = 0x0D6,
GFX3DSTATE_SURFACEFORMAT_R32_UINT = 0x0D7,
GFX3DSTATE_SURFACEFORMAT_R32_FLOAT = 0x0D8,
GFX3DSTATE_SURFACEFORMAT_R24_UNORM_X8_TYPELESS = 0x0D9,
GFX3DSTATE_SURFACEFORMAT_X24_TYPELESS_G8_UINT = 0x0DA,
GFX3DSTATE_SURFACEFORMAT_L16A16_UNORM = 0x0DF,
GFX3DSTATE_SURFACEFORMAT_I24X8_UNORM = 0x0E0,
GFX3DSTATE_SURFACEFORMAT_L24X8_UNORM = 0x0E1,
GFX3DSTATE_SURFACEFORMAT_A24X8_UNORM = 0x0E2,
GFX3DSTATE_SURFACEFORMAT_I32_FLOAT = 0x0E3,
GFX3DSTATE_SURFACEFORMAT_L32_FLOAT = 0x0E4,
GFX3DSTATE_SURFACEFORMAT_A32_FLOAT = 0x0E5,
GFX3DSTATE_SURFACEFORMAT_B8G8R8X8_UNORM = 0x0E9,
GFX3DSTATE_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB = 0x0EA,
GFX3DSTATE_SURFACEFORMAT_R8G8B8X8_UNORM = 0x0EB,
GFX3DSTATE_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB = 0x0EC,
GFX3DSTATE_SURFACEFORMAT_R9G9B9E5_SHAREDEXP = 0x0ED,
GFX3DSTATE_SURFACEFORMAT_B10G10R10X2_UNORM = 0x0EE,
GFX3DSTATE_SURFACEFORMAT_L16A16_FLOAT = 0x0F0,
GFX3DSTATE_SURFACEFORMAT_R32_UNORM = 0x0F1,
GFX3DSTATE_SURFACEFORMAT_R32_SNORM = 0x0F2,
GFX3DSTATE_SURFACEFORMAT_R10G10B10X2_USCALED = 0x0F3,
GFX3DSTATE_SURFACEFORMAT_R8G8B8A8_SSCALED = 0x0F4,
GFX3DSTATE_SURFACEFORMAT_R8G8B8A8_USCALED = 0x0F5,
GFX3DSTATE_SURFACEFORMAT_R16G16_SSCALED = 0x0F6,
GFX3DSTATE_SURFACEFORMAT_R16G16_USCALED = 0x0F7,
GFX3DSTATE_SURFACEFORMAT_R32_SSCALED = 0x0F8,
GFX3DSTATE_SURFACEFORMAT_R32_USCALED = 0x0F9,
GFX3DSTATE_SURFACEFORMAT_B5G6R5_UNORM = 0x100,
GFX3DSTATE_SURFACEFORMAT_B5G6R5_UNORM_SRGB = 0x101,
GFX3DSTATE_SURFACEFORMAT_B5G5R5A1_UNORM = 0x102,
GFX3DSTATE_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB = 0x103,
GFX3DSTATE_SURFACEFORMAT_B4G4R4A4_UNORM = 0x104,
GFX3DSTATE_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB = 0x105,
GFX3DSTATE_SURFACEFORMAT_R8G8_UNORM = 0x106,
GFX3DSTATE_SURFACEFORMAT_R8G8_SNORM = 0x107,
GFX3DSTATE_SURFACEFORMAT_R8G8_SINT = 0x108,
GFX3DSTATE_SURFACEFORMAT_R8G8_UINT = 0x109,
GFX3DSTATE_SURFACEFORMAT_R16_UNORM = 0x10A,
GFX3DSTATE_SURFACEFORMAT_R16_SNORM = 0x10B,
GFX3DSTATE_SURFACEFORMAT_R16_SINT = 0x10C,
GFX3DSTATE_SURFACEFORMAT_R16_UINT = 0x10D,
GFX3DSTATE_SURFACEFORMAT_R16_FLOAT = 0x10E,
GFX3DSTATE_SURFACEFORMAT_I16_UNORM = 0x111,
GFX3DSTATE_SURFACEFORMAT_L16_UNORM = 0x112,
GFX3DSTATE_SURFACEFORMAT_A16_UNORM = 0x113,
GFX3DSTATE_SURFACEFORMAT_L8A8_UNORM = 0x114,
GFX3DSTATE_SURFACEFORMAT_I16_FLOAT = 0x115,
GFX3DSTATE_SURFACEFORMAT_L16_FLOAT = 0x116,
GFX3DSTATE_SURFACEFORMAT_A16_FLOAT = 0x117,
GFX3DSTATE_SURFACEFORMAT_L8A8_UNORM_SRGB = 0x118,
GFX3DSTATE_SURFACEFORMAT_R5G5_SNORM_B6_UNORM = 0x119,
GFX3DSTATE_SURFACEFORMAT_B5G5R5X1_UNORM = 0x11A,
GFX3DSTATE_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB = 0x11B,
GFX3DSTATE_SURFACEFORMAT_R8G8_SSCALED = 0x11C,
GFX3DSTATE_SURFACEFORMAT_R8G8_USCALED = 0x11D,
GFX3DSTATE_SURFACEFORMAT_R16_SSCALED = 0x11E,
GFX3DSTATE_SURFACEFORMAT_R16_USCALED = 0x11F,
GFX3DSTATE_SURFACEFORMAT_R8_UNORM = 0x140,
GFX3DSTATE_SURFACEFORMAT_R8_SNORM = 0x141,
GFX3DSTATE_SURFACEFORMAT_R8_SINT = 0x142,
GFX3DSTATE_SURFACEFORMAT_R8_UINT = 0x143,
GFX3DSTATE_SURFACEFORMAT_A8_UNORM = 0x144,
GFX3DSTATE_SURFACEFORMAT_I8_UNORM = 0x145,
GFX3DSTATE_SURFACEFORMAT_L8_UNORM = 0x146,
GFX3DSTATE_SURFACEFORMAT_P4A4_UNORM = 0x147,
GFX3DSTATE_SURFACEFORMAT_A4P4_UNORM = 0x148,
GFX3DSTATE_SURFACEFORMAT_R8_SSCALED = 0x149,
GFX3DSTATE_SURFACEFORMAT_R8_USCALED = 0x14A,
GFX3DSTATE_SURFACEFORMAT_P8_UNORM = 0x14B,
GFX3DSTATE_SURFACEFORMAT_L8_UNORM_SRGB = 0x14C,
GFX3DSTATE_SURFACEFORMAT_DXT1_RGB_SRGB = 0x180,
GFX3DSTATE_SURFACEFORMAT_R1_UINT = 0x181,
GFX3DSTATE_SURFACEFORMAT_YCRCB_NORMAL = 0x182,
GFX3DSTATE_SURFACEFORMAT_YCRCB_SWAPUVY = 0x183,
GFX3DSTATE_SURFACEFORMAT_P2_UNORM = 0x184,
GFX3DSTATE_SURFACEFORMAT_BC1_UNORM = 0x186,
GFX3DSTATE_SURFACEFORMAT_BC2_UNORM = 0x187,
GFX3DSTATE_SURFACEFORMAT_BC3_UNORM = 0x188,
GFX3DSTATE_SURFACEFORMAT_BC4_UNORM = 0x189,
GFX3DSTATE_SURFACEFORMAT_BC5_UNORM = 0x18A,
GFX3DSTATE_SURFACEFORMAT_BC1_UNORM_SRGB = 0x18B,
GFX3DSTATE_SURFACEFORMAT_BC2_UNORM_SRGB = 0x18C,
GFX3DSTATE_SURFACEFORMAT_BC3_UNORM_SRGB = 0x18D,
GFX3DSTATE_SURFACEFORMAT_MONO8 = 0x18E,
GFX3DSTATE_SURFACEFORMAT_YCRCB_SWAPUV = 0x18F,
GFX3DSTATE_SURFACEFORMAT_YCRCB_SWAPY = 0x190,
GFX3DSTATE_SURFACEFORMAT_DXT1_RGB = 0x191,
GFX3DSTATE_SURFACEFORMAT_FXT1 = 0x192,
GFX3DSTATE_SURFACEFORMAT_R8G8B8_UNORM = 0x193,
GFX3DSTATE_SURFACEFORMAT_R8G8B8_SNORM = 0x194,
GFX3DSTATE_SURFACEFORMAT_R8G8B8_SSCALED = 0x195,
GFX3DSTATE_SURFACEFORMAT_R8G8B8_USCALED = 0x196,
GFX3DSTATE_SURFACEFORMAT_R64G64B64A64_FLOAT = 0x197,
GFX3DSTATE_SURFACEFORMAT_R64G64B64_FLOAT = 0x198,
GFX3DSTATE_SURFACEFORMAT_BC4_SNORM = 0x199,
GFX3DSTATE_SURFACEFORMAT_BC5_SNORM = 0x19A,
GFX3DSTATE_SURFACEFORMAT_R16G16B16_FLOAT = 0x19B,
GFX3DSTATE_SURFACEFORMAT_R16G16B16_UNORM = 0x19C,
GFX3DSTATE_SURFACEFORMAT_R16G16B16_SNORM = 0x19D,
GFX3DSTATE_SURFACEFORMAT_R16G16B16_SSCALED = 0x19E,
GFX3DSTATE_SURFACEFORMAT_R16G16B16_USCALED = 0x19F,
GFX3DSTATE_SURFACEFORMAT_BC6H_SF16 = 0x1A1,
GFX3DSTATE_SURFACEFORMAT_BC7_UNORM = 0x1A2,
GFX3DSTATE_SURFACEFORMAT_BC7_UNORM_SRGB = 0x1A3,
GFX3DSTATE_SURFACEFORMAT_BC6H_UF16 = 0x1A4,
GFX3DSTATE_SURFACEFORMAT_NV12 = 0x1A5,
GFX3DSTATE_SURFACEFORMAT_RAW = 0x1FF,
NUM_GFX3DSTATE_SURFACEFORMATS
};
enum class ImagePlane {
NO_PLANE = 0,
PLANE_Y,
PLANE_U,
PLANE_V,
PLANE_UV
};
struct SurfaceFormatInfo {
GMM_RESOURCE_FORMAT GMMSurfaceFormat;
GFX3DSTATE_SURFACEFORMAT GenxSurfaceFormat;
uint32_t GMMTileWalk;
uint32_t NumChannels;
uint32_t PerChannelSizeInBytes;
size_t ImageElementSizeInBytes;
};
enum class ImageType {
Invalid,
Image1D,
Image2D,
Image3D,
Image1DArray,
Image2DArray,
Image1DBuffer
};
struct ImageDescriptor {
ImageType imageType;
size_t imageWidth;
size_t imageHeight;
size_t imageDepth;
size_t imageArraySize;
size_t imageRowPitch;
size_t imageSlicePitch;
uint32_t numMipLevels;
uint32_t numSamples;
bool fromParent;
};
struct ImageInfo {
ImageDescriptor imgDesc;
const SurfaceFormatInfo *surfaceFormat;
size_t size;
size_t rowPitch;
size_t slicePitch;
uint32_t qPitch;
size_t offset;
uint32_t xOffset;
uint32_t yOffset;
uint32_t yOffsetForUVPlane;
GMM_YUV_PLANE_ENUM plane;
uint32_t baseMipLevel;
uint32_t mipCount;
bool linearStorage;
bool preferRenderCompression;
bool useLocalMemory;
};
struct McsSurfaceInfo {
uint32_t pitch;
uint32_t qPitch;
uint32_t multisampleCount;
};
struct SurfaceOffsets {
uint64_t offset;
uint32_t xOffset;
uint32_t yOffset;
uint32_t yOffsetForUVplane;
};
} // namespace NEO

View File

@@ -0,0 +1,66 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "helpers/timestamp_packet.h"
#include "command_stream/command_stream_receiver.h"
#include "command_stream/linear_stream.h"
#include "utilities/tag_allocator.h"
using namespace NEO;
void TimestampPacketContainer::add(Node *timestampPacketNode) {
timestampPacketNodes.push_back(timestampPacketNode);
}
TimestampPacketContainer::~TimestampPacketContainer() {
for (auto node : timestampPacketNodes) {
node->returnTag();
}
}
void TimestampPacketContainer::swapNodes(TimestampPacketContainer &timestampPacketContainer) {
timestampPacketNodes.swap(timestampPacketContainer.timestampPacketNodes);
}
void TimestampPacketContainer::resolveDependencies(bool clearAllDependencies) {
std::vector<Node *> pendingNodes;
for (auto node : timestampPacketNodes) {
if (node->canBeReleased() || clearAllDependencies) {
node->returnTag();
} else {
pendingNodes.push_back(node);
}
}
std::swap(timestampPacketNodes, pendingNodes);
}
void TimestampPacketContainer::assignAndIncrementNodesRefCounts(const TimestampPacketContainer &inputTimestampPacketContainer) {
auto &inputNodes = inputTimestampPacketContainer.peekNodes();
std::copy(inputNodes.begin(), inputNodes.end(), std::back_inserter(timestampPacketNodes));
for (auto node : inputNodes) {
node->incRefCount();
}
}
void TimestampPacketContainer::makeResident(CommandStreamReceiver &commandStreamReceiver) {
for (auto node : timestampPacketNodes) {
commandStreamReceiver.makeResident(*node->getBaseGraphicsAllocation());
}
}
bool TimestampPacketContainer::isCompleted() const {
for (auto node : timestampPacketNodes) {
if (!node->tagForCpuAccess->isCompleted()) {
return false;
}
}
return true;
}

View File

@@ -0,0 +1,179 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "command_container/command_encoder.h"
#include "command_stream/csr_deps.h"
#include "helpers/aux_translation.h"
#include "helpers/non_copyable_or_moveable.h"
#include "utilities/tag_allocator.h"
#include <atomic>
#include <cstdint>
#include <vector>
namespace NEO {
class CommandStreamReceiver;
class LinearStream;
namespace TimestampPacketSizeControl {
constexpr uint32_t preferredPacketCount = 16u;
}
#pragma pack(1)
struct TimestampPacketStorage {
struct Packet {
uint32_t contextStart = 1u;
uint32_t globalStart = 1u;
uint32_t contextEnd = 1u;
uint32_t globalEnd = 1u;
};
enum class WriteOperationType : uint32_t {
BeforeWalker,
AfterWalker
};
static GraphicsAllocation::AllocationType getAllocationType() {
return GraphicsAllocation::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER;
}
bool isCompleted() const {
for (uint32_t i = 0; i < packetsUsed; i++) {
if ((packets[i].contextEnd & 1) || (packets[i].globalEnd & 1)) {
return false;
}
}
return implicitDependenciesCount.load() == 0;
}
void initialize() {
for (auto &packet : packets) {
packet.contextStart = 1u;
packet.globalStart = 1u;
packet.contextEnd = 1u;
packet.globalEnd = 1u;
}
implicitDependenciesCount.store(0);
packetsUsed = 1;
}
void incImplicitDependenciesCount() { implicitDependenciesCount++; }
Packet packets[TimestampPacketSizeControl::preferredPacketCount];
std::atomic<uint32_t> implicitDependenciesCount{0u};
uint32_t packetsUsed = 1;
};
#pragma pack()
static_assert(((4 * TimestampPacketSizeControl::preferredPacketCount + 2) * sizeof(uint32_t)) == sizeof(TimestampPacketStorage),
"This structure is consumed by GPU and has to follow specific restrictions for padding and size");
class TimestampPacketContainer : public NonCopyableClass {
public:
using Node = TagNode<TimestampPacketStorage>;
TimestampPacketContainer() = default;
TimestampPacketContainer(TimestampPacketContainer &&) = default;
TimestampPacketContainer &operator=(TimestampPacketContainer &&) = default;
MOCKABLE_VIRTUAL ~TimestampPacketContainer();
const std::vector<Node *> &peekNodes() const { return timestampPacketNodes; }
void add(Node *timestampPacketNode);
void swapNodes(TimestampPacketContainer &timestampPacketContainer);
void assignAndIncrementNodesRefCounts(const TimestampPacketContainer &inputTimestampPacketContainer);
void resolveDependencies(bool clearAllDependencies);
void makeResident(CommandStreamReceiver &commandStreamReceiver);
bool isCompleted() const;
protected:
std::vector<Node *> timestampPacketNodes;
};
struct TimestampPacketDependencies : public NonCopyableClass {
TimestampPacketContainer previousEnqueueNodes;
TimestampPacketContainer barrierNodes;
TimestampPacketContainer auxToNonAuxNodes;
TimestampPacketContainer nonAuxToAuxNodes;
};
struct TimestampPacketHelper {
template <typename GfxFamily>
static void programSemaphoreWithImplicitDependency(LinearStream &cmdStream, TagNode<TimestampPacketStorage> &timestampPacketNode) {
using MI_ATOMIC = typename GfxFamily::MI_ATOMIC;
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;
auto compareAddress = timestampPacketNode.getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd);
auto dependenciesCountAddress = timestampPacketNode.getGpuAddress() + offsetof(TimestampPacketStorage, implicitDependenciesCount);
for (uint32_t packetId = 0; packetId < timestampPacketNode.tagForCpuAccess->packetsUsed; packetId++) {
uint64_t compareOffset = packetId * sizeof(TimestampPacketStorage::Packet);
auto miSemaphoreCmd = cmdStream.getSpaceForCmd<MI_SEMAPHORE_WAIT>();
EncodeSempahore<GfxFamily>::programMiSemaphoreWait(miSemaphoreCmd, compareAddress + compareOffset, 1, COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD);
}
timestampPacketNode.tagForCpuAccess->incImplicitDependenciesCount();
auto miAtomic = cmdStream.getSpaceForCmd<MI_ATOMIC>();
EncodeAtomic<GfxFamily>::programMiAtomic(miAtomic, dependenciesCountAddress,
MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_DECREMENT,
MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD);
}
template <typename GfxFamily>
static void programCsrDependencies(LinearStream &cmdStream, const CsrDependencies &csrDependencies) {
for (auto timestampPacketContainer : csrDependencies) {
for (auto &node : timestampPacketContainer->peekNodes()) {
TimestampPacketHelper::programSemaphoreWithImplicitDependency<GfxFamily>(cmdStream, *node);
}
}
}
template <typename GfxFamily, AuxTranslationDirection auxTranslationDirection>
static void programSemaphoreWithImplicitDependencyForAuxTranslation(LinearStream &cmdStream,
const TimestampPacketDependencies *timestampPacketDependencies) {
auto &container = (auxTranslationDirection == AuxTranslationDirection::AuxToNonAux)
? timestampPacketDependencies->auxToNonAuxNodes
: timestampPacketDependencies->nonAuxToAuxNodes;
for (auto &node : container.peekNodes()) {
TimestampPacketHelper::programSemaphoreWithImplicitDependency<GfxFamily>(cmdStream, *node);
}
}
template <typename GfxFamily>
static size_t getRequiredCmdStreamSizeForAuxTranslationNodeDependency(size_t count) {
return count * TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue<GfxFamily>();
}
template <typename GfxFamily>
static size_t getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue() {
return sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT) + sizeof(typename GfxFamily::MI_ATOMIC);
}
template <typename GfxFamily>
static size_t getRequiredCmdStreamSizeForNodeDependency(TagNode<TimestampPacketStorage> &timestampPacketNode) {
size_t totalMiSemaphoreWaitSize = timestampPacketNode.tagForCpuAccess->packetsUsed * sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT);
return totalMiSemaphoreWaitSize + sizeof(typename GfxFamily::MI_ATOMIC);
}
template <typename GfxFamily>
static size_t getRequiredCmdStreamSize(const CsrDependencies &csrDependencies) {
size_t totalCommandsSize = 0;
for (auto timestampPacketContainer : csrDependencies) {
for (auto &node : timestampPacketContainer->peekNodes()) {
totalCommandsSize += getRequiredCmdStreamSizeForNodeDependency<GfxFamily>(*node);
}
}
return totalCommandsSize;
}
};
} // namespace NEO

View File

@@ -0,0 +1,62 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
template <typename T>
struct Vec3 {
Vec3(T x, T y, T z) : x(x), y(y), z(z) {}
Vec3(const Vec3 &v) : x(v.x), y(v.y), z(v.z) {}
Vec3(const T *arr) {
if (arr == nullptr) {
x = y = z = 0;
} else {
x = arr[0];
y = arr[1];
z = arr[2];
}
}
Vec3 &operator=(const Vec3 &arr) {
x = arr.x;
y = arr.y;
z = arr.z;
return *this;
}
Vec3<T> &operator=(const T arr[3]) {
x = arr[0];
y = arr[1];
z = arr[2];
return *this;
}
bool operator==(const Vec3<T> &vec) const {
return ((x == vec.x) && (y == vec.y) && (z == vec.z));
}
bool operator!=(const Vec3<T> &vec) const {
return !operator==(vec);
}
unsigned int getSimplifiedDim() const {
if (z > 1) {
return 3;
}
if (y > 1) {
return 2;
}
if (x >= 1) {
return 1;
}
return 0;
}
T x;
T y;
T z;
};

View File

@@ -0,0 +1,13 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include <cstdint>
namespace NEO {
long(__stdcall *notifyAubCaptureImpl)(void *csrHandle, uint64_t gfxAddress, size_t gfxSize, bool allocate) = nullptr;
} // namespace NEO

View File

@@ -0,0 +1,26 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include <cstdint>
namespace NEO {
extern long(__stdcall *notifyAubCaptureImpl)(void *csrHandle, uint64_t gfxAddress, size_t gfxSize, bool allocate);
template <typename GfxFamily>
struct DeviceCallbacks {
static long __stdcall notifyAubCapture(void *csrHandle, uint64_t gfxAddress, size_t gfxSize, bool allocate);
};
template <typename GfxFamily>
struct TTCallbacks {
using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM;
static int __stdcall writeL3Address(void *queueHandle, uint64_t l3GfxAddress, uint64_t regOffset);
};
} // namespace NEO

View File

@@ -0,0 +1,26 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "command_stream/linear_stream.h"
#include "helpers/hw_helper.h"
#include "helpers/windows/gmm_callbacks.h"
#include <cstdint>
namespace NEO {
template <typename GfxFamily>
long __stdcall DeviceCallbacks<GfxFamily>::notifyAubCapture(void *csrHandle, uint64_t gfxAddress, size_t gfxSize, bool allocate) {
return 0;
}
template <typename GfxFamily>
int __stdcall TTCallbacks<GfxFamily>::writeL3Address(void *queueHandle, uint64_t l3GfxAddress, uint64_t regOffset) {
return 0;
}
} // namespace NEO