Reorganization directory structure [3/n]

Change-Id: If3dfa3f6007f8810a6a1ae1a4f0c7da38544648d
2026-01-05 09:09:04 +08:00 · 2020-02-23 21:00:51 +01:00
parent e177b4fc0f
commit e072275ae6
711 changed files with 94 additions and 94 deletions
--- a/shared/source/helpers/CMakeLists.txt
+++ b/shared/source/helpers/CMakeLists.txt
@@ -0,0 +1,91 @@
+#
+# Copyright (C) 2019-2020 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+#
+
+set(NEO_CORE_HELPERS
+  ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
+  ${CMAKE_CURRENT_SOURCE_DIR}/abort.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/address_patch.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/aligned_memory.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/array_count.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/aux_translation.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/basic_math.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/bit_helpers.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/blit_commands_helper_base.inl
+  ${CMAKE_CURRENT_SOURCE_DIR}/blit_commands_helper_bdw_plus.inl
+  ${CMAKE_CURRENT_SOURCE_DIR}/blit_commands_helper.cpp
+  ${CMAKE_CURRENT_SOURCE_DIR}/blit_commands_helper.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/cache_policy.cpp
+  ${CMAKE_CURRENT_SOURCE_DIR}/cache_policy.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/common_types.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/completion_stamp.cpp
+  ${CMAKE_CURRENT_SOURCE_DIR}/completion_stamp.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/debug_helpers.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/deferred_deleter_helper.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/dirty_state_helpers.cpp
+  ${CMAKE_CURRENT_SOURCE_DIR}/dirty_state_helpers.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/engine_control.h
+  ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/engine_node_helper.cpp
+  ${CMAKE_CURRENT_SOURCE_DIR}/engine_node_helper.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/extendable_enum.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/file_io.cpp
+  ${CMAKE_CURRENT_SOURCE_DIR}/file_io.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/flat_batch_buffer_helper.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/flat_batch_buffer_helper.cpp
+  ${CMAKE_CURRENT_SOURCE_DIR}/flat_batch_buffer_helper_hw.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/flat_batch_buffer_helper_hw.inl
+  ${CMAKE_CURRENT_SOURCE_DIR}/flush_stamp.cpp
+  ${CMAKE_CURRENT_SOURCE_DIR}/flush_stamp.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/get_info.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/hash.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/heap_helper.cpp
+  ${CMAKE_CURRENT_SOURCE_DIR}/heap_helper.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/hw_cmds.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/hw_helper.cpp
+  ${CMAKE_CURRENT_SOURCE_DIR}/hw_helper.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/hw_helper_base.inl
+  ${CMAKE_CURRENT_SOURCE_DIR}/hw_helper_bdw_plus.inl
+  ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/hw_helper_extended.cpp
+  ${CMAKE_CURRENT_SOURCE_DIR}/hw_info.cpp
+  ${CMAKE_CURRENT_SOURCE_DIR}/hw_info.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/interlocked_max.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/kernel_helpers.cpp
+  ${CMAKE_CURRENT_SOURCE_DIR}/kernel_helpers.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/kmd_notify_properties.cpp
+  ${CMAKE_CURRENT_SOURCE_DIR}/kmd_notify_properties.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/non_copyable_or_moveable.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/options.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/pipeline_select_args.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/pipeline_select_helper.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/preamble.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/preamble_base.inl
+  ${CMAKE_CURRENT_SOURCE_DIR}/preamble_bdw_plus.inl
+  ${CMAKE_CURRENT_SOURCE_DIR}/ptr_math.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/register_offsets.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/registered_method_dispatcher.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/simd_helper.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/state_base_address.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/state_base_address_base.inl
+  ${CMAKE_CURRENT_SOURCE_DIR}/state_base_address_bdw_plus.inl
+  ${CMAKE_CURRENT_SOURCE_DIR}/state_compute_mode_helper.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/stdio.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/string.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/surface_format_info.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/timestamp_packet.cpp
+  ${CMAKE_CURRENT_SOURCE_DIR}/timestamp_packet.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/vec.h
+)
+
+set_property(GLOBAL PROPERTY NEO_CORE_HELPERS ${NEO_CORE_HELPERS})
+if(WIN32)
+  set(NEO_CORE_SRCS_HELPERS_WINDOWS
+    ${CMAKE_CURRENT_SOURCE_DIR}/windows/gmm_callbacks.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/windows/gmm_callbacks.h
+    ${CMAKE_CURRENT_SOURCE_DIR}/windows/gmm_callbacks.inl
+  )
+  set_property(GLOBAL PROPERTY NEO_CORE_SRCS_HELPERS_WINDOWS ${NEO_CORE_SRCS_HELPERS_WINDOWS})
+endif()
+
+add_subdirectories()
--- a/shared/source/helpers/abort.cpp
+++ b/shared/source/helpers/abort.cpp
@@ -0,0 +1,16 @@
+/*
+ * Copyright (C) 2017-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#include "helpers/abort.h"
+
+#include <cstdlib>
+
+namespace NEO {
+void abortExecution() {
+    abort();
+}
+} // namespace NEO
--- a/shared/source/helpers/abort.h
+++ b/shared/source/helpers/abort.h
@@ -0,0 +1,12 @@
+/*
+ * Copyright (C) 2017-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+
+namespace NEO {
+[[noreturn]] void abortExecution();
+}
--- a/shared/source/helpers/address_patch.h
+++ b/shared/source/helpers/address_patch.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2018-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+#include <stdint.h>
+
+namespace NEO {
+
+enum PatchInfoAllocationType {
+    Default = 0,
+    KernelArg,
+    GeneralStateHeap,
+    DynamicStateHeap,
+    IndirectObjectHeap,
+    SurfaceStateHeap,
+    InstructionHeap,
+    TagAddress,
+    TagValue,
+    GUCStartMessage,
+    ScratchSpace
+};
+
+struct PatchInfoData {
+    uint64_t sourceAllocation;
+    uint64_t sourceAllocationOffset;
+    PatchInfoAllocationType sourceType;
+    uint64_t targetAllocation;
+    uint64_t targetAllocationOffset;
+    PatchInfoAllocationType targetType;
+    uint32_t patchAddressSize;
+
+    PatchInfoData(uint64_t sourceAllocation,
+                  uint64_t sourceAllocationOffset,
+                  PatchInfoAllocationType sourceType,
+                  uint64_t targetAllocation,
+                  uint64_t targetAllocationOffset,
+                  PatchInfoAllocationType targetType,
+                  uint32_t patchAddressSize)
+        : sourceAllocation(sourceAllocation),
+          sourceAllocationOffset(sourceAllocationOffset),
+          sourceType(sourceType),
+          targetAllocation(targetAllocation),
+          targetAllocationOffset(targetAllocationOffset),
+          targetType(targetType),
+          patchAddressSize(patchAddressSize) {
+    }
+
+    PatchInfoData(uint64_t sourceAllocation,
+                  uint64_t sourceAllocationOffset,
+                  PatchInfoAllocationType sourceType,
+                  uint64_t targetAllocation,
+                  uint64_t targetAllocationOffset,
+                  PatchInfoAllocationType targetType)
+        : sourceAllocation(sourceAllocation),
+          sourceAllocationOffset(sourceAllocationOffset),
+          sourceType(sourceType),
+          targetAllocation(targetAllocation),
+          targetAllocationOffset(targetAllocationOffset),
+          targetType(targetType),
+          patchAddressSize(sizeof(void *)) {
+    }
+
+    bool requiresIndirectPatching() {
+        return (targetType != PatchInfoAllocationType::Default && targetType != PatchInfoAllocationType::GUCStartMessage);
+    }
+};
+
+struct CommandChunk {
+    uint64_t baseAddressCpu = 0;
+    uint64_t baseAddressGpu = 0;
+    uint64_t startOffset = 0;
+    uint64_t endOffset = 0;
+    uint64_t batchBufferStartLocation = 0;
+    uint64_t batchBufferStartAddress = 0;
+};
+} // namespace NEO
--- a/shared/source/helpers/aligned_memory.h
+++ b/shared/source/helpers/aligned_memory.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2017-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+#include "helpers/debug_helpers.h"
+#include "memory_manager/memory_constants.h"
+#include "opencl/source/utilities/logger.h"
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <functional>
+#include <memory>
+#include <new>
+
+#ifdef _MSC_VER
+#define ALIGNAS(x) __declspec(align(x))
+#else
+#define ALIGNAS(x) alignas(x)
+#endif
+
+template <typename T, typename TNoRef = typename std::remove_reference<T>::type>
+constexpr inline TNoRef alignUp(T before, size_t alignment) {
+    TNoRef mask = static_cast<TNoRef>(alignment - 1);
+    return (before + mask) & ~mask;
+}
+
+template <typename T>
+constexpr inline T *alignUp(T *ptrBefore, size_t alignment) {
+    return reinterpret_cast<T *>(alignUp(reinterpret_cast<uintptr_t>(ptrBefore), alignment));
+}
+
+template <typename T, typename TNoRef = typename std::remove_reference<T>::type>
+constexpr inline TNoRef alignDown(T before, size_t alignment) {
+    TNoRef mask = static_cast<TNoRef>(alignment - 1);
+    return before & ~mask;
+}
+
+template <typename T>
+constexpr inline T *alignDown(T *ptrBefore, size_t alignment) {
+    return reinterpret_cast<T *>(alignDown(reinterpret_cast<uintptr_t>(ptrBefore), alignment));
+}
+
+inline void *alignedMalloc(size_t bytes, size_t alignment) {
+    DEBUG_BREAK_IF(alignment <= 0);
+
+    if (bytes == 0) {
+        bytes = sizeof(void *);
+    }
+
+    // Make sure our alignment is at least the size of a pointer
+    alignment = std::max(alignment, sizeof(void *));
+
+    // Allocate _bytes + _alignment
+    size_t sizeToAlloc = bytes + alignment;
+    auto pOriginalMemory = new (std::nothrow) char[sizeToAlloc];
+
+    // Add in the alignment
+    auto pAlignedMemory = reinterpret_cast<uintptr_t>(pOriginalMemory);
+    if (pAlignedMemory) {
+        pAlignedMemory += alignment;
+        pAlignedMemory -= pAlignedMemory % alignment;
+
+        // Store the original pointer to facilitate deallocation
+        reinterpret_cast<void **>(pAlignedMemory)[-1] = pOriginalMemory;
+    }
+
+    DBG_LOG(LogAlignedAllocations, __FUNCTION__, "Pointer:", reinterpret_cast<void *>(pOriginalMemory), "size:", sizeToAlloc);
+    // Return result
+    return reinterpret_cast<void *>(pAlignedMemory); // NOLINT(clang-analyzer-cplusplus.NewDeleteLeaks)
+}
+
+inline void alignedFree(void *ptr) {
+    if (ptr) {
+        auto originalPtr = reinterpret_cast<char **>(ptr)[-1];
+        DBG_LOG(LogAlignedAllocations, __FUNCTION__, "Pointer:", reinterpret_cast<void *>(originalPtr));
+        delete[] originalPtr;
+    }
+}
+
+inline size_t alignSizeWholePage(const void *ptr, size_t size) {
+    uintptr_t startPageMisalignedAddressOffset = reinterpret_cast<uintptr_t>(ptr) & MemoryConstants::pageMask;
+    size_t alignedSizeToPage = alignUp(startPageMisalignedAddressOffset + size, MemoryConstants::pageSize);
+    return alignedSizeToPage;
+}
+
+template <size_t alignment, typename T>
+inline constexpr bool isAligned(T val) {
+    return (static_cast<size_t>(val) % alignment) == 0;
+}
+
+template <size_t alignment, typename T>
+inline bool isAligned(T *ptr) {
+    return ((reinterpret_cast<uintptr_t>(ptr)) % alignment) == 0;
+}
+
+template <typename T1, typename T2>
+inline bool isAligned(T1 ptr, T2 alignment) {
+    return ((static_cast<size_t>(ptr)) & (static_cast<size_t>(alignment) - 1u)) == 0;
+}
+
+template <typename T>
+inline bool isAligned(T *ptr) {
+    return (reinterpret_cast<uintptr_t>(ptr) & (alignof(T) - 1)) == 0;
+}
+inline auto allocateAlignedMemory(size_t bytes, size_t alignment) {
+    return std::unique_ptr<void, std::function<decltype(alignedFree)>>(alignedMalloc(bytes, alignment), alignedFree);
+}
--- a/shared/source/helpers/allow_deferred_deleter.cpp
+++ b/shared/source/helpers/allow_deferred_deleter.cpp
@@ -0,0 +1,15 @@
+/*
+ * Copyright (C) 2017-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#include "debug_settings/debug_settings_manager.h"
+#include "helpers/deferred_deleter_helper.h"
+
+namespace NEO {
+bool isDeferredDeleterEnabled() {
+    return DebugManager.flags.EnableDeferredDeleter.get();
+}
+} // namespace NEO
--- a/shared/source/helpers/array_count.h
+++ b/shared/source/helpers/array_count.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2017-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+
+#include <stddef.h>
+
+template <typename T, size_t N>
+constexpr size_t arrayCount(const T (&)[N]) {
+    return N;
+}
+
+template <typename T, size_t N>
+constexpr bool isInRange(size_t idx, const T (&)[N]) {
+    return (idx < N);
+}
--- a/shared/source/helpers/aux_translation.h
+++ b/shared/source/helpers/aux_translation.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+
+#include <cstdint>
+
+namespace NEO {
+
+enum class AuxTranslationDirection {
+    None,
+    AuxToNonAux,
+    NonAuxToAux
+};
+
+enum class AuxTranslationMode : int32_t {
+    Builtin = 0,
+    Blit = 1
+};
+} // namespace NEO
--- a/shared/source/helpers/basic_math.h
+++ b/shared/source/helpers/basic_math.h
@@ -0,0 +1,182 @@
+/*
+ * Copyright (C) 2017-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+#include "helpers/vec.h"
+
+#include <algorithm>
+#include <cmath>
+#include <cstdint>
+#include <cstdlib>
+#include <stdio.h>
+
+#define KB 1024uLL
+#define MB (KB * KB)
+#define GB (KB * MB)
+
+namespace Math {
+
+constexpr uint32_t nextPowerOfTwo(uint32_t value) {
+    --value;
+    value |= value >> 1;
+    value |= value >> 2;
+    value |= value >> 4;
+    value |= value >> 8;
+    value |= value >> 16;
+    ++value;
+    return value;
+}
+
+constexpr uint64_t nextPowerOfTwo(uint64_t value) {
+    --value;
+    value |= value >> 1;
+    value |= value >> 2;
+    value |= value >> 4;
+    value |= value >> 8;
+    value |= value >> 16;
+    value |= value >> 32;
+    ++value;
+    return value;
+}
+
+constexpr uint32_t prevPowerOfTwo(uint32_t value) {
+    value |= value >> 1;
+    value |= value >> 2;
+    value |= value >> 4;
+    value |= value >> 8;
+    value |= value >> 16;
+    return (value - (value >> 1));
+}
+
+constexpr uint64_t prevPowerOfTwo(uint64_t value) {
+    value |= value >> 1;
+    value |= value >> 2;
+    value |= value >> 4;
+    value |= value >> 8;
+    value |= value >> 16;
+    value |= value >> 32;
+    return (value - (value >> 1));
+}
+
+inline uint32_t getMinLsbSet(uint32_t value) {
+    static const uint8_t multiplyDeBruijnBitPosition[32] = {
+        0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
+        31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9};
+    auto invert = -static_cast<int64_t>(value);
+    value &= static_cast<uint32_t>(invert);
+    return multiplyDeBruijnBitPosition[static_cast<uint32_t>(value * 0x077CB531U) >> 27];
+}
+
+constexpr uint32_t log2(uint32_t value) {
+    if (value == 0) {
+        return 32;
+    }
+    uint32_t exponent = 0u;
+    while (value >>= 1) {
+        exponent++;
+    }
+    return exponent;
+}
+
+constexpr uint32_t log2(uint64_t value) {
+    if (value == 0) {
+        return 64;
+    }
+    uint32_t exponent = 0;
+    while (value >>= 1) {
+        exponent++;
+    }
+    return exponent;
+}
+
+union FloatConversion {
+    uint32_t u;
+    float f;
+};
+
+// clang-format off
+static const FloatConversion PosInfinity = {0x7f800000};
+static const FloatConversion NegInfinity = {0xff800000};
+static const FloatConversion Nan         = {0x7fc00000};
+// clang-format on
+
+inline uint16_t float2Half(float f) {
+    FloatConversion u;
+    u.f = f;
+
+    uint32_t fsign = (u.u >> 16) & 0x8000;
+    float x = std::fabs(f);
+
+    //Nan
+    if (x != x) {
+        u.u >>= (24 - 11);
+        u.u &= 0x7fff;
+        u.u |= 0x0200; //silence the NaN
+        return u.u | fsign;
+    }
+
+    // overflow
+    if (x >= std::ldexp(1.0f, 16)) {
+        if (x == PosInfinity.f)
+            return 0x7c00 | fsign;
+
+        return 0x7bff | fsign;
+    }
+
+    // underflow
+    if (x < std::ldexp(1.0f, -24))
+        return fsign; // The halfway case can return 0x0001 or 0. 0 is even.
+
+    // half denormal
+    if (x < std::ldexp(1.0f, -14)) {
+        x *= std::ldexp(1.0f, 24);
+        return (uint16_t)((int)x | fsign);
+    }
+
+    u.u &= 0xFFFFE000U;
+    u.u -= 0x38000000U;
+
+    return (u.u >> (24 - 11)) | fsign;
+}
+
+constexpr bool isDivisibleByPowerOfTwoDivisor(uint32_t number, uint32_t divisor) {
+    return (number & (divisor - 1)) == 0;
+}
+
+constexpr size_t computeTotalElementsCount(const Vec3<size_t> &inputVector) {
+    size_t minElementCount = 1;
+    auto xDim = std::max(minElementCount, inputVector.x);
+    auto yDim = std::max(minElementCount, inputVector.y);
+    auto zDim = std::max(minElementCount, inputVector.z);
+    return xDim * yDim * zDim;
+}
+
+template <typename T>
+constexpr bool isPow2(T val) {
+    return val != 0 && (val & (val - 1)) == 0;
+}
+
+template <typename T>
+constexpr T ffs(T v) {
+    if (v == 0) {
+        return std::numeric_limits<T>::max();
+    }
+
+    for (T i = 0; i < sizeof(T) * 8; ++i) {
+        if (0 != (v & (1ULL << i))) {
+            return i;
+        }
+    }
+
+    std::abort();
+}
+
+constexpr size_t divideAndRoundUp(size_t dividend, size_t divisor) {
+    return (dividend + divisor - 1) / divisor;
+}
+
+} // namespace Math
--- a/shared/source/helpers/bit_helpers.h
+++ b/shared/source/helpers/bit_helpers.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2019-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+#include <cassert>
+#include <cstdint>
+#include <limits>
+
+namespace NEO {
+
+constexpr bool isBitSet(uint64_t field, uint64_t bitPosition) {
+    assert(bitPosition < std::numeric_limits<uint64_t>::digits); // undefined behavior
+    return (field & (1ull << bitPosition));
+}
+
+constexpr bool isAnyBitSet(uint64_t field, uint64_t checkedBits) {
+    return ((field & checkedBits) != 0);
+}
+
+constexpr bool isValueSet(uint64_t field, uint64_t value) {
+    assert(value != 0);
+    return ((field & value) == value);
+}
+
+constexpr bool isFieldValid(uint64_t field, uint64_t acceptedBits) {
+    return ((field & (~acceptedBits)) == 0);
+}
+
+constexpr uint64_t setBits(uint64_t field, bool newValue, uint64_t bitsToModify) {
+    if (newValue) {
+        return (field | bitsToModify);
+    }
+    return (field & (~bitsToModify));
+}
+
+} // namespace NEO
--- a/shared/source/helpers/blit_commands_helper.cpp
+++ b/shared/source/helpers/blit_commands_helper.cpp
@@ -0,0 +1,125 @@
+/*
+ * Copyright (C) 2019-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#include "helpers/blit_commands_helper.h"
+
+#include "helpers/timestamp_packet.h"
+#include "memory_manager/surface.h"
+
+namespace NEO {
+BlitProperties BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection blitDirection,
+                                                                     CommandStreamReceiver &commandStreamReceiver,
+                                                                     GraphicsAllocation *memObjAllocation,
+                                                                     GraphicsAllocation *preallocatedHostAllocation,
+                                                                     void *hostPtr, uint64_t memObjGpuVa,
+                                                                     uint64_t hostAllocGpuVa, size_t hostPtrOffset,
+                                                                     size_t copyOffset, uint64_t copySize) {
+
+    GraphicsAllocation *hostAllocation = nullptr;
+
+    if (preallocatedHostAllocation) {
+        hostAllocation = preallocatedHostAllocation;
+        UNRECOVERABLE_IF(hostAllocGpuVa == 0);
+    } else {
+        HostPtrSurface hostPtrSurface(hostPtr, static_cast<size_t>(copySize), true);
+        bool success = commandStreamReceiver.createAllocationForHostSurface(hostPtrSurface, false);
+        UNRECOVERABLE_IF(!success);
+        hostAllocation = hostPtrSurface.getAllocation();
+        hostAllocGpuVa = hostAllocation->getGpuAddress();
+    }
+
+    if (BlitterConstants::BlitDirection::HostPtrToBuffer == blitDirection) {
+        return {
+            nullptr,                       // outputTimestampPacket
+            blitDirection,                 // blitDirection
+            {},                            // csrDependencies
+            AuxTranslationDirection::None, // auxTranslationDirection
+            memObjAllocation,              // dstAllocation
+            hostAllocation,                // srcAllocation
+            memObjGpuVa,                   // dstGpuAddress
+            hostAllocGpuVa,                // srcGpuAddress
+            copySize,                      // copySize
+            copyOffset,                    // dstOffset
+            hostPtrOffset};                // srcOffset
+    } else {
+        return {
+            nullptr,                       // outputTimestampPacket
+            blitDirection,                 // blitDirection
+            {},                            // csrDependencies
+            AuxTranslationDirection::None, // auxTranslationDirection
+            hostAllocation,                // dstAllocation
+            memObjAllocation,              // srcAllocation
+            hostAllocGpuVa,                // dstGpuAddress
+            memObjGpuVa,                   // srcGpuAddress
+            copySize,                      // copySize
+            hostPtrOffset,                 // dstOffset
+            copyOffset};                   // srcOffset
+    }
+}
+
+BlitProperties BlitProperties::constructPropertiesForCopyBuffer(GraphicsAllocation *dstAllocation, GraphicsAllocation *srcAllocation,
+                                                                size_t dstOffset, size_t srcOffset, uint64_t copySize) {
+
+    return {
+        nullptr,                                         // outputTimestampPacket
+        BlitterConstants::BlitDirection::BufferToBuffer, // blitDirection
+        {},                                              // csrDependencies
+        AuxTranslationDirection::None,                   // auxTranslationDirection
+        dstAllocation,                                   // dstAllocation
+        srcAllocation,                                   // srcAllocation
+        dstAllocation->getGpuAddress(),                  // dstGpuAddress
+        srcAllocation->getGpuAddress(),                  // srcGpuAddress
+        copySize,                                        // copySize
+        dstOffset,                                       // dstOffset
+        srcOffset};                                      // srcOffset
+}
+
+BlitProperties BlitProperties::constructPropertiesForAuxTranslation(AuxTranslationDirection auxTranslationDirection,
+                                                                    GraphicsAllocation *allocation) {
+
+    auto allocationSize = allocation->getUnderlyingBufferSize();
+    return {
+        nullptr,                                         // outputTimestampPacket
+        BlitterConstants::BlitDirection::BufferToBuffer, // blitDirection
+        {},                                              // csrDependencies
+        auxTranslationDirection,                         // auxTranslationDirection
+        allocation,                                      // dstAllocation
+        allocation,                                      // srcAllocation
+        allocation->getGpuAddress(),                     // dstGpuAddress
+        allocation->getGpuAddress(),                     // srcGpuAddress
+        allocationSize,                                  // copySize
+        0,                                               // dstOffset
+        0                                                // srcOffset
+    };
+}
+
+void BlitProperties::setupDependenciesForAuxTranslation(BlitPropertiesContainer &blitPropertiesContainer, TimestampPacketDependencies &timestampPacketDependencies,
+                                                        TimestampPacketContainer &kernelTimestamps, const CsrDependencies &depsFromEvents,
+                                                        CommandStreamReceiver &gpguCsr, CommandStreamReceiver &bcsCsr) {
+    auto numObjects = blitPropertiesContainer.size() / 2;
+
+    for (size_t i = 0; i < numObjects; i++) {
+        blitPropertiesContainer[i].outputTimestampPacket = timestampPacketDependencies.auxToNonAuxNodes.peekNodes()[i];
+        blitPropertiesContainer[i + numObjects].outputTimestampPacket = timestampPacketDependencies.nonAuxToAuxNodes.peekNodes()[i];
+    }
+
+    gpguCsr.requestStallingPipeControlOnNextFlush();
+    auto nodesAllocator = gpguCsr.getTimestampPacketAllocator();
+    timestampPacketDependencies.barrierNodes.add(nodesAllocator->getTag());
+
+    // wait for barrier and events before AuxToNonAux
+    blitPropertiesContainer[0].csrDependencies.push_back(&timestampPacketDependencies.barrierNodes);
+
+    for (auto dep : depsFromEvents) {
+        blitPropertiesContainer[0].csrDependencies.push_back(dep);
+    }
+
+    // wait for NDR before NonAuxToAux
+    blitPropertiesContainer[numObjects].csrDependencies.push_back(&kernelTimestamps);
+}
+
+} // namespace NEO
--- a/shared/source/helpers/blit_commands_helper.h
+++ b/shared/source/helpers/blit_commands_helper.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2019-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+#include "command_stream/csr_deps.h"
+#include "helpers/aux_translation.h"
+#include "memory_manager/memory_constants.h"
+#include "utilities/stackvec.h"
+
+#include <cstdint>
+
+namespace NEO {
+class CommandStreamReceiver;
+class GraphicsAllocation;
+class LinearStream;
+struct TimestampPacketStorage;
+struct RootDeviceEnvironment;
+
+template <typename TagType>
+struct TagNode;
+
+struct BlitProperties;
+struct HardwareInfo;
+struct TimestampPacketDependencies;
+using BlitPropertiesContainer = StackVec<BlitProperties, 16>;
+
+struct BlitProperties {
+    static BlitProperties constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection blitDirection,
+                                                                CommandStreamReceiver &commandStreamReceiver,
+                                                                GraphicsAllocation *memObjAllocation,
+                                                                GraphicsAllocation *preallocatedHostAllocation,
+                                                                void *hostPtr, uint64_t memObjGpuVa,
+                                                                uint64_t hostAllocGpuVa, size_t hostPtrOffset,
+                                                                size_t copyOffset, uint64_t copySize);
+
+    static BlitProperties constructPropertiesForCopyBuffer(GraphicsAllocation *dstAllocation, GraphicsAllocation *srcAllocation,
+                                                           size_t dstOffset, size_t srcOffset, uint64_t copySize);
+
+    static BlitProperties constructPropertiesForAuxTranslation(AuxTranslationDirection auxTranslationDirection,
+                                                               GraphicsAllocation *allocation);
+
+    static void setupDependenciesForAuxTranslation(BlitPropertiesContainer &blitPropertiesContainer, TimestampPacketDependencies &timestampPacketDependencies,
+                                                   TimestampPacketContainer &kernelTimestamps, const CsrDependencies &depsFromEvents,
+                                                   CommandStreamReceiver &gpguCsr, CommandStreamReceiver &bcsCsr);
+
+    static BlitterConstants::BlitDirection obtainBlitDirection(uint32_t commandType);
+
+    TagNode<TimestampPacketStorage> *outputTimestampPacket = nullptr;
+    BlitterConstants::BlitDirection blitDirection;
+    CsrDependencies csrDependencies;
+    AuxTranslationDirection auxTranslationDirection = AuxTranslationDirection::None;
+
+    GraphicsAllocation *dstAllocation = nullptr;
+    GraphicsAllocation *srcAllocation = nullptr;
+    uint64_t dstGpuAddress = 0;
+    uint64_t srcGpuAddress = 0;
+    uint64_t copySize = 0;
+    size_t dstOffset = 0;
+    size_t srcOffset = 0;
+};
+
+template <typename GfxFamily>
+struct BlitCommandsHelper {
+    static size_t estimateBlitCommandsSize(uint64_t copySize, const CsrDependencies &csrDependencies, bool updateTimestampPacket);
+    static size_t estimateBlitCommandsSize(const BlitPropertiesContainer &blitPropertiesContainer, const HardwareInfo &hwInfo);
+    static void dispatchBlitCommandsForBuffer(const BlitProperties &blitProperties, LinearStream &linearStream, const RootDeviceEnvironment &rootDeviceEnvironment);
+    static void appendBlitCommandsForBuffer(const BlitProperties &blitProperties, typename GfxFamily::XY_COPY_BLT &blitCmd, const RootDeviceEnvironment &rootDeviceEnvironment);
+};
+} // namespace NEO
--- a/shared/source/helpers/blit_commands_helper_base.inl
+++ b/shared/source/helpers/blit_commands_helper_base.inl
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2019-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#include "helpers/blit_commands_helper.h"
+#include "helpers/hw_helper.h"
+#include "helpers/timestamp_packet.h"
+
+namespace NEO {
+
+template <typename GfxFamily>
+size_t BlitCommandsHelper<GfxFamily>::estimateBlitCommandsSize(uint64_t copySize, const CsrDependencies &csrDependencies, bool updateTimestampPacket) {
+    size_t numberOfBlits = 0;
+    uint64_t sizeToBlit = copySize;
+    uint64_t width = 1;
+    uint64_t height = 1;
+
+    while (sizeToBlit != 0) {
+        if (sizeToBlit > BlitterConstants::maxBlitWidth) {
+            // 2D: maxBlitWidth x (1 .. maxBlitHeight)
+            width = BlitterConstants::maxBlitWidth;
+            height = std::min((sizeToBlit / width), BlitterConstants::maxBlitHeight);
+        } else {
+            // 1D: (1 .. maxBlitWidth) x 1
+            width = sizeToBlit;
+            height = 1;
+        }
+        sizeToBlit -= (width * height);
+        numberOfBlits++;
+    }
+
+    return TimestampPacketHelper::getRequiredCmdStreamSize<GfxFamily>(csrDependencies) +
+           (sizeof(typename GfxFamily::XY_COPY_BLT) * numberOfBlits) +
+           (sizeof(typename GfxFamily::MI_FLUSH_DW) * static_cast<size_t>(updateTimestampPacket));
+}
+
+template <typename GfxFamily>
+size_t BlitCommandsHelper<GfxFamily>::estimateBlitCommandsSize(const BlitPropertiesContainer &blitPropertiesContainer, const HardwareInfo &hwInfo) {
+    size_t size = 0;
+    for (auto &blitProperties : blitPropertiesContainer) {
+        size += BlitCommandsHelper<GfxFamily>::estimateBlitCommandsSize(blitProperties.copySize, blitProperties.csrDependencies,
+                                                                        blitProperties.outputTimestampPacket != nullptr);
+    }
+    size += MemorySynchronizationCommands<GfxFamily>::getSizeForAdditonalSynchronization(hwInfo);
+    size += sizeof(typename GfxFamily::MI_FLUSH_DW) + sizeof(typename GfxFamily::MI_BATCH_BUFFER_END);
+
+    return alignUp(size, MemoryConstants::cacheLineSize);
+}
+
+template <typename GfxFamily>
+void BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForBuffer(const BlitProperties &blitProperties, LinearStream &linearStream, const RootDeviceEnvironment &rootDeviceEnvironment) {
+    uint64_t sizeToBlit = blitProperties.copySize;
+    uint64_t width = 1;
+    uint64_t height = 1;
+    uint64_t offset = 0;
+
+    while (sizeToBlit != 0) {
+        if (sizeToBlit > BlitterConstants::maxBlitWidth) {
+            // dispatch 2D blit: maxBlitWidth x (1 .. maxBlitHeight)
+            width = BlitterConstants::maxBlitWidth;
+            height = std::min((sizeToBlit / width), BlitterConstants::maxBlitHeight);
+        } else {
+            // dispatch 1D blt: (1 .. maxBlitWidth) x 1
+            width = sizeToBlit;
+            height = 1;
+        }
+
+        auto bltCmd = linearStream.getSpaceForCmd<typename GfxFamily::XY_COPY_BLT>();
+        *bltCmd = GfxFamily::cmdInitXyCopyBlt;
+
+        bltCmd->setTransferWidth(static_cast<uint32_t>(width));
+        bltCmd->setTransferHeight(static_cast<uint32_t>(height));
+
+        bltCmd->setDestinationPitch(static_cast<uint32_t>(width));
+        bltCmd->setSourcePitch(static_cast<uint32_t>(width));
+
+        bltCmd->setDestinationBaseAddress(blitProperties.dstGpuAddress + blitProperties.dstOffset + offset);
+        bltCmd->setSourceBaseAddress(blitProperties.srcGpuAddress + blitProperties.srcOffset + offset);
+
+        appendBlitCommandsForBuffer(blitProperties, *bltCmd, rootDeviceEnvironment);
+
+        auto blitSize = width * height;
+        sizeToBlit -= blitSize;
+        offset += blitSize;
+    }
+}
+
+} // namespace NEO
--- a/shared/source/helpers/blit_commands_helper_bdw_plus.inl
+++ b/shared/source/helpers/blit_commands_helper_bdw_plus.inl
@@ -0,0 +1,15 @@
+/*
+ * Copyright (C) 2019-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#include "helpers/blit_commands_helper_base.inl"
+
+namespace NEO {
+
+template <typename GfxFamily>
+void BlitCommandsHelper<GfxFamily>::appendBlitCommandsForBuffer(const BlitProperties &blitProperties, typename GfxFamily::XY_COPY_BLT &blitCmd, const RootDeviceEnvironment &rootDeviceEnvironment) {}
+
+} // namespace NEO
--- a/shared/source/helpers/cache_policy.cpp
+++ b/shared/source/helpers/cache_policy.cpp
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2017-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#include "helpers/cache_policy.h"
+
+#include "helpers/aligned_memory.h"
+#include "memory_manager/graphics_allocation.h"
+
+namespace NEO {
+
+bool isL3Capable(void *ptr, size_t size) {
+    return isAligned<MemoryConstants::cacheLineSize>(ptr) &&
+           isAligned<MemoryConstants::cacheLineSize>(size);
+}
+
+bool isL3Capable(const NEO::GraphicsAllocation &graphicsAllocation) {
+    return isL3Capable(graphicsAllocation.getUnderlyingBuffer(), graphicsAllocation.getUnderlyingBufferSize());
+}
+
+} // namespace NEO
--- a/shared/source/helpers/cache_policy.h
+++ b/shared/source/helpers/cache_policy.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (C) 2017-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+#include "gmm_helper/gmm_lib.h"
+
+namespace CacheSettings {
+constexpr uint32_t unknownMocs = GMM_RESOURCE_USAGE_UNKNOWN;
+} // namespace CacheSettings
+
+namespace NEO {
+class GraphicsAllocation;
+bool isL3Capable(void *ptr, size_t size);
+bool isL3Capable(const GraphicsAllocation &graphicsAllocation);
+} // namespace NEO
--- a/shared/source/helpers/common_types.h
+++ b/shared/source/helpers/common_types.h
@@ -0,0 +1,16 @@
+/*
+ * Copyright (C) 2019-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+#include <bitset>
+#include <memory>
+#include <vector>
+namespace NEO {
+struct EngineControl;
+using EngineControlContainer = std::vector<EngineControl>;
+using DeviceBitfield = std::bitset<32>;
+} // namespace NEO
--- a/shared/source/helpers/completion_stamp.cpp
+++ b/shared/source/helpers/completion_stamp.cpp
@@ -0,0 +1,14 @@
+/*
+ * Copyright (C) 2017-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#include "helpers/completion_stamp.h"
+
+namespace NEO {
+
+const uint32_t CompletionStamp::levelNotReady = 0xFFFFFFF0;
+
+} // namespace NEO
--- a/shared/source/helpers/completion_stamp.h
+++ b/shared/source/helpers/completion_stamp.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2017-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+
+#include <cstdint>
+
+namespace NEO {
+typedef uint64_t FlushStamp;
+struct CompletionStamp {
+    uint32_t taskCount;
+    uint32_t taskLevel;
+    FlushStamp flushStamp;
+
+    static const uint32_t levelNotReady;
+};
+
+} // namespace NEO
--- a/shared/source/helpers/debug_helpers.cpp
+++ b/shared/source/helpers/debug_helpers.cpp
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2017-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#include "helpers/debug_helpers.h"
+
+#include "debug_settings/debug_settings_manager.h"
+
+#include <assert.h>
+#include <cstdio>
+
+namespace NEO {
+void debugBreak(int line, const char *file) {
+    if (DebugManager.flags.EnableDebugBreak.get()) {
+        printf("Assert was called at %d line in file:\n%s\n", line, file);
+        assert(false);
+    }
+}
+void abortUnrecoverable(int line, const char *file) {
+    printf("Abort was called at %d line in file:\n%s\n", line, file);
+    abortExecution();
+}
+} // namespace NEO
--- a/shared/source/helpers/debug_helpers.h
+++ b/shared/source/helpers/debug_helpers.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2017-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+#include "helpers/abort.h"
+
+#define UNRECOVERABLE_IF(expression)                 \
+                                                     \
+    if (expression) {                                \
+        NEO::abortUnrecoverable(__LINE__, __FILE__); \
+    }
+
+#define UNREACHABLE(...) std::abort()
+
+#ifndef DEBUG_BREAK_IF
+#ifdef _DEBUG
+#define DEBUG_BREAK_IF(expression)           \
+                                             \
+    if (expression) {                        \
+        NEO::debugBreak(__LINE__, __FILE__); \
+    }
+#else
+#define DEBUG_BREAK_IF(expression) (void)0
+#endif // _DEBUG
+#endif // !DEBUG_BREAK_IF
+
+#define UNUSED_VARIABLE(x) ((void)(x))
+
+namespace NEO {
+void debugBreak(int line, const char *file);
+[[noreturn]] void abortUnrecoverable(int line, const char *file);
+} // namespace NEO
--- a/shared/source/helpers/deferred_deleter_helper.h
+++ b/shared/source/helpers/deferred_deleter_helper.h
@@ -0,0 +1,10 @@
+/*
+ * Copyright (C) 2017-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+namespace NEO {
+bool isDeferredDeleterEnabled();
+} // namespace NEO
--- a/shared/source/helpers/dirty_state_helpers.cpp
+++ b/shared/source/helpers/dirty_state_helpers.cpp
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2017-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#include "helpers/dirty_state_helpers.h"
+
+#include "indirect_heap/indirect_heap.h"
+
+using namespace NEO;
+
+bool HeapDirtyState::updateAndCheck(const IndirectHeap *heap) {
+    if (!heap->getGraphicsAllocation()) {
+        sizeInPages = 0llu;
+        return true;
+    }
+    bool dirty = gpuBaseAddress != heap->getHeapGpuBase() || sizeInPages != heap->getHeapSizeInPages();
+    if (dirty) {
+        gpuBaseAddress = heap->getHeapGpuBase();
+        sizeInPages = heap->getHeapSizeInPages();
+    }
+    return dirty;
+}
--- a/shared/source/helpers/dirty_state_helpers.h
+++ b/shared/source/helpers/dirty_state_helpers.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2017-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+#include <cstdint>
+#include <cstdlib>
+
+namespace NEO {
+class IndirectHeap;
+
+class HeapDirtyState {
+  public:
+    bool updateAndCheck(const IndirectHeap *heap);
+
+  protected:
+    uint64_t gpuBaseAddress = 0llu;
+    size_t sizeInPages = 0u;
+};
+} // namespace NEO
--- a/shared/source/helpers/engine_control.h
+++ b/shared/source/helpers/engine_control.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2018-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+
+namespace NEO {
+class CommandStreamReceiver;
+class OsContext;
+
+struct EngineControl {
+    EngineControl() = default;
+    EngineControl(CommandStreamReceiver *commandStreamReceiver, OsContext *osContext)
+        : commandStreamReceiver(commandStreamReceiver), osContext(osContext){};
+
+    CommandStreamReceiver *commandStreamReceiver = nullptr;
+    OsContext *osContext = nullptr;
+};
+} // namespace NEO
--- a/shared/source/helpers/engine_node_helper.cpp
+++ b/shared/source/helpers/engine_node_helper.cpp
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2019-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#include "helpers/engine_node_helper.h"
+
+namespace NEO {
+namespace EngineHelpers {
+bool isCcs(aub_stream::EngineType engineType) {
+    return engineType == aub_stream::ENGINE_CCS;
+}
+
+bool isBcs(aub_stream::EngineType engineType) {
+    return engineType == aub_stream::ENGINE_BCS;
+}
+
+aub_stream::EngineType getBcsEngineType(const HardwareInfo &hwInfo) {
+    return aub_stream::EngineType::ENGINE_BCS;
+}
+} // namespace EngineHelpers
+} // namespace NEO
--- a/shared/source/helpers/engine_node_helper.h
+++ b/shared/source/helpers/engine_node_helper.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2019-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+
+#include "engine_node.h"
+
+namespace NEO {
+struct HardwareInfo;
+
+namespace EngineHelpers {
+bool isCcs(aub_stream::EngineType engineType);
+bool isBcs(aub_stream::EngineType engineType);
+aub_stream::EngineType getBcsEngineType(const HardwareInfo &hwInfo);
+}; // namespace EngineHelpers
+} // namespace NEO
--- a/shared/source/helpers/extendable_enum.h
+++ b/shared/source/helpers/extendable_enum.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2018-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+
+#include <cstdint>
+
+struct ExtendableEnum {
+    constexpr operator uint32_t() const {
+        return value;
+    }
+
+    constexpr ExtendableEnum(uint32_t val) : value(val) {}
+
+  protected:
+    uint32_t value;
+};
--- a/shared/source/helpers/file_io.cpp
+++ b/shared/source/helpers/file_io.cpp
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2017-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#include "file_io.h"
+
+#include "helpers/debug_helpers.h"
+#include "helpers/stdio.h"
+
+#include <cstring>
+#include <new>
+
+std::unique_ptr<char[]> loadDataFromFile(
+    const char *filename,
+    size_t &retSize) {
+    FILE *fp = nullptr;
+    size_t nsize = 0;
+    std::unique_ptr<char[]> ret;
+
+    DEBUG_BREAK_IF(nullptr == filename);
+    // Open the file
+    fopen_s(&fp, filename, "rb");
+    if (fp) {
+        // Allocate a buffer for the file contents
+        fseek(fp, 0, SEEK_END);
+        nsize = (size_t)ftell(fp);
+
+        fseek(fp, 0, SEEK_SET);
+
+        ret.reset(new (std::nothrow) char[nsize + 1]);
+
+        if (ret) {
+            // we initialize to all zeroes before reading in data
+            memset(ret.get(), 0x00, nsize + 1);
+            auto read = fread(ret.get(), sizeof(unsigned char), nsize, fp);
+            DEBUG_BREAK_IF(read != nsize);
+            UNUSED_VARIABLE(read);
+        } else {
+            nsize = 0;
+        }
+
+        fclose(fp);
+    }
+
+    retSize = nsize;
+    return ret;
+}
+
+size_t writeDataToFile(
+    const char *filename,
+    const void *pData,
+    size_t dataSize) {
+    FILE *fp = nullptr;
+    size_t nsize = 0;
+
+    DEBUG_BREAK_IF(nullptr == pData);
+    DEBUG_BREAK_IF(nullptr == filename);
+
+    fopen_s(&fp, filename, "wb");
+    if (fp) {
+        nsize = fwrite(pData, sizeof(unsigned char), dataSize, fp);
+        fclose(fp);
+    }
+
+    return nsize;
+}
+
+bool fileExists(const std::string &fileName) {
+    FILE *pFile = nullptr;
+
+    DEBUG_BREAK_IF(fileName.empty());
+    DEBUG_BREAK_IF(fileName == "");
+
+    fopen_s(&pFile, fileName.c_str(), "rb");
+    if (pFile) {
+        fclose(pFile);
+    }
+    return pFile != nullptr;
+}
+
+bool fileExistsHasSize(const std::string &fileName) {
+    FILE *pFile = nullptr;
+    size_t nsize = 0;
+
+    DEBUG_BREAK_IF(fileName.empty());
+    DEBUG_BREAK_IF(fileName == "");
+
+    fopen_s(&pFile, fileName.c_str(), "rb");
+    if (pFile) {
+        fseek(pFile, 0, SEEK_END);
+        nsize = (size_t)ftell(pFile);
+        fclose(pFile);
+    }
+    return pFile != nullptr && nsize > 0;
+}
--- a/shared/source/helpers/file_io.h
+++ b/shared/source/helpers/file_io.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2017-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+
+std::unique_ptr<char[]> loadDataFromFile(
+    const char *filename,
+    size_t &retSize);
+
+size_t writeDataToFile(
+    const char *filename,
+    const void *pData,
+    size_t dataSize);
+
+bool fileExists(const std::string &fileName);
+bool fileExistsHasSize(const std::string &fileName);
--- a/shared/source/helpers/flat_batch_buffer_helper.cpp
+++ b/shared/source/helpers/flat_batch_buffer_helper.cpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2018-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#include "helpers/flat_batch_buffer_helper.h"
+
+#include "execution_environment/execution_environment.h"
+#include "memory_manager/graphics_allocation.h"
+
+namespace NEO {
+
+bool FlatBatchBufferHelper::setPatchInfoData(const PatchInfoData &data) {
+    patchInfoCollection.push_back(data);
+    return true;
+}
+bool FlatBatchBufferHelper::removePatchInfoData(uint64_t targetLocation) {
+    for (auto it = patchInfoCollection.begin(); it != patchInfoCollection.end(); ++it) {
+        if (it->targetAllocation + it->targetAllocationOffset == targetLocation) {
+            patchInfoCollection.erase(it);
+            break;
+        }
+    }
+    return true;
+}
+
+bool FlatBatchBufferHelper::registerCommandChunk(uint64_t baseCpu, uint64_t baseGpu, uint64_t startOffset, uint64_t endOffset) {
+
+    CommandChunk commandChunk;
+    commandChunk.baseAddressGpu = baseGpu;
+    commandChunk.baseAddressCpu = baseCpu;
+    commandChunk.startOffset = startOffset;
+    commandChunk.endOffset = endOffset;
+    return registerCommandChunk(commandChunk);
+}
+
+bool FlatBatchBufferHelper::registerCommandChunk(BatchBuffer &batchBuffer, size_t batchBufferStartCommandSize) {
+    CommandChunk commandChunk;
+    commandChunk.baseAddressGpu = batchBuffer.stream->getGraphicsAllocation()->getGpuAddress();
+    commandChunk.baseAddressCpu = reinterpret_cast<uint64_t>(batchBuffer.stream->getCpuBase());
+    commandChunk.startOffset = batchBuffer.startOffset;
+    commandChunk.endOffset = batchBuffer.chainedBatchBufferStartOffset + batchBufferStartCommandSize;
+    return registerCommandChunk(commandChunk);
+}
+
+bool FlatBatchBufferHelper::registerCommandChunk(CommandChunk &commandChunk) {
+    commandChunkList.push_back(commandChunk);
+    return true;
+}
+
+bool FlatBatchBufferHelper::registerBatchBufferStartAddress(uint64_t commandAddress, uint64_t startAddress) {
+    batchBufferStartAddressSequence.insert(std::pair<uint64_t, uint64_t>(commandAddress, startAddress));
+    return true;
+}
+
+void FlatBatchBufferHelper::fixCrossThreadDataInfo(std::vector<PatchInfoData> &data, size_t offsetCrossThreadData, uint64_t gpuAddress) {
+    for (auto &patchInfoData : data) {
+        if (patchInfoData.sourceType == PatchInfoAllocationType::KernelArg) {
+            patchInfoData.targetAllocation = gpuAddress;
+            patchInfoData.targetAllocationOffset += offsetCrossThreadData;
+        }
+    }
+}
+
+MemoryManager *FlatBatchBufferHelper::getMemoryManager() const {
+    return executionEnvironemnt.memoryManager.get();
+}
+}; // namespace NEO
--- a/shared/source/helpers/flat_batch_buffer_helper.h
+++ b/shared/source/helpers/flat_batch_buffer_helper.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2018-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+
+#include "command_stream/submissions_aggregator.h"
+#include "helpers/address_patch.h"
+
+#include <map>
+#include <vector>
+
+namespace NEO {
+
+enum class DispatchMode;
+class MemoryManager;
+class ExecutionEnvironment;
+
+class FlatBatchBufferHelper {
+  public:
+    FlatBatchBufferHelper(ExecutionEnvironment &executionEnvironemnt) : executionEnvironemnt(executionEnvironemnt) {}
+    virtual ~FlatBatchBufferHelper(){};
+    MOCKABLE_VIRTUAL bool setPatchInfoData(const PatchInfoData &data);
+    MOCKABLE_VIRTUAL bool removePatchInfoData(uint64_t targetLocation);
+    MOCKABLE_VIRTUAL bool registerCommandChunk(uint64_t baseCpu, uint64_t baseGpu, uint64_t startOffset, uint64_t endOffset);
+    MOCKABLE_VIRTUAL bool registerCommandChunk(CommandChunk &commandChunk);
+    MOCKABLE_VIRTUAL bool registerCommandChunk(BatchBuffer &batchBuffer, size_t batchBufferStartCommandSize);
+    MOCKABLE_VIRTUAL bool registerBatchBufferStartAddress(uint64_t commandAddress, uint64_t startAddress);
+    virtual GraphicsAllocation *flattenBatchBuffer(uint32_t rootDeviceIndex, BatchBuffer &batchBuffer, size_t &sizeBatchBuffer, DispatchMode dispatchMode) = 0;
+    virtual char *getIndirectPatchCommands(size_t &indirectPatchCommandsSize, std::vector<PatchInfoData> &indirectPatchInfo) = 0;
+    virtual void removePipeControlData(size_t pipeControlLocationSize, void *pipeControlForNooping, const HardwareInfo &hwInfo) = 0;
+    virtual void collectScratchSpacePatchInfo(uint64_t scratchAddress, uint64_t commandOffset, const LinearStream &csr) = 0;
+    static void fixCrossThreadDataInfo(std::vector<PatchInfoData> &data, size_t offsetCrossThreadData, uint64_t gpuAddress);
+
+    std::vector<CommandChunk> &getCommandChunkList() { return commandChunkList; }
+    std::vector<PatchInfoData> &getPatchInfoCollection() { return patchInfoCollection; }
+    std::map<uint64_t, uint64_t> &getBatchBufferStartAddressSequence() { return batchBufferStartAddressSequence; }
+
+  protected:
+    MemoryManager *getMemoryManager() const;
+    ExecutionEnvironment &executionEnvironemnt;
+
+    std::vector<PatchInfoData> patchInfoCollection;
+    std::vector<CommandChunk> commandChunkList;
+    std::map<uint64_t, uint64_t> batchBufferStartAddressSequence;
+};
+
+} // namespace NEO
--- a/shared/source/helpers/flat_batch_buffer_helper_hw.h
+++ b/shared/source/helpers/flat_batch_buffer_helper_hw.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2018-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+
+#include "helpers/flat_batch_buffer_helper.h"
+
+namespace NEO {
+
+template <typename GfxFamily>
+class FlatBatchBufferHelperHw : public FlatBatchBufferHelper {
+  public:
+    using FlatBatchBufferHelper::FlatBatchBufferHelper;
+    GraphicsAllocation *flattenBatchBuffer(uint32_t rootDeviceIndex, BatchBuffer &batchBuffer, size_t &sizeBatchBuffer, DispatchMode dispatchMode) override;
+    char *getIndirectPatchCommands(size_t &indirectPatchCommandsSize, std::vector<PatchInfoData> &indirectPatchInfo) override;
+    void removePipeControlData(size_t pipeControlLocationSize, void *pipeControlForNooping, const HardwareInfo &hwInfo) override;
+    void collectScratchSpacePatchInfo(uint64_t scratchAddress, uint64_t commandOffset, const LinearStream &csr) override;
+};
+
+} // namespace NEO
--- a/shared/source/helpers/flat_batch_buffer_helper_hw.inl
+++ b/shared/source/helpers/flat_batch_buffer_helper_hw.inl
@@ -0,0 +1,203 @@
+/*
+ * Copyright (C) 2018-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#include "command_stream/command_stream_receiver.h"
+#include "helpers/flat_batch_buffer_helper_hw.h"
+#include "helpers/hw_helper.h"
+#include "helpers/string.h"
+#include "memory_manager/graphics_allocation.h"
+#include "memory_manager/memory_manager.h"
+
+namespace NEO {
+
+template <typename GfxFamily>
+GraphicsAllocation *FlatBatchBufferHelperHw<GfxFamily>::flattenBatchBuffer(uint32_t rootDeviceIndex, BatchBuffer &batchBuffer, size_t &sizeBatchBuffer,
+                                                                           DispatchMode dispatchMode) {
+    typedef typename GfxFamily::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START;
+    typedef typename GfxFamily::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END;
+    typedef typename GfxFamily::MI_USER_INTERRUPT MI_USER_INTERRUPT;
+
+    GraphicsAllocation *flatBatchBuffer = nullptr;
+
+    size_t indirectPatchCommandsSize = 0u;
+    std::vector<PatchInfoData> indirectPatchInfo;
+    std::unique_ptr<char> indirectPatchCommands(getIndirectPatchCommands(indirectPatchCommandsSize, indirectPatchInfo));
+
+    if (dispatchMode == DispatchMode::ImmediateDispatch) {
+        if (batchBuffer.chainedBatchBuffer) {
+            batchBuffer.chainedBatchBuffer->setAubWritable(false, GraphicsAllocation::defaultBank);
+            auto sizeMainBatchBuffer = batchBuffer.chainedBatchBufferStartOffset - batchBuffer.startOffset;
+            auto alignedMainBatchBufferSize = alignUp(sizeMainBatchBuffer + indirectPatchCommandsSize + batchBuffer.chainedBatchBuffer->getUnderlyingBufferSize(), MemoryConstants::pageSize);
+            AllocationProperties flatBatchBufferProperties(rootDeviceIndex, alignedMainBatchBufferSize, GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY);
+            flatBatchBufferProperties.alignment = MemoryConstants::pageSize;
+            flatBatchBuffer =
+                getMemoryManager()->allocateGraphicsMemoryWithProperties(flatBatchBufferProperties);
+            UNRECOVERABLE_IF(flatBatchBuffer == nullptr);
+            // Copy main batchbuffer
+            memcpy_s(flatBatchBuffer->getUnderlyingBuffer(), sizeMainBatchBuffer,
+                     ptrOffset(batchBuffer.commandBufferAllocation->getUnderlyingBuffer(), batchBuffer.startOffset),
+                     sizeMainBatchBuffer);
+            // Copy indirect patch commands
+            memcpy_s(ptrOffset(flatBatchBuffer->getUnderlyingBuffer(), sizeMainBatchBuffer), indirectPatchCommandsSize,
+                     indirectPatchCommands.get(), indirectPatchCommandsSize);
+            // Copy chained batchbuffer
+            memcpy_s(ptrOffset(flatBatchBuffer->getUnderlyingBuffer(), sizeMainBatchBuffer + indirectPatchCommandsSize),
+                     batchBuffer.chainedBatchBuffer->getUnderlyingBufferSize(), batchBuffer.chainedBatchBuffer->getUnderlyingBuffer(),
+                     batchBuffer.chainedBatchBuffer->getUnderlyingBufferSize());
+            sizeBatchBuffer = flatBatchBufferProperties.size;
+            patchInfoCollection.insert(std::end(patchInfoCollection), std::begin(indirectPatchInfo), std::end(indirectPatchInfo));
+        }
+    } else if (dispatchMode == DispatchMode::BatchedDispatch) {
+        CommandChunk firstChunk;
+        for (auto &chunk : commandChunkList) {
+            bool found = false;
+            for (auto &batchBuffer : batchBufferStartAddressSequence) {
+                if ((batchBuffer.first <= chunk.baseAddressGpu + chunk.endOffset) && (batchBuffer.first >= chunk.baseAddressGpu + chunk.startOffset)) {
+                    chunk.batchBufferStartLocation = batchBuffer.first;
+                    chunk.batchBufferStartAddress = batchBuffer.second;
+                    chunk.endOffset = chunk.batchBufferStartLocation - chunk.baseAddressGpu;
+                }
+                if (batchBuffer.second == chunk.baseAddressGpu + chunk.startOffset) {
+                    found = true;
+                }
+            }
+            if (!found) {
+                firstChunk = chunk;
+            }
+        }
+
+        std::vector<CommandChunk> orderedChunks;
+        CommandChunk &nextChunk = firstChunk;
+        while (true) {
+            bool hasNextChunk = false;
+            for (auto &chunk : commandChunkList) {
+                if (nextChunk.batchBufferStartAddress == chunk.baseAddressGpu + chunk.startOffset) {
+                    hasNextChunk = true;
+                    orderedChunks.push_back(nextChunk);
+                    nextChunk = chunk;
+                    break;
+                }
+            }
+            if (!hasNextChunk) {
+                nextChunk.endOffset -= sizeof(MI_BATCH_BUFFER_START);
+                orderedChunks.push_back(nextChunk);
+                break;
+            }
+        }
+
+        uint64_t flatBatchBufferSize = 0u;
+        std::vector<PatchInfoData> patchInfoCopy = patchInfoCollection;
+        patchInfoCollection.clear();
+
+        for (auto &chunk : orderedChunks) {
+            for (auto &patch : patchInfoCopy) {
+                if (patch.targetAllocation + patch.targetAllocationOffset >= chunk.baseAddressGpu + chunk.startOffset && patch.targetAllocation + patch.targetAllocationOffset <= chunk.baseAddressGpu + chunk.endOffset) {
+                    patch.targetAllocationOffset = patch.targetAllocationOffset - chunk.startOffset + flatBatchBufferSize + indirectPatchCommandsSize;
+                    patchInfoCollection.push_back(patch);
+                }
+            }
+            flatBatchBufferSize += chunk.endOffset - chunk.startOffset;
+        }
+        patchInfoCollection.insert(std::end(patchInfoCollection), std::begin(indirectPatchInfo), std::end(indirectPatchInfo));
+
+        flatBatchBufferSize += sizeof(MI_USER_INTERRUPT);
+        flatBatchBufferSize += sizeof(MI_BATCH_BUFFER_END);
+        flatBatchBufferSize += indirectPatchCommandsSize;
+
+        flatBatchBufferSize = alignUp(flatBatchBufferSize, MemoryConstants::pageSize);
+        flatBatchBufferSize += CSRequirements::csOverfetchSize;
+        AllocationProperties flatBatchBufferProperties(rootDeviceIndex, static_cast<size_t>(flatBatchBufferSize), GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY);
+        flatBatchBufferProperties.alignment = MemoryConstants::pageSize;
+        flatBatchBuffer = getMemoryManager()->allocateGraphicsMemoryWithProperties(flatBatchBufferProperties);
+        UNRECOVERABLE_IF(flatBatchBuffer == nullptr);
+
+        char *ptr = static_cast<char *>(flatBatchBuffer->getUnderlyingBuffer());
+        memcpy_s(ptr, indirectPatchCommandsSize, indirectPatchCommands.get(), indirectPatchCommandsSize);
+        ptr += indirectPatchCommandsSize;
+        for (auto &chunk : orderedChunks) {
+            size_t chunkSize = static_cast<size_t>(chunk.endOffset - chunk.startOffset);
+            memcpy_s(ptr,
+                     chunkSize,
+                     reinterpret_cast<char *>(ptrOffset(chunk.baseAddressCpu, static_cast<size_t>(chunk.startOffset))),
+                     chunkSize);
+            ptr += chunkSize;
+        }
+
+        auto pCmdMui = reinterpret_cast<MI_USER_INTERRUPT *>(ptr);
+        *pCmdMui = GfxFamily::cmdInitUserInterrupt;
+        ptr += sizeof(MI_USER_INTERRUPT);
+
+        auto pCmdBBend = reinterpret_cast<MI_BATCH_BUFFER_END *>(ptr);
+        *pCmdBBend = GfxFamily::cmdInitBatchBufferEnd;
+        ptr += sizeof(MI_BATCH_BUFFER_END);
+
+        sizeBatchBuffer = static_cast<size_t>(flatBatchBufferSize);
+        commandChunkList.clear();
+        batchBufferStartAddressSequence.clear();
+    }
+
+    return flatBatchBuffer;
+}
+
+template <typename GfxFamily>
+char *FlatBatchBufferHelperHw<GfxFamily>::getIndirectPatchCommands(size_t &indirectPatchCommandsSize, std::vector<PatchInfoData> &indirectPatchInfo) {
+    typedef typename GfxFamily::MI_STORE_DATA_IMM MI_STORE_DATA_IMM;
+
+    indirectPatchCommandsSize = 0;
+    for (auto &patchInfoData : patchInfoCollection) {
+        if (patchInfoData.requiresIndirectPatching()) {
+            indirectPatchCommandsSize += sizeof(MI_STORE_DATA_IMM);
+        }
+    }
+
+    uint64_t stiCommandOffset = 0;
+    std::vector<PatchInfoData> patchInfoCopy = patchInfoCollection;
+    std::unique_ptr<char> buffer(new char[indirectPatchCommandsSize]);
+    LinearStream indirectPatchCommandStream(buffer.get(), indirectPatchCommandsSize);
+    patchInfoCollection.clear();
+
+    for (auto &patchInfoData : patchInfoCopy) {
+        if (patchInfoData.requiresIndirectPatching()) {
+            auto storeDataImmediate = indirectPatchCommandStream.getSpaceForCmd<MI_STORE_DATA_IMM>();
+            *storeDataImmediate = GfxFamily::cmdInitStoreDataImm;
+            storeDataImmediate->setAddress(patchInfoData.targetAllocation + patchInfoData.targetAllocationOffset);
+            storeDataImmediate->setStoreQword(patchInfoData.patchAddressSize != sizeof(uint32_t));
+            storeDataImmediate->setDataDword0(static_cast<uint32_t>((patchInfoData.sourceAllocation + patchInfoData.sourceAllocationOffset) & 0x0000FFFFFFFFULL));
+            storeDataImmediate->setDataDword1(static_cast<uint32_t>((patchInfoData.sourceAllocation + patchInfoData.sourceAllocationOffset) >> 32));
+
+            PatchInfoData patchInfoForAddress(patchInfoData.targetAllocation, patchInfoData.targetAllocationOffset, patchInfoData.targetType, 0u, stiCommandOffset + sizeof(MI_STORE_DATA_IMM) - 2 * sizeof(uint64_t), PatchInfoAllocationType::Default);
+            PatchInfoData patchInfoForValue(patchInfoData.sourceAllocation, patchInfoData.sourceAllocationOffset, patchInfoData.sourceType, 0u, stiCommandOffset + sizeof(MI_STORE_DATA_IMM) - sizeof(uint64_t), PatchInfoAllocationType::Default);
+            indirectPatchInfo.push_back(patchInfoForAddress);
+            indirectPatchInfo.push_back(patchInfoForValue);
+            stiCommandOffset += sizeof(MI_STORE_DATA_IMM);
+        } else {
+            patchInfoCollection.push_back(patchInfoData);
+        }
+    }
+    return buffer.release();
+}
+template <typename GfxFamily>
+void FlatBatchBufferHelperHw<GfxFamily>::removePipeControlData(size_t pipeControlLocationSize, void *pipeControlForNooping, const HardwareInfo &hwInfo) {
+    typedef typename GfxFamily::PIPE_CONTROL PIPE_CONTROL;
+    size_t numPipeControls = (pipeControlLocationSize - MemorySynchronizationCommands<GfxFamily>::getSizeForAdditonalSynchronization(hwInfo)) / (sizeof(PIPE_CONTROL));
+    for (size_t i = 0; i < numPipeControls; i++) {
+        PIPE_CONTROL *erasedPipeControl = reinterpret_cast<PIPE_CONTROL *>(pipeControlForNooping);
+        removePatchInfoData(reinterpret_cast<uint64_t>(erasedPipeControl) + (i + 1) * sizeof(PIPE_CONTROL) - 2 * sizeof(uint64_t));
+        removePatchInfoData(reinterpret_cast<uint64_t>(erasedPipeControl) + (i + 1) * sizeof(PIPE_CONTROL) - sizeof(uint64_t));
+    }
+}
+
+template <typename GfxFamily>
+void FlatBatchBufferHelperHw<GfxFamily>::collectScratchSpacePatchInfo(uint64_t scratchAddress, uint64_t commandOffset, const LinearStream &csr) {
+    if (scratchAddress) {
+        auto scratchOffset = reinterpret_cast<uint32_t *>(reinterpret_cast<uint8_t *>(csr.getCpuBase()) + commandOffset)[0] & 0x3FF;
+        PatchInfoData patchInfoData(scratchAddress, scratchOffset, PatchInfoAllocationType::ScratchSpace, csr.getGraphicsAllocation()->getGpuAddress(), commandOffset, PatchInfoAllocationType::Default);
+        patchInfoCollection.push_back(patchInfoData);
+    }
+}
+
+}; // namespace NEO
--- a/shared/source/helpers/flush_stamp.cpp
+++ b/shared/source/helpers/flush_stamp.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2017-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#include "helpers/flush_stamp.h"
+
+using namespace NEO;
+
+FlushStampTracker::FlushStampTracker(bool allocateStamp) {
+    if (allocateStamp) {
+        flushStampSharedHandle = new FlushStampTrackingObj();
+        flushStampSharedHandle->incRefInternal();
+    }
+}
+
+FlushStampTracker::~FlushStampTracker() {
+    if (flushStampSharedHandle) {
+        flushStampSharedHandle->decRefInternal();
+    }
+}
+
+FlushStamp FlushStampTracker::peekStamp() const {
+    if (flushStampSharedHandle->initialized) {
+        return flushStampSharedHandle->flushStamp;
+    } else {
+        return 0;
+    }
+}
+
+void FlushStampTracker::setStamp(FlushStamp stamp) {
+    if (stamp != 0) {
+        flushStampSharedHandle->flushStamp = stamp;
+        flushStampSharedHandle->initialized = true;
+    }
+}
+
+void FlushStampTracker::replaceStampObject(FlushStampTrackingObj *stampObj) {
+    if (stampObj) {
+        stampObj->incRefInternal();
+        if (flushStampSharedHandle) {
+            flushStampSharedHandle->decRefInternal();
+        }
+        flushStampSharedHandle = stampObj;
+    }
+}
+
+void FlushStampUpdateHelper::insert(FlushStampTrackingObj *stampObj) {
+    if (stampObj) {
+        flushStampsToUpdate.push_back(stampObj);
+    }
+}
+
+void FlushStampUpdateHelper::updateAll(const FlushStamp &flushStamp) {
+    for (const auto &stamp : flushStampsToUpdate) {
+        stamp->flushStamp = flushStamp;
+        stamp->initialized = true;
+    }
+}
+
+size_t FlushStampUpdateHelper::size() const {
+    return flushStampsToUpdate.size();
+}
--- a/shared/source/helpers/flush_stamp.h
+++ b/shared/source/helpers/flush_stamp.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2017-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+
+#include "helpers/completion_stamp.h"
+#include "utilities/reference_tracked_object.h"
+#include "utilities/stackvec.h"
+
+namespace NEO {
+struct FlushStampTrackingObj : public ReferenceTrackedObject<FlushStampTrackingObj> {
+    FlushStamp flushStamp = 0;
+    std::atomic<bool> initialized{false};
+};
+
+class FlushStampTracker {
+  public:
+    FlushStampTracker() = delete;
+    FlushStampTracker(bool allocateStamp);
+    ~FlushStampTracker();
+
+    FlushStamp peekStamp() const;
+    void setStamp(FlushStamp stamp);
+    void replaceStampObject(FlushStampTrackingObj *stampObj);
+
+    // Temporary. Method will be removed
+    FlushStampTrackingObj *getStampReference() {
+        return flushStampSharedHandle;
+    }
+
+  protected:
+    FlushStampTrackingObj *flushStampSharedHandle = nullptr;
+};
+
+class FlushStampUpdateHelper {
+  public:
+    void insert(FlushStampTrackingObj *stampObj);
+    void updateAll(const FlushStamp &flushStamp);
+    size_t size() const;
+
+  private:
+    StackVec<FlushStampTrackingObj *, 64> flushStampsToUpdate;
+};
+} // namespace NEO
--- a/shared/source/helpers/get_info.h
+++ b/shared/source/helpers/get_info.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2017-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+#include "get_info_status.h"
+
+#include <cstring>
+
+// Need for linux compatibility with memcpy_s
+#include "helpers/string.h"
+
+inline GetInfoStatus getInfo(void *destParamValue, size_t destParamValueSize,
+                             const void *srcParamValue, size_t srcParamValueSize) {
+    auto retVal = GetInfoStatus::INVALID_VALUE;
+    if (srcParamValue && srcParamValueSize) {
+        if (!destParamValue && !destParamValueSize) {
+            // Report ok if they're looking for size.
+            retVal = GetInfoStatus::SUCCESS;
+        } else if (destParamValue && destParamValueSize >= srcParamValueSize) {
+            // Report ok if we can copy safely
+            retVal = GetInfoStatus::SUCCESS;
+
+            memcpy_s(destParamValue, destParamValueSize, srcParamValue, srcParamValueSize);
+        } else if (!destParamValue) {
+            // Report ok if destParamValue == nullptr and destParamValueSize > 0
+            retVal = GetInfoStatus::SUCCESS;
+        }
+    }
+
+    return retVal;
+}
+
+struct GetInfoHelper {
+    GetInfoHelper(void *dst, size_t dstSize, size_t *retSize, GetInfoStatus *retVal = nullptr)
+        : dst(dst), dstSize(dstSize), retSize(retSize), retVal(retVal) {
+    }
+
+    template <typename DataType>
+    GetInfoStatus set(const DataType &val) {
+        auto errCode = GetInfoStatus::SUCCESS;
+        if (retSize != nullptr) {
+            *retSize = sizeof(val);
+        }
+        if (dst != nullptr) {
+            if (dstSize >= sizeof(val)) {
+                *reinterpret_cast<DataType *>(dst) = val;
+            } else {
+                errCode = GetInfoStatus::INVALID_VALUE;
+            }
+        }
+        if (retVal)
+            *retVal = errCode;
+        return errCode;
+    }
+
+    template <typename DataType>
+    static void set(DataType *dst, DataType val) {
+        if (dst) {
+            *dst = val;
+        }
+    }
+
+    void *dst;
+    size_t dstSize;
+    size_t *retSize;
+    GetInfoStatus *retVal;
+};
+
+struct ErrorCodeHelper {
+    ErrorCodeHelper(int *errcodeRet, int defaultCode)
+        : errcodeRet(errcodeRet) {
+        set(defaultCode);
+    }
+
+    void set(int code) {
+        if (errcodeRet != nullptr) {
+            *errcodeRet = code;
+        }
+        localErrcode = code;
+    }
+
+    int *errcodeRet;
+    int localErrcode;
+};
+
+template <typename T>
+T getValidParam(T param, T defaultVal = 1, T invalidVal = 0) {
+    if (param == invalidVal) {
+        return defaultVal;
+    }
+    return param;
+}
--- a/shared/source/helpers/get_info_status.h
+++ b/shared/source/helpers/get_info_status.h
@@ -0,0 +1,14 @@
+/*
+ * Copyright (C) 2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+
+enum class GetInfoStatus {
+    INVALID_CONTEXT = -2,
+    INVALID_VALUE = -1,
+    SUCCESS = 0
+};
--- a/shared/source/helpers/hash.h
+++ b/shared/source/helpers/hash.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2017-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+#include "helpers/aligned_memory.h"
+#include "utilities/compiler_support.h"
+
+#include <cstdint>
+
+namespace NEO {
+// clang-format off
+#define HASH_JENKINS_MIX(a,b,c) \
+{ \
+    a -= b; a -= c; a ^= (c>>13); \
+    b -= c; b -= a; b ^= (a<<8);  \
+    c -= a; c -= b; c ^= (b>>13); \
+    a -= b; a -= c; a ^= (c>>12); \
+    b -= c; b -= a; b ^= (a<<16); \
+    c -= a; c -= b; c ^= (b>>5);  \
+    a -= b; a -= c; a ^= (c>>3);  \
+    b -= c; b -= a; b ^= (a<<10); \
+    c -= a; c -= b; c ^= (b>>15); \
+}
+// clang-format on
+class Hash {
+  public:
+    Hash() {
+        reset();
+    };
+
+    uint32_t getValue(const char *data, size_t size) {
+        uint32_t value = 0;
+        switch (size) {
+        case 3:
+            value = static_cast<uint32_t>(*reinterpret_cast<const unsigned char *>(data++));
+            value <<= 8;
+            CPP_ATTRIBUTE_FALLTHROUGH;
+        case 2:
+            value |= static_cast<uint32_t>(*reinterpret_cast<const unsigned char *>(data++));
+            value <<= 8;
+            CPP_ATTRIBUTE_FALLTHROUGH;
+        case 1:
+            value |= static_cast<uint32_t>(*reinterpret_cast<const unsigned char *>(data++));
+            value <<= 8;
+        }
+        return value;
+    }
+
+    void update(const char *buff, size_t size) {
+        if (buff == nullptr)
+            return;
+
+        if ((reinterpret_cast<uintptr_t>(buff) & 0x3) != 0) {
+            const unsigned char *tmp = (const unsigned char *)buff;
+
+            while (size >= sizeof(uint32_t)) {
+                uint32_t value = (uint32_t)tmp[0] + (((uint32_t)tmp[1]) << 8) + ((uint32_t)tmp[2] << 16) + ((uint32_t)tmp[3] << 24);
+                a ^= value;
+                HASH_JENKINS_MIX(a, hi, lo);
+                size -= sizeof(uint32_t);
+                tmp += sizeof(uint32_t);
+            }
+            if (size > 0) {
+                uint32_t value = getValue((char *)tmp, size);
+                a ^= value;
+                HASH_JENKINS_MIX(a, hi, lo);
+            }
+        } else {
+            const uint32_t *tmp = reinterpret_cast<const uint32_t *>(buff);
+
+            while (size >= sizeof(*tmp)) {
+                a ^= *(tmp++);
+                HASH_JENKINS_MIX(a, hi, lo);
+                size -= sizeof(*tmp);
+            }
+
+            if (size > 0) {
+                uint32_t value = getValue((char *)tmp, size);
+                a ^= value;
+                HASH_JENKINS_MIX(a, hi, lo);
+            }
+        }
+    }
+
+    uint64_t finish() {
+        return (((uint64_t)hi) << 32) | lo;
+    }
+
+    void reset() {
+        a = 0x428a2f98;
+        hi = 0x71374491;
+        lo = 0xb5c0fbcf;
+    }
+
+    static uint64_t hash(const char *buff, size_t size) {
+        Hash hash;
+        hash.update(buff, size);
+        return hash.finish();
+    }
+
+  protected:
+    uint32_t a, hi, lo;
+};
+
+template <typename T>
+uint32_t hashPtrToU32(const T *src) {
+    auto asInt = reinterpret_cast<uintptr_t>(src);
+    constexpr auto m = sizeof(uintptr_t) / 8;
+    asInt = asInt ^ ((asInt & ~(m - 1)) >> (m * 32));
+
+    return static_cast<uint32_t>(asInt);
+}
+} // namespace NEO
--- a/shared/source/helpers/heap_helper.cpp
+++ b/shared/source/helpers/heap_helper.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2019-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#include "helpers/heap_helper.h"
+
+#include "indirect_heap/indirect_heap.h"
+#include "memory_manager/graphics_allocation.h"
+#include "memory_manager/internal_allocation_storage.h"
+#include "memory_manager/memory_manager.h"
+
+namespace NEO {
+
+GraphicsAllocation *HeapHelper::getHeapAllocation(uint32_t heapType, size_t heapSize, size_t alignment, uint32_t rootDeviceIndex) {
+    auto allocationType = GraphicsAllocation::AllocationType::LINEAR_STREAM;
+    if (IndirectHeap::Type::INDIRECT_OBJECT == heapType) {
+        allocationType = GraphicsAllocation::AllocationType::INTERNAL_HEAP;
+    }
+
+    auto allocation = this->storageForReuse->obtainReusableAllocation(heapSize, allocationType);
+    if (allocation) {
+        return allocation.release();
+    }
+    NEO::AllocationProperties properties{rootDeviceIndex, true, heapSize, allocationType, isMultiOsContextCapable, false, {}};
+    properties.alignment = alignment;
+
+    return this->memManager->allocateGraphicsMemoryWithProperties(properties);
+}
+void HeapHelper::storeHeapAllocation(GraphicsAllocation *heapAllocation) {
+    this->storageForReuse->storeAllocation(std::unique_ptr<NEO::GraphicsAllocation>(heapAllocation), NEO::AllocationUsage::REUSABLE_ALLOCATION);
+}
+} // namespace NEO
--- a/shared/source/helpers/heap_helper.h
+++ b/shared/source/helpers/heap_helper.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2019-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+#include <stddef.h>
+#include <stdint.h>
+
+namespace NEO {
+
+class MemoryManager;
+class GraphicsAllocation;
+class InternalAllocationStorage;
+
+class HeapHelper {
+  public:
+    HeapHelper(MemoryManager *memManager, InternalAllocationStorage *storageForReuse, bool isMultiOsContextCapable) : storageForReuse(storageForReuse),
+                                                                                                                      memManager(memManager),
+                                                                                                                      isMultiOsContextCapable(isMultiOsContextCapable) {}
+    GraphicsAllocation *getHeapAllocation(uint32_t heapType, size_t heapSize, size_t alignment, uint32_t rootDeviceIndex);
+    void storeHeapAllocation(GraphicsAllocation *heapAllocation);
+
+  protected:
+    InternalAllocationStorage *storageForReuse = nullptr;
+    MemoryManager *memManager = nullptr;
+    bool isMultiOsContextCapable = false;
+};
+} // namespace NEO
--- a/shared/source/helpers/hw_cmds.h
+++ b/shared/source/helpers/hw_cmds.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2017-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+#ifdef SUPPORT_GEN8
+#include "gen8/hw_cmds.h"
+#endif
+#ifdef SUPPORT_GEN9
+#include "gen9/hw_cmds.h"
+#endif
+#ifdef SUPPORT_GEN11
+#include "gen11/hw_cmds.h"
+#endif
+#ifdef SUPPORT_GEN12LP
+#include "gen12lp/hw_cmds.h"
+#endif
--- a/shared/source/helpers/hw_helper.cpp
+++ b/shared/source/helpers/hw_helper.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2017-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#include "helpers/hw_helper.h"
+
+#include "debug_settings/debug_settings_manager.h"
+
+namespace NEO {
+HwHelper *hwHelperFactory[IGFX_MAX_CORE] = {};
+
+HwHelper &HwHelper::get(GFXCORE_FAMILY gfxCore) {
+    return *hwHelperFactory[gfxCore];
+}
+
+bool HwHelper::renderCompressedBuffersSupported(const HardwareInfo &hwInfo) {
+    if (DebugManager.flags.RenderCompressedBuffersEnabled.get() != -1) {
+        return !!DebugManager.flags.RenderCompressedBuffersEnabled.get();
+    }
+    return hwInfo.capabilityTable.ftrRenderCompressedBuffers;
+}
+
+bool HwHelper::renderCompressedImagesSupported(const HardwareInfo &hwInfo) {
+    if (DebugManager.flags.RenderCompressedImagesEnabled.get() != -1) {
+        return !!DebugManager.flags.RenderCompressedImagesEnabled.get();
+    }
+    return hwInfo.capabilityTable.ftrRenderCompressedImages;
+}
+
+bool HwHelper::cacheFlushAfterWalkerSupported(const HardwareInfo &hwInfo) {
+    int32_t dbgFlag = DebugManager.flags.EnableCacheFlushAfterWalker.get();
+    if (dbgFlag == 1) {
+        return true;
+    } else if (dbgFlag == 0) {
+        return false;
+    }
+    return hwInfo.capabilityTable.supportCacheFlushAfterWalker;
+}
+
+uint32_t HwHelper::getMaxThreadsForVfe(const HardwareInfo &hwInfo) {
+    uint32_t threadsPerEU = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount) + hwInfo.capabilityTable.extraQuantityThreadsPerEU;
+    return hwInfo.gtSystemInfo.EUCount * threadsPerEU;
+}
+
+uint32_t HwHelper::getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const {
+    uint32_t numThreadsPerEU = hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount;
+    return maxNumEUsPerSubSlice * numThreadsPerEU;
+}
+} // namespace NEO
--- a/shared/source/helpers/hw_helper.h
+++ b/shared/source/helpers/hw_helper.h
@@ -0,0 +1,273 @@
+/*
+ * Copyright (C) 2017-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+#include "command_stream/linear_stream.h"
+#include "helpers/aux_translation.h"
+#include "helpers/hw_cmds.h"
+#include "opencl/source/built_ins/sip.h"
+#include "opencl/source/gen_common/aub_mapper.h"
+#include "opencl/source/mem_obj/buffer.h"
+
+#include <cstdint>
+#include <string>
+#include <type_traits>
+
+namespace NEO {
+class ExecutionEnvironment;
+class GraphicsAllocation;
+struct HardwareCapabilities;
+class GmmHelper;
+
+class HwHelper {
+  public:
+    static HwHelper &get(GFXCORE_FAMILY gfxCore);
+    virtual uint32_t getBindingTableStateSurfaceStatePointer(const void *pBindingTable, uint32_t index) = 0;
+    virtual size_t getBindingTableStateSize() const = 0;
+    virtual uint32_t getBindingTableStateAlignement() const = 0;
+    virtual size_t getInterfaceDescriptorDataSize() const = 0;
+    virtual size_t getMaxBarrierRegisterPerSlice() const = 0;
+    virtual uint32_t getComputeUnitsUsedForScratch(const HardwareInfo *pHwInfo) const = 0;
+    virtual uint32_t getPitchAlignmentForImage(const HardwareInfo *hwInfo) = 0;
+    virtual void setCapabilityCoherencyFlag(const HardwareInfo *pHwInfo, bool &coherencyFlag) = 0;
+    virtual void adjustDefaultEngineType(HardwareInfo *pHwInfo) = 0;
+    virtual void setupHardwareCapabilities(HardwareCapabilities *caps, const HardwareInfo &hwInfo) = 0;
+    virtual bool isL3Configurable(const HardwareInfo &hwInfo) = 0;
+    virtual SipKernelType getSipKernelType(bool debuggingActive) = 0;
+    virtual bool isLocalMemoryEnabled(const HardwareInfo &hwInfo) const = 0;
+    virtual bool isPageTableManagerSupported(const HardwareInfo &hwInfo) const = 0;
+    virtual bool isFenceAllocationRequired(const HardwareInfo &hwInfo) const = 0;
+    virtual const AubMemDump::LrcaHelper &getCsTraits(aub_stream::EngineType engineType) const = 0;
+    virtual bool hvAlign4Required() const = 0;
+    virtual bool obtainRenderBufferCompressionPreference(const HardwareInfo &hwInfo, const size_t size) const = 0;
+    virtual bool checkResourceCompatibility(GraphicsAllocation &graphicsAllocation) = 0;
+    static bool renderCompressedBuffersSupported(const HardwareInfo &hwInfo);
+    static bool renderCompressedImagesSupported(const HardwareInfo &hwInfo);
+    static bool cacheFlushAfterWalkerSupported(const HardwareInfo &hwInfo);
+    virtual bool timestampPacketWriteSupported() const = 0;
+    virtual size_t getRenderSurfaceStateSize() const = 0;
+    virtual void setRenderSurfaceStateForBuffer(ExecutionEnvironment &executionEnvironment,
+                                                void *surfaceStateBuffer,
+                                                size_t bufferSize,
+                                                uint64_t gpuVa,
+                                                size_t offset,
+                                                uint32_t pitch,
+                                                GraphicsAllocation *gfxAlloc,
+                                                bool isReadOnly,
+                                                uint32_t surfaceType,
+                                                bool forceNonAuxMode) = 0;
+    virtual const std::vector<aub_stream::EngineType> getGpgpuEngineInstances() const = 0;
+    virtual const StackVec<size_t, 3> getDeviceSubGroupSizes() const = 0;
+    virtual bool getEnableLocalMemory(const HardwareInfo &hwInfo) const = 0;
+    virtual std::string getExtensions() const = 0;
+    static uint32_t getMaxThreadsForVfe(const HardwareInfo &hwInfo);
+    virtual uint32_t getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const;
+    virtual uint32_t getMetricsLibraryGenId() const = 0;
+    virtual uint32_t getMocsIndex(const GmmHelper &gmmHelper, bool l3enabled, bool l1enabled) const = 0;
+    virtual bool requiresAuxResolves() const = 0;
+    virtual bool tilingAllowed(bool isSharedContext, bool isImage1d, bool forceLinearStorage) = 0;
+    virtual uint32_t getBarriersCountFromHasBarriers(uint32_t hasBarriers) = 0;
+    virtual uint32_t calculateAvailableThreadCount(PRODUCT_FAMILY family, uint32_t grfCount, uint32_t euCount,
+                                                   uint32_t threadsPerEu) = 0;
+    virtual uint32_t alignSlmSize(uint32_t slmSize) = 0;
+    virtual bool isForceEmuInt32DivRemSPWARequired(const HardwareInfo &hwInfo) = 0;
+    virtual uint32_t getMinimalSIMDSize() = 0;
+    virtual bool isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const = 0;
+
+    static uint32_t getSubDevicesCount(const HardwareInfo *pHwInfo);
+    static uint32_t getEnginesCount(const HardwareInfo &hwInfo);
+
+    static constexpr uint32_t lowPriorityGpgpuEngineIndex = 1;
+    static constexpr uint32_t internalUsageEngineIndex = 2;
+
+  protected:
+    HwHelper() = default;
+};
+
+template <typename GfxFamily>
+class HwHelperHw : public HwHelper {
+  public:
+    static HwHelper &get() {
+        static HwHelperHw<GfxFamily> hwHelper;
+        return hwHelper;
+    }
+
+    static const aub_stream::EngineType lowPriorityEngineType;
+
+    uint32_t getBindingTableStateSurfaceStatePointer(const void *pBindingTable, uint32_t index) override {
+        using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE;
+
+        const BINDING_TABLE_STATE *bindingTableState = static_cast<const BINDING_TABLE_STATE *>(pBindingTable);
+        return bindingTableState[index].getRawData(0);
+    }
+
+    size_t getBindingTableStateSize() const override {
+        using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE;
+        return sizeof(BINDING_TABLE_STATE);
+    }
+
+    uint32_t getBindingTableStateAlignement() const override {
+        using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE;
+        return BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE;
+    }
+
+    size_t getInterfaceDescriptorDataSize() const override {
+        using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA;
+        return sizeof(INTERFACE_DESCRIPTOR_DATA);
+    }
+
+    size_t getRenderSurfaceStateSize() const override {
+        using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE;
+        return sizeof(RENDER_SURFACE_STATE);
+    }
+
+    const AubMemDump::LrcaHelper &getCsTraits(aub_stream::EngineType engineType) const override;
+
+    size_t getMaxBarrierRegisterPerSlice() const override;
+
+    uint32_t getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const override;
+
+    uint32_t getComputeUnitsUsedForScratch(const HardwareInfo *pHwInfo) const override;
+
+    uint32_t getPitchAlignmentForImage(const HardwareInfo *hwInfo) override;
+
+    void setCapabilityCoherencyFlag(const HardwareInfo *pHwInfo, bool &coherencyFlag) override;
+
+    void adjustDefaultEngineType(HardwareInfo *pHwInfo) override;
+
+    void setupHardwareCapabilities(HardwareCapabilities *caps, const HardwareInfo &hwInfo) override;
+
+    bool isL3Configurable(const HardwareInfo &hwInfo) override;
+
+    SipKernelType getSipKernelType(bool debuggingActive) override;
+
+    bool isLocalMemoryEnabled(const HardwareInfo &hwInfo) const override;
+
+    bool hvAlign4Required() const override;
+
+    bool obtainRenderBufferCompressionPreference(const HardwareInfo &hwInfo, const size_t size) const override;
+
+    bool checkResourceCompatibility(GraphicsAllocation &graphicsAllocation) override;
+
+    bool timestampPacketWriteSupported() const override;
+
+    bool isPageTableManagerSupported(const HardwareInfo &hwInfo) const override;
+
+    bool isFenceAllocationRequired(const HardwareInfo &hwInfo) const override;
+
+    void setRenderSurfaceStateForBuffer(ExecutionEnvironment &executionEnvironment,
+                                        void *surfaceStateBuffer,
+                                        size_t bufferSize,
+                                        uint64_t gpuVa,
+                                        size_t offset,
+                                        uint32_t pitch,
+                                        GraphicsAllocation *gfxAlloc,
+                                        bool isReadOnly,
+                                        uint32_t surfaceType,
+                                        bool forceNonAuxMode) override;
+
+    const std::vector<aub_stream::EngineType> getGpgpuEngineInstances() const override;
+
+    const StackVec<size_t, 3> getDeviceSubGroupSizes() const override;
+
+    bool getEnableLocalMemory(const HardwareInfo &hwInfo) const override;
+
+    std::string getExtensions() const override;
+
+    uint32_t getMetricsLibraryGenId() const override;
+
+    uint32_t getMocsIndex(const GmmHelper &gmmHelper, bool l3enabled, bool l1enabled) const override;
+
+    bool requiresAuxResolves() const override;
+
+    bool tilingAllowed(bool isSharedContext, bool isImage1d, bool forceLinearStorage) override;
+
+    uint32_t getBarriersCountFromHasBarriers(uint32_t hasBarriers) override;
+
+    uint32_t calculateAvailableThreadCount(PRODUCT_FAMILY family, uint32_t grfCount, uint32_t euCount, uint32_t threadsPerEu) override;
+
+    uint32_t alignSlmSize(uint32_t slmSize) override;
+
+    static AuxTranslationMode getAuxTranslationMode();
+
+    static bool isBlitAuxTranslationRequired(const HardwareInfo &hwInfo, const MultiDispatchInfo &multiDispatchInfo);
+
+    bool isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const override;
+
+    static bool isForceDefaultRCSEngineWARequired(const HardwareInfo &hwInfo);
+
+    bool isForceEmuInt32DivRemSPWARequired(const HardwareInfo &hwInfo) override;
+
+    uint32_t getMinimalSIMDSize() override;
+
+  protected:
+    static const AuxTranslationMode defaultAuxTranslationMode;
+    HwHelperHw() = default;
+};
+
+struct DwordBuilder {
+    static uint32_t build(uint32_t bitNumberToSet, bool masked, bool set = true, uint32_t initValue = 0) {
+        uint32_t dword = initValue;
+        if (set) {
+            dword |= (1 << bitNumberToSet);
+        }
+        if (masked) {
+            dword |= (1 << (bitNumberToSet + 16));
+        }
+        return dword;
+    };
+};
+
+template <typename GfxFamily>
+struct LriHelper {
+    using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM;
+
+    static MI_LOAD_REGISTER_IMM *program(LinearStream *cmdStream, uint32_t address, uint32_t value) {
+        auto lri = (MI_LOAD_REGISTER_IMM *)cmdStream->getSpace(sizeof(MI_LOAD_REGISTER_IMM));
+        *lri = GfxFamily::cmdInitLoadRegisterImm;
+        lri->setRegisterOffset(address);
+        lri->setDataDword(value);
+        return lri;
+    }
+};
+
+template <typename GfxFamily>
+struct MemorySynchronizationCommands {
+    using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
+    using POST_SYNC_OPERATION = typename GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION;
+    static PIPE_CONTROL *obtainPipeControlAndProgramPostSyncOperation(LinearStream &commandStream,
+                                                                      POST_SYNC_OPERATION operation,
+                                                                      uint64_t gpuAddress,
+                                                                      uint64_t immediateData,
+                                                                      bool dcFlush, const HardwareInfo &hwInfo);
+    static void addAdditionalSynchronization(LinearStream &commandStream, uint64_t gpuAddress, const HardwareInfo &hwInfo);
+    static void addPipeControlWA(LinearStream &commandStream, uint64_t gpuAddress, const HardwareInfo &hwInfo);
+    static void setExtraPipeControlProperties(PIPE_CONTROL &pipeControl, const HardwareInfo &hwInfo);
+    static PIPE_CONTROL *addPipeControl(LinearStream &commandStream, bool dcFlush);
+    static size_t getSizeForPipeControlWithPostSyncOperation(const HardwareInfo &hwInfo);
+    static size_t getSizeForSinglePipeControl();
+    static size_t getSizeForSingleSynchronization(const HardwareInfo &hwInfo);
+    static size_t getSizeForAdditonalSynchronization(const HardwareInfo &hwInfo);
+
+    static PIPE_CONTROL *addFullCacheFlush(LinearStream &commandStream);
+    static size_t getSizeForFullCacheFlush();
+    static void setExtraCacheFlushFields(PIPE_CONTROL *pipeControl);
+
+  protected:
+    static PIPE_CONTROL *obtainPipeControl(LinearStream &commandStream, bool dcFlush);
+};
+
+union SURFACE_STATE_BUFFER_LENGTH {
+    uint32_t Length;
+    struct SurfaceState {
+        uint32_t Width : BITFIELD_RANGE(0, 6);
+        uint32_t Height : BITFIELD_RANGE(7, 20);
+        uint32_t Depth : BITFIELD_RANGE(21, 31);
+    } SurfaceState;
+};
+
+} // namespace NEO
--- a/shared/source/helpers/hw_helper_base.inl
+++ b/shared/source/helpers/hw_helper_base.inl
@@ -0,0 +1,320 @@
+/*
+ * Copyright (C) 2019-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#include "execution_environment/execution_environment.h"
+#include "gmm_helper/gmm.h"
+#include "gmm_helper/gmm_helper.h"
+#include "helpers/aligned_memory.h"
+#include "helpers/hw_helper.h"
+#include "helpers/hw_info.h"
+#include "helpers/preamble.h"
+#include "memory_manager/graphics_allocation.h"
+#include "memory_manager/memory_constants.h"
+#include "os_interface/os_interface.h"
+#include "opencl/source/aub_mem_dump/aub_mem_dump.h"
+#include "opencl/source/helpers/dispatch_info.h"
+#include "opencl/source/helpers/hardware_commands_helper.h"
+
+#include "instrumentation.h"
+
+namespace NEO {
+
+template <typename Family>
+const aub_stream::EngineType HwHelperHw<Family>::lowPriorityEngineType = aub_stream::EngineType::ENGINE_RCS;
+
+template <typename Family>
+const AuxTranslationMode HwHelperHw<Family>::defaultAuxTranslationMode = AuxTranslationMode::Builtin;
+
+template <typename Family>
+bool HwHelperHw<Family>::obtainRenderBufferCompressionPreference(const HardwareInfo &hwInfo, const size_t size) const {
+    return size > KB;
+}
+
+template <typename Family>
+void HwHelperHw<Family>::setupHardwareCapabilities(HardwareCapabilities *caps, const HardwareInfo &hwInfo) {
+    caps->image3DMaxHeight = 16384;
+    caps->image3DMaxWidth = 16384;
+    //With statefull messages we have an allocation cap of 4GB
+    //Reason to subtract 8KB is that driver may pad the buffer with addition pages for over fetching..
+    caps->maxMemAllocSize = (4ULL * MemoryConstants::gigaByte) - (8ULL * MemoryConstants::kiloByte);
+    caps->isStatelesToStatefullWithOffsetSupported = true;
+}
+
+template <typename Family>
+bool HwHelperHw<Family>::isL3Configurable(const HardwareInfo &hwInfo) {
+    return PreambleHelper<Family>::isL3Configurable(hwInfo);
+}
+
+template <typename Family>
+SipKernelType HwHelperHw<Family>::getSipKernelType(bool debuggingActive) {
+    if (!debuggingActive) {
+        return SipKernelType::Csr;
+    }
+    return SipKernelType::DbgCsr;
+}
+
+template <typename Family>
+size_t HwHelperHw<Family>::getMaxBarrierRegisterPerSlice() const {
+    return 32;
+}
+
+template <typename Family>
+uint32_t HwHelperHw<Family>::getPitchAlignmentForImage(const HardwareInfo *hwInfo) {
+    return 4u;
+}
+
+template <typename Family>
+const AubMemDump::LrcaHelper &HwHelperHw<Family>::getCsTraits(aub_stream::EngineType engineType) const {
+    return *AUBFamilyMapper<Family>::csTraits[engineType];
+}
+
+template <typename Family>
+bool HwHelperHw<Family>::isPageTableManagerSupported(const HardwareInfo &hwInfo) const {
+    return false;
+}
+
+template <typename Family>
+bool HwHelperHw<Family>::isFenceAllocationRequired(const HardwareInfo &hwInfo) const {
+    return false;
+}
+
+template <typename GfxFamily>
+inline bool HwHelperHw<GfxFamily>::checkResourceCompatibility(GraphicsAllocation &graphicsAllocation) {
+    return true;
+}
+
+template <typename Family>
+void HwHelperHw<Family>::setRenderSurfaceStateForBuffer(ExecutionEnvironment &executionEnvironment,
+                                                        void *surfaceStateBuffer,
+                                                        size_t bufferSize,
+                                                        uint64_t gpuVa,
+                                                        size_t offset,
+                                                        uint32_t pitch,
+                                                        GraphicsAllocation *gfxAlloc,
+                                                        bool isReadOnly,
+                                                        uint32_t surfaceType,
+                                                        bool forceNonAuxMode) {
+    using RENDER_SURFACE_STATE = typename Family::RENDER_SURFACE_STATE;
+    using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT;
+    using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE;
+
+    auto gmmHelper = executionEnvironment.getGmmHelper();
+    auto surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(surfaceStateBuffer);
+    *surfaceState = Family::cmdInitRenderSurfaceState;
+    auto surfaceSize = alignUp(bufferSize, 4);
+
+    SURFACE_STATE_BUFFER_LENGTH Length = {0};
+    Length.Length = static_cast<uint32_t>(surfaceSize - 1);
+
+    surfaceState->setWidth(Length.SurfaceState.Width + 1);
+    surfaceState->setHeight(Length.SurfaceState.Height + 1);
+    surfaceState->setDepth(Length.SurfaceState.Depth + 1);
+    if (pitch) {
+        surfaceState->setSurfacePitch(pitch);
+    }
+
+    // The graphics allocation for Host Ptr surface will be created in makeResident call and GPU address is expected to be the same as CPU address
+    auto bufferStateAddress = (gfxAlloc != nullptr) ? gfxAlloc->getGpuAddress() : gpuVa;
+    bufferStateAddress += offset;
+
+    auto bufferStateSize = (gfxAlloc != nullptr) ? gfxAlloc->getUnderlyingBufferSize() : bufferSize;
+
+    surfaceState->setSurfaceType(static_cast<typename RENDER_SURFACE_STATE::SURFACE_TYPE>(surfaceType));
+
+    surfaceState->setSurfaceFormat(SURFACE_FORMAT::SURFACE_FORMAT_RAW);
+    surfaceState->setSurfaceVerticalAlignment(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4);
+    surfaceState->setSurfaceHorizontalAlignment(RENDER_SURFACE_STATE::SURFACE_HORIZONTAL_ALIGNMENT_HALIGN_4);
+
+    surfaceState->setTileMode(RENDER_SURFACE_STATE::TILE_MODE_LINEAR);
+    surfaceState->setVerticalLineStride(0);
+    surfaceState->setVerticalLineStrideOffset(0);
+    if ((isAligned<MemoryConstants::cacheLineSize>(bufferStateAddress) && isAligned<MemoryConstants::cacheLineSize>(bufferStateSize)) ||
+        isReadOnly) {
+        surfaceState->setMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER));
+    } else {
+        surfaceState->setMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED));
+    }
+
+    surfaceState->setSurfaceBaseAddress(bufferStateAddress);
+
+    Gmm *gmm = gfxAlloc ? gfxAlloc->getDefaultGmm() : nullptr;
+    if (gmm && gmm->isRenderCompressed && !forceNonAuxMode &&
+        GraphicsAllocation::AllocationType::BUFFER_COMPRESSED == gfxAlloc->getAllocationType()) {
+        // Its expected to not program pitch/qpitch/baseAddress for Aux surface in CCS scenarios
+        surfaceState->setCoherencyType(RENDER_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT);
+        surfaceState->setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E);
+    } else {
+        surfaceState->setCoherencyType(RENDER_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT);
+        surfaceState->setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE);
+    }
+}
+
+template <typename Family>
+bool HwHelperHw<Family>::getEnableLocalMemory(const HardwareInfo &hwInfo) const {
+    if (DebugManager.flags.EnableLocalMemory.get() != -1) {
+        return DebugManager.flags.EnableLocalMemory.get();
+    } else if (DebugManager.flags.AUBDumpForceAllToLocalMemory.get()) {
+        return true;
+    }
+
+    return OSInterface::osEnableLocalMemory && isLocalMemoryEnabled(hwInfo);
+}
+
+template <typename Family>
+AuxTranslationMode HwHelperHw<Family>::getAuxTranslationMode() {
+    if (DebugManager.flags.ForceAuxTranslationMode.get() != -1) {
+        return static_cast<AuxTranslationMode>(DebugManager.flags.ForceAuxTranslationMode.get());
+    }
+
+    return HwHelperHw<Family>::defaultAuxTranslationMode;
+}
+
+template <typename Family>
+bool HwHelperHw<Family>::isBlitAuxTranslationRequired(const HardwareInfo &hwInfo, const MultiDispatchInfo &multiDispatchInfo) {
+    return (HwHelperHw<Family>::getAuxTranslationMode() == AuxTranslationMode::Blit) &&
+           hwInfo.capabilityTable.blitterOperationsSupported &&
+           multiDispatchInfo.getMemObjsForAuxTranslation() &&
+           (multiDispatchInfo.getMemObjsForAuxTranslation()->size() > 0);
+}
+
+template <typename Family>
+typename Family::PIPE_CONTROL *MemorySynchronizationCommands<Family>::obtainPipeControlAndProgramPostSyncOperation(
+    LinearStream &commandStream, POST_SYNC_OPERATION operation, uint64_t gpuAddress, uint64_t immediateData, bool dcFlush, const HardwareInfo &hwInfo) {
+    addPipeControlWA(commandStream, gpuAddress, hwInfo);
+
+    auto pipeControl = obtainPipeControl(commandStream, dcFlush);
+    pipeControl->setPostSyncOperation(operation);
+    pipeControl->setAddress(static_cast<uint32_t>(gpuAddress & 0x0000FFFFFFFFULL));
+    pipeControl->setAddressHigh(static_cast<uint32_t>(gpuAddress >> 32));
+    pipeControl->setDcFlushEnable(dcFlush);
+    if (operation == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
+        pipeControl->setImmediateData(immediateData);
+    }
+
+    setExtraPipeControlProperties(*pipeControl, hwInfo);
+
+    MemorySynchronizationCommands<Family>::addAdditionalSynchronization(commandStream, gpuAddress, hwInfo);
+
+    return pipeControl;
+}
+
+template <typename GfxFamily>
+typename GfxFamily::PIPE_CONTROL *MemorySynchronizationCommands<GfxFamily>::obtainPipeControl(LinearStream &commandStream, bool dcFlush) {
+    auto pCmd = reinterpret_cast<PIPE_CONTROL *>(commandStream.getSpace(sizeof(PIPE_CONTROL)));
+    *pCmd = GfxFamily::cmdInitPipeControl;
+    pCmd->setCommandStreamerStallEnable(true);
+    pCmd->setDcFlushEnable(dcFlush);
+
+    if (DebugManager.flags.FlushAllCaches.get()) {
+        pCmd->setDcFlushEnable(true);
+        pCmd->setRenderTargetCacheFlushEnable(true);
+        pCmd->setInstructionCacheInvalidateEnable(true);
+        pCmd->setTextureCacheInvalidationEnable(true);
+        pCmd->setPipeControlFlushEnable(true);
+        pCmd->setVfCacheInvalidationEnable(true);
+        pCmd->setConstantCacheInvalidationEnable(true);
+        pCmd->setStateCacheInvalidationEnable(true);
+    }
+    return pCmd;
+}
+
+template <typename GfxFamily>
+typename GfxFamily::PIPE_CONTROL *MemorySynchronizationCommands<GfxFamily>::addPipeControl(LinearStream &commandStream, bool dcFlush) {
+    return MemorySynchronizationCommands<GfxFamily>::obtainPipeControl(commandStream, dcFlush);
+}
+
+template <typename GfxFamily>
+size_t MemorySynchronizationCommands<GfxFamily>::getSizeForSinglePipeControl() {
+    return sizeof(typename GfxFamily::PIPE_CONTROL);
+}
+
+template <typename GfxFamily>
+size_t MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(const HardwareInfo &hwInfo) {
+    const auto pipeControlCount = HardwareCommandsHelper<GfxFamily>::isPipeControlWArequired(hwInfo) ? 2u : 1u;
+    return pipeControlCount * getSizeForSinglePipeControl() + getSizeForAdditonalSynchronization(hwInfo);
+}
+
+template <typename GfxFamily>
+uint32_t HwHelperHw<GfxFamily>::getMetricsLibraryGenId() const {
+    return static_cast<uint32_t>(MetricsLibraryApi::ClientGen::Gen9);
+}
+
+template <typename GfxFamily>
+inline bool HwHelperHw<GfxFamily>::requiresAuxResolves() const {
+    return true;
+}
+
+template <typename GfxFamily>
+bool HwHelperHw<GfxFamily>::tilingAllowed(bool isSharedContext, bool isImage1d, bool forceLinearStorage) {
+    if (DebugManager.flags.ForceLinearImages.get() || forceLinearStorage || isSharedContext) {
+        return false;
+    }
+    return !isImage1d;
+}
+
+template <typename GfxFamily>
+uint32_t HwHelperHw<GfxFamily>::alignSlmSize(uint32_t slmSize) {
+    return HardwareCommandsHelper<GfxFamily>::alignSlmSize(slmSize);
+}
+
+template <typename GfxFamily>
+uint32_t HwHelperHw<GfxFamily>::getBarriersCountFromHasBarriers(uint32_t hasBarriers) {
+    return hasBarriers;
+}
+
+template <typename GfxFamily>
+bool HwHelperHw<GfxFamily>::isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const {
+    return false;
+}
+
+template <typename GfxFamily>
+bool HwHelperHw<GfxFamily>::isForceDefaultRCSEngineWARequired(const HardwareInfo &hwInfo) {
+    return false;
+}
+
+template <typename GfxFamily>
+bool HwHelperHw<GfxFamily>::isForceEmuInt32DivRemSPWARequired(const HardwareInfo &hwInfo) {
+    return false;
+}
+
+template <typename GfxFamily>
+inline uint32_t HwHelperHw<GfxFamily>::getMinimalSIMDSize() {
+    return 8u;
+}
+
+template <typename GfxFamily>
+uint32_t HwHelperHw<GfxFamily>::getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const {
+    return HwHelper::getMaxThreadsForWorkgroup(hwInfo, maxNumEUsPerSubSlice);
+}
+
+template <typename GfxFamily>
+size_t MemorySynchronizationCommands<GfxFamily>::getSizeForFullCacheFlush() {
+    return sizeof(typename GfxFamily::PIPE_CONTROL);
+}
+
+template <typename GfxFamily>
+typename GfxFamily::PIPE_CONTROL *MemorySynchronizationCommands<GfxFamily>::addFullCacheFlush(LinearStream &commandStream) {
+    auto pipeControl = MemorySynchronizationCommands<GfxFamily>::obtainPipeControl(commandStream, true);
+
+    pipeControl->setRenderTargetCacheFlushEnable(true);
+    pipeControl->setInstructionCacheInvalidateEnable(true);
+    pipeControl->setTextureCacheInvalidationEnable(true);
+    pipeControl->setPipeControlFlushEnable(true);
+    pipeControl->setConstantCacheInvalidationEnable(true);
+    pipeControl->setStateCacheInvalidationEnable(true);
+
+    MemorySynchronizationCommands<GfxFamily>::setExtraCacheFlushFields(pipeControl);
+
+    return pipeControl;
+}
+
+template <typename GfxFamily>
+const StackVec<size_t, 3> HwHelperHw<GfxFamily>::getDeviceSubGroupSizes() const {
+    return {8, 16, 32};
+}
+
+} // namespace NEO
--- a/shared/source/helpers/hw_helper_bdw_plus.inl
+++ b/shared/source/helpers/hw_helper_bdw_plus.inl
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2019-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#include "gmm_helper/gmm_helper.h"
+#include "helpers/hw_helper_base.inl"
+
+namespace NEO {
+
+template <typename GfxFamily>
+void HwHelperHw<GfxFamily>::adjustDefaultEngineType(HardwareInfo *pHwInfo) {
+}
+
+template <typename GfxFamily>
+uint32_t HwHelperHw<GfxFamily>::getComputeUnitsUsedForScratch(const HardwareInfo *pHwInfo) const {
+    return pHwInfo->gtSystemInfo.MaxSubSlicesSupported * pHwInfo->gtSystemInfo.MaxEuPerSubSlice *
+           pHwInfo->gtSystemInfo.ThreadCount / pHwInfo->gtSystemInfo.EUCount;
+}
+
+template <typename GfxFamily>
+void HwHelperHw<GfxFamily>::setCapabilityCoherencyFlag(const HardwareInfo *pHwInfo, bool &coherencyFlag) {
+    coherencyFlag = true;
+}
+
+template <typename GfxFamily>
+bool HwHelperHw<GfxFamily>::isLocalMemoryEnabled(const HardwareInfo &hwInfo) const {
+    return false;
+}
+
+template <typename GfxFamily>
+bool HwHelperHw<GfxFamily>::hvAlign4Required() const {
+    return true;
+}
+
+template <typename GfxFamily>
+bool HwHelperHw<GfxFamily>::timestampPacketWriteSupported() const {
+    return false;
+}
+
+template <typename GfxFamily>
+const std::vector<aub_stream::EngineType> HwHelperHw<GfxFamily>::getGpgpuEngineInstances() const {
+    constexpr std::array<aub_stream::EngineType, 3> gpgpuEngineInstances = {{aub_stream::ENGINE_RCS,
+                                                                             aub_stream::ENGINE_RCS,   // low priority
+                                                                             aub_stream::ENGINE_RCS}}; // internal usage
+    return std::vector<aub_stream::EngineType>(gpgpuEngineInstances.begin(), gpgpuEngineInstances.end());
+}
+
+template <typename GfxFamily>
+std::string HwHelperHw<GfxFamily>::getExtensions() const {
+    return "";
+}
+
+template <typename GfxFamily>
+uint32_t HwHelperHw<GfxFamily>::getMocsIndex(const GmmHelper &gmmHelper, bool l3enabled, bool l1enabled) const {
+    if (l3enabled) {
+        return gmmHelper.getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1;
+    }
+    return gmmHelper.getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) >> 1;
+}
+
+template <typename GfxFamily>
+uint32_t HwHelperHw<GfxFamily>::calculateAvailableThreadCount(PRODUCT_FAMILY family, uint32_t grfCount, uint32_t euCount,
+                                                              uint32_t threadsPerEu) {
+    return threadsPerEu * euCount;
+}
+
+template <typename GfxFamily>
+void MemorySynchronizationCommands<GfxFamily>::addAdditionalSynchronization(LinearStream &commandStream, uint64_t gpuAddress, const HardwareInfo &hwInfo) {
+}
+
+template <typename GfxFamily>
+void MemorySynchronizationCommands<GfxFamily>::addPipeControlWA(LinearStream &commandStream, uint64_t gpuAddress, const HardwareInfo &hwInfo) {
+}
+
+template <typename GfxFamily>
+inline size_t MemorySynchronizationCommands<GfxFamily>::getSizeForSingleSynchronization(const HardwareInfo &hwInfo) {
+    return 0u;
+}
+
+template <typename GfxFamily>
+inline size_t MemorySynchronizationCommands<GfxFamily>::getSizeForAdditonalSynchronization(const HardwareInfo &hwInfo) {
+    return 0u;
+}
+
+template <typename GfxFamily>
+void MemorySynchronizationCommands<GfxFamily>::setExtraPipeControlProperties(PIPE_CONTROL &pipeControl, const HardwareInfo &hwInfo) {
+}
+
+template <typename GfxFamily>
+void MemorySynchronizationCommands<GfxFamily>::setExtraCacheFlushFields(PIPE_CONTROL *pipeControl) {
+}
+
+} // namespace NEO
--- a/shared/source/helpers/hw_helper_extended.cpp
+++ b/shared/source/helpers/hw_helper_extended.cpp
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#include "debug_settings/debug_settings_manager.h"
+#include "helpers/hw_helper.h"
+
+namespace NEO {
+
+uint32_t HwHelper::getSubDevicesCount(const HardwareInfo *pHwInfo) {
+    return DebugManager.flags.CreateMultipleSubDevices.get() > 0 ? DebugManager.flags.CreateMultipleSubDevices.get() : 1u;
+}
+
+uint32_t HwHelper::getEnginesCount(const HardwareInfo &hwInfo) {
+    return 1u;
+}
+
+} // namespace NEO
--- a/shared/source/helpers/hw_info.cpp
+++ b/shared/source/helpers/hw_info.cpp
@@ -0,0 +1,122 @@
+/*
+ * Copyright (C) 2017-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#include "helpers/hw_info.h"
+
+#include "debug_settings/debug_settings_manager.h"
+#include "helpers/hw_cmds.h"
+
+#include <algorithm>
+
+namespace NEO {
+HardwareInfo::HardwareInfo(const PLATFORM *platform, const FeatureTable *featureTable, const WorkaroundTable *workaroundTable,
+                           const GT_SYSTEM_INFO *gtSystemInfo, const RuntimeCapabilityTable &capabilityTable)
+    : platform(*platform), featureTable(*featureTable), workaroundTable(*workaroundTable), gtSystemInfo(*gtSystemInfo), capabilityTable(capabilityTable) {
+}
+
+// Global table of hardware prefixes
+const char *hardwarePrefix[IGFX_MAX_PRODUCT] = {
+    nullptr,
+};
+
+// Global table of default hardware info configs
+uint64_t defaultHardwareInfoConfigTable[IGFX_MAX_PRODUCT] = {
+    0x0,
+};
+
+// Global table of family names
+const char *familyName[IGFX_MAX_CORE] = {
+    nullptr,
+};
+// Global table of family names
+bool familyEnabled[IGFX_MAX_CORE] = {
+    false,
+};
+
+const HardwareInfo *hardwareInfoTable[IGFX_MAX_PRODUCT] = {};
+void (*hardwareInfoSetup[IGFX_MAX_PRODUCT])(HardwareInfo *, bool, uint64_t) = {
+    0x0,
+};
+
+bool getHwInfoForPlatformString(std::string &platform, const HardwareInfo *&hwInfoIn) {
+    std::transform(platform.begin(), platform.end(), platform.begin(), ::tolower);
+
+    bool ret = false;
+    for (int j = 0; j < IGFX_MAX_PRODUCT; j++) {
+        if (hardwarePrefix[j] == nullptr)
+            continue;
+        if (hardwarePrefix[j] == platform) {
+            hwInfoIn = hardwareInfoTable[j];
+            ret = true;
+            break;
+        }
+    }
+    return ret;
+}
+
+void setHwInfoValuesFromConfig(const uint64_t hwInfoConfig, HardwareInfo &hwInfoIn) {
+    uint32_t sliceCount = static_cast<uint16_t>(hwInfoConfig >> 32);
+    uint32_t subSlicePerSliceCount = static_cast<uint16_t>(hwInfoConfig >> 16);
+    uint32_t euPerSubSliceCount = static_cast<uint16_t>(hwInfoConfig);
+
+    hwInfoIn.gtSystemInfo.SliceCount = sliceCount;
+    hwInfoIn.gtSystemInfo.SubSliceCount = subSlicePerSliceCount * sliceCount;
+    hwInfoIn.gtSystemInfo.EUCount = euPerSubSliceCount * subSlicePerSliceCount * sliceCount;
+}
+
+bool parseHwInfoConfigString(const std::string &hwInfoConfigStr, uint64_t &hwInfoConfig) {
+    hwInfoConfig = 0u;
+
+    size_t currPos = hwInfoConfigStr.find('x', 0);
+    if (currPos == std::string::npos) {
+        return false;
+    }
+    uint32_t sliceCount = static_cast<uint32_t>(std::stoul(hwInfoConfigStr.substr(0, currPos)));
+    if (sliceCount > std::numeric_limits<uint16_t>::max()) {
+        return false;
+    }
+    size_t prevPos = currPos + 1;
+
+    currPos = hwInfoConfigStr.find('x', prevPos);
+    if (currPos == std::string::npos) {
+        return false;
+    }
+    uint32_t subSlicePerSliceCount = static_cast<uint32_t>(std::stoul(hwInfoConfigStr.substr(prevPos, currPos)));
+    if (subSlicePerSliceCount > std::numeric_limits<uint16_t>::max()) {
+        return false;
+    }
+    uint32_t subSliceCount = subSlicePerSliceCount * sliceCount;
+    if (subSliceCount > std::numeric_limits<uint16_t>::max()) {
+        return false;
+    }
+    prevPos = currPos + 1;
+
+    uint32_t euPerSubSliceCount = static_cast<uint32_t>(std::stoul(hwInfoConfigStr.substr(prevPos, std::string::npos)));
+    if (euPerSubSliceCount > std::numeric_limits<uint16_t>::max()) {
+        return false;
+    }
+    uint32_t euCount = euPerSubSliceCount * subSliceCount;
+    if (euCount > std::numeric_limits<uint16_t>::max()) {
+        return false;
+    }
+
+    hwInfoConfig = static_cast<uint64_t>(sliceCount & 0xffff) << 32 | static_cast<uint64_t>(subSlicePerSliceCount & 0xffff) << 16 | static_cast<uint64_t>(euPerSubSliceCount & 0xffff);
+    return true;
+}
+
+aub_stream::EngineType getChosenEngineType(const HardwareInfo &hwInfo) {
+    return DebugManager.flags.NodeOrdinal.get() == -1
+               ? hwInfo.capabilityTable.defaultEngineType
+               : static_cast<aub_stream::EngineType>(DebugManager.flags.NodeOrdinal.get());
+}
+
+const std::string getFamilyNameWithType(const HardwareInfo &hwInfo) {
+    std::string platformName = familyName[hwInfo.platform.eRenderCoreFamily];
+    platformName.append(hwInfo.capabilityTable.platformType);
+    return platformName;
+}
+} // namespace NEO
--- a/shared/source/helpers/hw_info.h
+++ b/shared/source/helpers/hw_info.h
@@ -0,0 +1,115 @@
+/*
+ * Copyright (C) 2018-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+#include "command_stream/preemption_mode.h"
+#include "direct_submission/direct_submission_properties.h"
+#include "helpers/kmd_notify_properties.h"
+
+#include "engine_node.h"
+#include "gtsysinfo.h"
+#include "igfxfmid.h"
+#include "sku_info.h"
+
+#include <cstddef>
+#include <string>
+
+namespace NEO {
+
+struct RuntimeCapabilityTable {
+    DirectSubmissionProperyEngines directSubmissionEngines;
+    KmdNotifyProperties kmdNotifyProperties;
+    uint64_t gpuAddressSpace;
+    double defaultProfilingTimerResolution;
+    size_t requiredPreemptionSurfaceSize;
+    bool (*isSimulation)(unsigned short);
+    PreemptionMode defaultPreemptionMode;
+    aub_stream::EngineType defaultEngineType;
+    uint32_t maxRenderFrequency;
+    unsigned int clVersionSupport;
+    uint32_t aubDeviceId;
+    uint32_t extraQuantityThreadsPerEU;
+    uint32_t slmSize;
+    uint32_t grfSize;
+    bool blitterOperationsSupported;
+    bool ftrSupportsInteger64BitAtomics;
+    bool ftrSupportsFP64;
+    bool ftrSupports64BitMath;
+    bool ftrSvm;
+    bool ftrSupportsCoherency;
+    bool ftrSupportsVmeAvcTextureSampler;
+    bool ftrSupportsVmeAvcPreemption;
+    bool ftrRenderCompressedBuffers;
+    bool ftrRenderCompressedImages;
+    bool ftr64KBpages;
+    bool instrumentationEnabled;
+    bool forceStatelessCompilationFor32Bit;
+    const char *platformType;
+    bool debuggerSupported;
+    bool supportsVme;
+    bool supportCacheFlushAfterWalker;
+    bool supportsImages;
+    bool supportsDeviceEnqueue;
+    bool hostPtrTrackingEnabled;
+};
+
+struct HardwareCapabilities {
+    size_t image3DMaxWidth;
+    size_t image3DMaxHeight;
+    uint64_t maxMemAllocSize;
+    bool isStatelesToStatefullWithOffsetSupported;
+};
+
+struct HardwareInfo {
+    HardwareInfo() = default;
+    HardwareInfo(const PLATFORM *platform, const FeatureTable *featureTable, const WorkaroundTable *workaroundTable,
+                 const GT_SYSTEM_INFO *gtSystemInfo, const RuntimeCapabilityTable &capabilityTable);
+
+    PLATFORM platform = {};
+    FeatureTable featureTable = {};
+    WorkaroundTable workaroundTable = {};
+    alignas(4) GT_SYSTEM_INFO gtSystemInfo = {};
+
+    RuntimeCapabilityTable capabilityTable = {};
+};
+
+template <PRODUCT_FAMILY product>
+struct HwMapper {};
+
+template <GFXCORE_FAMILY gfxFamily>
+struct GfxFamilyMapper {};
+
+// Global table of hardware prefixes
+extern bool familyEnabled[IGFX_MAX_CORE];
+extern const char *familyName[IGFX_MAX_CORE];
+extern const char *hardwarePrefix[IGFX_MAX_PRODUCT];
+extern uint64_t defaultHardwareInfoConfigTable[IGFX_MAX_PRODUCT];
+extern const HardwareInfo *hardwareInfoTable[IGFX_MAX_PRODUCT];
+extern void (*hardwareInfoSetup[IGFX_MAX_PRODUCT])(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable, uint64_t hwInfoConfig);
+
+template <GFXCORE_FAMILY gfxFamily>
+struct EnableGfxFamilyHw {
+    EnableGfxFamilyHw() {
+        familyEnabled[gfxFamily] = true;
+        familyName[gfxFamily] = GfxFamilyMapper<gfxFamily>::name;
+    }
+};
+
+bool getHwInfoForPlatformString(std::string &platform, const HardwareInfo *&hwInfoIn);
+void setHwInfoValuesFromConfig(const uint64_t hwInfoConfig, HardwareInfo &hwInfoIn);
+bool parseHwInfoConfigString(const std::string &hwInfoConfigStr, uint64_t &hwInfoConfig);
+aub_stream::EngineType getChosenEngineType(const HardwareInfo &hwInfo);
+const std::string getFamilyNameWithType(const HardwareInfo &hwInfo);
+
+// Utility conversion
+template <PRODUCT_FAMILY productFamily>
+struct ToGfxCoreFamily {
+    static const GFXCORE_FAMILY gfxCoreFamily =
+        static_cast<GFXCORE_FAMILY>(NEO::HwMapper<productFamily>::gfxFamily);
+    static constexpr GFXCORE_FAMILY get() { return gfxCoreFamily; }
+};
+} // namespace NEO
--- a/shared/source/helpers/interlocked_max.h
+++ b/shared/source/helpers/interlocked_max.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (C) 2019-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+
+#include <atomic>
+template <typename Type>
+void interlockedMax(std::atomic<Type> &dest, Type newVal) {
+    Type oldVal = dest;
+    Type maxVal = oldVal < newVal ? newVal : oldVal;
+    while (!std::atomic_compare_exchange_weak(&dest, &oldVal, maxVal)) {
+        oldVal = dest;
+        maxVal = oldVal < newVal ? newVal : oldVal;
+    }
+}
--- a/shared/source/helpers/kernel_helpers.cpp
+++ b/shared/source/helpers/kernel_helpers.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2019-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#include "helpers/kernel_helpers.h"
+
+#include "helpers/basic_math.h"
+#include "helpers/debug_helpers.h"
+
+#include <algorithm>
+
+namespace NEO {
+
+uint32_t KernelHelper::getMaxWorkGroupCount(uint32_t simd, uint32_t availableThreadCount, uint32_t dssCount, uint32_t availableSlmSize,
+                                            uint32_t usedSlmSize, uint32_t maxBarrierCount, uint32_t numberOfBarriers, uint32_t workDim,
+                                            const size_t *localWorkSize) {
+    UNRECOVERABLE_IF((workDim == 0) || (workDim > 3));
+    UNRECOVERABLE_IF(localWorkSize == nullptr);
+
+    size_t workGroupSize = localWorkSize[0];
+    for (uint32_t i = 1; i < workDim; i++) {
+        workGroupSize *= localWorkSize[i];
+    }
+
+    auto threadsPerThreadGroup = static_cast<uint32_t>(Math::divideAndRoundUp(workGroupSize, simd));
+    auto maxWorkGroupsCount = availableThreadCount / threadsPerThreadGroup;
+
+    if (numberOfBarriers > 0) {
+        auto maxWorkGroupsCountDueToBarrierUsage = dssCount * (maxBarrierCount / numberOfBarriers);
+        maxWorkGroupsCount = std::min(maxWorkGroupsCount, maxWorkGroupsCountDueToBarrierUsage);
+    }
+
+    if (usedSlmSize > 0) {
+        auto maxWorkGroupsCountDueToSlm = availableSlmSize / usedSlmSize;
+        maxWorkGroupsCount = std::min(maxWorkGroupsCount, maxWorkGroupsCountDueToSlm);
+    }
+
+    return maxWorkGroupsCount;
+}
+
+} // namespace NEO
--- a/shared/source/helpers/kernel_helpers.h
+++ b/shared/source/helpers/kernel_helpers.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (C) 2019-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#include <cstddef>
+#include <cstdint>
+
+namespace NEO {
+
+struct KernelHelper {
+    static uint32_t getMaxWorkGroupCount(uint32_t simd, uint32_t availableThreadCount, uint32_t dssCount, uint32_t availableSlmSize,
+                                         uint32_t usedSlmSize, uint32_t maxBarrierCount, uint32_t numberOfBarriers, uint32_t workDim,
+                                         const size_t *localWorkSize);
+};
+
+} // namespace NEO
--- a/shared/source/helpers/kmd_notify_properties.cpp
+++ b/shared/source/helpers/kmd_notify_properties.cpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2018-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#include "helpers/kmd_notify_properties.h"
+
+#include "debug_settings/debug_settings_manager.h"
+
+#include <cstdint>
+
+using namespace NEO;
+
+bool KmdNotifyHelper::obtainTimeoutParams(int64_t &timeoutValueOutput,
+                                          bool quickKmdSleepRequest,
+                                          uint32_t currentHwTag,
+                                          uint32_t taskCountToWait,
+                                          FlushStamp flushStampToWait,
+                                          bool forcePowerSavingMode) {
+    if (flushStampToWait == 0) {
+        return false;
+    }
+
+    if (DebugManager.flags.PowerSavingMode.get() || forcePowerSavingMode) {
+        timeoutValueOutput = 1;
+        return true;
+    }
+
+    int64_t multiplier = (currentHwTag < taskCountToWait) ? static_cast<int64_t>(taskCountToWait - currentHwTag) : 1;
+    if (!properties->enableKmdNotify && multiplier > KmdNotifyConstants::minimumTaskCountDiffToCheckAcLine) {
+        updateAcLineStatus();
+    }
+
+    quickKmdSleepRequest |= applyQuickKmdSleepForSporadicWait();
+
+    if (!properties->enableKmdNotify && !acLineConnected) {
+        timeoutValueOutput = KmdNotifyConstants::timeoutInMicrosecondsForDisconnectedAcLine;
+    } else if (quickKmdSleepRequest && properties->enableQuickKmdSleep) {
+        timeoutValueOutput = properties->delayQuickKmdSleepMicroseconds;
+    } else {
+        timeoutValueOutput = getBaseTimeout(multiplier);
+    }
+
+    return (properties->enableKmdNotify || !acLineConnected);
+}
+
+bool KmdNotifyHelper::applyQuickKmdSleepForSporadicWait() const {
+    if (properties->enableQuickKmdSleepForSporadicWaits) {
+        auto timeDiff = getMicrosecondsSinceEpoch() - lastWaitForCompletionTimestampUs.load();
+        if (timeDiff > properties->delayQuickKmdSleepForSporadicWaitsMicroseconds) {
+            return true;
+        }
+    }
+    return false;
+}
+
+void KmdNotifyHelper::updateLastWaitForCompletionTimestamp() {
+    lastWaitForCompletionTimestampUs = getMicrosecondsSinceEpoch();
+}
+
+int64_t KmdNotifyHelper::getMicrosecondsSinceEpoch() const {
+    auto now = std::chrono::high_resolution_clock::now().time_since_epoch();
+    return std::chrono::duration_cast<std::chrono::microseconds>(now).count();
+}
+
+void KmdNotifyHelper::overrideFromDebugVariable(int32_t debugVariableValue, int64_t &destination) {
+    if (debugVariableValue >= 0) {
+        destination = static_cast<int64_t>(debugVariableValue);
+    }
+}
+
+void KmdNotifyHelper::overrideFromDebugVariable(int32_t debugVariableValue, bool &destination) {
+    if (debugVariableValue >= 0) {
+        destination = !!(debugVariableValue);
+    }
+}
--- a/shared/source/helpers/kmd_notify_properties.h
+++ b/shared/source/helpers/kmd_notify_properties.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2018-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+#include "helpers/completion_stamp.h"
+
+#include <atomic>
+#include <chrono>
+#include <cstdint>
+
+namespace NEO {
+struct KmdNotifyProperties {
+    int64_t delayKmdNotifyMicroseconds;
+    int64_t delayQuickKmdSleepMicroseconds;
+    int64_t delayQuickKmdSleepForSporadicWaitsMicroseconds;
+    // Main switch for KMD Notify optimization - if its disabled, all below are disabled too
+    bool enableKmdNotify;
+    // Use smaller delay in specific situations (ie. from AsyncEventsHandler)
+    bool enableQuickKmdSleep;
+    // If waits are called sporadically  use QuickKmdSleep mode, otherwise use standard delay
+    bool enableQuickKmdSleepForSporadicWaits;
+};
+
+namespace KmdNotifyConstants {
+constexpr int64_t timeoutInMicrosecondsForDisconnectedAcLine = 10000;
+constexpr uint32_t minimumTaskCountDiffToCheckAcLine = 10;
+} // namespace KmdNotifyConstants
+
+class KmdNotifyHelper {
+  public:
+    KmdNotifyHelper() = delete;
+    KmdNotifyHelper(const KmdNotifyProperties *properties) : properties(properties){};
+    MOCKABLE_VIRTUAL ~KmdNotifyHelper() = default;
+
+    bool obtainTimeoutParams(int64_t &timeoutValueOutput,
+                             bool quickKmdSleepRequest,
+                             uint32_t currentHwTag,
+                             uint32_t taskCountToWait,
+                             FlushStamp flushStampToWait,
+                             bool forcePowerSavingMode);
+
+    bool quickKmdSleepForSporadicWaitsEnabled() const { return properties->enableQuickKmdSleepForSporadicWaits; }
+    MOCKABLE_VIRTUAL void updateLastWaitForCompletionTimestamp();
+    MOCKABLE_VIRTUAL void updateAcLineStatus();
+
+    static void overrideFromDebugVariable(int32_t debugVariableValue, int64_t &destination);
+    static void overrideFromDebugVariable(int32_t debugVariableValue, bool &destination);
+
+  protected:
+    bool applyQuickKmdSleepForSporadicWait() const;
+    int64_t getBaseTimeout(const int64_t &multiplier) const;
+    int64_t getMicrosecondsSinceEpoch() const;
+
+    const KmdNotifyProperties *properties = nullptr;
+    std::atomic<int64_t> lastWaitForCompletionTimestampUs{0};
+    std::atomic<bool> acLineConnected{true};
+};
+} // namespace NEO
--- a/shared/source/helpers/non_copyable_or_moveable.h
+++ b/shared/source/helpers/non_copyable_or_moveable.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2019-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+namespace NEO {
+class NonCopyableOrMovableClass {
+  public:
+    NonCopyableOrMovableClass() = default;
+    NonCopyableOrMovableClass(const NonCopyableOrMovableClass &) = delete;
+    NonCopyableOrMovableClass &operator=(const NonCopyableOrMovableClass &) = delete;
+
+    NonCopyableOrMovableClass(NonCopyableOrMovableClass &&) = delete;
+    NonCopyableOrMovableClass &operator=(NonCopyableOrMovableClass &&) = delete;
+};
+
+class NonCopyableClass {
+  public:
+    NonCopyableClass() = default;
+    NonCopyableClass(const NonCopyableClass &) = delete;
+    NonCopyableClass &operator=(const NonCopyableClass &) = delete;
+
+    NonCopyableClass(NonCopyableClass &&) = default;
+    NonCopyableClass &operator=(NonCopyableClass &&) = default;
+};
+} // namespace NEO
--- a/shared/source/helpers/options.h
+++ b/shared/source/helpers/options.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2017-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+#include <cstdint>
+
+#ifndef KMD_PROFILING
+#define KMD_PROFILING 0
+#endif
+
+namespace NEO {
+enum CommandStreamReceiverType {
+    // Use receiver for real HW
+    CSR_HW = 0,
+    // Capture an AUB file automatically for all traffic going through Device -> CommandStreamReceiver
+    CSR_AUB,
+    // Capture an AUB and tunnel all commands going through Device -> CommandStreamReceiver to a TBX server
+    CSR_TBX,
+    // Use receiver for real HW and capture AUB file
+    CSR_HW_WITH_AUB,
+    // Use TBX server and capture AUB file
+    CSR_TBX_WITH_AUB,
+    // Number of CSR types
+    CSR_TYPES_NUM
+};
+
+// AUB file folder location
+extern const char *folderAUB;
+
+// Initial value for HW tag
+// Set to 0 if using HW or simulator, otherwise 0xFFFFFF00, needs to be lower then CompletionStamp::levelNotReady.
+extern uint32_t initialHardwareTag;
+} // namespace NEO
--- a/shared/source/helpers/pipeline_select_args.h
+++ b/shared/source/helpers/pipeline_select_args.h
@@ -0,0 +1,15 @@
+/*
+ * Copyright (C) 2017-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+
+namespace NEO {
+struct PipelineSelectArgs {
+    bool specialPipelineSelectMode = false;
+    bool mediaSamplerRequired = false;
+};
+} // namespace NEO
--- a/shared/source/helpers/pipeline_select_helper.h
+++ b/shared/source/helpers/pipeline_select_helper.h
@@ -0,0 +1,15 @@
+/*
+ * Copyright (C) 2018-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+#include <cstdint>
+
+namespace NEO {
+const uint32_t pipelineSelectEnablePipelineSelectMaskBits = 0x3;
+const uint32_t pipelineSelectMediaSamplerDopClockGateMaskBits = 0x10;
+const uint32_t pipelineSelectMediaSamplerPowerClockGateMaskBits = 0x40;
+} // namespace NEO
--- a/shared/source/helpers/preamble.h
+++ b/shared/source/helpers/preamble.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2018-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+#include "helpers/pipeline_select_helper.h"
+
+#include "engine_node.h"
+#include "igfxfmid.h"
+
+#include <cstddef>
+#include <cstdint>
+
+namespace NEO {
+
+struct HardwareInfo;
+class Device;
+struct DispatchFlags;
+class GraphicsAllocation;
+class LinearStream;
+struct PipelineSelectArgs;
+
+template <typename GfxFamily>
+struct PreambleHelper {
+    using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM;
+    using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
+    using VFE_STATE_TYPE = typename GfxFamily::VFE_STATE_TYPE;
+
+    static void programL3(LinearStream *pCommandStream, uint32_t l3Config);
+    static void programPipelineSelect(LinearStream *pCommandStream,
+                                      const PipelineSelectArgs &pipelineSelectArgs,
+                                      const HardwareInfo &hwInfo);
+    static uint32_t getDefaultThreadArbitrationPolicy();
+    static void programThreadArbitration(LinearStream *pCommandStream, uint32_t requiredThreadArbitrationPolicy);
+    static void programPreemption(LinearStream *pCommandStream, Device &device, GraphicsAllocation *preemptionCsr);
+    static void addPipeControlBeforeVfeCmd(LinearStream *pCommandStream, const HardwareInfo *hwInfo, aub_stream::EngineType engineType);
+    static uint64_t programVFEState(LinearStream *pCommandStream,
+                                    const HardwareInfo &hwInfo,
+                                    int scratchSize,
+                                    uint64_t scratchAddress,
+                                    uint32_t maxFrontEndThreads,
+                                    aub_stream::EngineType engineType);
+    static void programAdditionalFieldsInVfeState(VFE_STATE_TYPE *mediaVfeState, const HardwareInfo &hwInfo);
+    static void programPreamble(LinearStream *pCommandStream, Device &device, uint32_t l3Config,
+                                uint32_t requiredThreadArbitrationPolicy, GraphicsAllocation *preemptionCsr, GraphicsAllocation *perDssBackedBuffer);
+    static void programKernelDebugging(LinearStream *pCommandStream);
+    static void programPerDssBackedBuffer(LinearStream *pCommandStream, const HardwareInfo &hwInfo, GraphicsAllocation *perDssBackBufferOffset);
+    static uint32_t getL3Config(const HardwareInfo &hwInfo, bool useSLM);
+    static bool isL3Configurable(const HardwareInfo &hwInfo);
+    static size_t getAdditionalCommandsSize(const Device &device);
+    static size_t getThreadArbitrationCommandsSize();
+    static size_t getVFECommandsSize();
+    static size_t getKernelDebuggingCommandsSize(bool debuggingActive);
+    static void programGenSpecificPreambleWorkArounds(LinearStream *pCommandStream, const HardwareInfo &hwInfo);
+    static uint32_t getUrbEntryAllocationSize();
+    static size_t getPerDssBackedBufferCommandsSize(const HardwareInfo &hwInfo);
+    static size_t getCmdSizeForPipelineSelect(const HardwareInfo &hwInfo);
+};
+
+template <PRODUCT_FAMILY ProductFamily>
+static uint32_t getL3ConfigHelper(bool useSLM);
+
+template <PRODUCT_FAMILY ProductFamily>
+struct L3CNTLREGConfig {
+    static const uint32_t valueForSLM;
+    static const uint32_t valueForNoSLM;
+};
+
+template <PRODUCT_FAMILY ProductFamily>
+uint32_t getL3ConfigHelper(bool useSLM) {
+    if (!useSLM) {
+        return L3CNTLREGConfig<ProductFamily>::valueForNoSLM;
+    }
+    return L3CNTLREGConfig<ProductFamily>::valueForSLM;
+}
+
+template <typename GfxFamily>
+struct L3CNTLRegisterOffset {
+    static const uint32_t registerOffset;
+};
+
+template <typename GfxFamily>
+struct DebugModeRegisterOffset {
+    enum {
+        registerOffset = 0x20ec,
+        debugEnabledValue = (1 << 6) | (1 << 22)
+    };
+};
+
+namespace TdDebugControlRegisterOffset {
+static constexpr uint32_t registerOffset = 0xe400;
+static constexpr uint32_t debugEnabledValue = (1 << 4) | (1 << 7);
+}; // namespace TdDebugControlRegisterOffset
+
+} // namespace NEO
--- a/shared/source/helpers/preamble_base.inl
+++ b/shared/source/helpers/preamble_base.inl
@@ -0,0 +1,118 @@
+/*
+ * Copyright (C) 2018-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#include "command_stream/linear_stream.h"
+#include "command_stream/preemption.h"
+#include "device/device.h"
+#include "helpers/aligned_memory.h"
+#include "helpers/hw_cmds.h"
+#include "helpers/preamble.h"
+#include "opencl/source/helpers/hardware_commands_helper.h"
+#include "opencl/source/kernel/kernel.h"
+
+#include "reg_configs_common.h"
+
+#include <cstddef>
+
+namespace NEO {
+
+template <typename GfxFamily>
+void PreambleHelper<GfxFamily>::programThreadArbitration(LinearStream *pCommandStream, uint32_t requiredThreadArbitrationPolicy) {
+}
+
+template <typename GfxFamily>
+size_t PreambleHelper<GfxFamily>::getThreadArbitrationCommandsSize() {
+    return 0;
+}
+
+template <typename GfxFamily>
+uint32_t PreambleHelper<GfxFamily>::getDefaultThreadArbitrationPolicy() {
+    return 0;
+}
+
+template <typename GfxFamily>
+void PreambleHelper<GfxFamily>::programGenSpecificPreambleWorkArounds(LinearStream *pCommandStream, const HardwareInfo &hwInfo) {
+}
+
+template <typename GfxFamily>
+void PreambleHelper<GfxFamily>::programPerDssBackedBuffer(LinearStream *pCommandStream, const HardwareInfo &hwInfo, GraphicsAllocation *perDssBackBufferOffset) {
+}
+
+template <typename GfxFamily>
+size_t PreambleHelper<GfxFamily>::getPerDssBackedBufferCommandsSize(const HardwareInfo &hwInfo) {
+    return 0;
+}
+
+template <typename GfxFamily>
+size_t PreambleHelper<GfxFamily>::getAdditionalCommandsSize(const Device &device) {
+    size_t totalSize = PreemptionHelper::getRequiredPreambleSize<GfxFamily>(device);
+    totalSize += getKernelDebuggingCommandsSize(device.isDebuggerActive());
+    return totalSize;
+}
+
+template <typename GfxFamily>
+size_t PreambleHelper<GfxFamily>::getCmdSizeForPipelineSelect(const HardwareInfo &hwInfo) {
+    size_t size = 0;
+    using PIPELINE_SELECT = typename GfxFamily::PIPELINE_SELECT;
+    size += sizeof(PIPELINE_SELECT);
+    if (HardwareCommandsHelper<GfxFamily>::isPipeControlPriorToPipelineSelectWArequired(hwInfo)) {
+        size += sizeof(PIPE_CONTROL);
+    }
+    return size;
+}
+
+template <typename GfxFamily>
+void PreambleHelper<GfxFamily>::programPreamble(LinearStream *pCommandStream, Device &device, uint32_t l3Config,
+                                                uint32_t requiredThreadArbitrationPolicy, GraphicsAllocation *preemptionCsr, GraphicsAllocation *perDssBackedBuffer) {
+    programL3(pCommandStream, l3Config);
+    programThreadArbitration(pCommandStream, requiredThreadArbitrationPolicy);
+    programPreemption(pCommandStream, device, preemptionCsr);
+    if (device.isDebuggerActive()) {
+        programKernelDebugging(pCommandStream);
+    }
+    programGenSpecificPreambleWorkArounds(pCommandStream, device.getHardwareInfo());
+    if (perDssBackedBuffer != nullptr) {
+        programPerDssBackedBuffer(pCommandStream, device.getHardwareInfo(), perDssBackedBuffer);
+    }
+}
+
+template <typename GfxFamily>
+void PreambleHelper<GfxFamily>::programPreemption(LinearStream *pCommandStream, Device &device, GraphicsAllocation *preemptionCsr) {
+    PreemptionHelper::programCsrBaseAddress<GfxFamily>(*pCommandStream, device, preemptionCsr);
+}
+
+template <typename GfxFamily>
+void PreambleHelper<GfxFamily>::programKernelDebugging(LinearStream *pCommandStream) {
+    auto pCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(pCommandStream->getSpace(sizeof(MI_LOAD_REGISTER_IMM)));
+    *pCmd = GfxFamily::cmdInitLoadRegisterImm;
+    pCmd->setRegisterOffset(DebugModeRegisterOffset<GfxFamily>::registerOffset);
+    pCmd->setDataDword(DebugModeRegisterOffset<GfxFamily>::debugEnabledValue);
+
+    auto pCmd2 = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(pCommandStream->getSpace(sizeof(MI_LOAD_REGISTER_IMM)));
+    *pCmd2 = GfxFamily::cmdInitLoadRegisterImm;
+    pCmd2->setRegisterOffset(TdDebugControlRegisterOffset::registerOffset);
+    pCmd2->setDataDword(TdDebugControlRegisterOffset::debugEnabledValue);
+}
+
+template <typename GfxFamily>
+size_t PreambleHelper<GfxFamily>::getKernelDebuggingCommandsSize(bool debuggingActive) {
+    if (debuggingActive) {
+        return 2 * sizeof(MI_LOAD_REGISTER_IMM);
+    }
+    return 0;
+}
+
+template <typename GfxFamily>
+bool PreambleHelper<GfxFamily>::isL3Configurable(const HardwareInfo &hwInfo) {
+    return false;
+}
+
+template <typename GfxFamily>
+void PreambleHelper<GfxFamily>::programAdditionalFieldsInVfeState(VFE_STATE_TYPE *mediaVfeState, const HardwareInfo &hwInfo) {
+}
+
+} // namespace NEO
--- a/shared/source/helpers/preamble_bdw_plus.inl
+++ b/shared/source/helpers/preamble_bdw_plus.inl
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2018-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#include "helpers/flat_batch_buffer_helper.h"
+#include "helpers/hw_helper.h"
+#include "helpers/preamble_base.inl"
+
+namespace NEO {
+
+template <typename GfxFamily>
+void PreambleHelper<GfxFamily>::programL3(LinearStream *pCommandStream, uint32_t l3Config) {
+    auto pCmd = (MI_LOAD_REGISTER_IMM *)pCommandStream->getSpace(sizeof(MI_LOAD_REGISTER_IMM));
+    *pCmd = GfxFamily::cmdInitLoadRegisterImm;
+
+    pCmd->setRegisterOffset(L3CNTLRegisterOffset<GfxFamily>::registerOffset);
+    pCmd->setDataDword(l3Config);
+}
+
+template <typename GfxFamily>
+uint32_t PreambleHelper<GfxFamily>::getUrbEntryAllocationSize() {
+    return 0x782;
+}
+
+template <typename GfxFamily>
+uint64_t PreambleHelper<GfxFamily>::programVFEState(LinearStream *pCommandStream,
+                                                    const HardwareInfo &hwInfo,
+                                                    int scratchSize,
+                                                    uint64_t scratchAddress,
+                                                    uint32_t maxFrontEndThreads,
+                                                    aub_stream::EngineType engineType) {
+    using MEDIA_VFE_STATE = typename GfxFamily::MEDIA_VFE_STATE;
+
+    addPipeControlBeforeVfeCmd(pCommandStream, &hwInfo, engineType);
+
+    auto scratchSpaceAddressOffset = static_cast<uint64_t>(pCommandStream->getUsed() + MEDIA_VFE_STATE::PATCH_CONSTANTS::SCRATCHSPACEBASEPOINTER_BYTEOFFSET);
+    auto pMediaVfeState = reinterpret_cast<MEDIA_VFE_STATE *>(pCommandStream->getSpace(sizeof(MEDIA_VFE_STATE)));
+    *pMediaVfeState = GfxFamily::cmdInitMediaVfeState;
+    pMediaVfeState->setMaximumNumberOfThreads(maxFrontEndThreads);
+    pMediaVfeState->setNumberOfUrbEntries(1);
+    pMediaVfeState->setUrbEntryAllocationSize(PreambleHelper<GfxFamily>::getUrbEntryAllocationSize());
+    pMediaVfeState->setPerThreadScratchSpace(Kernel::getScratchSizeValueToProgramMediaVfeState(scratchSize));
+    pMediaVfeState->setStackSize(Kernel::getScratchSizeValueToProgramMediaVfeState(scratchSize));
+    uint32_t lowAddress = static_cast<uint32_t>(0xFFFFFFFF & scratchAddress);
+    uint32_t highAddress = static_cast<uint32_t>(0xFFFFFFFF & (scratchAddress >> 32));
+    pMediaVfeState->setScratchSpaceBasePointer(lowAddress);
+    pMediaVfeState->setScratchSpaceBasePointerHigh(highAddress);
+
+    programAdditionalFieldsInVfeState(pMediaVfeState, hwInfo);
+
+    return scratchSpaceAddressOffset;
+}
+
+template <typename GfxFamily>
+size_t PreambleHelper<GfxFamily>::getVFECommandsSize() {
+    using MEDIA_VFE_STATE = typename GfxFamily::MEDIA_VFE_STATE;
+    return sizeof(MEDIA_VFE_STATE) + sizeof(PIPE_CONTROL);
+}
+
+} // namespace NEO
--- a/shared/source/helpers/ptr_math.h
+++ b/shared/source/helpers/ptr_math.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2017-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+#include <cstddef>
+#include <cstdint>
+
+static const int ptrGarbageContent[16] = {
+    0x0131, 0x133, 0xA, 0xEF,
+    0x0131, 0x133, 0xA, 0xEF,
+    0x0131, 0x133, 0xA, 0xEF,
+    0x0131, 0x133, 0xA, 0xEF};
+static const auto ptrGarbage = (void *)ptrGarbageContent;
+
+template <typename T>
+inline T ptrOffset(T ptrBefore, size_t offset) {
+    auto addrBefore = (uintptr_t)ptrBefore;
+    auto addrAfter = addrBefore + offset;
+    return (T)addrAfter;
+}
+
+template <>
+inline uint64_t ptrOffset(uint64_t ptrBefore, size_t offset) {
+    return ptrBefore + offset;
+}
+
+template <typename TA, typename TB>
+inline size_t ptrDiff(TA ptrAfter, TB ptrBefore) {
+    auto addrBefore = (uintptr_t)ptrBefore;
+    auto addrAfter = (uintptr_t)ptrAfter;
+    return addrAfter - addrBefore;
+}
+
+template <typename T>
+inline uint64_t ptrDiff(uint64_t ptrAfter, T ptrBefore) {
+    return ptrAfter - ptrBefore;
+}
+
+template <typename IntegerAddressType>
+inline void *addrToPtr(IntegerAddressType addr) {
+    uintptr_t correctBitnessAddress = static_cast<uintptr_t>(addr);
+    void *ptrReturn = reinterpret_cast<void *>(correctBitnessAddress);
+    return ptrReturn;
+}
+
+struct PatchStoreOperation {
+    template <typename T>
+    void operator()(T *memory, T value) {
+        *memory = value;
+    }
+};
+
+struct PatchIncrementOperation {
+    template <typename T>
+    void operator()(T *memory, T value) {
+        *memory += value;
+    }
+};
+
+template <typename PatchOperationT = PatchStoreOperation>
+inline void patchWithRequiredSize(void *memoryToBePatched, uint32_t patchSize, uint64_t patchValue) {
+    if (patchSize == sizeof(uint64_t)) {
+        uint64_t *curbeAddress = reinterpret_cast<uint64_t *>(memoryToBePatched);
+        PatchOperationT{}(curbeAddress, patchValue);
+    } else {
+        uint32_t *curbeAddress = reinterpret_cast<uint32_t *>(memoryToBePatched);
+        PatchOperationT{}(curbeAddress, static_cast<uint32_t>(patchValue));
+    }
+}
+
+inline void patchIncrement(void *memoryToBePatched, uint32_t patchSize, uint64_t patchIncrementValue) {
+    patchWithRequiredSize<PatchIncrementOperation>(memoryToBePatched, patchSize, patchIncrementValue);
+}
+
+inline uint64_t castToUint64(const void *address) {
+    return static_cast<uint64_t>(reinterpret_cast<uintptr_t>(const_cast<void *>(address)));
+}
--- a/shared/source/helpers/register_offsets.h
+++ b/shared/source/helpers/register_offsets.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2019-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+#include <stdint.h>
+
+constexpr uint32_t L3SQC_BIT_LQSC_RO_PERF_DIS = 0x08000000;
+constexpr uint32_t L3SQC_REG4 = 0xB118;
+
+constexpr uint32_t GPGPU_WALKER_COOKIE_VALUE_BEFORE_WALKER = 0xFFFFFFFF;
+constexpr uint32_t GPGPU_WALKER_COOKIE_VALUE_AFTER_WALKER = 0x00000000;
+
+//Threads Dimension X/Y/Z
+constexpr uint32_t GPUGPU_DISPATCHDIMX = 0x2500;
+constexpr uint32_t GPUGPU_DISPATCHDIMY = 0x2504;
+constexpr uint32_t GPUGPU_DISPATCHDIMZ = 0x2508;
+
+constexpr uint32_t CS_GPR_R0 = 0x2600;
+constexpr uint32_t CS_GPR_R1 = 0x2608;
+constexpr uint32_t CS_GPR_R2 = 0x2610;
+constexpr uint32_t CS_GPR_R3 = 0x2618;
+constexpr uint32_t CS_GPR_R4 = 0x2620;
+constexpr uint32_t CS_GPR_R5 = 0x2628;
+constexpr uint32_t CS_GPR_R6 = 0x2630;
+constexpr uint32_t CS_GPR_R7 = 0x2638;
+constexpr uint32_t CS_GPR_R8 = 0x2640;
+constexpr uint32_t CS_GPR_R9 = 0x2648;
+constexpr uint32_t CS_GPR_R10 = 0x2650;
+constexpr uint32_t CS_GPR_R11 = 0x2658;
+constexpr uint32_t CS_GPR_R12 = 0x2660;
+constexpr uint32_t CS_GPR_R13 = 0x2668;
+constexpr uint32_t CS_GPR_R14 = 0x2670;
+constexpr uint32_t CS_GPR_R15 = 0x2678;
+
+constexpr uint32_t CS_PREDICATE_RESULT = 0x2418;
+
+//Alu opcodes
+constexpr uint32_t NUM_ALU_INST_FOR_READ_MODIFY_WRITE = 4;
+
+constexpr uint32_t ALU_OPCODE_LOAD = 0x080;
+constexpr uint32_t ALU_OPCODE_STORE = 0x180;
+constexpr uint32_t ALU_OPCODE_ADD = 0x100;
+constexpr uint32_t ALU_OPCODE_SUB = 0x101;
+constexpr uint32_t ALU_OPCODE_AND = 0x102;
+constexpr uint32_t ALU_OPCODE_OR = 0x103;
+
+constexpr uint32_t ALU_REGISTER_R_0 = 0x0;
+constexpr uint32_t ALU_REGISTER_R_1 = 0x1;
+constexpr uint32_t ALU_REGISTER_R_2 = 0x2;
+constexpr uint32_t ALU_REGISTER_R_3 = 0x3;
+constexpr uint32_t ALU_REGISTER_R_4 = 0x4;
+constexpr uint32_t ALU_REGISTER_R_5 = 0x5;
+constexpr uint32_t ALU_REGISTER_R_6 = 0x6;
+constexpr uint32_t ALU_REGISTER_R_7 = 0x7;
+constexpr uint32_t ALU_REGISTER_R_8 = 0x8;
+constexpr uint32_t ALU_REGISTER_R_9 = 0x9;
+constexpr uint32_t ALU_REGISTER_R_10 = 0xA;
+constexpr uint32_t ALU_REGISTER_R_11 = 0xB;
+constexpr uint32_t ALU_REGISTER_R_12 = 0xC;
+constexpr uint32_t ALU_REGISTER_R_13 = 0xD;
+constexpr uint32_t ALU_REGISTER_R_14 = 0xE;
+constexpr uint32_t ALU_REGISTER_R_15 = 0xF;
+
+constexpr uint32_t ALU_REGISTER_R_SRCA = 0x20;
+constexpr uint32_t ALU_REGISTER_R_SRCB = 0x21;
+constexpr uint32_t ALU_REGISTER_R_ACCU = 0x31;
+constexpr uint32_t ALU_REGISTER_R_ZF = 0x32;
+constexpr uint32_t ALU_REGISTER_R_CF = 0x33;
+
+constexpr uint32_t GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW = 0x23A8;
--- a/shared/source/helpers/registered_method_dispatcher.h
+++ b/shared/source/helpers/registered_method_dispatcher.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2017-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+
+#include <cstdint>
+#include <functional>
+
+namespace NEO {
+
+template <typename MethodArgsT, typename EstimateMethodArgsT>
+class RegisteredMethodDispatcher {
+  public:
+    using CommandsSizeEstimationMethodT = std::function<EstimateMethodArgsT>;
+    using RegisteredMethodT = std::function<MethodArgsT>;
+
+    void registerMethod(RegisteredMethodT method) {
+        this->method = method;
+    }
+
+    void registerCommandsSizeEstimationMethod(CommandsSizeEstimationMethodT method) {
+        this->commandsEstimationMethod = method;
+    }
+
+    template <typename... Args>
+    void operator()(Args &&... args) const {
+        if (method) {
+            method(std::forward<Args>(args)...);
+        }
+    }
+
+    template <typename... Args>
+    size_t estimateCommandsSize(Args &&... args) const {
+        if (commandsEstimationMethod) {
+            return commandsEstimationMethod(std::forward<Args>(args)...);
+        }
+        return 0;
+    }
+
+  protected:
+    CommandsSizeEstimationMethodT commandsEstimationMethod;
+    RegisteredMethodT method;
+};
+
+} // namespace NEO
--- a/shared/source/helpers/simd_helper.h
+++ b/shared/source/helpers/simd_helper.h
@@ -0,0 +1,14 @@
+/*
+ * Copyright (C) 2017-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+#include <stdint.h>
+
+template <typename WALKER_TYPE>
+constexpr typename WALKER_TYPE::SIMD_SIZE getSimdConfig(uint32_t simdSize) {
+    return static_cast<typename WALKER_TYPE::SIMD_SIZE>((simdSize == 1) ? (32 >> 4) : (simdSize >> 4));
+}
--- a/shared/source/helpers/state_base_address.h
+++ b/shared/source/helpers/state_base_address.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2017-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+#include <cstddef>
+#include <cstdint>
+
+namespace NEO {
+
+class GmmHelper;
+class IndirectHeap;
+class LinearStream;
+struct DispatchFlags;
+
+template <typename GfxFamily>
+struct StateBaseAddressHelper {
+    using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS;
+
+    static void programStateBaseAddress(
+        LinearStream &commandStream,
+        const IndirectHeap *dsh,
+        const IndirectHeap *ioh,
+        const IndirectHeap *ssh,
+        uint64_t generalStateBase,
+        bool setGeneralStateBaseAddress,
+        uint32_t statelessMocsIndex,
+        uint64_t internalHeapBase,
+        bool setInstructionStateBaseAddress,
+        GmmHelper *gmmHelper,
+        bool isMultiOsContextCapable);
+
+    static void appendStateBaseAddressParameters(
+        STATE_BASE_ADDRESS *stateBaseAddress,
+        const IndirectHeap *ssh,
+        bool setGeneralStateBaseAddress,
+        uint64_t internalHeapBase,
+        GmmHelper *gmmHelper,
+        bool isMultiOsContextCapable);
+
+    static void programBindingTableBaseAddress(LinearStream &commandStream, const IndirectHeap &ssh, GmmHelper *gmmHelper);
+};
+} // namespace NEO
--- a/shared/source/helpers/state_base_address_base.inl
+++ b/shared/source/helpers/state_base_address_base.inl
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2019-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#include "gmm_helper/gmm_helper.h"
+#include "helpers/cache_policy.h"
+#include "helpers/hw_cmds.h"
+#include "helpers/state_base_address.h"
+#include "indirect_heap/indirect_heap.h"
+#include "memory_manager/memory_constants.h"
+
+namespace NEO {
+template <typename GfxFamily>
+void StateBaseAddressHelper<GfxFamily>::programStateBaseAddress(
+    LinearStream &commandStream,
+    const IndirectHeap *dsh,
+    const IndirectHeap *ioh,
+    const IndirectHeap *ssh,
+    uint64_t generalStateBase,
+    bool setGeneralStateBaseAddress,
+    uint32_t statelessMocsIndex,
+    uint64_t internalHeapBase,
+    bool setInstructionStateBaseAddress,
+    GmmHelper *gmmHelper,
+    bool isMultiOsContextCapable) {
+
+    auto pCmd = static_cast<STATE_BASE_ADDRESS *>(commandStream.getSpace(sizeof(STATE_BASE_ADDRESS)));
+    *pCmd = GfxFamily::cmdInitStateBaseAddress;
+
+    if (dsh) {
+        pCmd->setDynamicStateBaseAddressModifyEnable(true);
+        pCmd->setDynamicStateBufferSizeModifyEnable(true);
+        pCmd->setDynamicStateBaseAddress(dsh->getHeapGpuBase());
+        pCmd->setDynamicStateBufferSize(dsh->getHeapSizeInPages());
+    }
+
+    if (ioh) {
+        pCmd->setIndirectObjectBaseAddressModifyEnable(true);
+        pCmd->setIndirectObjectBufferSizeModifyEnable(true);
+        pCmd->setIndirectObjectBaseAddress(ioh->getHeapGpuBase());
+        pCmd->setIndirectObjectBufferSize(ioh->getHeapSizeInPages());
+    }
+
+    if (ssh) {
+        pCmd->setSurfaceStateBaseAddressModifyEnable(true);
+        pCmd->setSurfaceStateBaseAddress(ssh->getHeapGpuBase());
+    }
+
+    if (setInstructionStateBaseAddress) {
+        pCmd->setInstructionBaseAddressModifyEnable(true);
+        pCmd->setInstructionBaseAddress(internalHeapBase);
+        pCmd->setInstructionBufferSizeModifyEnable(true);
+        pCmd->setInstructionBufferSize(MemoryConstants::sizeOf4GBinPageEntities);
+        pCmd->setInstructionMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_STATE_HEAP_BUFFER));
+    }
+
+    if (setGeneralStateBaseAddress) {
+        pCmd->setGeneralStateBaseAddressModifyEnable(true);
+        pCmd->setGeneralStateBufferSizeModifyEnable(true);
+        // GSH must be set to 0 for stateless
+        pCmd->setGeneralStateBaseAddress(GmmHelper::decanonize(generalStateBase));
+        pCmd->setGeneralStateBufferSize(0xfffff);
+    }
+
+    if (DebugManager.flags.OverrideStatelessMocsIndex.get() != -1) {
+        statelessMocsIndex = DebugManager.flags.OverrideStatelessMocsIndex.get();
+    }
+
+    statelessMocsIndex = statelessMocsIndex << 1;
+
+    pCmd->setStatelessDataPortAccessMemoryObjectControlState(statelessMocsIndex);
+
+    appendStateBaseAddressParameters(pCmd, ssh, setGeneralStateBaseAddress, internalHeapBase, gmmHelper, isMultiOsContextCapable);
+}
+
+} // namespace NEO
--- a/shared/source/helpers/state_base_address_bdw_plus.inl
+++ b/shared/source/helpers/state_base_address_bdw_plus.inl
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2019-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#include "helpers/state_base_address_base.inl"
+
+namespace NEO {
+
+template <typename GfxFamily>
+void StateBaseAddressHelper<GfxFamily>::appendStateBaseAddressParameters(
+    STATE_BASE_ADDRESS *stateBaseAddress,
+    const IndirectHeap *ssh,
+    bool setGeneralStateBaseAddress,
+    uint64_t internalHeapBase,
+    GmmHelper *gmmHelper,
+    bool isMultiOsContextCapable) {
+}
+
+template <typename GfxFamily>
+void StateBaseAddressHelper<GfxFamily>::programBindingTableBaseAddress(LinearStream &commandStream, const IndirectHeap &ssh, GmmHelper *gmmHelper) {
+}
+
+} // namespace NEO
--- a/shared/source/helpers/state_compute_mode_helper.h
+++ b/shared/source/helpers/state_compute_mode_helper.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2019-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+#include "command_stream/csr_definitions.h"
+#include "helpers/hw_cmds.h"
+
+namespace NEO {
+template <typename GfxFamily>
+struct StateComputeModeHelper {
+    static bool isStateComputeModeRequired(CsrSizeRequestFlags &csrSizeRequestFlags, bool isThreadArbitionPolicyProgrammed);
+};
+} // namespace NEO
--- a/shared/source/helpers/stdio.h
+++ b/shared/source/helpers/stdio.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2017-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+#ifndef _WIN32
+#ifndef __STDC_LIB_EXT1__
+#if __STDC_WANT_LIB_EXT1__ != 1
+
+#include <cstdio>
+#include <errno.h>
+
+inline int fopen_s(FILE **pFile, const char *filename, const char *mode) {
+    if ((pFile == nullptr) || (filename == nullptr) || (mode == nullptr)) {
+        return -EINVAL;
+    }
+
+    *pFile = fopen(filename, mode);
+    if (*pFile == nullptr) {
+        return -errno;
+    }
+
+    return 0;
+}
+
+#endif
+#endif
+#endif
--- a/shared/source/helpers/string.h
+++ b/shared/source/helpers/string.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2017-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+
+#include <memory>
+#include <type_traits>
+
+#if defined(__linux__)
+
+#include <cstring>
+#include <errno.h>
+#include <string>
+
+inline int strcpy_s(char *dst, size_t dstSize, const char *src) {
+    if ((dst == nullptr) || (src == nullptr)) {
+        return -EINVAL;
+    }
+    size_t length = strlen(src);
+    if (dstSize <= length) {
+        return -ERANGE;
+    }
+
+    memcpy(dst, src, length);
+    dst[length] = '\0';
+
+    return 0;
+}
+
+inline int strncpy_s(char *dst, size_t numberOfElements, const char *src, size_t count) {
+    if ((dst == nullptr) || (src == nullptr)) {
+        return -EINVAL;
+    }
+    if (numberOfElements < count) {
+        return -ERANGE;
+    }
+
+    size_t length = strlen(src);
+    if (length > count) {
+        length = count;
+    }
+    memcpy(dst, src, length);
+
+    if (length < numberOfElements) {
+        numberOfElements = length;
+    }
+    dst[numberOfElements] = '\0';
+
+    return 0;
+}
+
+inline size_t strnlen_s(const char *str, size_t count) {
+    if (str == nullptr) {
+        return 0;
+    }
+
+    for (size_t i = 0; i < count; ++i) {
+        if (str[i] == '\0')
+            return i;
+    }
+
+    return count;
+}
+
+inline int memcpy_s(void *dst, size_t destSize, const void *src, size_t count) {
+    if ((dst == nullptr) || (src == nullptr)) {
+        return -EINVAL;
+    }
+    if (destSize < count) {
+        return -ERANGE;
+    }
+
+    memcpy(dst, src, count);
+
+    return 0;
+}
+
+inline int memmove_s(void *dst, size_t numberOfElements, const void *src, size_t count) {
+    if ((dst == nullptr) || (src == nullptr)) {
+        return -EINVAL;
+    }
+    if (numberOfElements < count) {
+        return -ERANGE;
+    }
+
+    memmove(dst, src, count);
+
+    return 0;
+}
+
+#endif
+
+template <typename T = char>
+inline std::unique_ptr<T[]> makeCopy(const void *src, size_t size) {
+    if (size == 0) {
+        return nullptr;
+    }
+    using ElT = typename std::remove_all_extents<T>::type;
+    std::unique_ptr<T[]> copiedData(new ElT[size]);
+    memcpy_s(copiedData.get(), size, src, size);
+    return copiedData;
+}
--- a/shared/source/helpers/surface_format_info.h
+++ b/shared/source/helpers/surface_format_info.h
@@ -0,0 +1,254 @@
+/*
+ * Copyright (C) 2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+#include "gmm_helper/gmm_lib.h"
+
+namespace NEO {
+enum GFX3DSTATE_SURFACEFORMAT : unsigned short {
+    GFX3DSTATE_SURFACEFORMAT_R32G32B32A32_FLOAT = 0x000,
+    GFX3DSTATE_SURFACEFORMAT_R32G32B32A32_SINT = 0x001,
+    GFX3DSTATE_SURFACEFORMAT_R32G32B32A32_UINT = 0x002,
+    GFX3DSTATE_SURFACEFORMAT_R32G32B32A32_UNORM = 0x003,
+    GFX3DSTATE_SURFACEFORMAT_R32G32B32A32_SNORM = 0x004,
+    GFX3DSTATE_SURFACEFORMAT_R64G64_FLOAT = 0x005,
+    GFX3DSTATE_SURFACEFORMAT_R32G32B32X32_FLOAT = 0x006,
+    GFX3DSTATE_SURFACEFORMAT_R32G32B32A32_SSCALED = 0x007,
+    GFX3DSTATE_SURFACEFORMAT_R32G32B32A32_USCALED = 0x008,
+    GFX3DSTATE_SURFACEFORMAT_R32G32B32_FLOAT = 0x040,
+    GFX3DSTATE_SURFACEFORMAT_R32G32B32_SINT = 0x041,
+    GFX3DSTATE_SURFACEFORMAT_R32G32B32_UINT = 0x042,
+    GFX3DSTATE_SURFACEFORMAT_R32G32B32_UNORM = 0x043,
+    GFX3DSTATE_SURFACEFORMAT_R32G32B32_SNORM = 0x044,
+    GFX3DSTATE_SURFACEFORMAT_R32G32B32_SSCALED = 0x045,
+    GFX3DSTATE_SURFACEFORMAT_R32G32B32_USCALED = 0x046,
+    GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_UNORM = 0x080,
+    GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_SNORM = 0x081,
+    GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_SINT = 0x082,
+    GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_UINT = 0x083,
+    GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_FLOAT = 0x084,
+    GFX3DSTATE_SURFACEFORMAT_R32G32_FLOAT = 0x085,
+    GFX3DSTATE_SURFACEFORMAT_R32G32_SINT = 0x086,
+    GFX3DSTATE_SURFACEFORMAT_R32G32_UINT = 0x087,
+    GFX3DSTATE_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS = 0x088,
+    GFX3DSTATE_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT = 0x089,
+    GFX3DSTATE_SURFACEFORMAT_L32A32_FLOAT = 0x08A,
+    GFX3DSTATE_SURFACEFORMAT_R32G32_UNORM = 0x08B,
+    GFX3DSTATE_SURFACEFORMAT_R32G32_SNORM = 0x08C,
+    GFX3DSTATE_SURFACEFORMAT_R64_FLOAT = 0x08D,
+    GFX3DSTATE_SURFACEFORMAT_R16G16B16X16_UNORM = 0x08E,
+    GFX3DSTATE_SURFACEFORMAT_R16G16B16X16_FLOAT = 0x08F,
+    GFX3DSTATE_SURFACEFORMAT_A32X32_FLOAT = 0x090,
+    GFX3DSTATE_SURFACEFORMAT_L32X32_FLOAT = 0x091,
+    GFX3DSTATE_SURFACEFORMAT_I32X32_FLOAT = 0x092,
+    GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_SSCALED = 0x093,
+    GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_USCALED = 0x094,
+    GFX3DSTATE_SURFACEFORMAT_R32G32_SSCALED = 0x095,
+    GFX3DSTATE_SURFACEFORMAT_R32G32_USCALED = 0x096,
+    GFX3DSTATE_SURFACEFORMAT_B8G8R8A8_UNORM = 0x0C0,
+    GFX3DSTATE_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB = 0x0C1,
+    GFX3DSTATE_SURFACEFORMAT_R10G10B10A2_UNORM = 0x0C2,
+    GFX3DSTATE_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB = 0x0C3,
+    GFX3DSTATE_SURFACEFORMAT_R10G10B10A2_UINT = 0x0C4,
+    GFX3DSTATE_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM = 0x0C5,
+    GFX3DSTATE_SURFACEFORMAT_R8G8B8A8_UNORM = 0x0C7,
+    GFX3DSTATE_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB = 0x0C8,
+    GFX3DSTATE_SURFACEFORMAT_R8G8B8A8_SNORM = 0x0C9,
+    GFX3DSTATE_SURFACEFORMAT_R8G8B8A8_SINT = 0x0CA,
+    GFX3DSTATE_SURFACEFORMAT_R8G8B8A8_UINT = 0x0CB,
+    GFX3DSTATE_SURFACEFORMAT_R16G16_UNORM = 0x0CC,
+    GFX3DSTATE_SURFACEFORMAT_R16G16_SNORM = 0x0CD,
+    GFX3DSTATE_SURFACEFORMAT_R16G16_SINT = 0x0CE,
+    GFX3DSTATE_SURFACEFORMAT_R16G16_UINT = 0x0CF,
+    GFX3DSTATE_SURFACEFORMAT_R16G16_FLOAT = 0x0D0,
+    GFX3DSTATE_SURFACEFORMAT_B10G10R10A2_UNORM = 0x0D1,
+    GFX3DSTATE_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB = 0x0D2,
+    GFX3DSTATE_SURFACEFORMAT_R11G11B10_FLOAT = 0x0D3,
+    GFX3DSTATE_SURFACEFORMAT_R32_SINT = 0x0D6,
+    GFX3DSTATE_SURFACEFORMAT_R32_UINT = 0x0D7,
+    GFX3DSTATE_SURFACEFORMAT_R32_FLOAT = 0x0D8,
+    GFX3DSTATE_SURFACEFORMAT_R24_UNORM_X8_TYPELESS = 0x0D9,
+    GFX3DSTATE_SURFACEFORMAT_X24_TYPELESS_G8_UINT = 0x0DA,
+    GFX3DSTATE_SURFACEFORMAT_L16A16_UNORM = 0x0DF,
+    GFX3DSTATE_SURFACEFORMAT_I24X8_UNORM = 0x0E0,
+    GFX3DSTATE_SURFACEFORMAT_L24X8_UNORM = 0x0E1,
+    GFX3DSTATE_SURFACEFORMAT_A24X8_UNORM = 0x0E2,
+    GFX3DSTATE_SURFACEFORMAT_I32_FLOAT = 0x0E3,
+    GFX3DSTATE_SURFACEFORMAT_L32_FLOAT = 0x0E4,
+    GFX3DSTATE_SURFACEFORMAT_A32_FLOAT = 0x0E5,
+    GFX3DSTATE_SURFACEFORMAT_B8G8R8X8_UNORM = 0x0E9,
+    GFX3DSTATE_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB = 0x0EA,
+    GFX3DSTATE_SURFACEFORMAT_R8G8B8X8_UNORM = 0x0EB,
+    GFX3DSTATE_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB = 0x0EC,
+    GFX3DSTATE_SURFACEFORMAT_R9G9B9E5_SHAREDEXP = 0x0ED,
+    GFX3DSTATE_SURFACEFORMAT_B10G10R10X2_UNORM = 0x0EE,
+    GFX3DSTATE_SURFACEFORMAT_L16A16_FLOAT = 0x0F0,
+    GFX3DSTATE_SURFACEFORMAT_R32_UNORM = 0x0F1,
+    GFX3DSTATE_SURFACEFORMAT_R32_SNORM = 0x0F2,
+    GFX3DSTATE_SURFACEFORMAT_R10G10B10X2_USCALED = 0x0F3,
+    GFX3DSTATE_SURFACEFORMAT_R8G8B8A8_SSCALED = 0x0F4,
+    GFX3DSTATE_SURFACEFORMAT_R8G8B8A8_USCALED = 0x0F5,
+    GFX3DSTATE_SURFACEFORMAT_R16G16_SSCALED = 0x0F6,
+    GFX3DSTATE_SURFACEFORMAT_R16G16_USCALED = 0x0F7,
+    GFX3DSTATE_SURFACEFORMAT_R32_SSCALED = 0x0F8,
+    GFX3DSTATE_SURFACEFORMAT_R32_USCALED = 0x0F9,
+    GFX3DSTATE_SURFACEFORMAT_B5G6R5_UNORM = 0x100,
+    GFX3DSTATE_SURFACEFORMAT_B5G6R5_UNORM_SRGB = 0x101,
+    GFX3DSTATE_SURFACEFORMAT_B5G5R5A1_UNORM = 0x102,
+    GFX3DSTATE_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB = 0x103,
+    GFX3DSTATE_SURFACEFORMAT_B4G4R4A4_UNORM = 0x104,
+    GFX3DSTATE_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB = 0x105,
+    GFX3DSTATE_SURFACEFORMAT_R8G8_UNORM = 0x106,
+    GFX3DSTATE_SURFACEFORMAT_R8G8_SNORM = 0x107,
+    GFX3DSTATE_SURFACEFORMAT_R8G8_SINT = 0x108,
+    GFX3DSTATE_SURFACEFORMAT_R8G8_UINT = 0x109,
+    GFX3DSTATE_SURFACEFORMAT_R16_UNORM = 0x10A,
+    GFX3DSTATE_SURFACEFORMAT_R16_SNORM = 0x10B,
+    GFX3DSTATE_SURFACEFORMAT_R16_SINT = 0x10C,
+    GFX3DSTATE_SURFACEFORMAT_R16_UINT = 0x10D,
+    GFX3DSTATE_SURFACEFORMAT_R16_FLOAT = 0x10E,
+    GFX3DSTATE_SURFACEFORMAT_I16_UNORM = 0x111,
+    GFX3DSTATE_SURFACEFORMAT_L16_UNORM = 0x112,
+    GFX3DSTATE_SURFACEFORMAT_A16_UNORM = 0x113,
+    GFX3DSTATE_SURFACEFORMAT_L8A8_UNORM = 0x114,
+    GFX3DSTATE_SURFACEFORMAT_I16_FLOAT = 0x115,
+    GFX3DSTATE_SURFACEFORMAT_L16_FLOAT = 0x116,
+    GFX3DSTATE_SURFACEFORMAT_A16_FLOAT = 0x117,
+    GFX3DSTATE_SURFACEFORMAT_L8A8_UNORM_SRGB = 0x118,
+    GFX3DSTATE_SURFACEFORMAT_R5G5_SNORM_B6_UNORM = 0x119,
+    GFX3DSTATE_SURFACEFORMAT_B5G5R5X1_UNORM = 0x11A,
+    GFX3DSTATE_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB = 0x11B,
+    GFX3DSTATE_SURFACEFORMAT_R8G8_SSCALED = 0x11C,
+    GFX3DSTATE_SURFACEFORMAT_R8G8_USCALED = 0x11D,
+    GFX3DSTATE_SURFACEFORMAT_R16_SSCALED = 0x11E,
+    GFX3DSTATE_SURFACEFORMAT_R16_USCALED = 0x11F,
+    GFX3DSTATE_SURFACEFORMAT_R8_UNORM = 0x140,
+    GFX3DSTATE_SURFACEFORMAT_R8_SNORM = 0x141,
+    GFX3DSTATE_SURFACEFORMAT_R8_SINT = 0x142,
+    GFX3DSTATE_SURFACEFORMAT_R8_UINT = 0x143,
+    GFX3DSTATE_SURFACEFORMAT_A8_UNORM = 0x144,
+    GFX3DSTATE_SURFACEFORMAT_I8_UNORM = 0x145,
+    GFX3DSTATE_SURFACEFORMAT_L8_UNORM = 0x146,
+    GFX3DSTATE_SURFACEFORMAT_P4A4_UNORM = 0x147,
+    GFX3DSTATE_SURFACEFORMAT_A4P4_UNORM = 0x148,
+    GFX3DSTATE_SURFACEFORMAT_R8_SSCALED = 0x149,
+    GFX3DSTATE_SURFACEFORMAT_R8_USCALED = 0x14A,
+    GFX3DSTATE_SURFACEFORMAT_P8_UNORM = 0x14B,
+    GFX3DSTATE_SURFACEFORMAT_L8_UNORM_SRGB = 0x14C,
+    GFX3DSTATE_SURFACEFORMAT_DXT1_RGB_SRGB = 0x180,
+    GFX3DSTATE_SURFACEFORMAT_R1_UINT = 0x181,
+    GFX3DSTATE_SURFACEFORMAT_YCRCB_NORMAL = 0x182,
+    GFX3DSTATE_SURFACEFORMAT_YCRCB_SWAPUVY = 0x183,
+    GFX3DSTATE_SURFACEFORMAT_P2_UNORM = 0x184,
+    GFX3DSTATE_SURFACEFORMAT_BC1_UNORM = 0x186,
+    GFX3DSTATE_SURFACEFORMAT_BC2_UNORM = 0x187,
+    GFX3DSTATE_SURFACEFORMAT_BC3_UNORM = 0x188,
+    GFX3DSTATE_SURFACEFORMAT_BC4_UNORM = 0x189,
+    GFX3DSTATE_SURFACEFORMAT_BC5_UNORM = 0x18A,
+    GFX3DSTATE_SURFACEFORMAT_BC1_UNORM_SRGB = 0x18B,
+    GFX3DSTATE_SURFACEFORMAT_BC2_UNORM_SRGB = 0x18C,
+    GFX3DSTATE_SURFACEFORMAT_BC3_UNORM_SRGB = 0x18D,
+    GFX3DSTATE_SURFACEFORMAT_MONO8 = 0x18E,
+    GFX3DSTATE_SURFACEFORMAT_YCRCB_SWAPUV = 0x18F,
+    GFX3DSTATE_SURFACEFORMAT_YCRCB_SWAPY = 0x190,
+    GFX3DSTATE_SURFACEFORMAT_DXT1_RGB = 0x191,
+    GFX3DSTATE_SURFACEFORMAT_FXT1 = 0x192,
+    GFX3DSTATE_SURFACEFORMAT_R8G8B8_UNORM = 0x193,
+    GFX3DSTATE_SURFACEFORMAT_R8G8B8_SNORM = 0x194,
+    GFX3DSTATE_SURFACEFORMAT_R8G8B8_SSCALED = 0x195,
+    GFX3DSTATE_SURFACEFORMAT_R8G8B8_USCALED = 0x196,
+    GFX3DSTATE_SURFACEFORMAT_R64G64B64A64_FLOAT = 0x197,
+    GFX3DSTATE_SURFACEFORMAT_R64G64B64_FLOAT = 0x198,
+    GFX3DSTATE_SURFACEFORMAT_BC4_SNORM = 0x199,
+    GFX3DSTATE_SURFACEFORMAT_BC5_SNORM = 0x19A,
+    GFX3DSTATE_SURFACEFORMAT_R16G16B16_FLOAT = 0x19B,
+    GFX3DSTATE_SURFACEFORMAT_R16G16B16_UNORM = 0x19C,
+    GFX3DSTATE_SURFACEFORMAT_R16G16B16_SNORM = 0x19D,
+    GFX3DSTATE_SURFACEFORMAT_R16G16B16_SSCALED = 0x19E,
+    GFX3DSTATE_SURFACEFORMAT_R16G16B16_USCALED = 0x19F,
+    GFX3DSTATE_SURFACEFORMAT_BC6H_SF16 = 0x1A1,
+    GFX3DSTATE_SURFACEFORMAT_BC7_UNORM = 0x1A2,
+    GFX3DSTATE_SURFACEFORMAT_BC7_UNORM_SRGB = 0x1A3,
+    GFX3DSTATE_SURFACEFORMAT_BC6H_UF16 = 0x1A4,
+    GFX3DSTATE_SURFACEFORMAT_NV12 = 0x1A5,
+    GFX3DSTATE_SURFACEFORMAT_RAW = 0x1FF,
+    NUM_GFX3DSTATE_SURFACEFORMATS
+};
+
+enum class ImagePlane {
+    NO_PLANE = 0,
+    PLANE_Y,
+    PLANE_U,
+    PLANE_V,
+    PLANE_UV
+};
+
+struct SurfaceFormatInfo {
+    GMM_RESOURCE_FORMAT GMMSurfaceFormat;
+    GFX3DSTATE_SURFACEFORMAT GenxSurfaceFormat;
+    uint32_t GMMTileWalk;
+    uint32_t NumChannels;
+    uint32_t PerChannelSizeInBytes;
+    size_t ImageElementSizeInBytes;
+};
+
+enum class ImageType {
+    Invalid,
+    Image1D,
+    Image2D,
+    Image3D,
+    Image1DArray,
+    Image2DArray,
+    Image1DBuffer
+};
+
+struct ImageDescriptor {
+    ImageType imageType;
+    size_t imageWidth;
+    size_t imageHeight;
+    size_t imageDepth;
+    size_t imageArraySize;
+    size_t imageRowPitch;
+    size_t imageSlicePitch;
+    uint32_t numMipLevels;
+    uint32_t numSamples;
+    bool fromParent;
+};
+
+struct ImageInfo {
+    ImageDescriptor imgDesc;
+    const SurfaceFormatInfo *surfaceFormat;
+    size_t size;
+    size_t rowPitch;
+    size_t slicePitch;
+    uint32_t qPitch;
+    size_t offset;
+    uint32_t xOffset;
+    uint32_t yOffset;
+    uint32_t yOffsetForUVPlane;
+    GMM_YUV_PLANE_ENUM plane;
+    uint32_t baseMipLevel;
+    uint32_t mipCount;
+    bool linearStorage;
+    bool preferRenderCompression;
+    bool useLocalMemory;
+};
+
+struct McsSurfaceInfo {
+    uint32_t pitch;
+    uint32_t qPitch;
+    uint32_t multisampleCount;
+};
+
+struct SurfaceOffsets {
+    uint64_t offset;
+    uint32_t xOffset;
+    uint32_t yOffset;
+    uint32_t yOffsetForUVplane;
+};
+
+} // namespace NEO
--- a/shared/source/helpers/timestamp_packet.cpp
+++ b/shared/source/helpers/timestamp_packet.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2018-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#include "helpers/timestamp_packet.h"
+
+#include "command_stream/command_stream_receiver.h"
+#include "command_stream/linear_stream.h"
+#include "utilities/tag_allocator.h"
+
+using namespace NEO;
+
+void TimestampPacketContainer::add(Node *timestampPacketNode) {
+    timestampPacketNodes.push_back(timestampPacketNode);
+}
+
+TimestampPacketContainer::~TimestampPacketContainer() {
+    for (auto node : timestampPacketNodes) {
+        node->returnTag();
+    }
+}
+
+void TimestampPacketContainer::swapNodes(TimestampPacketContainer &timestampPacketContainer) {
+    timestampPacketNodes.swap(timestampPacketContainer.timestampPacketNodes);
+}
+
+void TimestampPacketContainer::resolveDependencies(bool clearAllDependencies) {
+    std::vector<Node *> pendingNodes;
+
+    for (auto node : timestampPacketNodes) {
+        if (node->canBeReleased() || clearAllDependencies) {
+            node->returnTag();
+        } else {
+            pendingNodes.push_back(node);
+        }
+    }
+
+    std::swap(timestampPacketNodes, pendingNodes);
+}
+
+void TimestampPacketContainer::assignAndIncrementNodesRefCounts(const TimestampPacketContainer &inputTimestampPacketContainer) {
+    auto &inputNodes = inputTimestampPacketContainer.peekNodes();
+    std::copy(inputNodes.begin(), inputNodes.end(), std::back_inserter(timestampPacketNodes));
+
+    for (auto node : inputNodes) {
+        node->incRefCount();
+    }
+}
+
+void TimestampPacketContainer::makeResident(CommandStreamReceiver &commandStreamReceiver) {
+    for (auto node : timestampPacketNodes) {
+        commandStreamReceiver.makeResident(*node->getBaseGraphicsAllocation());
+    }
+}
+
+bool TimestampPacketContainer::isCompleted() const {
+    for (auto node : timestampPacketNodes) {
+        if (!node->tagForCpuAccess->isCompleted()) {
+            return false;
+        }
+    }
+    return true;
+}
--- a/shared/source/helpers/timestamp_packet.h
+++ b/shared/source/helpers/timestamp_packet.h
@@ -0,0 +1,179 @@
+/*
+ * Copyright (C) 2018-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+
+#include "command_container/command_encoder.h"
+#include "command_stream/csr_deps.h"
+#include "helpers/aux_translation.h"
+#include "helpers/non_copyable_or_moveable.h"
+#include "utilities/tag_allocator.h"
+
+#include <atomic>
+#include <cstdint>
+#include <vector>
+
+namespace NEO {
+class CommandStreamReceiver;
+class LinearStream;
+
+namespace TimestampPacketSizeControl {
+constexpr uint32_t preferredPacketCount = 16u;
+}
+
+#pragma pack(1)
+struct TimestampPacketStorage {
+    struct Packet {
+        uint32_t contextStart = 1u;
+        uint32_t globalStart = 1u;
+        uint32_t contextEnd = 1u;
+        uint32_t globalEnd = 1u;
+    };
+
+    enum class WriteOperationType : uint32_t {
+        BeforeWalker,
+        AfterWalker
+    };
+
+    static GraphicsAllocation::AllocationType getAllocationType() {
+        return GraphicsAllocation::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER;
+    }
+
+    bool isCompleted() const {
+        for (uint32_t i = 0; i < packetsUsed; i++) {
+            if ((packets[i].contextEnd & 1) || (packets[i].globalEnd & 1)) {
+                return false;
+            }
+        }
+        return implicitDependenciesCount.load() == 0;
+    }
+
+    void initialize() {
+        for (auto &packet : packets) {
+            packet.contextStart = 1u;
+            packet.globalStart = 1u;
+            packet.contextEnd = 1u;
+            packet.globalEnd = 1u;
+        }
+        implicitDependenciesCount.store(0);
+        packetsUsed = 1;
+    }
+
+    void incImplicitDependenciesCount() { implicitDependenciesCount++; }
+
+    Packet packets[TimestampPacketSizeControl::preferredPacketCount];
+    std::atomic<uint32_t> implicitDependenciesCount{0u};
+    uint32_t packetsUsed = 1;
+};
+#pragma pack()
+
+static_assert(((4 * TimestampPacketSizeControl::preferredPacketCount + 2) * sizeof(uint32_t)) == sizeof(TimestampPacketStorage),
+              "This structure is consumed by GPU and has to follow specific restrictions for padding and size");
+
+class TimestampPacketContainer : public NonCopyableClass {
+  public:
+    using Node = TagNode<TimestampPacketStorage>;
+    TimestampPacketContainer() = default;
+    TimestampPacketContainer(TimestampPacketContainer &&) = default;
+    TimestampPacketContainer &operator=(TimestampPacketContainer &&) = default;
+    MOCKABLE_VIRTUAL ~TimestampPacketContainer();
+
+    const std::vector<Node *> &peekNodes() const { return timestampPacketNodes; }
+    void add(Node *timestampPacketNode);
+    void swapNodes(TimestampPacketContainer &timestampPacketContainer);
+    void assignAndIncrementNodesRefCounts(const TimestampPacketContainer &inputTimestampPacketContainer);
+    void resolveDependencies(bool clearAllDependencies);
+    void makeResident(CommandStreamReceiver &commandStreamReceiver);
+    bool isCompleted() const;
+
+  protected:
+    std::vector<Node *> timestampPacketNodes;
+};
+
+struct TimestampPacketDependencies : public NonCopyableClass {
+    TimestampPacketContainer previousEnqueueNodes;
+    TimestampPacketContainer barrierNodes;
+    TimestampPacketContainer auxToNonAuxNodes;
+    TimestampPacketContainer nonAuxToAuxNodes;
+};
+
+struct TimestampPacketHelper {
+    template <typename GfxFamily>
+    static void programSemaphoreWithImplicitDependency(LinearStream &cmdStream, TagNode<TimestampPacketStorage> &timestampPacketNode) {
+        using MI_ATOMIC = typename GfxFamily::MI_ATOMIC;
+        using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
+        using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;
+
+        auto compareAddress = timestampPacketNode.getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd);
+        auto dependenciesCountAddress = timestampPacketNode.getGpuAddress() + offsetof(TimestampPacketStorage, implicitDependenciesCount);
+
+        for (uint32_t packetId = 0; packetId < timestampPacketNode.tagForCpuAccess->packetsUsed; packetId++) {
+            uint64_t compareOffset = packetId * sizeof(TimestampPacketStorage::Packet);
+            auto miSemaphoreCmd = cmdStream.getSpaceForCmd<MI_SEMAPHORE_WAIT>();
+            EncodeSempahore<GfxFamily>::programMiSemaphoreWait(miSemaphoreCmd, compareAddress + compareOffset, 1, COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD);
+        }
+
+        timestampPacketNode.tagForCpuAccess->incImplicitDependenciesCount();
+
+        auto miAtomic = cmdStream.getSpaceForCmd<MI_ATOMIC>();
+        EncodeAtomic<GfxFamily>::programMiAtomic(miAtomic, dependenciesCountAddress,
+                                                 MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_DECREMENT,
+                                                 MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD);
+    }
+
+    template <typename GfxFamily>
+    static void programCsrDependencies(LinearStream &cmdStream, const CsrDependencies &csrDependencies) {
+        for (auto timestampPacketContainer : csrDependencies) {
+            for (auto &node : timestampPacketContainer->peekNodes()) {
+                TimestampPacketHelper::programSemaphoreWithImplicitDependency<GfxFamily>(cmdStream, *node);
+            }
+        }
+    }
+
+    template <typename GfxFamily, AuxTranslationDirection auxTranslationDirection>
+    static void programSemaphoreWithImplicitDependencyForAuxTranslation(LinearStream &cmdStream,
+                                                                        const TimestampPacketDependencies *timestampPacketDependencies) {
+        auto &container = (auxTranslationDirection == AuxTranslationDirection::AuxToNonAux)
+                              ? timestampPacketDependencies->auxToNonAuxNodes
+                              : timestampPacketDependencies->nonAuxToAuxNodes;
+
+        for (auto &node : container.peekNodes()) {
+            TimestampPacketHelper::programSemaphoreWithImplicitDependency<GfxFamily>(cmdStream, *node);
+        }
+    }
+
+    template <typename GfxFamily>
+    static size_t getRequiredCmdStreamSizeForAuxTranslationNodeDependency(size_t count) {
+        return count * TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue<GfxFamily>();
+    }
+
+    template <typename GfxFamily>
+    static size_t getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue() {
+        return sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT) + sizeof(typename GfxFamily::MI_ATOMIC);
+    }
+
+    template <typename GfxFamily>
+    static size_t getRequiredCmdStreamSizeForNodeDependency(TagNode<TimestampPacketStorage> &timestampPacketNode) {
+        size_t totalMiSemaphoreWaitSize = timestampPacketNode.tagForCpuAccess->packetsUsed * sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT);
+
+        return totalMiSemaphoreWaitSize + sizeof(typename GfxFamily::MI_ATOMIC);
+    }
+
+    template <typename GfxFamily>
+    static size_t getRequiredCmdStreamSize(const CsrDependencies &csrDependencies) {
+        size_t totalCommandsSize = 0;
+        for (auto timestampPacketContainer : csrDependencies) {
+            for (auto &node : timestampPacketContainer->peekNodes()) {
+                totalCommandsSize += getRequiredCmdStreamSizeForNodeDependency<GfxFamily>(*node);
+            }
+        }
+
+        return totalCommandsSize;
+    }
+};
+
+} // namespace NEO
--- a/shared/source/helpers/vec.h
+++ b/shared/source/helpers/vec.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2017-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+
+template <typename T>
+struct Vec3 {
+    Vec3(T x, T y, T z) : x(x), y(y), z(z) {}
+    Vec3(const Vec3 &v) : x(v.x), y(v.y), z(v.z) {}
+    Vec3(const T *arr) {
+        if (arr == nullptr) {
+            x = y = z = 0;
+        } else {
+            x = arr[0];
+            y = arr[1];
+            z = arr[2];
+        }
+    }
+
+    Vec3 &operator=(const Vec3 &arr) {
+        x = arr.x;
+        y = arr.y;
+        z = arr.z;
+        return *this;
+    }
+
+    Vec3<T> &operator=(const T arr[3]) {
+        x = arr[0];
+        y = arr[1];
+        z = arr[2];
+        return *this;
+    }
+
+    bool operator==(const Vec3<T> &vec) const {
+        return ((x == vec.x) && (y == vec.y) && (z == vec.z));
+    }
+
+    bool operator!=(const Vec3<T> &vec) const {
+        return !operator==(vec);
+    }
+
+    unsigned int getSimplifiedDim() const {
+        if (z > 1) {
+            return 3;
+        }
+        if (y > 1) {
+            return 2;
+        }
+        if (x >= 1) {
+            return 1;
+        }
+        return 0;
+    }
+
+    T x;
+    T y;
+    T z;
+};
--- a/shared/source/helpers/windows/gmm_callbacks.cpp
+++ b/shared/source/helpers/windows/gmm_callbacks.cpp
@@ -0,0 +1,13 @@
+/*
+ * Copyright (C) 2019-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+#include <cstdint>
+
+namespace NEO {
+long(__stdcall *notifyAubCaptureImpl)(void *csrHandle, uint64_t gfxAddress, size_t gfxSize, bool allocate) = nullptr;
+} // namespace NEO
--- a/shared/source/helpers/windows/gmm_callbacks.h
+++ b/shared/source/helpers/windows/gmm_callbacks.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2017-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+#include <cstdint>
+
+namespace NEO {
+
+extern long(__stdcall *notifyAubCaptureImpl)(void *csrHandle, uint64_t gfxAddress, size_t gfxSize, bool allocate);
+template <typename GfxFamily>
+struct DeviceCallbacks {
+    static long __stdcall notifyAubCapture(void *csrHandle, uint64_t gfxAddress, size_t gfxSize, bool allocate);
+};
+
+template <typename GfxFamily>
+struct TTCallbacks {
+    using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM;
+
+    static int __stdcall writeL3Address(void *queueHandle, uint64_t l3GfxAddress, uint64_t regOffset);
+};
+
+} // namespace NEO
--- a/shared/source/helpers/windows/gmm_callbacks.inl
+++ b/shared/source/helpers/windows/gmm_callbacks.inl
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2018-2020 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#include "command_stream/linear_stream.h"
+#include "helpers/hw_helper.h"
+#include "helpers/windows/gmm_callbacks.h"
+
+#include <cstdint>
+
+namespace NEO {
+
+template <typename GfxFamily>
+long __stdcall DeviceCallbacks<GfxFamily>::notifyAubCapture(void *csrHandle, uint64_t gfxAddress, size_t gfxSize, bool allocate) {
+    return 0;
+}
+
+template <typename GfxFamily>
+int __stdcall TTCallbacks<GfxFamily>::writeL3Address(void *queueHandle, uint64_t l3GfxAddress, uint64_t regOffset) {
+    return 0;
+}
+
+} // namespace NEO