From 835a1da175df4bfcad3c89c3f187d1175fc9ebe4 Mon Sep 17 00:00:00 2001 From: Mateusz Jablonski Date: Wed, 4 Apr 2018 19:02:07 +0200 Subject: [PATCH] Update kernel allocation when substitute kernel heap Change-Id: Iee02a93d4e10c7b32fae56ffa61c90d8617d6ec9 --- runtime/kernel/kernel.cpp | 10 ++ runtime/program/kernel_info.cpp | 14 ++ runtime/program/kernel_info.h | 3 + runtime/program/process_gen_binary.cpp | 10 +- unit_tests/gtpin/gtpin_tests.cpp | 6 +- unit_tests/kernel/CMakeLists.txt | 1 + .../kernel/substitute_kernel_heap_tests.cpp | 147 ++++++++++++++++++ unit_tests/program/kernel_info_tests.cpp | 44 +++++- 8 files changed, 219 insertions(+), 16 deletions(-) create mode 100644 unit_tests/kernel/substitute_kernel_heap_tests.cpp diff --git a/runtime/kernel/kernel.cpp b/runtime/kernel/kernel.cpp index bf5155fdfc..1a081942ad 100644 --- a/runtime/kernel/kernel.cpp +++ b/runtime/kernel/kernel.cpp @@ -697,6 +697,16 @@ void Kernel::substituteKernelHeap(void *newKernelHeap, size_t newKernelHeapSize) SKernelBinaryHeaderCommon *pHeader = const_cast(pKernelInfo->heapInfo.pKernelHeader); pHeader->KernelHeapSize = static_cast(newKernelHeapSize); pKernelInfo->isKernelHeapSubstituted = true; + + auto currentAllocationSize = pKernelInfo->kernelAllocation->getUnderlyingBufferSize(); + if (currentAllocationSize >= newKernelHeapSize) { + memcpy_s(pKernelInfo->kernelAllocation->getUnderlyingBuffer(), newKernelHeapSize, newKernelHeap, newKernelHeapSize); + } else { + auto memoryManager = device.getMemoryManager(); + memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(pKernelInfo->kernelAllocation); + pKernelInfo->kernelAllocation = nullptr; + pKernelInfo->createKernelAllocation(memoryManager); + } } bool Kernel::isKernelHeapSubstituted() const { diff --git a/runtime/program/kernel_info.cpp b/runtime/program/kernel_info.cpp index e619509c21..ab0ee2d373 100644 --- a/runtime/program/kernel_info.cpp +++ b/runtime/program/kernel_info.cpp @@ -27,6 +27,7 @@ #include "runtime/helpers/ptr_math.h" #include "runtime/mem_obj/buffer.h" #include "runtime/mem_obj/image.h" +#include "runtime/memory_manager/memory_manager.h" #include "runtime/kernel/kernel.h" #include "runtime/sampler/sampler.h" #include "runtime/helpers/string.h" @@ -499,4 +500,17 @@ size_t KernelInfo::getBorderColorOffset() const { uint32_t KernelInfo::getConstantBufferSize() const { return patchInfo.dataParameterStream ? patchInfo.dataParameterStream->DataParameterStreamSize : 0; } + +bool KernelInfo::createKernelAllocation(MemoryManager *memoryManager) { + UNRECOVERABLE_IF(kernelAllocation); + auto kernelIsaSize = heapInfo.pKernelHeader->KernelHeapSize; + kernelAllocation = memoryManager->createInternalGraphicsAllocation(nullptr, kernelIsaSize); + if (kernelAllocation) { + memcpy_s(kernelAllocation->getUnderlyingBuffer(), kernelIsaSize, heapInfo.pKernelHeap, kernelIsaSize); + } else { + return false; + } + return true; +} + } // namespace OCLRT diff --git a/runtime/program/kernel_info.h b/runtime/program/kernel_info.h index 4674f74b09..1487b683b7 100644 --- a/runtime/program/kernel_info.h +++ b/runtime/program/kernel_info.h @@ -43,6 +43,7 @@ struct KernelInfo; class DispatchInfo; struct KernelArgumentType; class GraphicsAllocation; +class MemoryManager; extern std::unordered_map accessQualifierMap; extern std::unordered_map addressQualifierMap; @@ -219,6 +220,8 @@ struct KernelInfo { return -1; } + bool createKernelAllocation(MemoryManager *memoryManager); + std::string name; std::string attributes; HeapInfo heapInfo; diff --git a/runtime/program/process_gen_binary.cpp b/runtime/program/process_gen_binary.cpp index 6cebf142b5..6a72a617af 100644 --- a/runtime/program/process_gen_binary.cpp +++ b/runtime/program/process_gen_binary.cpp @@ -807,15 +807,7 @@ cl_int Program::parsePatchList(KernelInfo &kernelInfo) { } if (kernelInfo.heapInfo.pKernelHeader->KernelHeapSize && this->pDevice) { - auto memoryManager = this->pDevice->getMemoryManager(); - auto kernelIsaSize = kernelInfo.heapInfo.pKernelHeader->KernelHeapSize; - auto kernelAllocation = memoryManager->createInternalGraphicsAllocation(nullptr, kernelIsaSize); - if (kernelAllocation) { - memcpy_s(kernelAllocation->getUnderlyingBuffer(), kernelIsaSize, kernelInfo.heapInfo.pKernelHeap, kernelIsaSize); - kernelInfo.kernelAllocation = kernelAllocation; - } else { - retVal = CL_OUT_OF_HOST_MEMORY; - } + retVal = kernelInfo.createKernelAllocation(this->pDevice->getMemoryManager()) ? CL_SUCCESS : CL_OUT_OF_HOST_MEMORY; } DEBUG_BREAK_IF(kernelInfo.heapInfo.pKernelHeader->KernelHeapSize && !this->pDevice); diff --git a/unit_tests/gtpin/gtpin_tests.cpp b/unit_tests/gtpin/gtpin_tests.cpp index 2ef561c90b..8c81aedd53 100644 --- a/unit_tests/gtpin/gtpin_tests.cpp +++ b/unit_tests/gtpin/gtpin_tests.cpp @@ -2044,7 +2044,7 @@ TEST_F(GTPinTests, givenKernelThenVerifyThatKernelCodeSubstitutionWorksWell) { // Substitute new kernel code constexpr size_t newCodeSize = 64; - uint8_t newCode[newCodeSize]; + uint8_t newCode[newCodeSize] = {0x0, 0x1, 0x2, 0x3, 0x4}; pKernel->substituteKernelHeap(&newCode[0], newCodeSize); // Verify that substitution went properly @@ -2053,6 +2053,10 @@ TEST_F(GTPinTests, givenKernelThenVerifyThatKernelCodeSubstitutionWorksWell) { uint8_t *pBin2 = reinterpret_cast(const_cast(pKernel->getKernelHeap())); EXPECT_EQ(pBin2, &newCode[0]); + auto kernelIsa = pKernel->getKernelInfo().kernelAllocation->getUnderlyingBuffer(); + + EXPECT_EQ(0, memcmp(kernelIsa, newCode, newCodeSize)); + // Cleanup retVal = clReleaseKernel(kernel); EXPECT_EQ(CL_SUCCESS, retVal); diff --git a/unit_tests/kernel/CMakeLists.txt b/unit_tests/kernel/CMakeLists.txt index d100e15e13..560ad04ba4 100644 --- a/unit_tests/kernel/CMakeLists.txt +++ b/unit_tests/kernel/CMakeLists.txt @@ -40,5 +40,6 @@ set(IGDRCL_SRCS_tests_kernel ${CMAKE_CURRENT_SOURCE_DIR}/kernel_transformable_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/debug_kernel_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/parent_kernel_tests.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/substitute_kernel_heap_tests.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_kernel}) diff --git a/unit_tests/kernel/substitute_kernel_heap_tests.cpp b/unit_tests/kernel/substitute_kernel_heap_tests.cpp new file mode 100644 index 0000000000..a4641a6095 --- /dev/null +++ b/unit_tests/kernel/substitute_kernel_heap_tests.cpp @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2018, Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "unit_tests/fixtures/device_fixture.h" +#include "unit_tests/mocks/mock_kernel.h" +#include "test.h" + +using namespace OCLRT; + +typedef Test KernelSubstituteTest; + +TEST_F(KernelSubstituteTest, givenKernelWhenSubstituteKernelHeapWithGreaterSizeThenAllocatesNewKernelAllocation) { + MockKernelWithInternals kernel(*pDevice); + auto pHeader = const_cast(kernel.kernelInfo.heapInfo.pKernelHeader); + const size_t initialHeapSize = 0x40; + pHeader->KernelHeapSize = initialHeapSize; + + EXPECT_EQ(nullptr, kernel.kernelInfo.kernelAllocation); + kernel.kernelInfo.createKernelAllocation(pDevice->getMemoryManager()); + auto firstAllocation = kernel.kernelInfo.kernelAllocation; + EXPECT_NE(nullptr, firstAllocation); + auto firstAllocationSize = firstAllocation->getUnderlyingBufferSize(); + EXPECT_EQ(initialHeapSize, firstAllocationSize); + + auto firstAllocationId = static_cast(firstAllocation)->id; + + const size_t newHeapSize = initialHeapSize + 1; + char newHeap[newHeapSize]; + + kernel.mockKernel->substituteKernelHeap(newHeap, newHeapSize); + auto secondAllocation = kernel.kernelInfo.kernelAllocation; + EXPECT_NE(nullptr, secondAllocation); + auto secondAllocationSize = secondAllocation->getUnderlyingBufferSize(); + EXPECT_NE(initialHeapSize, secondAllocationSize); + EXPECT_EQ(newHeapSize, secondAllocationSize); + auto secondAllocationId = static_cast(secondAllocation)->id; + + EXPECT_NE(firstAllocationId, secondAllocationId); + + pDevice->getMemoryManager()->checkGpuUsageAndDestroyGraphicsAllocations(secondAllocation); +} + +TEST_F(KernelSubstituteTest, givenKernelWhenSubstituteKernelHeapWithSameSizeThenDoesNotAllocateNewKernelAllocation) { + MockKernelWithInternals kernel(*pDevice); + auto pHeader = const_cast(kernel.kernelInfo.heapInfo.pKernelHeader); + const size_t initialHeapSize = 0x40; + pHeader->KernelHeapSize = initialHeapSize; + + EXPECT_EQ(nullptr, kernel.kernelInfo.kernelAllocation); + kernel.kernelInfo.createKernelAllocation(pDevice->getMemoryManager()); + auto firstAllocation = kernel.kernelInfo.kernelAllocation; + EXPECT_NE(nullptr, firstAllocation); + auto firstAllocationSize = firstAllocation->getUnderlyingBufferSize(); + EXPECT_EQ(initialHeapSize, firstAllocationSize); + + auto firstAllocationId = static_cast(firstAllocation)->id; + + const size_t newHeapSize = initialHeapSize; + char newHeap[newHeapSize]; + + kernel.mockKernel->substituteKernelHeap(newHeap, newHeapSize); + auto secondAllocation = kernel.kernelInfo.kernelAllocation; + EXPECT_NE(nullptr, secondAllocation); + auto secondAllocationSize = secondAllocation->getUnderlyingBufferSize(); + EXPECT_EQ(initialHeapSize, secondAllocationSize); + auto secondAllocationId = static_cast(secondAllocation)->id; + + EXPECT_EQ(firstAllocationId, secondAllocationId); + + pDevice->getMemoryManager()->checkGpuUsageAndDestroyGraphicsAllocations(secondAllocation); +} + +TEST_F(KernelSubstituteTest, givenKernelWhenSubstituteKernelHeapWithSmallerSizeThenDoesNotAllocateNewKernelAllocation) { + MockKernelWithInternals kernel(*pDevice); + auto pHeader = const_cast(kernel.kernelInfo.heapInfo.pKernelHeader); + const size_t initialHeapSize = 0x40; + pHeader->KernelHeapSize = initialHeapSize; + + EXPECT_EQ(nullptr, kernel.kernelInfo.kernelAllocation); + kernel.kernelInfo.createKernelAllocation(pDevice->getMemoryManager()); + auto firstAllocation = kernel.kernelInfo.kernelAllocation; + EXPECT_NE(nullptr, firstAllocation); + auto firstAllocationSize = firstAllocation->getUnderlyingBufferSize(); + EXPECT_EQ(initialHeapSize, firstAllocationSize); + + auto firstAllocationId = static_cast(firstAllocation)->id; + + const size_t newHeapSize = initialHeapSize - 1; + char newHeap[newHeapSize]; + + kernel.mockKernel->substituteKernelHeap(newHeap, newHeapSize); + auto secondAllocation = kernel.kernelInfo.kernelAllocation; + EXPECT_NE(nullptr, secondAllocation); + auto secondAllocationSize = secondAllocation->getUnderlyingBufferSize(); + EXPECT_EQ(initialHeapSize, secondAllocationSize); + auto secondAllocationId = static_cast(secondAllocation)->id; + + EXPECT_EQ(firstAllocationId, secondAllocationId); + + pDevice->getMemoryManager()->checkGpuUsageAndDestroyGraphicsAllocations(secondAllocation); +} + +TEST_F(KernelSubstituteTest, givenKernelWithUsedKernelAllocationWhenSubstituteKernelHeapAndAllocateNewMemoryThenStoreOldAllocationOnTemporaryList) { + MockKernelWithInternals kernel(*pDevice); + auto pHeader = const_cast(kernel.kernelInfo.heapInfo.pKernelHeader); + auto memoryManager = pDevice->getMemoryManager(); + + const size_t initialHeapSize = 0x40; + pHeader->KernelHeapSize = initialHeapSize; + + kernel.kernelInfo.createKernelAllocation(memoryManager); + auto firstAllocation = kernel.kernelInfo.kernelAllocation; + + firstAllocation->taskCount = ObjectNotUsed - 1; + + const size_t newHeapSize = initialHeapSize + 1; + char newHeap[newHeapSize]; + + EXPECT_TRUE(memoryManager->graphicsAllocations.peekIsEmpty()); + + kernel.mockKernel->substituteKernelHeap(newHeap, newHeapSize); + auto secondAllocation = kernel.kernelInfo.kernelAllocation; + + EXPECT_FALSE(memoryManager->graphicsAllocations.peekIsEmpty()); + EXPECT_EQ(memoryManager->graphicsAllocations.peekHead(), firstAllocation); + memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(secondAllocation); + memoryManager->cleanAllocationList(firstAllocation->taskCount, TEMPORARY_ALLOCATION); +} diff --git a/unit_tests/program/kernel_info_tests.cpp b/unit_tests/program/kernel_info_tests.cpp index fdbd4666ba..d5e33186e5 100644 --- a/unit_tests/program/kernel_info_tests.cpp +++ b/unit_tests/program/kernel_info_tests.cpp @@ -21,16 +21,12 @@ */ #include "runtime/program/kernel_info.h" +#include "runtime/memory_manager/os_agnostic_memory_manager.h" #include "gtest/gtest.h" #include #include -using OCLRT::KernelInfo; -using OCLRT::SPatchStatelessConstantMemoryObjectKernelArgument; -using OCLRT::SPatchStatelessGlobalMemoryObjectKernelArgument; -using OCLRT::SPatchGlobalMemoryObjectKernelArgument; -using OCLRT::SPatchImageMemoryObjectKernelArgument; -using OCLRT::SPatchSamplerKernelArgument; +using namespace OCLRT; TEST(KernelInfo, NonCopyable) { EXPECT_FALSE(std::is_move_constructible::value); @@ -129,6 +125,42 @@ TEST(KernelInfo, decodeImageKernelArgument) { delete pKernelInfo; } +TEST(KernelInfoTest, givenKernelInfoWhenCreateKernelAllocationThenCopyWholeKernelHeapToKernelAllocation) { + KernelInfo kernelInfo; + OsAgnosticMemoryManager memoryManager; + SKernelBinaryHeaderCommon kernelHeader; + const size_t heapSize = 0x40; + char heap[heapSize]; + kernelHeader.KernelHeapSize = heapSize; + kernelInfo.heapInfo.pKernelHeader = &kernelHeader; + kernelInfo.heapInfo.pKernelHeap = &heap; + + for (size_t i = 0; i < heapSize; i++) { + heap[i] = static_cast(i); + } + + auto retVal = kernelInfo.createKernelAllocation(&memoryManager); + EXPECT_TRUE(retVal); + auto allocation = kernelInfo.kernelAllocation; + EXPECT_EQ(0, memcmp(allocation->getUnderlyingBuffer(), heap, heapSize)); + EXPECT_EQ(heapSize, allocation->getUnderlyingBufferSize()); + memoryManager.checkGpuUsageAndDestroyGraphicsAllocations(allocation); +} + +class MyMemoryManager : public OsAgnosticMemoryManager { + public: + GraphicsAllocation *createInternalGraphicsAllocation(const void *ptr, size_t allocationSize) override { return nullptr; } +}; + +TEST(KernelInfoTest, givenKernelInfoWhenCreateKernelAllocationAndCannotAllocateMemoryThenReturnsFalse) { + KernelInfo kernelInfo; + MyMemoryManager memoryManager; + SKernelBinaryHeaderCommon kernelHeader; + kernelInfo.heapInfo.pKernelHeader = &kernelHeader; + auto retVal = kernelInfo.createKernelAllocation(&memoryManager); + EXPECT_FALSE(retVal); +} + TEST(KernelInfo, decodeGlobalMemObjectKernelArgument) { uint32_t argumentNumber = 1; KernelInfo *pKernelInfo = KernelInfo::create();