HostPtr surface makeResident must be called once

Change-Id: I9cb04e3affdd8b8634466621b50326a088ecdcf9
This commit is contained in:
Zdanowicz, Zbigniew 2018-02-16 09:15:36 +01:00 committed by sys_ocldev
parent f9254c8de6
commit 86bb715b95
7 changed files with 148 additions and 16 deletions

View File

@ -489,10 +489,16 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
}
auto mediaSamplerRequired = false;
Kernel *kernel = nullptr;
for (auto &dispatchInfo : multiDispatchInfo) {
dispatchInfo.getKernel()->makeResident(commandStreamReceiver);
requiresCoherency |= dispatchInfo.getKernel()->requiresCoherency();
mediaSamplerRequired |= dispatchInfo.getKernel()->isVmeKernel();
if (kernel != dispatchInfo.getKernel()) {
kernel = dispatchInfo.getKernel();
} else {
continue;
}
kernel->makeResident(commandStreamReceiver);
requiresCoherency |= kernel->requiresCoherency();
mediaSamplerRequired |= kernel->isVmeKernel();
}
if (mediaSamplerRequired) {
@ -617,11 +623,17 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
} else {
//store task data in event
std::vector<Surface *> allSurfaces;
Kernel *kernel = nullptr;
for (auto &dispatchInfo : multiDispatchInfo) {
dispatchInfo.getKernel()->getResidency(allSurfaces);
for (auto &surface : CreateRange(surfaces, surfaceCount)) {
allSurfaces.push_back(surface->duplicate());
if (kernel != dispatchInfo.getKernel()) {
kernel = dispatchInfo.getKernel();
} else {
continue;
}
kernel->getResidency(allSurfaces);
}
for (auto &surface : CreateRange(surfaces, surfaceCount)) {
allSurfaces.push_back(surface->duplicate());
}
auto kernelOperation = std::unique_ptr<KernelOperation>(blockedCommandsData); // marking ownership

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, Intel Corporation
* Copyright (c) 2017 - 2018, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@ -274,9 +274,9 @@ class Kernel : public BaseObject<_cl_kernel> {
bool isVmeKernel() { return kernelInfo.isVmeWorkload; };
//residency for kernel surfaces
void makeResident(CommandStreamReceiver &commandStreamReceiver);
MOCKABLE_VIRTUAL void makeResident(CommandStreamReceiver &commandStreamReceiver);
void updateWithCompletionStamp(CommandStreamReceiver &commandStreamReceiver, CompletionStamp *completionStamp);
void getResidency(std::vector<Surface *> &dst);
MOCKABLE_VIRTUAL void getResidency(std::vector<Surface *> &dst);
bool requiresCoherency();
void resetSharedObjectsPatchAddresses();
bool isUsingSharedObjArgs() { return usingSharedObjArgs; }

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, Intel Corporation
* Copyright (c) 2017 - 2018, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@ -30,6 +30,7 @@
#include "runtime/memory_manager/memory_manager.h"
#include "runtime/memory_manager/surface.h"
#include "unit_tests/command_queue/command_queue_fixture.h"
#include "unit_tests/fixtures/buffer_fixture.h"
#include "unit_tests/fixtures/context_fixture.h"
#include "unit_tests/fixtures/device_fixture.h"
#include "unit_tests/fixtures/memory_management_fixture.h"
@ -990,3 +991,85 @@ HWTEST_F(OOQueueHwTest, givenBlockedOutOfOrderCmdQueueAndAsynchronouslyCompleted
EXPECT_EQ(virtualEventTaskLevel + 1, cmdQHw->taskLevel);
EXPECT_EQ(virtualEventTaskLevel + 1, mockCSR->lastTaskLevelToFlushTask);
}
HWTEST_F(CommandQueueHwTest, givenWalkerSplitEnqueueNDRangeWhenNoBlockedThenKernelMakeResidentCalledOnce) {
KernelInfo kernelInfo;
MockKernelWithInternals mockKernelWithInternals(*pDevice);
auto mockKernel = mockKernelWithInternals.mockKernel;
auto mockProgram = mockKernelWithInternals.mockProgram;
mockProgram->setAllowNonUniform(true);
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
csr.storeMakeResidentAllocations = true;
size_t offset = 0;
size_t gws = 63;
size_t lws = 16;
cl_int status = pCmdQ->enqueueKernel(mockKernel, 1, &offset, &gws, &lws, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, status);
EXPECT_EQ(1u, mockKernel->makeResidentCalls);
std::map<GraphicsAllocation *, uint32_t>::iterator it = csr.makeResidentAllocations.begin();
for (; it != csr.makeResidentAllocations.end(); it++) {
EXPECT_EQ(1u, it->second);
}
}
HWTEST_F(CommandQueueHwTest, givenWalkerSplitEnqueueNDRangeWhenBlockedThenKernelGetResidencyCalledOnce) {
UserEvent userEvent(context);
KernelInfo kernelInfo;
MockKernelWithInternals mockKernelWithInternals(*pDevice);
auto mockKernel = mockKernelWithInternals.mockKernel;
auto mockProgram = mockKernelWithInternals.mockProgram;
mockProgram->setAllowNonUniform(true);
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
csr.storeMakeResidentAllocations = true;
size_t offset = 0;
size_t gws = 63;
size_t lws = 16;
cl_event blockedEvent = &userEvent;
cl_int status = pCmdQ->enqueueKernel(mockKernel, 1, &offset, &gws, &lws, 1, &blockedEvent, nullptr);
EXPECT_EQ(CL_SUCCESS, status);
EXPECT_EQ(1u, mockKernel->getResidencyCalls);
userEvent.setStatus(CL_COMPLETE);
std::map<GraphicsAllocation *, uint32_t>::iterator it = csr.makeResidentAllocations.begin();
for (; it != csr.makeResidentAllocations.end(); it++) {
EXPECT_EQ(1u, it->second);
}
}
HWTEST_F(CommandQueueHwTest, givenKernelSplitEnqueueReadBufferWhenBlockedThenEnqueueSurfacesMakeResidentIsCalledOnce) {
UserEvent userEvent(context);
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
csr.storeMakeResidentAllocations = true;
BufferDefaults::context = context;
std::unique_ptr<Buffer> buffer(BufferHelper<>::create());
GraphicsAllocation *bufferAllocation = buffer->getGraphicsAllocation();
char array[3 * MemoryConstants::cacheLineSize];
char *ptr = &array[MemoryConstants::cacheLineSize];
ptr = alignUp(ptr, MemoryConstants::cacheLineSize);
ptr -= 1;
cl_event blockedEvent = &userEvent;
cl_int status = pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, 1, &blockedEvent, nullptr);
EXPECT_EQ(CL_SUCCESS, status);
userEvent.setStatus(CL_COMPLETE);
std::map<GraphicsAllocation *, uint32_t>::iterator it = csr.makeResidentAllocations.begin();
for (; it != csr.makeResidentAllocations.end(); it++) {
uint32_t expected = 1u;
//Buffer surface will be added three times (for each kernel from split and as a base range of enqueueReadBuffer call)
if (it->first == bufferAllocation) {
expected = 3u;
}
EXPECT_EQ(expected, it->second);
}
}

View File

@ -43,14 +43,14 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily> {
using BaseClass::CommandStreamReceiver::dispatchMode;
using BaseClass::CommandStreamReceiver::flushStamp;
using BaseClass::CommandStreamReceiver::isPreambleSent;
using BaseClass::CommandStreamReceiver::lastMediaSamplerConfig;
using BaseClass::CommandStreamReceiver::lastPreemptionMode;
using BaseClass::CommandStreamReceiver::lastSentCoherencyRequest;
using BaseClass::CommandStreamReceiver::lastSentL3Config;
using BaseClass::CommandStreamReceiver::lastSentThreadAribtrationPolicy;
using BaseClass::CommandStreamReceiver::lastVmeSubslicesConfig;
using BaseClass::CommandStreamReceiver::latestFlushedTaskCount;
using BaseClass::CommandStreamReceiver::latestSentStatelessMocsConfig;
using BaseClass::CommandStreamReceiver::lastMediaSamplerConfig;
using BaseClass::CommandStreamReceiver::lastPreemptionMode;
using BaseClass::CommandStreamReceiver::lastVmeSubslicesConfig;
using BaseClass::CommandStreamReceiver::taskCount;
using BaseClass::CommandStreamReceiver::taskLevel;
@ -65,6 +65,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily> {
tempTagLocation = new GraphicsAllocation(nullptr, 0);
this->tagAllocation = tempTagLocation;
this->tagAddress = reinterpret_cast<uint32_t *>(tempTagLocation->getUnderlyingBuffer());
this->storeMakeResidentAllocations = false;
}
virtual MemoryManager *createMemoryManager(bool enable64kbPages) override {
@ -82,6 +83,23 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily> {
using SamplerCacheFlushState = CommandStreamReceiver::SamplerCacheFlushState;
SamplerCacheFlushState peekSamplerCacheFlushRequired() const { return this->samplerCacheFlushRequired; }
void makeResident(GraphicsAllocation &gfxAllocation) override {
if (storeMakeResidentAllocations) {
std::map<GraphicsAllocation *, uint32_t>::iterator it = makeResidentAllocations.find(&gfxAllocation);
if (it == makeResidentAllocations.end()) {
std::pair<std::map<GraphicsAllocation *, uint32_t>::iterator, bool> result;
result = makeResidentAllocations.insert(std::pair<GraphicsAllocation *, uint32_t>(&gfxAllocation, 1));
DEBUG_BREAK_IF(!result.second);
} else {
makeResidentAllocations[&gfxAllocation]++;
}
}
BaseClass::makeResident(gfxAllocation);
}
std::map<GraphicsAllocation *, uint32_t> makeResidentAllocations;
bool storeMakeResidentAllocations;
protected:
using BaseClass::CommandStreamReceiver::memoryManager;
using BaseClass::CommandStreamReceiver::tagAddress;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, Intel Corporation
* Copyright (c) 2017 - 2018, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@ -53,4 +53,14 @@ bool MockKernel::isPatched() const {
return true;
}
void MockKernel::makeResident(CommandStreamReceiver &commandStreamReceiver) {
makeResidentCalls++;
Kernel::makeResident(commandStreamReceiver);
}
void MockKernel::getResidency(std::vector<Surface *> &dst) {
getResidencyCalls++;
Kernel::getResidency(dst);
}
} // namespace OCLRT

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, Intel Corporation
* Copyright (c) 2017 - 2018, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@ -182,6 +182,12 @@ class MockKernel : public Kernel {
using Kernel::kernelArgHandlers;
void setUsingSharedArgs(bool usingSharedArgValue) { this->usingSharedObjArgs = usingSharedArgValue; }
void makeResident(CommandStreamReceiver &commandStreamReceiver) override;
void getResidency(std::vector<Surface *> &dst) override;
uint32_t makeResidentCalls = 0;
uint32_t getResidencyCalls = 0;
};
//class below have enough internals to service Enqueue operation.

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, Intel Corporation
* Copyright (c) 2017 - 2018, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@ -99,6 +99,9 @@ class MockProgram : public Program {
void SetLLVMBinarySize(size_t bsz) { llvmBinarySize = bsz; }
uint64_t getHash();
void setAllowNonUniform(bool allow) {
allowNonUniform = allow;
}
bool contextSet = false;
};