Remove loops over devices from Kernel's methods

Related-To: NEO-5001
Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
Mateusz Jablonski
2021-03-22 09:49:27 +00:00
committed by Compute-Runtime-Automation
parent 9ac7f1d370
commit 12458fb183
9 changed files with 365 additions and 417 deletions

View File

@ -830,20 +830,17 @@ TEST_P(PerformanceHintKernelTest, GivenSpillFillWhenKernelIsInitializedThenConte
populateKernelDescriptor(mockKernel.kernelInfo.kernelDescriptor, mediaVFEstate, 0);
uint32_t computeUnitsForScratch[] = {0x10, 0x20};
for (auto &pClDevice : context->getDevices()) {
auto &deviceInfo = const_cast<DeviceInfo &>(pClDevice->getSharedDeviceInfo());
deviceInfo.computeUnitsUsedForScratch = computeUnitsForScratch[pClDevice->getRootDeviceIndex()];
}
auto pClDevice = &mockKernel.mockKernel->getDevice();
auto &deviceInfo = const_cast<DeviceInfo &>(pClDevice->getSharedDeviceInfo());
deviceInfo.computeUnitsUsedForScratch = computeUnitsForScratch[pClDevice->getRootDeviceIndex()];
mockKernel.mockKernel->initialize();
for (auto &pClDevice : context->getDevices()) {
auto rootDeviceIndex = pClDevice->getRootDeviceIndex();
auto expectedSize = size * pClDevice->getSharedDeviceInfo().computeUnitsUsedForScratch * mockKernel.mockKernel->getKernelInfo(rootDeviceIndex).getMaxSimdSize();
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[REGISTER_PRESSURE_TOO_HIGH],
mockKernel.mockKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName.c_str(), expectedSize);
EXPECT_EQ(!zeroSized, containsHint(expectedHint, userData));
}
auto rootDeviceIndex = pClDevice->getRootDeviceIndex();
auto expectedSize = size * pClDevice->getSharedDeviceInfo().computeUnitsUsedForScratch * mockKernel.mockKernel->getKernelInfo(rootDeviceIndex).getMaxSimdSize();
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[REGISTER_PRESSURE_TOO_HIGH],
mockKernel.mockKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName.c_str(), expectedSize);
EXPECT_EQ(!zeroSized, containsHint(expectedHint, userData));
}
TEST_P(PerformanceHintKernelTest, GivenPrivateSurfaceWhenKernelIsInitializedThenContextProvidesProperHint) {

View File

@ -105,10 +105,8 @@ class CloneKernelTest : public MultiRootDeviceWithSubDevicesFixture {
char pClonedCrossThreadData[64] = {};
clonedKernels[rootDeviceIndex] = pClonedKernel[rootDeviceIndex];
for (auto &rootDeviceIndex2 : this->context->getRootDeviceIndices()) {
pSourceKernel[rootDeviceIndex]->setCrossThreadDataForRootDeviceIndex(rootDeviceIndex2, pSourceCrossThreadData, sizeof(pSourceCrossThreadData));
pClonedKernel[rootDeviceIndex]->setCrossThreadDataForRootDeviceIndex(rootDeviceIndex2, pClonedCrossThreadData, sizeof(pClonedCrossThreadData));
}
pSourceKernel[rootDeviceIndex]->setCrossThreadData(pSourceCrossThreadData, sizeof(pSourceCrossThreadData));
pClonedKernel[rootDeviceIndex]->setCrossThreadData(pClonedCrossThreadData, sizeof(pClonedCrossThreadData));
}
pSourceMultiDeviceKernel = std::make_unique<MultiDeviceKernel>(sourceKernels);

View File

@ -53,7 +53,7 @@ struct KernelArgDevQueueTest : public DeviceHostQueueFixture<DeviceQueue> {
bool crossThreadDataUnchanged() {
for (uint32_t i = 0; i < crossThreadDataSize; i++) {
if (pKernel->mockCrossThreadDatas[testedRootDeviceIndex][i] != crossThreadDataInit) {
if (pKernel->mockCrossThreadData[i] != crossThreadDataInit) {
return false;
}
}
@ -82,7 +82,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelArgDevQueueTest, GivenDeviceQueueWhenSettingAr
EXPECT_EQ(ret, CL_SUCCESS);
auto gpuAddress = static_cast<uint32_t>(pDeviceQueue->getQueueBuffer()->getGpuAddressToPatch());
auto patchLocation = ptrOffset(pKernel->mockCrossThreadDatas[testedRootDeviceIndex].data(), kernelArgPatchInfo.crossthreadOffset);
auto patchLocation = ptrOffset(pKernel->mockCrossThreadData.data(), kernelArgPatchInfo.crossthreadOffset);
EXPECT_EQ(*(reinterpret_cast<uint32_t *>(patchLocation)), gpuAddress);
}

View File

@ -72,9 +72,7 @@ class KernelArgImmediateTest : public MultiRootDeviceWithSubDevicesFixture {
pMultiDeviceKernel = std::make_unique<MultiDeviceKernel>(kernels);
for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) {
for (auto &rootDeviceIndex2 : this->context->getRootDeviceIndices()) {
pKernel[rootDeviceIndex]->setCrossThreadDataForRootDeviceIndex(rootDeviceIndex2, &pCrossThreadData[rootDeviceIndex], sizeof(pCrossThreadData[rootDeviceIndex]));
}
pKernel[rootDeviceIndex]->setCrossThreadData(&pCrossThreadData[rootDeviceIndex], sizeof(pCrossThreadData[rootDeviceIndex]));
}
}

View File

@ -63,9 +63,7 @@ class KernelSlmArgTest : public MultiRootDeviceWithSubDevicesFixture {
for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) {
crossThreadData[rootDeviceIndex][0x20 / sizeof(uint32_t)] = 0x12344321;
for (auto &rootDeviceIndex2 : this->context->getRootDeviceIndices()) {
pKernel[rootDeviceIndex]->setCrossThreadDataForRootDeviceIndex(rootDeviceIndex2, &crossThreadData[rootDeviceIndex], sizeof(crossThreadData[rootDeviceIndex]));
}
pKernel[rootDeviceIndex]->setCrossThreadData(&crossThreadData[rootDeviceIndex], sizeof(crossThreadData[rootDeviceIndex]));
}
}

View File

@ -595,7 +595,6 @@ TEST_F(MemoryAllocatorTest, given32BitDeviceWhenPrintfSurfaceIsCreatedThen32BitA
if (is64bit) {
DebugManager.flags.Force32bitAddressing.set(true);
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
auto rootDeviceIndex = device->getRootDeviceIndex();
MockKernelWithInternals kernel(*device);
kernel.kernelInfo.kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless;
@ -612,12 +611,12 @@ TEST_F(MemoryAllocatorTest, given32BitDeviceWhenPrintfSurfaceIsCreatedThen32BitA
auto printfHandler = MockPrintfHandler::create(multiDispatchInfo, *device.get());
for (int i = 0; i < 8; i++) {
kernel.mockKernel->mockCrossThreadDatas[rootDeviceIndex][i] = 50;
kernel.mockKernel->mockCrossThreadData[i] = 50;
}
printfHandler->prepareDispatch(multiDispatchInfo);
uint32_t *ptr32Bit = (uint32_t *)kernel.mockKernel->mockCrossThreadDatas[rootDeviceIndex].data();
uint32_t *ptr32Bit = (uint32_t *)kernel.mockKernel->mockCrossThreadData.data();
auto printfAllocation = printfHandler->getSurface();
auto allocationAddress = printfAllocation->getGpuAddressToPatch();
uint32_t allocationAddress32bit = (uint32_t)(uintptr_t)allocationAddress;
@ -625,7 +624,7 @@ TEST_F(MemoryAllocatorTest, given32BitDeviceWhenPrintfSurfaceIsCreatedThen32BitA
EXPECT_TRUE(printfAllocation->is32BitAllocation());
EXPECT_EQ(allocationAddress32bit, *ptr32Bit);
for (int i = 4; i < 8; i++) {
EXPECT_EQ(50, kernel.mockKernel->mockCrossThreadDatas[rootDeviceIndex][i]);
EXPECT_EQ(50, kernel.mockKernel->mockCrossThreadData[i]);
}
delete printfHandler;

View File

@ -117,6 +117,7 @@ class MockKernel : public Kernel {
using Kernel::auxTranslationRequired;
using Kernel::containsStatelessWrites;
using Kernel::executionType;
using Kernel::getDevice;
using Kernel::hasDirectStatelessAccessToHostMemory;
using Kernel::hasIndirectStatelessAccessToHostMemory;
using Kernel::isSchedulerKernel;
@ -185,13 +186,12 @@ class MockKernel : public Kernel {
MockKernel(Program *programArg, const KernelInfoContainer &kernelInfoArg, ClDevice &clDeviceArg, bool scheduler = false)
: Kernel(programArg, kernelInfoArg, clDeviceArg, scheduler) {
mockCrossThreadDatas.resize(kernelInfoArg.size());
}
~MockKernel() override {
// prevent double deletion
for (auto rootDeviceIndex = 0u; rootDeviceIndex < kernelDeviceInfos.size(); rootDeviceIndex++) {
if (kernelDeviceInfos[rootDeviceIndex].crossThreadData == mockCrossThreadDatas[rootDeviceIndex].data()) {
if (kernelDeviceInfos[rootDeviceIndex].crossThreadData == mockCrossThreadData.data()) {
kernelDeviceInfos[rootDeviceIndex].crossThreadData = nullptr;
}
}
@ -248,17 +248,18 @@ class MockKernel : public Kernel {
bool canTransformImages() const override;
////////////////////////////////////////////////////////////////////////////////
void setCrossThreadDataForRootDeviceIndex(uint32_t rootDeviceIndex, const void *crossThreadDataPattern, uint32_t newCrossThreadDataSize) {
if ((kernelDeviceInfos[rootDeviceIndex].crossThreadData != nullptr) && (kernelDeviceInfos[rootDeviceIndex].crossThreadData != mockCrossThreadDatas[rootDeviceIndex].data())) {
void setCrossThreadData(const void *crossThreadDataPattern, uint32_t newCrossThreadDataSize) {
auto rootDeviceIndex = defaultRootDeviceIndex;
if ((kernelDeviceInfos[rootDeviceIndex].crossThreadData != nullptr) && (kernelDeviceInfos[rootDeviceIndex].crossThreadData != mockCrossThreadData.data())) {
delete[] kernelDeviceInfos[rootDeviceIndex].crossThreadData;
kernelDeviceInfos[rootDeviceIndex].crossThreadData = nullptr;
kernelDeviceInfos[rootDeviceIndex].crossThreadDataSize = 0;
}
if (crossThreadDataPattern && (newCrossThreadDataSize > 0)) {
mockCrossThreadDatas[rootDeviceIndex].clear();
mockCrossThreadDatas[rootDeviceIndex].insert(mockCrossThreadDatas[rootDeviceIndex].begin(), (char *)crossThreadDataPattern, ((char *)crossThreadDataPattern) + newCrossThreadDataSize);
mockCrossThreadData.clear();
mockCrossThreadData.insert(mockCrossThreadData.begin(), (char *)crossThreadDataPattern, ((char *)crossThreadDataPattern) + newCrossThreadDataSize);
} else {
mockCrossThreadDatas[rootDeviceIndex].resize(newCrossThreadDataSize, 0);
mockCrossThreadData.resize(newCrossThreadDataSize, 0);
}
if (newCrossThreadDataSize == 0) {
@ -266,13 +267,8 @@ class MockKernel : public Kernel {
kernelDeviceInfos[rootDeviceIndex].crossThreadDataSize = 0;
return;
}
kernelDeviceInfos[rootDeviceIndex].crossThreadData = mockCrossThreadDatas[rootDeviceIndex].data();
kernelDeviceInfos[rootDeviceIndex].crossThreadDataSize = static_cast<uint32_t>(mockCrossThreadDatas[rootDeviceIndex].size());
}
void setCrossThreadData(const void *crossThreadDataPattern, uint32_t newCrossThreadDataSize) {
auto rootDeviceIndex = getDevice().getRootDeviceIndex();
setCrossThreadDataForRootDeviceIndex(rootDeviceIndex, crossThreadDataPattern, newCrossThreadDataSize);
kernelDeviceInfos[rootDeviceIndex].crossThreadData = mockCrossThreadData.data();
kernelDeviceInfos[rootDeviceIndex].crossThreadDataSize = static_cast<uint32_t>(mockCrossThreadData.size());
}
void setSshLocal(const void *sshPattern, uint32_t newSshSize, uint32_t rootDeviceIndex) {
@ -312,7 +308,7 @@ class MockKernel : public Kernel {
return kernelInfoAllocated;
}
StackVec<std::vector<char>, 3> mockCrossThreadDatas;
std::vector<char> mockCrossThreadData;
std::vector<char> mockSshLocal;
void setUsingSharedArgs(bool usingSharedArgValue) { this->usingSharedObjArgs = usingSharedArgValue; }

View File

@ -81,7 +81,7 @@ class SamplerSetArgFixture : public ClDeviceFixture {
bool crossThreadDataUnchanged() {
for (uint32_t i = 0; i < crossThreadDataSize; i++) {
if (pKernel->mockCrossThreadDatas[rootDeviceIndex][i] != 0u) {
if (pKernel->mockCrossThreadData[i] != 0u) {
return false;
}
}