mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Improve NonUniform aubs. Use Buffer instead of SVM allocation
Change-Id: Ifd9b1e23aa64e631cf7efb1e1924aa7c3d9dcc7b Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:

committed by
sys_ocldev

parent
2b0e743ac9
commit
5d1da5740a
@ -370,17 +370,11 @@ INSTANTIATE_TEST_CASE_P(
|
||||
|
||||
struct AUBSimpleArgNonUniformFixture : public KernelAUBFixture<SimpleArgNonUniformKernelFixture> {
|
||||
void SetUp() override {
|
||||
deviceClVersionSupport = NEO::platformDevices[0]->capabilityTable.clVersionSupport;
|
||||
if (deviceClVersionSupport < 20) {
|
||||
return;
|
||||
if (NEO::platformDevices[0]->capabilityTable.clVersionSupport < 20) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
KernelAUBFixture<SimpleArgNonUniformKernelFixture>::SetUp();
|
||||
|
||||
argVal = static_cast<int>(0x22222222);
|
||||
|
||||
sizeWrittenMemory = 0;
|
||||
typeSize = sizeof(int);
|
||||
typeItems = 40 * 40 * 40;
|
||||
sizeUserMemory = alignUp(typeItems * typeSize, 64);
|
||||
|
||||
destMemory = alignedMalloc(sizeUserMemory, 4096);
|
||||
@ -391,12 +385,7 @@ struct AUBSimpleArgNonUniformFixture : public KernelAUBFixture<SimpleArgNonUnifo
|
||||
|
||||
expectedMemory = alignedMalloc(sizeUserMemory, 4096);
|
||||
ASSERT_NE(nullptr, expectedMemory);
|
||||
|
||||
memset(expectedMemory, 0x0, sizeUserMemory);
|
||||
|
||||
kernel->setArgSvm(1, sizeUserMemory, destMemory, nullptr, 0u);
|
||||
|
||||
outBuffer = createHostPtrAllocationFromSvmPtr(destMemory, sizeUserMemory);
|
||||
}
|
||||
|
||||
void initializeExpectedMemory(size_t globalX, size_t globalY, size_t globalZ) {
|
||||
@ -421,6 +410,10 @@ struct AUBSimpleArgNonUniformFixture : public KernelAUBFixture<SimpleArgNonUnifo
|
||||
*(static_cast<int *>(destMemory) + maxId) = 0;
|
||||
*(expectedData + maxId) = maxId;
|
||||
|
||||
outBuffer.reset(Buffer::create(context, CL_MEM_COPY_HOST_PTR, alignUp(sizeUserMemory, 4096), destMemory, retVal));
|
||||
bufferGpuAddress = reinterpret_cast<void *>(outBuffer->getGraphicsAllocation()->getGpuAddress());
|
||||
kernel->setArg(1, outBuffer.get());
|
||||
|
||||
sizeWrittenMemory = maxId * typeSize;
|
||||
//add single int size for atomic sum of all work-items
|
||||
sizeWrittenMemory += typeSize;
|
||||
@ -433,11 +426,11 @@ struct AUBSimpleArgNonUniformFixture : public KernelAUBFixture<SimpleArgNonUnifo
|
||||
for (size_t i = 0; i < reminderElements; i++) {
|
||||
*(expectedReminderData + i) = 0xdeadbeef;
|
||||
}
|
||||
remainderDestMemory = static_cast<char *>(destMemory) + sizeWrittenMemory;
|
||||
remainderBufferGpuAddress = ptrOffset(bufferGpuAddress, sizeWrittenMemory);
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
if (deviceClVersionSupport < 20) {
|
||||
if (NEO::platformDevices[0]->capabilityTable.clVersionSupport < 20) {
|
||||
return;
|
||||
}
|
||||
if (destMemory) {
|
||||
@ -456,17 +449,18 @@ struct AUBSimpleArgNonUniformFixture : public KernelAUBFixture<SimpleArgNonUnifo
|
||||
}
|
||||
unsigned int deviceClVersionSupport;
|
||||
|
||||
size_t typeSize;
|
||||
size_t typeItems;
|
||||
size_t sizeWrittenMemory;
|
||||
const size_t typeSize = sizeof(int);
|
||||
const size_t typeItems = 40 * 40 * 40;
|
||||
size_t sizeWrittenMemory = 0;
|
||||
size_t sizeUserMemory;
|
||||
size_t sizeRemainderMemory;
|
||||
int argVal;
|
||||
int argVal = 0x22222222;
|
||||
void *destMemory = nullptr;
|
||||
void *expectedMemory = nullptr;
|
||||
void *expectedRemainderMemory = nullptr;
|
||||
char *remainderDestMemory = nullptr;
|
||||
GraphicsAllocation *outBuffer;
|
||||
void *remainderBufferGpuAddress = nullptr;
|
||||
void *bufferGpuAddress = nullptr;
|
||||
std::unique_ptr<Buffer> outBuffer;
|
||||
|
||||
HardwareParse hwParser;
|
||||
};
|
||||
@ -527,374 +521,352 @@ HWTEST_F(AUBSimpleKernelStatelessTest, givenSimpleKernelWhenStatelessPathIsUsedT
|
||||
using AUBSimpleArgNonUniformTest = Test<AUBSimpleArgNonUniformFixture>;
|
||||
HWTEST_F(AUBSimpleArgNonUniformTest, givenOpenCL20SupportWhenProvidingWork1DimNonUniformGroupThenExpectTwoWalkers) {
|
||||
using WALKER_TYPE = WALKER_TYPE<FamilyType>;
|
||||
if (deviceClVersionSupport >= 20) {
|
||||
cl_uint workDim = 1;
|
||||
size_t globalWorkOffset[3] = {0, 0, 0};
|
||||
size_t globalWorkSize[3] = {39, 1, 1};
|
||||
size_t localWorkSize[3] = {32, 1, 1};
|
||||
cl_uint numEventsInWaitList = 0;
|
||||
cl_event *eventWaitList = nullptr;
|
||||
cl_event *event = nullptr;
|
||||
cl_uint workDim = 1;
|
||||
size_t globalWorkOffset[3] = {0, 0, 0};
|
||||
size_t globalWorkSize[3] = {39, 1, 1};
|
||||
size_t localWorkSize[3] = {32, 1, 1};
|
||||
cl_uint numEventsInWaitList = 0;
|
||||
cl_event *eventWaitList = nullptr;
|
||||
cl_event *event = nullptr;
|
||||
|
||||
initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]);
|
||||
initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]);
|
||||
|
||||
auto retVal = this->pCmdQ->enqueueKernel(
|
||||
this->kernel,
|
||||
workDim,
|
||||
globalWorkOffset,
|
||||
globalWorkSize,
|
||||
localWorkSize,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
auto retVal = this->pCmdQ->enqueueKernel(
|
||||
this->kernel,
|
||||
workDim,
|
||||
globalWorkOffset,
|
||||
globalWorkSize,
|
||||
localWorkSize,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
hwParser.parseCommands<FamilyType>(*pCmdQ);
|
||||
uint32_t walkerCount = hwParser.getCommandCount<WALKER_TYPE>();
|
||||
EXPECT_EQ(2u, walkerCount);
|
||||
hwParser.parseCommands<FamilyType>(*pCmdQ);
|
||||
uint32_t walkerCount = hwParser.getCommandCount<WALKER_TYPE>();
|
||||
EXPECT_EQ(2u, walkerCount);
|
||||
|
||||
pCmdQ->flush();
|
||||
expectMemory<FamilyType>(this->destMemory, this->expectedMemory, sizeWrittenMemory);
|
||||
expectMemory<FamilyType>(this->remainderDestMemory, this->expectedRemainderMemory, sizeRemainderMemory);
|
||||
}
|
||||
pCmdQ->flush();
|
||||
expectMemory<FamilyType>(bufferGpuAddress, this->expectedMemory, sizeWrittenMemory);
|
||||
expectMemory<FamilyType>(remainderBufferGpuAddress, this->expectedRemainderMemory, sizeRemainderMemory);
|
||||
}
|
||||
|
||||
HWTEST_F(AUBSimpleArgNonUniformTest, givenOpenCL20SupportWhenProvidingWork2DimNonUniformGroupInXDimensionThenExpectTwoWalkers) {
|
||||
using WALKER_TYPE = WALKER_TYPE<FamilyType>;
|
||||
if (deviceClVersionSupport >= 20) {
|
||||
cl_uint workDim = 2;
|
||||
size_t globalWorkOffset[3] = {0, 0, 0};
|
||||
size_t globalWorkSize[3] = {39, 32, 1};
|
||||
size_t localWorkSize[3] = {16, 16, 1};
|
||||
cl_uint numEventsInWaitList = 0;
|
||||
cl_event *eventWaitList = nullptr;
|
||||
cl_event *event = nullptr;
|
||||
cl_uint workDim = 2;
|
||||
size_t globalWorkOffset[3] = {0, 0, 0};
|
||||
size_t globalWorkSize[3] = {39, 32, 1};
|
||||
size_t localWorkSize[3] = {16, 16, 1};
|
||||
cl_uint numEventsInWaitList = 0;
|
||||
cl_event *eventWaitList = nullptr;
|
||||
cl_event *event = nullptr;
|
||||
|
||||
initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]);
|
||||
initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]);
|
||||
|
||||
auto retVal = this->pCmdQ->enqueueKernel(
|
||||
this->kernel,
|
||||
workDim,
|
||||
globalWorkOffset,
|
||||
globalWorkSize,
|
||||
localWorkSize,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
auto retVal = this->pCmdQ->enqueueKernel(
|
||||
this->kernel,
|
||||
workDim,
|
||||
globalWorkOffset,
|
||||
globalWorkSize,
|
||||
localWorkSize,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
hwParser.parseCommands<FamilyType>(*pCmdQ);
|
||||
uint32_t walkerCount = hwParser.getCommandCount<WALKER_TYPE>();
|
||||
EXPECT_EQ(2u, walkerCount);
|
||||
hwParser.parseCommands<FamilyType>(*pCmdQ);
|
||||
uint32_t walkerCount = hwParser.getCommandCount<WALKER_TYPE>();
|
||||
EXPECT_EQ(2u, walkerCount);
|
||||
|
||||
pCmdQ->flush();
|
||||
expectMemory<FamilyType>(this->destMemory, this->expectedMemory, sizeWrittenMemory);
|
||||
expectMemory<FamilyType>(this->remainderDestMemory, this->expectedRemainderMemory, sizeRemainderMemory);
|
||||
}
|
||||
pCmdQ->flush();
|
||||
expectMemory<FamilyType>(bufferGpuAddress, this->expectedMemory, sizeWrittenMemory);
|
||||
expectMemory<FamilyType>(remainderBufferGpuAddress, this->expectedRemainderMemory, sizeRemainderMemory);
|
||||
}
|
||||
|
||||
HWTEST_F(AUBSimpleArgNonUniformTest, givenOpenCL20SupportWhenProvidingWork2DimNonUniformGroupInYDimensionThenExpectTwoWalkers) {
|
||||
using WALKER_TYPE = WALKER_TYPE<FamilyType>;
|
||||
if (deviceClVersionSupport >= 20) {
|
||||
cl_uint workDim = 2;
|
||||
size_t globalWorkOffset[3] = {0, 0, 0};
|
||||
size_t globalWorkSize[3] = {32, 39, 1};
|
||||
size_t localWorkSize[3] = {16, 16, 1};
|
||||
cl_uint numEventsInWaitList = 0;
|
||||
cl_event *eventWaitList = nullptr;
|
||||
cl_event *event = nullptr;
|
||||
cl_uint workDim = 2;
|
||||
size_t globalWorkOffset[3] = {0, 0, 0};
|
||||
size_t globalWorkSize[3] = {32, 39, 1};
|
||||
size_t localWorkSize[3] = {16, 16, 1};
|
||||
cl_uint numEventsInWaitList = 0;
|
||||
cl_event *eventWaitList = nullptr;
|
||||
cl_event *event = nullptr;
|
||||
|
||||
initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]);
|
||||
initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]);
|
||||
|
||||
auto retVal = this->pCmdQ->enqueueKernel(
|
||||
this->kernel,
|
||||
workDim,
|
||||
globalWorkOffset,
|
||||
globalWorkSize,
|
||||
localWorkSize,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
auto retVal = this->pCmdQ->enqueueKernel(
|
||||
this->kernel,
|
||||
workDim,
|
||||
globalWorkOffset,
|
||||
globalWorkSize,
|
||||
localWorkSize,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
hwParser.parseCommands<FamilyType>(*pCmdQ);
|
||||
uint32_t walkerCount = hwParser.getCommandCount<WALKER_TYPE>();
|
||||
EXPECT_EQ(2u, walkerCount);
|
||||
hwParser.parseCommands<FamilyType>(*pCmdQ);
|
||||
uint32_t walkerCount = hwParser.getCommandCount<WALKER_TYPE>();
|
||||
EXPECT_EQ(2u, walkerCount);
|
||||
|
||||
pCmdQ->flush();
|
||||
expectMemory<FamilyType>(this->destMemory, this->expectedMemory, sizeWrittenMemory);
|
||||
expectMemory<FamilyType>(this->remainderDestMemory, this->expectedRemainderMemory, sizeRemainderMemory);
|
||||
}
|
||||
pCmdQ->flush();
|
||||
expectMemory<FamilyType>(bufferGpuAddress, this->expectedMemory, sizeWrittenMemory);
|
||||
expectMemory<FamilyType>(remainderBufferGpuAddress, this->expectedRemainderMemory, sizeRemainderMemory);
|
||||
}
|
||||
|
||||
HWTEST_F(AUBSimpleArgNonUniformTest, givenOpenCL20SupportWhenProvidingWork2DimNonUniformGroupInXandYDimensionThenExpectFourWalkers) {
|
||||
using WALKER_TYPE = WALKER_TYPE<FamilyType>;
|
||||
if (deviceClVersionSupport >= 20) {
|
||||
cl_uint workDim = 2;
|
||||
size_t globalWorkOffset[3] = {0, 0, 0};
|
||||
size_t globalWorkSize[3] = {39, 39, 1};
|
||||
size_t localWorkSize[3] = {16, 16, 1};
|
||||
cl_uint numEventsInWaitList = 0;
|
||||
cl_event *eventWaitList = nullptr;
|
||||
cl_event *event = nullptr;
|
||||
cl_uint workDim = 2;
|
||||
size_t globalWorkOffset[3] = {0, 0, 0};
|
||||
size_t globalWorkSize[3] = {39, 39, 1};
|
||||
size_t localWorkSize[3] = {16, 16, 1};
|
||||
cl_uint numEventsInWaitList = 0;
|
||||
cl_event *eventWaitList = nullptr;
|
||||
cl_event *event = nullptr;
|
||||
|
||||
initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]);
|
||||
initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]);
|
||||
|
||||
auto retVal = this->pCmdQ->enqueueKernel(
|
||||
this->kernel,
|
||||
workDim,
|
||||
globalWorkOffset,
|
||||
globalWorkSize,
|
||||
localWorkSize,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
auto retVal = this->pCmdQ->enqueueKernel(
|
||||
this->kernel,
|
||||
workDim,
|
||||
globalWorkOffset,
|
||||
globalWorkSize,
|
||||
localWorkSize,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
hwParser.parseCommands<FamilyType>(*pCmdQ);
|
||||
uint32_t walkerCount = hwParser.getCommandCount<WALKER_TYPE>();
|
||||
EXPECT_EQ(4u, walkerCount);
|
||||
hwParser.parseCommands<FamilyType>(*pCmdQ);
|
||||
uint32_t walkerCount = hwParser.getCommandCount<WALKER_TYPE>();
|
||||
EXPECT_EQ(4u, walkerCount);
|
||||
|
||||
pCmdQ->flush();
|
||||
expectMemory<FamilyType>(this->destMemory, this->expectedMemory, sizeWrittenMemory);
|
||||
expectMemory<FamilyType>(this->remainderDestMemory, this->expectedRemainderMemory, sizeRemainderMemory);
|
||||
}
|
||||
pCmdQ->flush();
|
||||
expectMemory<FamilyType>(bufferGpuAddress, this->expectedMemory, sizeWrittenMemory);
|
||||
expectMemory<FamilyType>(remainderBufferGpuAddress, this->expectedRemainderMemory, sizeRemainderMemory);
|
||||
}
|
||||
|
||||
HWTEST_F(AUBSimpleArgNonUniformTest, givenOpenCL20SupportWhenProvidingWork3DimNonUniformGroupInXDimensionThenExpectTwoWalkers) {
|
||||
using WALKER_TYPE = WALKER_TYPE<FamilyType>;
|
||||
if (deviceClVersionSupport >= 20) {
|
||||
cl_uint workDim = 3;
|
||||
size_t globalWorkOffset[3] = {0, 0, 0};
|
||||
size_t globalWorkSize[3] = {39, 32, 32};
|
||||
size_t localWorkSize[3] = {8, 8, 2};
|
||||
cl_uint numEventsInWaitList = 0;
|
||||
cl_event *eventWaitList = nullptr;
|
||||
cl_event *event = nullptr;
|
||||
cl_uint workDim = 3;
|
||||
size_t globalWorkOffset[3] = {0, 0, 0};
|
||||
size_t globalWorkSize[3] = {39, 32, 32};
|
||||
size_t localWorkSize[3] = {8, 8, 2};
|
||||
cl_uint numEventsInWaitList = 0;
|
||||
cl_event *eventWaitList = nullptr;
|
||||
cl_event *event = nullptr;
|
||||
|
||||
initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]);
|
||||
initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]);
|
||||
|
||||
auto retVal = this->pCmdQ->enqueueKernel(
|
||||
this->kernel,
|
||||
workDim,
|
||||
globalWorkOffset,
|
||||
globalWorkSize,
|
||||
localWorkSize,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
auto retVal = this->pCmdQ->enqueueKernel(
|
||||
this->kernel,
|
||||
workDim,
|
||||
globalWorkOffset,
|
||||
globalWorkSize,
|
||||
localWorkSize,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
hwParser.parseCommands<FamilyType>(*pCmdQ);
|
||||
uint32_t walkerCount = hwParser.getCommandCount<WALKER_TYPE>();
|
||||
EXPECT_EQ(2u, walkerCount);
|
||||
hwParser.parseCommands<FamilyType>(*pCmdQ);
|
||||
uint32_t walkerCount = hwParser.getCommandCount<WALKER_TYPE>();
|
||||
EXPECT_EQ(2u, walkerCount);
|
||||
|
||||
pCmdQ->flush();
|
||||
expectMemory<FamilyType>(this->destMemory, this->expectedMemory, sizeWrittenMemory);
|
||||
expectMemory<FamilyType>(this->remainderDestMemory, this->expectedRemainderMemory, sizeRemainderMemory);
|
||||
}
|
||||
pCmdQ->flush();
|
||||
expectMemory<FamilyType>(bufferGpuAddress, this->expectedMemory, sizeWrittenMemory);
|
||||
expectMemory<FamilyType>(remainderBufferGpuAddress, this->expectedRemainderMemory, sizeRemainderMemory);
|
||||
}
|
||||
|
||||
HWTEST_F(AUBSimpleArgNonUniformTest, givenOpenCL20SupportWhenProvidingWork3DimNonUniformGroupInYDimensionThenExpectTwoWalkers) {
|
||||
using WALKER_TYPE = WALKER_TYPE<FamilyType>;
|
||||
if (deviceClVersionSupport >= 20) {
|
||||
cl_uint workDim = 3;
|
||||
size_t globalWorkOffset[3] = {0, 0, 0};
|
||||
size_t globalWorkSize[3] = {32, 39, 32};
|
||||
size_t localWorkSize[3] = {8, 8, 2};
|
||||
cl_uint numEventsInWaitList = 0;
|
||||
cl_event *eventWaitList = nullptr;
|
||||
cl_event *event = nullptr;
|
||||
cl_uint workDim = 3;
|
||||
size_t globalWorkOffset[3] = {0, 0, 0};
|
||||
size_t globalWorkSize[3] = {32, 39, 32};
|
||||
size_t localWorkSize[3] = {8, 8, 2};
|
||||
cl_uint numEventsInWaitList = 0;
|
||||
cl_event *eventWaitList = nullptr;
|
||||
cl_event *event = nullptr;
|
||||
|
||||
initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]);
|
||||
initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]);
|
||||
|
||||
auto retVal = this->pCmdQ->enqueueKernel(
|
||||
this->kernel,
|
||||
workDim,
|
||||
globalWorkOffset,
|
||||
globalWorkSize,
|
||||
localWorkSize,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
auto retVal = this->pCmdQ->enqueueKernel(
|
||||
this->kernel,
|
||||
workDim,
|
||||
globalWorkOffset,
|
||||
globalWorkSize,
|
||||
localWorkSize,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
hwParser.parseCommands<FamilyType>(*pCmdQ);
|
||||
uint32_t walkerCount = hwParser.getCommandCount<WALKER_TYPE>();
|
||||
EXPECT_EQ(2u, walkerCount);
|
||||
hwParser.parseCommands<FamilyType>(*pCmdQ);
|
||||
uint32_t walkerCount = hwParser.getCommandCount<WALKER_TYPE>();
|
||||
EXPECT_EQ(2u, walkerCount);
|
||||
|
||||
pCmdQ->flush();
|
||||
expectMemory<FamilyType>(this->destMemory, this->expectedMemory, sizeWrittenMemory);
|
||||
expectMemory<FamilyType>(this->remainderDestMemory, this->expectedRemainderMemory, sizeRemainderMemory);
|
||||
}
|
||||
pCmdQ->flush();
|
||||
expectMemory<FamilyType>(bufferGpuAddress, this->expectedMemory, sizeWrittenMemory);
|
||||
expectMemory<FamilyType>(remainderBufferGpuAddress, this->expectedRemainderMemory, sizeRemainderMemory);
|
||||
}
|
||||
|
||||
HWTEST_F(AUBSimpleArgNonUniformTest, givenOpenCL20SupportWhenProvidingWork3DimNonUniformGroupInZDimensionThenExpectTwoWalkers) {
|
||||
using WALKER_TYPE = WALKER_TYPE<FamilyType>;
|
||||
if (deviceClVersionSupport >= 20) {
|
||||
cl_uint workDim = 3;
|
||||
size_t globalWorkOffset[3] = {0, 0, 0};
|
||||
size_t globalWorkSize[3] = {32, 32, 39};
|
||||
size_t localWorkSize[3] = {8, 2, 8};
|
||||
cl_uint numEventsInWaitList = 0;
|
||||
cl_event *eventWaitList = nullptr;
|
||||
cl_event *event = nullptr;
|
||||
cl_uint workDim = 3;
|
||||
size_t globalWorkOffset[3] = {0, 0, 0};
|
||||
size_t globalWorkSize[3] = {32, 32, 39};
|
||||
size_t localWorkSize[3] = {8, 2, 8};
|
||||
cl_uint numEventsInWaitList = 0;
|
||||
cl_event *eventWaitList = nullptr;
|
||||
cl_event *event = nullptr;
|
||||
|
||||
initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]);
|
||||
initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]);
|
||||
|
||||
auto retVal = this->pCmdQ->enqueueKernel(
|
||||
this->kernel,
|
||||
workDim,
|
||||
globalWorkOffset,
|
||||
globalWorkSize,
|
||||
localWorkSize,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
auto retVal = this->pCmdQ->enqueueKernel(
|
||||
this->kernel,
|
||||
workDim,
|
||||
globalWorkOffset,
|
||||
globalWorkSize,
|
||||
localWorkSize,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
hwParser.parseCommands<FamilyType>(*pCmdQ);
|
||||
uint32_t walkerCount = hwParser.getCommandCount<WALKER_TYPE>();
|
||||
EXPECT_EQ(2u, walkerCount);
|
||||
hwParser.parseCommands<FamilyType>(*pCmdQ);
|
||||
uint32_t walkerCount = hwParser.getCommandCount<WALKER_TYPE>();
|
||||
EXPECT_EQ(2u, walkerCount);
|
||||
|
||||
pCmdQ->flush();
|
||||
expectMemory<FamilyType>(this->destMemory, this->expectedMemory, sizeWrittenMemory);
|
||||
expectMemory<FamilyType>(this->remainderDestMemory, this->expectedRemainderMemory, sizeRemainderMemory);
|
||||
}
|
||||
pCmdQ->flush();
|
||||
expectMemory<FamilyType>(bufferGpuAddress, this->expectedMemory, sizeWrittenMemory);
|
||||
expectMemory<FamilyType>(remainderBufferGpuAddress, this->expectedRemainderMemory, sizeRemainderMemory);
|
||||
}
|
||||
|
||||
HWTEST_F(AUBSimpleArgNonUniformTest, givenOpenCL20SupportWhenProvidingWork3DimNonUniformGroupInXandYDimensionThenExpectFourWalkers) {
|
||||
using WALKER_TYPE = WALKER_TYPE<FamilyType>;
|
||||
if (deviceClVersionSupport >= 20) {
|
||||
cl_uint workDim = 3;
|
||||
size_t globalWorkOffset[3] = {0, 0, 0};
|
||||
size_t globalWorkSize[3] = {39, 39, 32};
|
||||
size_t localWorkSize[3] = {8, 8, 2};
|
||||
cl_uint numEventsInWaitList = 0;
|
||||
cl_event *eventWaitList = nullptr;
|
||||
cl_event *event = nullptr;
|
||||
cl_uint workDim = 3;
|
||||
size_t globalWorkOffset[3] = {0, 0, 0};
|
||||
size_t globalWorkSize[3] = {39, 39, 32};
|
||||
size_t localWorkSize[3] = {8, 8, 2};
|
||||
cl_uint numEventsInWaitList = 0;
|
||||
cl_event *eventWaitList = nullptr;
|
||||
cl_event *event = nullptr;
|
||||
|
||||
initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]);
|
||||
initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]);
|
||||
|
||||
auto retVal = this->pCmdQ->enqueueKernel(
|
||||
this->kernel,
|
||||
workDim,
|
||||
globalWorkOffset,
|
||||
globalWorkSize,
|
||||
localWorkSize,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
auto retVal = this->pCmdQ->enqueueKernel(
|
||||
this->kernel,
|
||||
workDim,
|
||||
globalWorkOffset,
|
||||
globalWorkSize,
|
||||
localWorkSize,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
hwParser.parseCommands<FamilyType>(*pCmdQ);
|
||||
uint32_t walkerCount = hwParser.getCommandCount<WALKER_TYPE>();
|
||||
EXPECT_EQ(4u, walkerCount);
|
||||
hwParser.parseCommands<FamilyType>(*pCmdQ);
|
||||
uint32_t walkerCount = hwParser.getCommandCount<WALKER_TYPE>();
|
||||
EXPECT_EQ(4u, walkerCount);
|
||||
|
||||
pCmdQ->flush();
|
||||
expectMemory<FamilyType>(this->destMemory, this->expectedMemory, sizeWrittenMemory);
|
||||
expectMemory<FamilyType>(this->remainderDestMemory, this->expectedRemainderMemory, sizeRemainderMemory);
|
||||
}
|
||||
pCmdQ->flush();
|
||||
expectMemory<FamilyType>(bufferGpuAddress, this->expectedMemory, sizeWrittenMemory);
|
||||
expectMemory<FamilyType>(remainderBufferGpuAddress, this->expectedRemainderMemory, sizeRemainderMemory);
|
||||
}
|
||||
|
||||
HWTEST_F(AUBSimpleArgNonUniformTest, givenOpenCL20SupportWhenProvidingWork3DimNonUniformGroupInXandZDimensionThenExpectFourWalkers) {
|
||||
using WALKER_TYPE = WALKER_TYPE<FamilyType>;
|
||||
if (deviceClVersionSupport >= 20) {
|
||||
cl_uint workDim = 3;
|
||||
size_t globalWorkOffset[3] = {0, 0, 0};
|
||||
size_t globalWorkSize[3] = {39, 32, 39};
|
||||
size_t localWorkSize[3] = {8, 2, 8};
|
||||
cl_uint numEventsInWaitList = 0;
|
||||
cl_event *eventWaitList = nullptr;
|
||||
cl_event *event = nullptr;
|
||||
cl_uint workDim = 3;
|
||||
size_t globalWorkOffset[3] = {0, 0, 0};
|
||||
size_t globalWorkSize[3] = {39, 32, 39};
|
||||
size_t localWorkSize[3] = {8, 2, 8};
|
||||
cl_uint numEventsInWaitList = 0;
|
||||
cl_event *eventWaitList = nullptr;
|
||||
cl_event *event = nullptr;
|
||||
|
||||
initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]);
|
||||
initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]);
|
||||
|
||||
auto retVal = this->pCmdQ->enqueueKernel(
|
||||
this->kernel,
|
||||
workDim,
|
||||
globalWorkOffset,
|
||||
globalWorkSize,
|
||||
localWorkSize,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
auto retVal = this->pCmdQ->enqueueKernel(
|
||||
this->kernel,
|
||||
workDim,
|
||||
globalWorkOffset,
|
||||
globalWorkSize,
|
||||
localWorkSize,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
hwParser.parseCommands<FamilyType>(*pCmdQ);
|
||||
uint32_t walkerCount = hwParser.getCommandCount<WALKER_TYPE>();
|
||||
EXPECT_EQ(4u, walkerCount);
|
||||
hwParser.parseCommands<FamilyType>(*pCmdQ);
|
||||
uint32_t walkerCount = hwParser.getCommandCount<WALKER_TYPE>();
|
||||
EXPECT_EQ(4u, walkerCount);
|
||||
|
||||
pCmdQ->flush();
|
||||
expectMemory<FamilyType>(this->destMemory, this->expectedMemory, sizeWrittenMemory);
|
||||
expectMemory<FamilyType>(this->remainderDestMemory, this->expectedRemainderMemory, sizeRemainderMemory);
|
||||
}
|
||||
pCmdQ->flush();
|
||||
expectMemory<FamilyType>(bufferGpuAddress, this->expectedMemory, sizeWrittenMemory);
|
||||
expectMemory<FamilyType>(remainderBufferGpuAddress, this->expectedRemainderMemory, sizeRemainderMemory);
|
||||
}
|
||||
|
||||
HWTEST_F(AUBSimpleArgNonUniformTest, givenOpenCL20SupportWhenProvidingWork3DimNonUniformGroupInYandZDimensionThenExpectFourWalkers) {
|
||||
using WALKER_TYPE = WALKER_TYPE<FamilyType>;
|
||||
if (deviceClVersionSupport >= 20) {
|
||||
cl_uint workDim = 3;
|
||||
size_t globalWorkOffset[3] = {0, 0, 0};
|
||||
size_t globalWorkSize[3] = {32, 39, 39};
|
||||
size_t localWorkSize[3] = {2, 8, 8};
|
||||
cl_uint numEventsInWaitList = 0;
|
||||
cl_event *eventWaitList = nullptr;
|
||||
cl_event *event = nullptr;
|
||||
cl_uint workDim = 3;
|
||||
size_t globalWorkOffset[3] = {0, 0, 0};
|
||||
size_t globalWorkSize[3] = {32, 39, 39};
|
||||
size_t localWorkSize[3] = {2, 8, 8};
|
||||
cl_uint numEventsInWaitList = 0;
|
||||
cl_event *eventWaitList = nullptr;
|
||||
cl_event *event = nullptr;
|
||||
|
||||
initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]);
|
||||
initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]);
|
||||
|
||||
auto retVal = this->pCmdQ->enqueueKernel(
|
||||
this->kernel,
|
||||
workDim,
|
||||
globalWorkOffset,
|
||||
globalWorkSize,
|
||||
localWorkSize,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
auto retVal = this->pCmdQ->enqueueKernel(
|
||||
this->kernel,
|
||||
workDim,
|
||||
globalWorkOffset,
|
||||
globalWorkSize,
|
||||
localWorkSize,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
hwParser.parseCommands<FamilyType>(*pCmdQ);
|
||||
uint32_t walkerCount = hwParser.getCommandCount<WALKER_TYPE>();
|
||||
EXPECT_EQ(4u, walkerCount);
|
||||
hwParser.parseCommands<FamilyType>(*pCmdQ);
|
||||
uint32_t walkerCount = hwParser.getCommandCount<WALKER_TYPE>();
|
||||
EXPECT_EQ(4u, walkerCount);
|
||||
|
||||
pCmdQ->flush();
|
||||
expectMemory<FamilyType>(this->destMemory, this->expectedMemory, sizeWrittenMemory);
|
||||
expectMemory<FamilyType>(this->remainderDestMemory, this->expectedRemainderMemory, sizeRemainderMemory);
|
||||
}
|
||||
pCmdQ->flush();
|
||||
expectMemory<FamilyType>(bufferGpuAddress, this->expectedMemory, sizeWrittenMemory);
|
||||
expectMemory<FamilyType>(remainderBufferGpuAddress, this->expectedRemainderMemory, sizeRemainderMemory);
|
||||
}
|
||||
|
||||
HWTEST_F(AUBSimpleArgNonUniformTest, givenOpenCL20SupportWhenProvidingWork3DimNonUniformGroupInXandYandZDimensionThenExpectEightWalkers) {
|
||||
using WALKER_TYPE = WALKER_TYPE<FamilyType>;
|
||||
if (deviceClVersionSupport >= 20) {
|
||||
cl_uint workDim = 3;
|
||||
size_t globalWorkOffset[3] = {0, 0, 0};
|
||||
size_t globalWorkSize[3] = {39, 39, 39};
|
||||
size_t localWorkSize[3] = {8, 8, 2};
|
||||
cl_uint numEventsInWaitList = 0;
|
||||
cl_event *eventWaitList = nullptr;
|
||||
cl_event *event = nullptr;
|
||||
cl_uint workDim = 3;
|
||||
size_t globalWorkOffset[3] = {0, 0, 0};
|
||||
size_t globalWorkSize[3] = {39, 39, 39};
|
||||
size_t localWorkSize[3] = {8, 8, 2};
|
||||
cl_uint numEventsInWaitList = 0;
|
||||
cl_event *eventWaitList = nullptr;
|
||||
cl_event *event = nullptr;
|
||||
|
||||
initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]);
|
||||
initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]);
|
||||
|
||||
auto retVal = this->pCmdQ->enqueueKernel(
|
||||
this->kernel,
|
||||
workDim,
|
||||
globalWorkOffset,
|
||||
globalWorkSize,
|
||||
localWorkSize,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
auto retVal = this->pCmdQ->enqueueKernel(
|
||||
this->kernel,
|
||||
workDim,
|
||||
globalWorkOffset,
|
||||
globalWorkSize,
|
||||
localWorkSize,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
hwParser.parseCommands<FamilyType>(*pCmdQ);
|
||||
uint32_t walkerCount = hwParser.getCommandCount<WALKER_TYPE>();
|
||||
EXPECT_EQ(8u, walkerCount);
|
||||
hwParser.parseCommands<FamilyType>(*pCmdQ);
|
||||
uint32_t walkerCount = hwParser.getCommandCount<WALKER_TYPE>();
|
||||
EXPECT_EQ(8u, walkerCount);
|
||||
|
||||
pCmdQ->flush();
|
||||
expectMemory<FamilyType>(this->destMemory, this->expectedMemory, sizeWrittenMemory);
|
||||
expectMemory<FamilyType>(this->remainderDestMemory, this->expectedRemainderMemory, sizeRemainderMemory);
|
||||
}
|
||||
pCmdQ->flush();
|
||||
expectMemory<FamilyType>(bufferGpuAddress, this->expectedMemory, sizeWrittenMemory);
|
||||
expectMemory<FamilyType>(remainderBufferGpuAddress, this->expectedRemainderMemory, sizeRemainderMemory);
|
||||
}
|
||||
|
Reference in New Issue
Block a user