mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 14:55:24 +08:00
Program preemption mode in Interface Descriptor Data
Change-Id: I7fce731d71dd0b6dc8505ebfe45d24c65898a08b
This commit is contained in:
committed by
sys_ocldev
parent
bee295415f
commit
533afe472a
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
* Copyright (c) 2017 - 2018, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -433,6 +433,7 @@ void dispatchWalker(
|
||||
KernelOperation **blockedCommandsData,
|
||||
HwTimeStamps *hwTimeStamps,
|
||||
OCLRT::HwPerfCounter *hwPerfCounter,
|
||||
PreemptionMode preemptionMode,
|
||||
bool blockQueue = false,
|
||||
unsigned int commandType = 0) {
|
||||
|
||||
@@ -586,7 +587,8 @@ void dispatchWalker(
|
||||
simd,
|
||||
localWorkSizes,
|
||||
offsetInterfaceDescriptorTable,
|
||||
interfaceDescriptorIndex);
|
||||
interfaceDescriptorIndex,
|
||||
preemptionMode);
|
||||
|
||||
if (&dispatchInfo == &*multiDispatchInfo.begin()) {
|
||||
// If hwTimeStampAlloc is passed (not nullptr), then we know that profiling is enabled
|
||||
@@ -659,17 +661,19 @@ void dispatchWalker(
|
||||
KernelOperation **blockedCommandsData,
|
||||
HwTimeStamps *hwTimeStamps,
|
||||
HwPerfCounter *hwPerfCounter,
|
||||
PreemptionMode preemptionMode,
|
||||
bool blockQueue = false) {
|
||||
|
||||
DispatchInfo dispatchInfo(const_cast<Kernel *>(&kernel), workDim, workItems, localWorkSizesIn, globalOffsets);
|
||||
dispatchWalker<GfxFamily>(commandQueue, dispatchInfo, numEventsInWaitList, eventWaitList,
|
||||
blockedCommandsData, hwTimeStamps, hwPerfCounter, blockQueue);
|
||||
blockedCommandsData, hwTimeStamps, hwPerfCounter, preemptionMode, blockQueue);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void dispatchScheduler(
|
||||
CommandQueue &commandQueue,
|
||||
DeviceQueueHw<GfxFamily> &devQueueHw,
|
||||
PreemptionMode preemptionMode,
|
||||
SchedulerKernel &scheduler) {
|
||||
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA;
|
||||
@@ -754,7 +758,8 @@ void dispatchScheduler(
|
||||
simd,
|
||||
localWorkSizes,
|
||||
offsetInterfaceDescriptorTable,
|
||||
interfaceDescriptorIndex);
|
||||
interfaceDescriptorIndex,
|
||||
preemptionMode);
|
||||
|
||||
// Implement enabling special WA DisableLSQCROPERFforOCL if needed
|
||||
applyWADisableLSQCROPERFforOCL<GfxFamily>(commandStream, scheduler, true);
|
||||
|
||||
@@ -183,6 +183,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
std::unique_ptr<PrintfHandler> printfHandler;
|
||||
bool slmUsed = false;
|
||||
EngineType engineType = device->getEngineType();
|
||||
auto preemption = PreemptionHelper::taskPreemptionMode(*device, multiDispatchInfo);
|
||||
TakeOwnershipWrapper<CommandQueueHw<GfxFamily>> queueOwnership(*this);
|
||||
|
||||
auto blockQueue = false;
|
||||
@@ -244,6 +245,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
&blockedCommandsData,
|
||||
hwTimeStamps,
|
||||
hwPerfCounter,
|
||||
preemption,
|
||||
blockQueue,
|
||||
commandType);
|
||||
|
||||
@@ -282,6 +284,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
dispatchScheduler<GfxFamily>(
|
||||
*this,
|
||||
*devQueueHw,
|
||||
preemption,
|
||||
scheduler);
|
||||
|
||||
scheduler.makeResident(commandStreamReceiver);
|
||||
@@ -638,7 +641,7 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
|
||||
for (auto &surface : CreateRange(surfaces, surfaceCount)) {
|
||||
allSurfaces.push_back(surface->duplicate());
|
||||
}
|
||||
|
||||
PreemptionMode preemptionMode = PreemptionHelper::taskPreemptionMode(*device, multiDispatchInfo);
|
||||
auto kernelOperation = std::unique_ptr<KernelOperation>(blockedCommandsData); // marking ownership
|
||||
auto cmd = std::unique_ptr<Command>(new CommandComputeKernel(
|
||||
*this,
|
||||
@@ -649,6 +652,7 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
|
||||
slmUsed,
|
||||
commandType == CL_COMMAND_NDRANGE_KERNEL,
|
||||
std::move(printfHandler),
|
||||
preemptionMode,
|
||||
multiDispatchInfo.begin()->getKernel(),
|
||||
(uint32_t)multiDispatchInfo.size()));
|
||||
eventBuilder->getEvent()->setCommand(std::move(cmd));
|
||||
|
||||
@@ -32,6 +32,9 @@ struct MultiDispatchInfo;
|
||||
|
||||
class PreemptionHelper {
|
||||
public:
|
||||
template <typename CmdFamily>
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename CmdFamily::INTERFACE_DESCRIPTOR_DATA;
|
||||
|
||||
static PreemptionMode taskPreemptionMode(Device &device, Kernel *kernel);
|
||||
static PreemptionMode taskPreemptionMode(Device &device, const MultiDispatchInfo &multiDispatchInfo);
|
||||
static bool allowThreadGroupPreemption(Kernel *kernel, const WorkaroundTable *waTable);
|
||||
@@ -65,6 +68,9 @@ class PreemptionHelper {
|
||||
static void applyPreemptionWaCmdsEnd(LinearStream *pCommandStream, const Device &device);
|
||||
|
||||
static PreemptionMode getDefaultPreemptionMode(const HardwareInfo &hwInfo);
|
||||
|
||||
template <typename GfxFamily>
|
||||
static void programInterfaceDescriptorDataPreemption(INTERFACE_DESCRIPTOR_DATA<GfxFamily> *idd, PreemptionMode preemptionMode);
|
||||
};
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
* Copyright (c) 2017 - 2018, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -130,4 +130,8 @@ size_t PreemptionHelper::getRequiredPreambleSize(const Device &device) {
|
||||
return sizeof(typename GfxFamily::GPGPU_CSR_BASE_ADDRESS) + sizeof(typename GfxFamily::STATE_SIP);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void PreemptionHelper::programInterfaceDescriptorDataPreemption(INTERFACE_DESCRIPTOR_DATA<GfxFamily> *idd, PreemptionMode preemptionMode) {
|
||||
}
|
||||
|
||||
} // namespace OCLRT
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
* Copyright (c) 2017 - 2018, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -173,7 +173,7 @@ void DeviceQueue::resetDeviceQueue() {
|
||||
return;
|
||||
}
|
||||
|
||||
void DeviceQueue::dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler) {
|
||||
void DeviceQueue::dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
* Copyright (c) 2017 - 2018, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -23,6 +23,7 @@
|
||||
#pragma once
|
||||
#include "runtime/api/cl_types.h"
|
||||
#include "runtime/helpers/base_object.h"
|
||||
#include "runtime/helpers/hw_info.h"
|
||||
#include "runtime/indirect_heap/indirect_heap.h"
|
||||
#include "runtime/memory_manager/graphics_allocation.h"
|
||||
#include "runtime/execution_model/device_enqueue.h"
|
||||
@@ -92,7 +93,7 @@ class DeviceQueue : public BaseObject<_device_queue> {
|
||||
}
|
||||
|
||||
virtual void resetDeviceQueue();
|
||||
virtual void dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler);
|
||||
virtual void dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode);
|
||||
virtual IndirectHeap *getIndirectHeap(IndirectHeap::Type type);
|
||||
|
||||
void acquireEMCriticalSection() {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
* Copyright (c) 2017 - 2018, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -76,7 +76,7 @@ class DeviceQueueHw : public DeviceQueue {
|
||||
|
||||
void addExecutionModelCleanUpSection(Kernel *parentKernel, HwTimeStamps *hwTimeStamp, uint32_t taskCount) override;
|
||||
void resetDeviceQueue() override;
|
||||
void dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler) override;
|
||||
void dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode) override;
|
||||
|
||||
uint32_t getSchedulerReturnInstance() {
|
||||
return igilQueue->m_controls.m_SchedulerEarlyReturn;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
* Copyright (c) 2017 - 2018, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -385,9 +385,10 @@ size_t DeviceQueueHw<GfxFamily>::setSchedulerCrossThreadData(SchedulerKernel &sc
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void DeviceQueueHw<GfxFamily>::dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler) {
|
||||
void DeviceQueueHw<GfxFamily>::dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode) {
|
||||
OCLRT::dispatchScheduler<GfxFamily>(cmdQ,
|
||||
*this,
|
||||
preemptionMode,
|
||||
scheduler);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -70,5 +70,6 @@ template size_t PreemptionHelper::getRequiredCmdStreamSize<GfxFamily>(Preemption
|
||||
template size_t PreemptionHelper::getPreemptionWaCsSize<GfxFamily>(const Device &device);
|
||||
template void PreemptionHelper::applyPreemptionWaCmdsBegin<GfxFamily>(LinearStream *pCommandStream, const Device &device);
|
||||
template void PreemptionHelper::applyPreemptionWaCmdsEnd<GfxFamily>(LinearStream *pCommandStream, const Device &device);
|
||||
template void PreemptionHelper::programInterfaceDescriptorDataPreemption<GfxFamily>(INTERFACE_DESCRIPTOR_DATA<GfxFamily> *idd, PreemptionMode preemptionMode);
|
||||
|
||||
} // namespace OCLRT
|
||||
|
||||
@@ -23,6 +23,7 @@
|
||||
#include <cstring>
|
||||
|
||||
#include "runtime/built_ins/built_ins.h"
|
||||
#include "runtime/command_stream/csr_definitions.h"
|
||||
#include "runtime/command_stream/preemption.h"
|
||||
#include "runtime/command_stream/preemption.inl"
|
||||
|
||||
@@ -52,4 +53,5 @@ template size_t PreemptionHelper::getRequiredCmdStreamSize<GfxFamily>(Preemption
|
||||
template size_t PreemptionHelper::getPreemptionWaCsSize<GfxFamily>(const Device &device);
|
||||
template void PreemptionHelper::applyPreemptionWaCmdsBegin<GfxFamily>(LinearStream *pCommandStream, const Device &device);
|
||||
template void PreemptionHelper::applyPreemptionWaCmdsEnd<GfxFamily>(LinearStream *pCommandStream, const Device &device);
|
||||
template void PreemptionHelper::programInterfaceDescriptorDataPreemption<GfxFamily>(INTERFACE_DESCRIPTOR_DATA<GfxFamily> *idd, PreemptionMode preemptionMode);
|
||||
} // namespace OCLRT
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
* Copyright (c) 2017 - 2018, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -60,7 +60,8 @@ struct KernelCommandsHelper : public PerThreadDataHelper {
|
||||
uint32_t numSamplers,
|
||||
uint32_t threadsPerThreadGroup,
|
||||
uint32_t sizeSlm,
|
||||
bool barrierEnable);
|
||||
bool barrierEnable,
|
||||
PreemptionMode preemptionMode);
|
||||
|
||||
static void sendMediaStateFlush(
|
||||
LinearStream &commandStream,
|
||||
@@ -103,7 +104,8 @@ struct KernelCommandsHelper : public PerThreadDataHelper {
|
||||
uint32_t simd,
|
||||
const size_t localWorkSize[3],
|
||||
const uint64_t offsetInterfaceDescriptorTable,
|
||||
const uint32_t interfaceDescriptorIndex);
|
||||
const uint32_t interfaceDescriptorIndex,
|
||||
PreemptionMode preemptionMode);
|
||||
|
||||
static size_t getSizeRequiredCS();
|
||||
static bool isPipeControlWArequired();
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
* Copyright (c) 2017 - 2018, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -21,6 +21,8 @@
|
||||
*/
|
||||
|
||||
#include "runtime/command_queue/local_id_gen.h"
|
||||
#include "runtime/command_stream/csr_definitions.h"
|
||||
#include "runtime/command_stream/preemption.h"
|
||||
#include "runtime/helpers/aligned_memory.h"
|
||||
#include "runtime/helpers/basic_math.h"
|
||||
#include "runtime/helpers/dispatch_info.h"
|
||||
@@ -171,12 +173,13 @@ size_t KernelCommandsHelper<GfxFamily>::sendInterfaceDescriptorData(
|
||||
uint32_t numSamplers,
|
||||
uint32_t threadsPerThreadGroup,
|
||||
uint32_t sizeSlm,
|
||||
bool barrierEnable) {
|
||||
bool barrierEnable,
|
||||
PreemptionMode preemptionMode) {
|
||||
typedef typename GfxFamily::SAMPLER_STATE SAMPLER_STATE;
|
||||
typedef typename GfxFamily::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA;
|
||||
|
||||
// Allocate some memory for the interface descriptor
|
||||
auto pInterfaceDescriptor = (INTERFACE_DESCRIPTOR_DATA *)ptrOffset(indirectHeap.getBase(), (size_t)offsetInterfaceDescriptor);
|
||||
auto pInterfaceDescriptor = static_cast<INTERFACE_DESCRIPTOR_DATA *>(ptrOffset(indirectHeap.getBase(), (size_t)offsetInterfaceDescriptor));
|
||||
*pInterfaceDescriptor = GfxFamily::cmdInitInterfaceDescriptorData;
|
||||
|
||||
// Program the kernel start pointer
|
||||
@@ -211,6 +214,8 @@ size_t KernelCommandsHelper<GfxFamily>::sendInterfaceDescriptorData(
|
||||
pInterfaceDescriptor->setSharedLocalMemorySize(programmableIDSLMSize);
|
||||
pInterfaceDescriptor->setBarrierEnable(barrierEnable);
|
||||
|
||||
PreemptionHelper::programInterfaceDescriptorDataPreemption<GfxFamily>(pInterfaceDescriptor, preemptionMode);
|
||||
|
||||
return (size_t)offsetInterfaceDescriptor;
|
||||
}
|
||||
|
||||
@@ -326,7 +331,8 @@ size_t KernelCommandsHelper<GfxFamily>::sendIndirectState(
|
||||
uint32_t simd,
|
||||
const size_t localWorkSize[3],
|
||||
const uint64_t offsetInterfaceDescriptorTable,
|
||||
const uint32_t interfaceDescriptorIndex) {
|
||||
const uint32_t interfaceDescriptorIndex,
|
||||
PreemptionMode preemptionMode) {
|
||||
|
||||
typedef typename GfxFamily::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA;
|
||||
typedef typename GfxFamily::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
|
||||
@@ -410,7 +416,8 @@ size_t KernelCommandsHelper<GfxFamily>::sendIndirectState(
|
||||
samplerCount,
|
||||
threadsPerThreadGroup,
|
||||
kernel.slmTotalSize,
|
||||
!!patchInfo.executionEnvironment->HasBarriers);
|
||||
!!patchInfo.executionEnvironment->HasBarriers,
|
||||
preemptionMode);
|
||||
|
||||
// Program media state flush to set interface descriptor offset
|
||||
KernelCommandsHelper<GfxFamily>::sendMediaStateFlush(
|
||||
|
||||
@@ -105,7 +105,8 @@ CompletionStamp &CommandMapUnmap::submit(uint32_t taskLevel, bool terminated) {
|
||||
|
||||
CommandComputeKernel::CommandComputeKernel(CommandQueue &commandQueue, CommandStreamReceiver &commandStreamReceiver,
|
||||
std::unique_ptr<KernelOperation> kernelOperation, std::vector<Surface *> &surfaces,
|
||||
bool flushDC, bool usesSLM, bool ndRangeKernel, std::unique_ptr<PrintfHandler> printfHandler, Kernel *kernel, uint32_t kernelCount)
|
||||
bool flushDC, bool usesSLM, bool ndRangeKernel, std::unique_ptr<PrintfHandler> printfHandler,
|
||||
PreemptionMode preemptionMode, Kernel *kernel, uint32_t kernelCount)
|
||||
: commandQueue(commandQueue),
|
||||
commandStreamReceiver(commandStreamReceiver),
|
||||
kernelOperation(std::move(kernelOperation)),
|
||||
@@ -123,6 +124,7 @@ CommandComputeKernel::CommandComputeKernel(CommandQueue &commandQueue, CommandSt
|
||||
kernel->incRefInternal();
|
||||
}
|
||||
this->kernelCount = kernelCount;
|
||||
this->preemptionMode = preemptionMode;
|
||||
}
|
||||
|
||||
CommandComputeKernel::~CommandComputeKernel() {
|
||||
@@ -240,7 +242,8 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
||||
|
||||
devQueue->dispatchScheduler(
|
||||
commandQueue,
|
||||
scheduler);
|
||||
scheduler,
|
||||
preemptionMode);
|
||||
|
||||
scheduler.makeResident(commandStreamReceiver);
|
||||
|
||||
@@ -257,7 +260,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
||||
dispatchFlags.requiresCoherency = requiresCoherency;
|
||||
dispatchFlags.lowPriority = commandQueue.getPriority() == QueuePriority::LOW;
|
||||
dispatchFlags.throttle = commandQueue.getThrottle();
|
||||
dispatchFlags.preemptionMode = PreemptionHelper::taskPreemptionMode(commandQueue.getDevice(), kernel);
|
||||
dispatchFlags.preemptionMode = preemptionMode;
|
||||
|
||||
DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady);
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
* Copyright (c) 2017 - 2018, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -24,7 +24,8 @@
|
||||
#include "runtime/command_stream/linear_stream.h"
|
||||
#include "runtime/indirect_heap/indirect_heap.h"
|
||||
#include "runtime/utilities/iflist.h"
|
||||
#include "runtime/helpers//completion_stamp.h"
|
||||
#include "runtime/helpers/completion_stamp.h"
|
||||
#include "runtime/helpers/hw_info.h"
|
||||
#include "runtime/helpers/properties_helper.h"
|
||||
|
||||
#include <memory>
|
||||
@@ -99,7 +100,9 @@ struct KernelOperation {
|
||||
class CommandComputeKernel : public Command {
|
||||
public:
|
||||
CommandComputeKernel(CommandQueue &commandQueue, CommandStreamReceiver &commandStreamReceiver,
|
||||
std::unique_ptr<KernelOperation> kernelResources, std::vector<Surface *> &surfaces, bool flushDC, bool usesSLM, bool ndRangeKernel, std::unique_ptr<PrintfHandler> printfHandler, Kernel *kernel = nullptr, uint32_t kernelCount = 0);
|
||||
std::unique_ptr<KernelOperation> kernelResources, std::vector<Surface *> &surfaces,
|
||||
bool flushDC, bool usesSLM, bool ndRangeKernel, std::unique_ptr<PrintfHandler> printfHandler,
|
||||
PreemptionMode preemptionMode, Kernel *kernel = nullptr, uint32_t kernelCount = 0);
|
||||
|
||||
~CommandComputeKernel() override;
|
||||
|
||||
@@ -120,6 +123,7 @@ class CommandComputeKernel : public Command {
|
||||
std::unique_ptr<PrintfHandler> printfHandler;
|
||||
Kernel *kernel;
|
||||
uint32_t kernelCount;
|
||||
PreemptionMode preemptionMode;
|
||||
};
|
||||
|
||||
class CommandMarker : public Command {
|
||||
|
||||
Reference in New Issue
Block a user