#include #include #include #include #include namespace at::native { namespace { void createMIOpenHandle(miopenHandle_t *handle) { MIOPEN_CHECK(miopenCreate(handle)); } void destroyMIOpenHandle(miopenHandle_t handle) { // this is because of something dumb in the ordering of // destruction. Sometimes atexit, the cuda context (or something) // would already be destroyed by the time this gets destroyed. It // happens in fbcode setting. @colesbury and I decided to not destroy // the handle as a workaround. // - @soumith // // Further note: this is now disabled globally, because we are seeing // the same issue as mentioned above in CUDA 11 CI. // - @zasdfgbnm // // #ifdef NO_MIOPEN_DESTROY_HANDLE // #else // miopenDestroy(handle); // #endif } using MIOpenPoolType = at::cuda::DeviceThreadHandlePool< miopenHandle_t, createMIOpenHandle, destroyMIOpenHandle>; } // namespace miopenHandle_t getMiopenHandle() { c10::DeviceIndex device = 0; AT_CUDA_CHECK(c10::hip::GetDevice(&device)); // Thread local PoolWindows are lazily-initialized // to avoid initialization issues that caused hangs on Windows. // See: https://github.com/pytorch/pytorch/pull/22405 // This thread local unique_ptrs will be destroyed when the thread terminates, // releasing its reserved handles back to the pool. static auto pool = std::make_shared(); thread_local std::unique_ptr myPoolWindow( pool->newPoolWindow()); auto handle = myPoolWindow->reserve(device); MIOPEN_CHECK(miopenSetStream(handle, c10::hip::getCurrentHIPStream())); return handle; } } // namespace at::native