/* * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ #pragma once #include #include #include #include #include #include #ifdef HAS_CUPTI #include #endif // TODO(T90238193) // @lint-ignore-every CLANGTIDY facebook-hte-RelativeInclude #include "ActivityType.h" #include "CuptiActivityBuffer.h" #ifdef HAS_CUPTI #include "CuptiCallbackApi.h" #endif namespace KINETO_NAMESPACE { using namespace libkineto; #ifndef HAS_CUPTI using CUpti_Activity = void; #endif class CuptiActivityApi { public: enum CorrelationFlowType { Default, User }; // Control Variables shared with CuptiCallbackApi for teardown std::atomic teardownCupti_{0}; std::mutex finalizeMutex_; std::condition_variable finalizeCond_; CuptiActivityApi() = default; CuptiActivityApi(const CuptiActivityApi&) = delete; CuptiActivityApi& operator=(const CuptiActivityApi&) = delete; virtual ~CuptiActivityApi() = default; static CuptiActivityApi& singleton(); static void pushCorrelationID(int id, CorrelationFlowType type); static void popCorrelationID(CorrelationFlowType type); void enableCuptiActivities( const std::set& selected_activities, bool enablePerThreadBuffers = false); void disableCuptiActivities( const std::set& selected_activities); void clearActivities(); void teardownContext(); virtual std::unique_ptr activityBuffers(); virtual const std::pair processActivities( CuptiActivityBufferMap&, const std::function& handler); void setMaxBufferSize(int size); void setDeviceBufferSize(size_t size); void setDeviceBufferPoolLimit(size_t limit); std::atomic_bool stopCollection{false}; int64_t flushOverhead{0}; static void forceLoadCupti(); // CUPTI configuraiton that needs to be set before CUDA context creation static void preConfigureCUPTI(); private: int maxGpuBufferCount_{0}; CuptiActivityBufferMap allocatedGpuTraceBuffers_; std::unique_ptr readyGpuTraceBuffers_; std::mutex mutex_; std::atomic tracingEnabled_{0}; bool externalCorrelationEnabled_{false}; #ifdef HAS_CUPTI int processActivitiesForBuffer( uint8_t* buf, size_t validSize, const std::function& handler); static void CUPTIAPI bufferRequestedTrampoline( uint8_t** buffer, size_t* size, size_t* maxNumRecords); static void CUPTIAPI bufferCompletedTrampoline( CUcontext ctx, uint32_t streamId, uint8_t* buffer, size_t /* unused */, size_t validSize); #endif // HAS_CUPTI protected: #ifdef HAS_CUPTI void bufferRequested(uint8_t** buffer, size_t* size, size_t* maxNumRecords); void bufferCompleted( CUcontext ctx, uint32_t streamId, uint8_t* buffer, size_t /* unused */, size_t validSize); #endif }; } // namespace KINETO_NAMESPACE