LCOV - code coverage report
Current view: top level - src/include/kompute - Sequence.hpp (source / functions) Hit Total Coverage
Test: lcov.info Lines: 26 26 100.0 %
Date: 2024-01-20 13:42:20 Functions: 17 18 94.4 %

          Line data    Source code
       1             : // SPDX-License-Identifier: Apache-2.0
       2             : #pragma once
       3             : 
       4             : #include "kompute/Core.hpp"
       5             : 
       6             : #include "kompute/operations/OpAlgoDispatch.hpp"
       7             : #include "kompute/operations/OpBase.hpp"
       8             : 
       9             : namespace kp {
      10             : 
      11             : /**
      12             :  *  Container of operations that can be sent to GPU as batch
      13             :  */
      14             : class Sequence : public std::enable_shared_from_this<Sequence>
      15             : {
      16             :   public:
      17             :     /**
      18             :      * Main constructor for sequence which requires core vulkan components to
      19             :      * generate all dependent resources.
      20             :      *
      21             :      * @param physicalDevice Vulkan physical device
      22             :      * @param device Vulkan logical device
      23             :      * @param computeQueue Vulkan compute queue
      24             :      * @param queueIndex Vulkan compute queue index in device
      25             :      * @param totalTimestamps Maximum number of timestamps to allocate
      26             :      */
      27             :     Sequence(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
      28             :              std::shared_ptr<vk::Device> device,
      29             :              std::shared_ptr<vk::Queue> computeQueue,
      30             :              uint32_t queueIndex,
      31             :              uint32_t totalTimestamps = 0);
      32             :     /**
      33             :      * Destructor for sequence which is responsible for cleaning all subsequent
      34             :      * owned operations.
      35             :      */
      36             :     ~Sequence();
      37             : 
      38             :     /**
      39             :      * Record function for operation to be added to the GPU queue in batch. This
      40             :      * template requires classes to be derived from the OpBase class. This
      41             :      * function also requires the Sequence to be recording, otherwise it will
      42             :      * not be able to add the operation.
      43             :      *
      44             :      * @param op Object derived from kp::BaseOp that will be recoreded by the
      45             :      * sequence which will be used when the operation is evaluated.
      46             :      * @return shared_ptr<Sequence> of the Sequence class itself
      47             :      */
      48             :     std::shared_ptr<Sequence> record(std::shared_ptr<OpBase> op);
      49             : 
      50             :     /**
      51             :      * Record function for operation to be added to the GPU queue in batch. This
      52             :      * template requires classes to be derived from the OpBase class. This
      53             :      * function also requires the Sequence to be recording, otherwise it will
      54             :      * not be able to add the operation.
      55             :      *
      56             :      * @param tensors Vector of tensors to use for the operation
      57             :      * @param TArgs Template parameters that are used to initialise operation
      58             :      * which allows for extensible configurations on initialisation.
      59             :      * @return shared_ptr<Sequence> of the Sequence class itself
      60             :      */
      61             :     template<typename T, typename... TArgs>
      62          31 :     std::shared_ptr<Sequence> record(
      63             :       std::vector<std::shared_ptr<Tensor>> tensors,
      64             :       TArgs&&... params)
      65             :     {
      66          32 :         std::shared_ptr<T> op{ new T(tensors, std::forward<TArgs>(params)...) };
      67          62 :         return this->record(op);
      68          31 :     }
      69             :     /**
      70             :      * Record function for operation to be added to the GPU queue in batch. This
      71             :      * template requires classes to be derived from the OpBase class. This
      72             :      * function also requires the Sequence to be recording, otherwise it will
      73             :      * not be able to add the operation.
      74             :      *
      75             :      * @param algorithm Algorithm to use for the record often used for OpAlgo
      76             :      * operations
      77             :      * @param TArgs Template parameters that are used to initialise operation
      78             :      * which allows for extensible configurations on initialisation.
      79             :      * @return shared_ptr<Sequence> of the Sequence class itself
      80             :      */
      81             :     template<typename T, typename... TArgs>
      82          24 :     std::shared_ptr<Sequence> record(std::shared_ptr<Algorithm> algorithm,
      83             :                                      TArgs&&... params)
      84             :     {
      85          24 :         std::shared_ptr<T> op{ new T(algorithm,
      86           2 :                                      std::forward<TArgs>(params)...) };
      87          48 :         return this->record(op);
      88          24 :     }
      89             : 
      90             :     /**
      91             :      * Eval sends all the recorded and stored operations in the vector of
      92             :      * operations into the gpu as a submit job synchronously (with a barrier).
      93             :      *
      94             :      * @return shared_ptr<Sequence> of the Sequence class itself
      95             :      */
      96             :     std::shared_ptr<Sequence> eval();
      97             : 
      98             :     /**
      99             :      * Resets all the recorded and stored operations, records the operation
     100             :      * provided and submits into the gpu as a submit job synchronously (with a
     101             :      * barrier).
     102             :      *
     103             :      * @return shared_ptr<Sequence> of the Sequence class itself
     104             :      */
     105             :     std::shared_ptr<Sequence> eval(std::shared_ptr<OpBase> op);
     106             : 
     107             :     /**
     108             :      * Eval sends all the recorded and stored operations in the vector of
     109             :      * operations into the gpu as a submit job with a barrier.
     110             :      *
     111             :      * @param tensors Vector of tensors to use for the operation
     112             :      * @param TArgs Template parameters that are used to initialise operation
     113             :      * which allows for extensible configurations on initialisation.
     114             :      * @return shared_ptr<Sequence> of the Sequence class itself
     115             :      */
     116             :     template<typename T, typename... TArgs>
     117          53 :     std::shared_ptr<Sequence> eval(std::vector<std::shared_ptr<Tensor>> tensors,
     118             :                                    TArgs&&... params)
     119             :     {
     120          56 :         std::shared_ptr<T> op{ new T(tensors, std::forward<TArgs>(params)...) };
     121         104 :         return this->eval(op);
     122          52 :     }
     123             :     /**
     124             :      * Eval sends all the recorded and stored operations in the vector of
     125             :      * operations into the gpu as a submit job with a barrier.
     126             :      *
     127             :      * @param algorithm Algorithm to use for the record often used for OpAlgo
     128             :      * operations
     129             :      * @param TArgs Template parameters that are used to initialise operation
     130             :      * which allows for extensible configurations on initialisation.
     131             :      * @return shared_ptr<Sequence> of the Sequence class itself
     132             :      */
     133             :     template<typename T, typename... TArgs>
     134          14 :     std::shared_ptr<Sequence> eval(std::shared_ptr<Algorithm> algorithm,
     135             :                                    TArgs&&... params)
     136             :     {
     137          14 :         std::shared_ptr<T> op{ new T(algorithm,
     138           9 :                                      std::forward<TArgs>(params)...) };
     139          28 :         return this->eval(op);
     140          14 :     }
     141             : 
     142             :     /**
     143             :      * Eval Async sends all the recorded and stored operations in the vector of
     144             :      * operations into the gpu as a submit job without a barrier. EvalAwait()
     145             :      * must ALWAYS be called after to ensure the sequence is terminated
     146             :      * correctly.
     147             :      *
     148             :      * @return Boolean stating whether execution was successful.
     149             :      */
     150             :     std::shared_ptr<Sequence> evalAsync();
     151             :     /**
     152             :      * Clears currnet operations to record provided one in the vector of
     153             :      * operations into the gpu as a submit job without a barrier. EvalAwait()
     154             :      * must ALWAYS be called after to ensure the sequence is terminated
     155             :      * correctly.
     156             :      *
     157             :      * @return Boolean stating whether execution was successful.
     158             :      */
     159             :     std::shared_ptr<Sequence> evalAsync(std::shared_ptr<OpBase> op);
     160             :     /**
     161             :      * Eval sends all the recorded and stored operations in the vector of
     162             :      * operations into the gpu as a submit job with a barrier.
     163             :      *
     164             :      * @param tensors Vector of tensors to use for the operation
     165             :      * @param TArgs Template parameters that are used to initialise operation
     166             :      * which allows for extensible configurations on initialisation.
     167             :      * @return shared_ptr<Sequence> of the Sequence class itself
     168             :      */
     169             :     template<typename T, typename... TArgs>
     170           3 :     std::shared_ptr<Sequence> evalAsync(
     171             :       std::vector<std::shared_ptr<Tensor>> tensors,
     172             :       TArgs&&... params)
     173             :     {
     174           3 :         std::shared_ptr<T> op{ new T(tensors, std::forward<TArgs>(params)...) };
     175           6 :         return this->evalAsync(op);
     176           3 :     }
     177             :     /**
     178             :      * Eval sends all the recorded and stored operations in the vector of
     179             :      * operations into the gpu as a submit job with a barrier.
     180             :      *
     181             :      * @param algorithm Algorithm to use for the record often used for OpAlgo
     182             :      * operations
     183             :      * @param TArgs Template parameters that are used to initialise operation
     184             :      * which allows for extensible configurations on initialisation.
     185             :      * @return shared_ptr<Sequence> of the Sequence class itself
     186             :      */
     187             :     template<typename T, typename... TArgs>
     188           4 :     std::shared_ptr<Sequence> evalAsync(std::shared_ptr<Algorithm> algorithm,
     189             :                                         TArgs&&... params)
     190             :     {
     191           4 :         std::shared_ptr<T> op{ new T(algorithm,
     192             :                                      std::forward<TArgs>(params)...) };
     193           8 :         return this->evalAsync(op);
     194           4 :     }
     195             : 
     196             :     /**
     197             :      * Eval Await waits for the fence to finish processing and then once it
     198             :      * finishes, it runs the postEval of all operations.
     199             :      *
     200             :      * @param waitFor Number of milliseconds to wait before timing out.
     201             :      * @return shared_ptr<Sequence> of the Sequence class itself
     202             :      */
     203             :     std::shared_ptr<Sequence> evalAwait(uint64_t waitFor = UINT64_MAX);
     204             : 
     205             :     /**
     206             :      * Clear function clears all operations currently recorded and starts
     207             :      * recording again.
     208             :      */
     209             :     void clear();
     210             : 
     211             :     /**
     212             :      * Return the timestamps that were latched at the beginning and
     213             :      * after each operation during the last eval() call.
     214             :      */
     215             :     std::vector<std::uint64_t> getTimestamps();
     216             : 
     217             :     /**
     218             :      * Begins recording commands for commands to be submitted into the command
     219             :      * buffer.
     220             :      */
     221             :     void begin();
     222             : 
     223             :     /**
     224             :      * Ends the recording and stops recording commands when the record command
     225             :      * is sent.
     226             :      */
     227             :     void end();
     228             : 
     229             :     /**
     230             :      * Returns true if the sequence is currently in recording activated.
     231             :      *
     232             :      * @return Boolean stating if recording ongoing.
     233             :      */
     234             :     bool isRecording() const;
     235             : 
     236             :     /**
     237             :      * Returns true if the sequence has been initialised, and it's based on the
     238             :      * GPU resources being referenced.
     239             :      *
     240             :      * @return Boolean stating if is initialized
     241             :      */
     242             :     bool isInit() const;
     243             : 
     244             :     /**
     245             :      * Clears command buffer and triggers re-record of all the current
     246             :      * operations saved, which is useful if the underlying kp::Tensors or
     247             :      * kp::Algorithms are modified and need to be re-recorded.
     248             :      */
     249             :     void rerecord();
     250             : 
     251             :     /**
     252             :      * Returns true if the sequence is currently running - mostly used for async
     253             :      * workloads.
     254             :      *
     255             :      * @return Boolean stating if currently running.
     256             :      */
     257             :     bool isRunning() const;
     258             : 
     259             :     /**
     260             :      * Destroys and frees the GPU resources which include the buffer and memory
     261             :      * and sets the sequence as init=False.
     262             :      */
     263             :     void destroy();
     264             : 
     265             :   private:
     266             :     // -------------- NEVER OWNED RESOURCES
     267             :     std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice = nullptr;
     268             :     std::shared_ptr<vk::Device> mDevice = nullptr;
     269             :     std::shared_ptr<vk::Queue> mComputeQueue = nullptr;
     270             :     uint32_t mQueueIndex = -1;
     271             : 
     272             :     // -------------- OPTIONALLY OWNED RESOURCES
     273             :     std::shared_ptr<vk::CommandPool> mCommandPool = nullptr;
     274             :     bool mFreeCommandPool = false;
     275             :     std::shared_ptr<vk::CommandBuffer> mCommandBuffer = nullptr;
     276             :     bool mFreeCommandBuffer = false;
     277             : 
     278             :     // -------------- ALWAYS OWNED RESOURCES
     279             :     vk::Fence mFence;
     280             :     std::vector<std::shared_ptr<OpBase>> mOperations{};
     281             :     std::shared_ptr<vk::QueryPool> timestampQueryPool = nullptr;
     282             : 
     283             :     // State
     284             :     bool mRecording = false;
     285             :     bool mIsRunning = false;
     286             : 
     287             :     // Create functions
     288             :     void createCommandPool();
     289             :     void createCommandBuffer();
     290             :     void createTimestampQueryPool(uint32_t totalTimestamps);
     291             : };
     292             : 
     293             : } // End namespace kp

Generated by: LCOV version 1.14