Context Navigation

← Previous Changeset
Next Changeset →

Changeset 25256

Timestamp:

Apr 14, 2021, 9:23:47 AM (3 years ago)

Author:

wraitii

Message:

Rework the pathfinder path computation setup for threading.

Essentially reverts D1918 / rP22902.
Instead of copying path requests to workers, setup the result vector, then setup an index, and compute 'in-place'.
To send messages, the result vectors are read in order. This makes the order trivially constant no matter how many workers there are, and the architecture overall makes it much easier to efficiently paralellise.

Tested by: Langbart, Stan

Differential Revision: https://code.wildfiregames.com/D3849

Location:

ps/trunk/source/simulation2

Files:

: 4 edited

Simulation2.cpp (modified) (3 diffs)
components/CCmpPathfinder.cpp (modified) (9 diffs)
components/CCmpPathfinder_Common.h (modified) (6 diffs)
components/ICmpPathfinder.h (modified) (1 diff)

Legend:

: Unmodified
: Added
: Removed

ps/trunk/source/simulation2/Simulation2.cpp

r25250	r25256
521	521	CmpPtr<ICmpPathfinder> cmpPathfinder(simContext, SYSTEM_ENTITY);
522	522	if (cmpPathfinder)
523		cmpPathfinder->~~FetchAsyncResultsAndSendMessage~~s();
	523	cmpPathfinder->s();
524	524
525	525	{
…	…
542	542	{
543	543	cmpPathfinder->StartProcessingMoves(true);
544		cmpPathfinder->~~FetchAsyncResultsAndSendMessage~~s();
	544	cmpPathfinder->s();
545	545	}
546	546	// Send all the update phases
…	…
560	560	{
561	561	cmpPathfinder->StartProcessingMoves(true);
562		cmpPathfinder->~~FetchAsyncResultsAndSendMessage~~s();
	562	cmpPathfinder->s();
563	563	}
564	564

ps/trunk/source/simulation2/components/CCmpPathfinder.cpp

-              r24915
+              r25256
 #include "renderer/Scene.h"
 REGISTER_COMPONENT_TYPE(Pathfinder)
 …
     CParamNode::LoadXML(externalParamNode, L"simulation/data/pathfinder.xml", "pathfinder");
+    // Previously all move commands during a turn were
+    // queued up and processed asynchronously at the start
+    // of the next turn.  Now we are processing queued up
+    // events several times duing the turn.  This improves
+    // responsiveness and units move more smoothly especially.
+    // when in formation.  There is still a call at the
+    // beginning of a turn to process all outstanding moves -
+    // this will handle any moves above the MaxSameTurnMoves
+    // threshold.
+    //
+    // TODO - The moves processed at the beginning of the
+    // turn do not count against the maximum moves per turn
+    // currently.  The thinking is that this will eventually
+    // happen in another thread.  Either way this probably
+    // will require some adjustment and rethinking.
+    // Paths are computed:
+    //  - Before MT_Update
+    //  - Before MT_MotionUnitFormation
+    //  - 'in-between' turns (effectively at the start until threading is implemented).
+    // The latter of these must compute all outstanding requests, but the former two are capped
+    // to avoid spending too much time there (since the latter are designed to be threaded and thus not block the GUI).
+    // This loads that maximum number (note that it's per computation call, not per turn for now).
     const CParamNode pathingSettings = externalParamNode.GetChild("Pathfinder");
     m_MaxSameTurnMoves = (u16)pathingSettings.GetChild("MaxSameTurnMoves").ToInt();
 …
         m_PassClassMasks[name] = mask;
+    }
-    m_Workers.emplace_back(PathfinderWorker{});
+}
 …
 void CCmpPathfinder::Deinit()
+{
-    m_Workers.clear();
     SetDebugOverlay(false); // cleans up memory
     SAFE_DELETE(m_AtlasOverlay);
 …
 void CCmpPathfinder::SerializeCommon(S& serialize)
+{
     Serializer(serialize, "long requests", m_LongPathRequests);
     Serializer(serialize, "short requests", m_ShortPathRequests);
+    Serializer(serialize, "long requests", m_LongPathRequests);
+    Serializer(serialize, "short requests", m_ShortPathRequests);
     serialize.NumberU32_Unbounded("next ticket", m_NextAsyncTicket);
     serialize.NumberU16_Unbounded("map size", m_MapSize);
 …
         UpdateGrid();
         // In case we were serialised with requests pending, we need to process them.
         if (!m_ShortPathRequests.empty() || !m_LongPathRequests.empty())
+        if (!m_ShortPathRequests.Requests.empty())
+        {
             ENSURE(CmpPtr<ICmpObstructionManager>(GetSystemEntity()));
 …
 //////////////////////////////////////////////////////////
-// Async pathfinder workers
-CCmpPathfinder::PathfinderWorker::PathfinderWorker() {}
-template<typename T>
-void CCmpPathfinder::PathfinderWorker::PushRequests(std::vector<T>&, ssize_t)
+{
-    static_assert(sizeof(T) == 0, "Only specializations can be used");
+}
-template<> void CCmpPathfinder::PathfinderWorker::PushRequests(std::vector<LongPathRequest>& from, ssize_t amount)
+{
-    m_LongRequests.insert(m_LongRequests.end(), std::make_move_iterator(from.end() - amount), std::make_move_iterator(from.end()));
+}
-template<> void CCmpPathfinder::PathfinderWorker::PushRequests(std::vector<ShortPathRequest>& from, ssize_t amount)
+{
-    m_ShortRequests.insert(m_ShortRequests.end(), std::make_move_iterator(from.end() - amount), std::make_move_iterator(from.end()));
+}
-void CCmpPathfinder::PathfinderWorker::Work(const CCmpPathfinder& pathfinder)
+{
-    while (!m_LongRequests.empty())
+    {
-        const LongPathRequest& req = m_LongRequests.back();
-        WaypointPath path;
-        pathfinder.m_LongPathfinder->ComputePath(*pathfinder.m_PathfinderHier, req.x0, req.z0, req.goal, req.passClass, path);
-        m_Results.emplace_back(req.ticket, req.notify, path);
-        m_LongRequests.pop_back();
+    }
-    while (!m_ShortRequests.empty())
+    {
-        const ShortPathRequest& req = m_ShortRequests.back();
-        WaypointPath path = pathfinder.m_VertexPathfinder->ComputeShortPath(req, CmpPtr<ICmpObstructionManager>(pathfinder.GetSystemEntity()));
-        m_Results.emplace_back(req.ticket, req.notify, path);
-        m_ShortRequests.pop_back();
+    }
+}
 u32 CCmpPathfinder::ComputePathAsync(entity_pos_t x0, entity_pos_t z0, const PathGoal& goal, pass_class_t passClass, entity_id_t notify)
+{
     LongPathRequest req = { m_NextAsyncTicket++, x0, z0, goal, passClass, notify };
     m_LongPathRequests.push_back(req);
+    m_LongPathRequests.push_back(req);
     return req.ticket;
+}
 …
+{
     ShortPathRequest req = { m_NextAsyncTicket++, x0, z0, clearance, range, goal, passClass, avoidMovingUnits, group, notify };
     m_ShortPathRequests.push_back(req);
+    m_ShortPathRequests.push_back(req);
     return req.ticket;
+}
 …
+}
+void CCmpPathfinder::FetchAsyncResultsAndSendMessages()
+{
+    PROFILE2("FetchAsyncResults");
+    // We may now clear existing requests.
+    m_ShortPathRequests.clear();
+    m_LongPathRequests.clear();
+    // WARNING: the order in which moves are pulled must be consistent when using 1 or n workers.
+    // We fetch in the same order we inserted in, but we push moves backwards, so this works.
+    std::vector<PathResult> results;
+    for (PathfinderWorker& worker : m_Workers)
+    {
+        results.insert(results.end(), std::make_move_iterator(worker.m_Results.begin()), std::make_move_iterator(worker.m_Results.end()));
+        worker.m_Results.clear();
+template<typename T>
+template<typename U>
+void CCmpPathfinder::PathRequests<T>::Compute(const CCmpPathfinder& cmpPathfinder, const U& pathfinder)
+{
+    static_assert((std::is_same_v<T, LongPathRequest> && std::is_same_v<U, LongPathfinder>) ||
+                  (std::is_same_v<T, ShortPathRequest> && std::is_same_v<U, VertexPathfinder>));
+    size_t maxN = m_Results.size();
+    size_t startIndex = m_Requests.size() - m_Results.size();
+    do
+    {
+        size_t workIndex = m_NextPathToCompute++;
+        if (workIndex >= maxN)
+            break;
+        const T& req = m_Requests[startIndex + workIndex];
+        PathResult& result = m_Results[workIndex];
+        result.ticket = req.ticket;
+        result.notify = req.notify;
+        if constexpr (std::is_same_v<T, LongPathRequest>)
+            pathfinder.ComputePath(*cmpPathfinder.m_PathfinderHier, req.x0, req.z0, req.goal, req.passClass, result.path);
+        else
+            result.path = pathfinder.ComputeShortPath(req, CmpPtr<ICmpObstructionManager>(cmpPathfinder.GetSystemEntity()));
+        if (workIndex == maxN - 1)
+            m_ComputeDone = true;
+    }
+    while (true);
+}
+void CCmpPathfinder::SendRequestedPaths()
+{
+    PROFILE2("SendRequestedPaths");
+    if (!m_LongPathRequests.m_ComputeDone || !m_ShortPathRequests.m_ComputeDone)
+    {
+        m_ShortPathRequests.Compute(*this, *m_VertexPathfinder);
+        m_LongPathRequests.Compute(*this, *m_LongPathfinder);
+    }
+    {
         PROFILE2("PostMessages");
         for (PathResult& path : results)
+        for (PathResult& path : esults)
+        {
             CMessagePathResult msg(path.ticket, path.path);
             GetSimContext().GetComponentManager().PostMessage(path.notify, msg);
+        }
+    }
+        for (PathResult& path : m_LongPathRequests.m_Results)
+        {
+            CMessagePathResult msg(path.ticket, path.path);
+            GetSimContext().GetComponentManager().PostMessage(path.notify, msg);
+        }
+    }
+    m_ShortPathRequests.ClearComputed();
+    m_LongPathRequests.ClearComputed();
+}
 void CCmpPathfinder::StartProcessingMoves(bool useMax)
+{
+    std::vector<LongPathRequest> longRequests = GetMovesToProcess(m_LongPathRequests, useMax, m_MaxSameTurnMoves);
+    std::vector<ShortPathRequest> shortRequests = GetMovesToProcess(m_ShortPathRequests, useMax, m_MaxSameTurnMoves - longRequests.size());
+    PushRequestsToWorkers(longRequests);
+    PushRequestsToWorkers(shortRequests);
+    for (PathfinderWorker& worker : m_Workers)
+        worker.Work(*this);
+}
+template <typename T>
+std::vector<T> CCmpPathfinder::GetMovesToProcess(std::vector<T>& requests, bool useMax, size_t maxMoves)
+{
+    // Keep the original requests in which we need to serialize.
+    std::vector<T> copiedRequests;
+    if (useMax)
+    {
+        size_t amount = std::min(requests.size(), maxMoves);
+        if (amount > 0)
+            copiedRequests.insert(copiedRequests.begin(), requests.end() - amount, requests.end());
+    }
+    else
+        copiedRequests = requests;
+    return copiedRequests;
+}
+template <typename T>
+void CCmpPathfinder::PushRequestsToWorkers(std::vector<T>& from)
+{
+    if (from.empty())
+        return;
+    // Trivial load-balancing, / rounds towards zero so add 1 to ensure we do push all requests.
+    size_t amount = from.size() / m_Workers.size() + 1;
+    // WARNING: the order in which moves are pushed must be consistent when using 1 or n workers.
+    // In this instance, work is distributed in a strict LIFO order, effectively reversing tickets.
+    for (PathfinderWorker& worker : m_Workers)
+    {
+        amount = std::min(amount, from.size()); // Since we are rounding up before, ensure we aren't pushing beyond the end.
+        worker.PushRequests(from, amount);
+        from.erase(from.end() - amount, from.end());
+    }
+    m_ShortPathRequests.PrepareForComputation(useMax ? m_MaxSameTurnMoves : 0);
+    m_LongPathRequests.PrepareForComputation(useMax ? m_MaxSameTurnMoves : 0);
+}

ps/trunk/source/simulation2/components/CCmpPathfinder_Common.h

-              r24142
+              r25256
 /* Copyright (C) 2020 Wildfire Games.
+/* Copyright (C) 202 Wildfire Games.
  * This file is part of 0 A.D.
+ *
 …
 #include "simulation2/helpers/Grid.h"
 class HierarchicalPathfinder;
 …
 class CCmpPathfinder final : public ICmpPathfinder
+{
-protected:
-    class PathfinderWorker
+    {
-        friend CCmpPathfinder;
-    public:
-        PathfinderWorker();
-        // Process path requests, checking if we should stop before each new one.
-        void Work(const CCmpPathfinder& pathfinder);
-    private:
-        // Insert requests in m_[Long/Short]Requests depending on from.
-        // This could be removed when we may use if-constexpr in CCmpPathfinder::PushRequestsToWorkers
-        template<typename T>
-        void PushRequests(std::vector<T>& from, ssize_t amount);
-        // Stores our results, the main thread will fetch this.
-        std::vector<PathResult> m_Results;
-        std::vector<LongPathRequest> m_LongRequests;
-        std::vector<ShortPathRequest> m_ShortRequests;
-    };
-    // Allow the workers to access our private variables
-    friend class PathfinderWorker;
 public:
     static void ClassInit(CComponentManager& componentManager)
 …
     std::map<std::string, pass_class_t> m_PassClassMasks;
     std::vector<PathfinderPassability> m_PassClasses;
     // Dynamic state:
-    std::vector<LongPathRequest> m_LongPathRequests;
-    std::vector<ShortPathRequest> m_ShortPathRequests;
-    u32 m_NextAsyncTicket; // Unique IDs for asynchronous path requests.
-    u16 m_MaxSameTurnMoves; // Compute only this many paths when useMax is true in StartProcessingMoves.
     // Lazily-constructed dynamic state (not serialized):
 …
     std::unique_ptr<LongPathfinder> m_LongPathfinder;
+    // Workers process pathing requests.
+    std::vector<PathfinderWorker> m_Workers;
+    template<typename T>
+    class PathRequests {
+    public:
+        std::vector<T> m_Requests;
+        std::vector<PathResult> m_Results;
+        // This is the array index of the next path to compute.
+        size_t m_NextPathToCompute = 0;
+        // This is false until all scheduled paths have been computed.
+        bool m_ComputeDone = true;
+        void ClearComputed()
+        {
+            if (m_Results.size() == m_Requests.size())
+                m_Requests.clear();
+            else
+                m_Requests.erase(m_Requests.end() - m_Results.size(), m_Requests.end());
+            m_Results.clear();
+        }
+        /**
+         * @param max - if non-zero, how many paths to process.
+         */
+        void PrepareForComputation(u16 max)
+        {
+            size_t n = m_Requests.size();
+            if (max && n > max)
+                n = max;
+            m_NextPathToCompute = 0;
+            m_Results.resize(n);
+            m_ComputeDone = n == 0;
+        }
+        template<typename U>
+        void Compute(const CCmpPathfinder& cmpPathfinder, const U& pathfinder);
+    };
+    PathRequests<LongPathRequest> m_LongPathRequests;
+    PathRequests<ShortPathRequest> m_ShortPathRequests;
+    u32 m_NextAsyncTicket; // Unique IDs for asynchronous path requests.
     AtlasOverlay* m_AtlasOverlay;
 …
     virtual ICmpObstruction::EFoundationCheck CheckBuildingPlacement(const IObstructionTestFilter& filter, entity_pos_t x, entity_pos_t z, entity_pos_t a, entity_pos_t w, entity_pos_t h, entity_id_t id, pass_class_t passClass, bool onlyCenterPoint) const;
     virtual void FetchAsyncResultsAndSendMessages();
+    virtual void s();
     virtual void StartProcessingMoves(bool useMax);

ps/trunk/source/simulation2/components/ICmpPathfinder.h

r25004	r25256
186	186	* Finish computing asynchronous path requests and send the CMessagePathResult messages.
187	187	*/
188		virtual void ~~FetchAsyncResultsAndSendMessage~~s() = 0;
	188	virtual void s() = 0;
189	189
190	190	/**

Note: See TracChangeset for help on using the changeset viewer.