From 28a54c2f7b25b9fda763c51746701f0cd21c8018 Mon Sep 17 00:00:00 2001
From: Nat Goodspeed <nat@lindenlab.com>
Date: Tue, 22 Oct 2019 16:49:29 -0400
Subject: [PATCH] DRTVWR-476: Infrastructure to help manage long-lived
 coroutines.

Introduce LLCoros::Stop exception, with subclasses Stopping, Stopped and
Shutdown. Add LLCoros::checkStop(), intended to be called periodically by any
coroutine with nontrivial lifespan. It checks the LLApp status and, unless
isRunning(), throws one of these new exceptions.

Make LLCoros::toplevel() catch Stop specially and log forcible coroutine
termination.

Now that LLApp status matters even in a test program, introduce a trivial
LLTestApp subclass whose sole function is to make isRunning() true.
(LLApp::setStatus() is protected: only a subclass can call it.) Add LLTestApp
instances to lleventcoro_test.cpp and lllogin_test.cpp.

Make LLCoros::toplevel() accept parameters by value rather than by const
reference so we can continue using them even after context switches.

Make private LLCoros::get_CoroData() static. Given that we've observed some
coroutines living past LLCoros destruction, making the caller call
LLCoros::instance() is more dangerous than encapsulating it within a static
method -- since the encapsulated call can check LLCoros::wasDeleted() first
and do something reasonable instead. This also eliminates the need for both a
const and non-const overload.

Defend LLCoros::delete_CoroData() (cleanup function for fiber_specific_ptr for
CoroData, implicitly called after coroutine termination) against calls after
~LLCoros().

Add a status string to coroutine-local data, with LLCoro::setStatus(),
getStatus() and RAII class TempStatus.

Add an optional 'when' string argument to LLCoros::printActiveCoroutines().
Make ~LLCoros() print the coroutines still active at destruction.
---
 indra/llcommon/llcoros.cpp                    | 105 ++++++++++++++----
 indra/llcommon/llcoros.h                      |  66 ++++++++++-
 indra/llcommon/tests/lleventcoro_test.cpp     |   2 +
 indra/test/lltestapp.h                        |  34 ++++++
 .../login/tests/lllogin_test.cpp              |   2 +
 5 files changed, 181 insertions(+), 28 deletions(-)
 create mode 100644 indra/test/lltestapp.h

diff --git a/indra/llcommon/llcoros.cpp b/indra/llcommon/llcoros.cpp
index 37bcfc32423..78a0c5d2257 100644
--- a/indra/llcommon/llcoros.cpp
+++ b/indra/llcommon/llcoros.cpp
@@ -48,6 +48,7 @@
 #undef BOOST_DISABLE_ASSERTS
 #endif
 // other Linden headers
+#include "llapp.h"
 #include "lltimer.h"
 #include "llevents.h"
 #include "llerror.h"
@@ -58,10 +59,15 @@
 #include <excpt.h>
 #endif
 
-
-const LLCoros::CoroData& LLCoros::get_CoroData(const std::string& caller) const
+// static
+LLCoros::CoroData& LLCoros::get_CoroData(const std::string& caller)
 {
-    CoroData* current = mCurrent.get();
+    CoroData* current{ nullptr };
+    // be careful about attempted accesses in the final throes of app shutdown
+    if (! wasDeleted())
+    {
+        current = instance().mCurrent.get();
+    }
     // For the main() coroutine, the one NOT explicitly launched by launch(),
     // we never explicitly set mCurrent. Use a static CoroData instance with
     // canonical values.
@@ -78,12 +84,6 @@ const LLCoros::CoroData& LLCoros::get_CoroData(const std::string& caller) const
     return *current;
 }
 
-LLCoros::CoroData& LLCoros::get_CoroData(const std::string& caller)
-{
-    // reuse const implementation, just cast away const-ness of result
-    return const_cast<CoroData&>(const_cast<const LLCoros*>(this)->get_CoroData(caller));
-}
-
 //static
 LLCoros::coro::id LLCoros::get_self()
 {
@@ -93,7 +93,7 @@ LLCoros::coro::id LLCoros::get_self()
 //static
 void LLCoros::set_consuming(bool consuming)
 {
-    CoroData& data(LLCoros::instance().get_CoroData("set_consuming()"));
+    CoroData& data(get_CoroData("set_consuming()"));
     // DO NOT call this on the main() coroutine.
     llassert_always(! data.mName.empty());
     data.mConsuming = consuming;
@@ -102,7 +102,19 @@ void LLCoros::set_consuming(bool consuming)
 //static
 bool LLCoros::get_consuming()
 {
-    return LLCoros::instance().get_CoroData("get_consuming()").mConsuming;
+    return get_CoroData("get_consuming()").mConsuming;
+}
+
+// static
+void LLCoros::setStatus(const std::string& status)
+{
+    get_CoroData("setStatus()").mStatus = status;
+}
+
+// static
+std::string LLCoros::getStatus()
+{
+    return get_CoroData("getStatus()").mStatus;
 }
 
 LLCoros::LLCoros():
@@ -118,6 +130,11 @@ LLCoros::LLCoros():
 {
 }
 
+LLCoros::~LLCoros()
+{
+    printActiveCoroutines("at LLCoros destruction");
+}
+
 std::string LLCoros::generateDistinctName(const std::string& prefix) const
 {
     static int unique = 0;
@@ -166,19 +183,19 @@ void LLCoros::setStackSize(S32 stacksize)
     mStackSize = stacksize;
 }
 
-void LLCoros::printActiveCoroutines()
+void LLCoros::printActiveCoroutines(const std::string& when)
 {
-    LL_INFOS("LLCoros") << "Number of active coroutines: " << (S32)mCoros.size() << LL_ENDL;
+    LL_INFOS("LLCoros") << "Number of active coroutines " << when
+                        << ": " << (S32)mCoros.size() << LL_ENDL;
     if (mCoros.size() > 0)
     {
         LL_INFOS("LLCoros") << "-------------- List of active coroutines ------------";
-        CoroMap::iterator iter;
-        CoroMap::iterator end = mCoros.end();
         F64 time = LLTimer::getTotalSeconds();
-        for (iter = mCoros.begin(); iter != end; iter++)
+        for (const auto& pair : mCoros)
         {
-            F64 life_time = time - iter->second->mCreationTime;
-            LL_CONT << LL_NEWLINE << "Name: " << iter->first << " life: " << life_time;
+            F64 life_time = time - pair.second->mCreationTime;
+            LL_CONT << LL_NEWLINE << pair.first << ' ' << pair.second->mStatus
+                    << " life: " << life_time;
         }
         LL_CONT << LL_ENDL;
         LL_INFOS("LLCoros") << "-----------------------------------------------------" << LL_ENDL;
@@ -244,11 +261,19 @@ void LLCoros::winlevel(const callable_t& callable)
 #endif
 
 // Top-level wrapper around caller's coroutine callable.
-void LLCoros::toplevel(const std::string& name, const callable_t& callable)
+// Normally we like to pass strings and such by const reference -- but in this
+// case, we WANT to copy both the name and the callable to our local stack!
+void LLCoros::toplevel(std::string name, callable_t callable)
 {
     CoroData* corodata = new CoroData(name);
-    // Store it in our pointer map. Oddly, must cast away const-ness of key.
-    mCoros.insert(const_cast<std::string&>(name), corodata);
+    if (corodata == NULL)
+    {
+        // Out of memory?
+        printActiveCoroutines();
+        LL_ERRS("LLCoros") << "Failed to start coroutine: " << name << " Stacksize: " << mStackSize << " Total coroutines: " << mCoros.size() << LL_ENDL;
+    }
+    // Store it in our pointer map.
+    mCoros.insert(name, corodata);
     // also set it as current
     mCurrent.reset(corodata);
 
@@ -261,18 +286,39 @@ void LLCoros::toplevel(const std::string& name, const callable_t& callable)
         callable();
 #endif
     }
+    catch (const Stop& exc)
+    {
+        LL_INFOS("LLCoros") << "coroutine " << name << " terminating because "
+                            << exc.what() << LL_ENDL;
+    }
     catch (const LLContinueError&)
     {
         // Any uncaught exception derived from LLContinueError will be caught
         // here and logged. This coroutine will terminate but the rest of the
         // viewer will carry on.
-        LOG_UNHANDLED_EXCEPTION(STRINGIZE("coroutine " << corodata->mName));
+        LOG_UNHANDLED_EXCEPTION(STRINGIZE("coroutine " << name));
     }
     catch (...)
     {
         // Any OTHER kind of uncaught exception will cause the viewer to
         // crash, hopefully informatively.
-        CRASH_ON_UNHANDLED_EXCEPTION(STRINGIZE("coroutine " << corodata->mName));
+        CRASH_ON_UNHANDLED_EXCEPTION(STRINGIZE("coroutine " << name));
+    }
+}
+
+void LLCoros::checkStop()
+{
+    if (wasDeleted())
+    {
+        LLTHROW(Shutdown("LLCoros was deleted"));
+    }
+    if (LLApp::isStopped())
+    {
+        LLTHROW(Stopped("viewer is stopped"));
+    }
+    if (! LLApp::isRunning())
+    {
+        LLTHROW(Stopping("viewer is stopping"));
     }
 }
 
@@ -288,6 +334,19 @@ void LLCoros::delete_CoroData(CoroData* cdptr)
 {
     // This custom cleanup function is necessarily static. Find and bind the
     // LLCoros instance.
+    // In case the LLCoros instance has already been deleted, just warn and
+    // scoot. We do NOT want to reinstantiate LLCoros during shutdown!
+    if (wasDeleted())
+    {
+        // The LLSingletons involved in logging may have been deleted too.
+        // This warning may help developers track down coroutines that have
+        // not yet been cleaned up.
+        // But cdptr is very likely a dangling pointer by this time, so don't
+        // try to dereference mName.
+        logwarns("Coroutine terminating after LLCoros instance deleted");
+        return;
+    }
+
     LLCoros& self(LLCoros::instance());
     // We set mCurrent on entry to a new fiber, expecting that the
     // corresponding entry has already been stored in mCoros. It is an
diff --git a/indra/llcommon/llcoros.h b/indra/llcommon/llcoros.h
index dedb6c8ecac..de7b6912841 100644
--- a/indra/llcommon/llcoros.h
+++ b/indra/llcommon/llcoros.h
@@ -29,6 +29,7 @@
 #if ! defined(LL_LLCOROS_H)
 #define LL_LLCOROS_H
 
+#include "llexception.h"
 #include <boost/fiber/fss.hpp>
 #include <boost/fiber/future/promise.hpp>
 #include <boost/fiber/future/future.hpp>
@@ -74,6 +75,7 @@
 class LL_COMMON_API LLCoros: public LLSingleton<LLCoros>
 {
     LLSINGLETON(LLCoros);
+    ~LLCoros();
 public:
     /// The viewer's use of the term "coroutine" became deeply embedded before
     /// the industry term "fiber" emerged to distinguish userland threads from
@@ -147,8 +149,8 @@ class LL_COMMON_API LLCoros: public LLSingleton<LLCoros>
      */
     void setStackSize(S32 stacksize);
 
-    /// for delayed initialization
-    void printActiveCoroutines();
+    /// diagnostic
+    void printActiveCoroutines(const std::string& when=std::string());
 
     /// get the current coro::id for those who really really care
     static coro::id get_self();
@@ -176,6 +178,7 @@ class LL_COMMON_API LLCoros: public LLSingleton<LLCoros>
         {
             set_consuming(consuming);
         }
+        OverrideConsuming(const OverrideConsuming&) = delete;
         ~OverrideConsuming()
         {
             set_consuming(mPrevConsuming);
@@ -185,6 +188,58 @@ class LL_COMMON_API LLCoros: public LLSingleton<LLCoros>
         bool mPrevConsuming;
     };
 
+    /// set string coroutine status for diagnostic purposes
+    static void setStatus(const std::string& status);
+    static std::string getStatus();
+
+    /// RAII control of status
+    class TempStatus
+    {
+    public:
+        TempStatus(const std::string& status):
+            mOldStatus(getStatus())
+        {
+            setStatus(status);
+        }
+        TempStatus(const TempStatus&) = delete;
+        ~TempStatus()
+        {
+            setStatus(mOldStatus);
+        }
+
+    private:
+        std::string mOldStatus;
+    };
+
+    /// thrown by checkStop()
+    struct Stop: public LLContinueError
+    {
+        Stop(const std::string& what): LLContinueError(what) {}
+    };
+
+    /// early stages
+    struct Stopping: public Stop
+    {
+        Stopping(const std::string& what): Stop(what) {}
+    };
+
+    /// cleaning up
+    struct Stopped: public Stop
+    {
+        Stopped(const std::string& what): Stop(what) {}
+    };
+
+    /// cleaned up -- not much survives!
+    struct Shutdown: public Stop
+    {
+        Shutdown(const std::string& what): Stop(what) {}
+    };
+
+    /// Call this intermittently if there's a chance your coroutine might
+    /// continue running into application shutdown. Throws Stop if LLCoros has
+    /// been cleaned up.
+    static void checkStop();
+
     /**
      * Aliases for promise and future. An older underlying future implementation
      * required us to wrap future; that's no longer needed. However -- if it's
@@ -204,13 +259,12 @@ class LL_COMMON_API LLCoros: public LLSingleton<LLCoros>
 
 private:
     std::string generateDistinctName(const std::string& prefix) const;
-    void toplevel(const std::string& name, const callable_t& callable);
+    void toplevel(std::string name, callable_t callable);
     struct CoroData;
 #if LL_WINDOWS
     static void winlevel(const callable_t& callable);
 #endif
-    CoroData& get_CoroData(const std::string& caller);
-    const CoroData& get_CoroData(const std::string& caller) const;
+    static CoroData& get_CoroData(const std::string& caller);
 
     S32 mStackSize;
 
@@ -223,6 +277,8 @@ class LL_COMMON_API LLCoros: public LLSingleton<LLCoros>
         const std::string mName;
         // set_consuming() state
         bool mConsuming;
+        // setStatus() state
+        std::string mStatus;
         F64 mCreationTime; // since epoch
     };
     typedef boost::ptr_map<std::string, CoroData> CoroMap;
diff --git a/indra/llcommon/tests/lleventcoro_test.cpp b/indra/llcommon/tests/lleventcoro_test.cpp
index c13920eefd5..032923a1083 100644
--- a/indra/llcommon/tests/lleventcoro_test.cpp
+++ b/indra/llcommon/tests/lleventcoro_test.cpp
@@ -40,6 +40,7 @@
 #include <typeinfo>
 
 #include "../test/lltut.h"
+#include "../test/lltestapp.h"
 #include "llsd.h"
 #include "llsdutil.h"
 #include "llevents.h"
@@ -98,6 +99,7 @@ namespace tut
         std::string replyName, errorName, threw, stringdata;
         LLSD result, errordata;
         int which;
+        LLTestApp testApp;
 
         void explicit_wait(boost::shared_ptr<LLCoros::Promise<std::string>>& cbp);
         void waitForEventOn1();
diff --git a/indra/test/lltestapp.h b/indra/test/lltestapp.h
new file mode 100644
index 00000000000..382516cd2bc
--- /dev/null
+++ b/indra/test/lltestapp.h
@@ -0,0 +1,34 @@
+/**
+ * @file   lltestapp.h
+ * @author Nat Goodspeed
+ * @date   2019-10-21
+ * @brief  LLApp subclass useful for testing.
+ * 
+ * $LicenseInfo:firstyear=2019&license=viewerlgpl$
+ * Copyright (c) 2019, Linden Research, Inc.
+ * $/LicenseInfo$
+ */
+
+#if ! defined(LL_LLTESTAPP_H)
+#define LL_LLTESTAPP_H
+
+#include "llapp.h"
+
+/**
+ * LLTestApp is a dummy LLApp that simply sets LLApp::isRunning() for anyone
+ * who cares.
+ */
+class LLTestApp: public LLApp
+{
+public:
+    LLTestApp()
+    {
+        setStatus(APP_STATUS_RUNNING);
+    }
+
+    bool init()    { return true; }
+    bool cleanup() { return true; }
+    bool frame()   { return true; }
+};
+
+#endif /* ! defined(LL_LLTESTAPP_H) */
diff --git a/indra/viewer_components/login/tests/lllogin_test.cpp b/indra/viewer_components/login/tests/lllogin_test.cpp
index 23db8d0fe36..0255e10e533 100644
--- a/indra/viewer_components/login/tests/lllogin_test.cpp
+++ b/indra/viewer_components/login/tests/lllogin_test.cpp
@@ -42,6 +42,7 @@
 // other Linden headers
 #include "llsd.h"
 #include "../../../test/lltut.h"
+#include "../../../test/lltestapp.h"
 //#define DEBUG_ON
 #include "../../../test/debug.h"
 #include "llevents.h"
@@ -201,6 +202,7 @@ namespace tut
             pumps.clear();
         }
         LLEventPumps& pumps;
+        LLTestApp testApp;
     };
 
     typedef test_group<llviewerlogin_data> llviewerlogin_group;
-- 
GitLab