diff --git a/indra/llcommon/llthreadsafequeue.h b/indra/llcommon/llthreadsafequeue.h
index 5c934791fe796d64fcec31d2c49246ec29b03065..280650655051ac489126314807307e0b3042e4d6 100644
--- a/indra/llcommon/llthreadsafequeue.h
+++ b/indra/llcommon/llthreadsafequeue.h
@@ -275,6 +275,7 @@ template <typename ElementT, typename QueueT>
 template <typename CALLABLE>
 bool LLThreadSafeQueue<ElementT, QueueT>::tryLock(CALLABLE&& callable)
 {
+    LL_PROFILE_ZONE_SCOPED;
     lock_t lock1(mLock, std::defer_lock);
     if (!lock1.try_lock())
         return false;
@@ -291,6 +292,7 @@ bool LLThreadSafeQueue<ElementT, QueueT>::tryLockUntil(
     const std::chrono::time_point<Clock, Duration>& until,
     CALLABLE&& callable)
 {
+    LL_PROFILE_ZONE_SCOPED;
     lock_t lock1(mLock, std::defer_lock);
     if (!lock1.try_lock_until(until))
         return false;
@@ -304,6 +306,7 @@ template <typename ElementT, typename QueueT>
 template <typename T>
 bool LLThreadSafeQueue<ElementT, QueueT>::push_(lock_t& lock, T&& element)
 {
+    LL_PROFILE_ZONE_SCOPED;
     if (mStorage.size() >= mCapacity)
         return false;
 
@@ -319,6 +322,7 @@ template <typename ElementT, typename QueueT>
 template <typename T>
 bool LLThreadSafeQueue<ElementT, QueueT>::pushIfOpen(T&& element)
 {
+    LL_PROFILE_ZONE_SCOPED;
     lock_t lock1(mLock);
     while (true)
     {
@@ -341,6 +345,7 @@ template <typename ElementT, typename QueueT>
 template<typename T>
 void LLThreadSafeQueue<ElementT, QueueT>::push(T&& element)
 {
+    LL_PROFILE_ZONE_SCOPED;
     if (! pushIfOpen(std::forward<T>(element)))
     {
         LLTHROW(LLThreadSafeQueueInterrupt());
@@ -352,6 +357,7 @@ template<typename ElementT, typename QueueT>
 template<typename T>
 bool LLThreadSafeQueue<ElementT, QueueT>::tryPush(T&& element)
 {
+    LL_PROFILE_ZONE_SCOPED;
     return tryLock(
         [this, element=std::move(element)](lock_t& lock)
         {
@@ -368,6 +374,7 @@ bool LLThreadSafeQueue<ElementT, QueueT>::tryPushFor(
     const std::chrono::duration<Rep, Period>& timeout,
     T&& element)
 {
+    LL_PROFILE_ZONE_SCOPED;
     // Convert duration to time_point: passing the same timeout duration to
     // each of multiple calls is wrong.
     return tryPushUntil(std::chrono::steady_clock::now() + timeout,
@@ -381,6 +388,7 @@ bool LLThreadSafeQueue<ElementT, QueueT>::tryPushUntil(
     const std::chrono::time_point<Clock, Duration>& until,
     T&& element)
 {
+    LL_PROFILE_ZONE_SCOPED;
     return tryLockUntil(
         until,
         [this, until, element=std::move(element)](lock_t& lock)
@@ -413,6 +421,7 @@ template <typename ElementT, typename QueueT>
 typename LLThreadSafeQueue<ElementT, QueueT>::pop_result
 LLThreadSafeQueue<ElementT, QueueT>::pop_(lock_t& lock, ElementT& element)
 {
+    LL_PROFILE_ZONE_SCOPED;
     // If mStorage is empty, there's no head element.
     if (mStorage.empty())
         return mClosed? DONE : EMPTY;
@@ -434,6 +443,7 @@ LLThreadSafeQueue<ElementT, QueueT>::pop_(lock_t& lock, ElementT& element)
 template<typename ElementT, typename QueueT>
 ElementT LLThreadSafeQueue<ElementT, QueueT>::pop(void)
 {
+    LL_PROFILE_ZONE_SCOPED;
     lock_t lock1(mLock);
     ElementT value;
     while (true)
@@ -462,6 +472,7 @@ ElementT LLThreadSafeQueue<ElementT, QueueT>::pop(void)
 template<typename ElementT, typename QueueT>
 bool LLThreadSafeQueue<ElementT, QueueT>::tryPop(ElementT & element)
 {
+    LL_PROFILE_ZONE_SCOPED;
     return tryLock(
         [this, &element](lock_t& lock)
         {
@@ -479,6 +490,7 @@ bool LLThreadSafeQueue<ElementT, QueueT>::tryPopFor(
     const std::chrono::duration<Rep, Period>& timeout,
     ElementT& element)
 {
+    LL_PROFILE_ZONE_SCOPED;
     // Convert duration to time_point: passing the same timeout duration to
     // each of multiple calls is wrong.
     return tryPopUntil(std::chrono::steady_clock::now() + timeout, element);
@@ -491,6 +503,7 @@ bool LLThreadSafeQueue<ElementT, QueueT>::tryPopUntil(
     const std::chrono::time_point<Clock, Duration>& until,
     ElementT& element)
 {
+    LL_PROFILE_ZONE_SCOPED;
     return tryLockUntil(
         until,
         [this, until, &element](lock_t& lock)
@@ -510,6 +523,7 @@ LLThreadSafeQueue<ElementT, QueueT>::tryPopUntil_(
     const std::chrono::time_point<Clock, Duration>& until,
     ElementT& element)
 {
+    LL_PROFILE_ZONE_SCOPED;
     while (true)
     {
         pop_result popped = pop_(lock, element);
@@ -536,6 +550,7 @@ LLThreadSafeQueue<ElementT, QueueT>::tryPopUntil_(
 template<typename ElementT, typename QueueT>
 size_t LLThreadSafeQueue<ElementT, QueueT>::size(void)
 {
+    LL_PROFILE_ZONE_SCOPED;
     lock_t lock(mLock);
     return mStorage.size();
 }
@@ -544,6 +559,7 @@ size_t LLThreadSafeQueue<ElementT, QueueT>::size(void)
 template<typename ElementT, typename QueueT>
 void LLThreadSafeQueue<ElementT, QueueT>::close()
 {
+    LL_PROFILE_ZONE_SCOPED;
     lock_t lock(mLock);
     mClosed = true;
     lock.unlock();
@@ -557,6 +573,7 @@ void LLThreadSafeQueue<ElementT, QueueT>::close()
 template<typename ElementT, typename QueueT>
 bool LLThreadSafeQueue<ElementT, QueueT>::isClosed()
 {
+    LL_PROFILE_ZONE_SCOPED;
     lock_t lock(mLock);
     return mClosed;
 }
@@ -565,6 +582,7 @@ bool LLThreadSafeQueue<ElementT, QueueT>::isClosed()
 template<typename ElementT, typename QueueT>
 bool LLThreadSafeQueue<ElementT, QueueT>::done()
 {
+    LL_PROFILE_ZONE_SCOPED;
     lock_t lock(mLock);
     return mClosed && mStorage.empty();
 }
diff --git a/indra/llcommon/threadpool.cpp b/indra/llcommon/threadpool.cpp
index cf25cc838e1ce55327cbd81b110473d1e2a1b7ef..06e0dc5bfc29ff478d0d6a076182daac09fbbf24 100644
--- a/indra/llcommon/threadpool.cpp
+++ b/indra/llcommon/threadpool.cpp
@@ -28,7 +28,11 @@ LL::ThreadPool::ThreadPool(const std::string& name, size_t threads, size_t capac
     for (size_t i = 0; i < threads; ++i)
     {
         std::string tname{ STRINGIZE(mName << ':' << (i+1) << '/' << threads) };
-        mThreads.emplace_back(tname, [this, tname](){ run(tname); });
+        mThreads.emplace_back(tname, [this, tname]()
+            {
+                LL_PROFILER_SET_THREAD_NAME(tname.c_str());
+                run(tname);
+            });
     }
     // Listen on "LLApp", and when the app is shutting down, close the queue
     // and join the workers.
diff --git a/indra/llcommon/threadsafeschedule.h b/indra/llcommon/threadsafeschedule.h
index c8ad23532b130f5e693faffd7b9bf8b4b78d655d..601681d550ed93b9194ee501b6e51c213ea759bf 100644
--- a/indra/llcommon/threadsafeschedule.h
+++ b/indra/llcommon/threadsafeschedule.h
@@ -98,12 +98,14 @@ namespace LL
         // we could minimize redundancy by breaking out a common base class...
         void push(const DataTuple& tuple)
         {
+            LL_PROFILE_ZONE_SCOPED;
             push(tuple_cons(Clock::now(), tuple));
         }
 
         /// individually pass each component of the TimeTuple
         void push(const TimePoint& time, Args&&... args)
         {
+            LL_PROFILE_ZONE_SCOPED;
             push(TimeTuple(time, std::forward<Args>(args)...));
         }
 
@@ -114,6 +116,7 @@ namespace LL
         // and call that overload.
         void push(Args&&... args)
         {
+            LL_PROFILE_ZONE_SCOPED;
             push(Clock::now(), std::forward<Args>(args)...);
         }
 
@@ -124,18 +127,21 @@ namespace LL
         /// DataTuple with implicit now
         bool tryPush(const DataTuple& tuple)
         {
+            LL_PROFILE_ZONE_SCOPED;
             return tryPush(tuple_cons(Clock::now(), tuple));
         }
 
         /// individually pass components
         bool tryPush(const TimePoint& time, Args&&... args)
         {
+            LL_PROFILE_ZONE_SCOPED;
             return tryPush(TimeTuple(time, std::forward<Args>(args)...));
         }
 
         /// individually pass components with implicit now
         bool tryPush(Args&&... args)
         {
+            LL_PROFILE_ZONE_SCOPED;
             return tryPush(Clock::now(), std::forward<Args>(args)...);
         }
 
@@ -148,6 +154,7 @@ namespace LL
         bool tryPushFor(const std::chrono::duration<Rep, Period>& timeout,
                         const DataTuple& tuple)
         {
+            LL_PROFILE_ZONE_SCOPED;
             return tryPushFor(timeout, tuple_cons(Clock::now(), tuple));
         }
 
@@ -156,6 +163,7 @@ namespace LL
         bool tryPushFor(const std::chrono::duration<Rep, Period>& timeout,
                         const TimePoint& time, Args&&... args)
         {
+            LL_PROFILE_ZONE_SCOPED;
             return tryPushFor(TimeTuple(time, std::forward<Args>(args)...));
         }
 
@@ -164,6 +172,7 @@ namespace LL
         bool tryPushFor(const std::chrono::duration<Rep, Period>& timeout,
                         Args&&... args)
         {
+            LL_PROFILE_ZONE_SCOPED;
             return tryPushFor(Clock::now(), std::forward<Args>(args)...);
         }
 
@@ -176,6 +185,7 @@ namespace LL
         bool tryPushUntil(const std::chrono::time_point<Clock, Duration>& until,
                           const DataTuple& tuple)
         {
+            LL_PROFILE_ZONE_SCOPED;
             return tryPushUntil(until, tuple_cons(Clock::now(), tuple));
         }
 
@@ -184,6 +194,7 @@ namespace LL
         bool tryPushUntil(const std::chrono::time_point<Clock, Duration>& until,
                           const TimePoint& time, Args&&... args)
         {
+            LL_PROFILE_ZONE_SCOPED;
             return tryPushUntil(until, TimeTuple(time, std::forward<Args>(args)...));
         }
 
@@ -192,6 +203,7 @@ namespace LL
         bool tryPushUntil(const std::chrono::time_point<Clock, Duration>& until,
                           Args&&... args)
         {
+            LL_PROFILE_ZONE_SCOPED;
             return tryPushUntil(until, Clock::now(), std::forward<Args>(args)...);
         }
 
@@ -209,12 +221,14 @@ namespace LL
         // haven't yet jumped through those hoops.
         DataTuple pop()
         {
+            LL_PROFILE_ZONE_SCOPED;
             return tuple_cdr(popWithTime());
         }
 
         /// pop TimeTuple by value
         TimeTuple popWithTime()
         {
+            LL_PROFILE_ZONE_SCOPED;
             lock_t lock(super::mLock);
             // We can't just sit around waiting forever, given that there may
             // be items in the queue that are not yet ready but will *become*
@@ -254,6 +268,7 @@ namespace LL
         /// tryPop(DataTuple&)
         bool tryPop(DataTuple& tuple)
         {
+            LL_PROFILE_ZONE_SCOPED;
             TimeTuple tt;
             if (! super::tryPop(tt))
                 return false;
@@ -264,6 +279,7 @@ namespace LL
         /// for when Args has exactly one type
         bool tryPop(typename std::tuple_element<1, TimeTuple>::type& value)
         {
+            LL_PROFILE_ZONE_SCOPED;
             TimeTuple tt;
             if (! super::tryPop(tt))
                 return false;
@@ -275,6 +291,7 @@ namespace LL
         template <typename Rep, typename Period, typename Tuple>
         bool tryPopFor(const std::chrono::duration<Rep, Period>& timeout, Tuple& tuple)
         {
+            LL_PROFILE_ZONE_SCOPED;
             // It's important to use OUR tryPopUntil() implementation, rather
             // than delegating immediately to our base class.
             return tryPopUntil(Clock::now() + timeout, tuple);
@@ -285,6 +302,7 @@ namespace LL
         bool tryPopUntil(const std::chrono::time_point<Clock, Duration>& until,
                          TimeTuple& tuple)
         {
+            LL_PROFILE_ZONE_SCOPED;
             // super::tryPopUntil() wakes up when an item becomes available or
             // we hit 'until', whichever comes first. Thing is, the current
             // head of the queue could become ready sooner than either of
@@ -304,20 +322,25 @@ namespace LL
 
         pop_result tryPopUntil_(lock_t& lock, const TimePoint& until, TimeTuple& tuple)
         {
+            LL_PROFILE_ZONE_SCOPED;
             TimePoint adjusted = until;
             if (! super::mStorage.empty())
             {
+                LL_PROFILE_ZONE_NAMED("tpu - adjust");
                 // use whichever is earlier: the head item's timestamp, or
                 // the caller's limit
                 adjusted = min(std::get<0>(super::mStorage.front()), adjusted);
             }
             // now delegate to base-class tryPopUntil_()
             pop_result popped;
-            while ((popped = pop_result(super::tryPopUntil_(lock, adjusted, tuple))) == WAITING)
             {
-                // If super::tryPopUntil_() returns WAITING, it means there's
-                // a head item, but it's not yet time. But it's worth looping
-                // back to recheck.
+                LL_PROFILE_ZONE_NAMED("tpu - super");
+                while ((popped = pop_result(super::tryPopUntil_(lock, adjusted, tuple))) == WAITING)
+                {
+                    // If super::tryPopUntil_() returns WAITING, it means there's
+                    // a head item, but it's not yet time. But it's worth looping
+                    // back to recheck.
+                }
             }
             return popped;
         }
@@ -327,6 +350,7 @@ namespace LL
         bool tryPopUntil(const std::chrono::time_point<Clock, Duration>& until,
                          DataTuple& tuple)
         {
+            LL_PROFILE_ZONE_SCOPED;
             TimeTuple tt;
             if (! tryPopUntil(until, tt))
                 return false;
@@ -339,6 +363,7 @@ namespace LL
         bool tryPopUntil(const std::chrono::time_point<Clock, Duration>& until,
                          typename std::tuple_element<1, TimeTuple>::type& value)
         {
+            LL_PROFILE_ZONE_SCOPED;
             TimeTuple tt;
             if (! tryPopUntil(until, tt))
                 return false;
@@ -362,6 +387,7 @@ namespace LL
         // considering whether to deliver the current head element
         bool canPop(const TimeTuple& head) const override
         {
+            LL_PROFILE_ZONE_SCOPED;
             // an item with a future timestamp isn't yet ready to pop
             // (should we add some slop for overhead?)
             return std::get<0>(head) <= Clock::now();
diff --git a/indra/llcommon/workqueue.cpp b/indra/llcommon/workqueue.cpp
index 633594ceea780b763f9e037a4ca34a18b84e2244..fbdbea2051fb5edfcdb3d80ecc5a4c6ce8fb8548 100644
--- a/indra/llcommon/workqueue.cpp
+++ b/indra/llcommon/workqueue.cpp
@@ -60,6 +60,7 @@ void LL::WorkQueue::runUntilClose()
     {
         for (;;)
         {
+            LL_PROFILE_ZONE_SCOPED;
             callWork(mQueue.pop());
         }
     }
@@ -90,6 +91,7 @@ bool LL::WorkQueue::runOne()
 
 bool LL::WorkQueue::runUntil(const TimePoint& until)
 {
+    LL_PROFILE_ZONE_SCOPED;
     // Should we subtract some slop to allow for typical Work execution time?
     // How much slop?
     Work work;
diff --git a/indra/llcommon/workqueue.h b/indra/llcommon/workqueue.h
index c25d78742544980203b357cdd9b19728fc0a5237..96574a18b92f137ef5d3a02b70412928abee6653 100644
--- a/indra/llcommon/workqueue.h
+++ b/indra/llcommon/workqueue.h
@@ -260,6 +260,7 @@ namespace LL
         template <typename Rep, typename Period>
         bool runFor(const std::chrono::duration<Rep, Period>& timeslice)
         {
+            LL_PROFILE_ZONE_SCOPED;
             return runUntil(TimePoint::clock::now() + timeslice);
         }
 
@@ -431,6 +432,7 @@ namespace LL
     bool WorkQueue::postTo(weak_t target,
                            const TimePoint& time, CALLABLE&& callable, FOLLOWUP&& callback)
     {
+        LL_PROFILE_ZONE_SCOPED;
         // We're being asked to post to the WorkQueue at target.
         // target is a weak_ptr: have to lock it to check it.
         auto tptr = target.lock();
@@ -479,6 +481,7 @@ namespace LL
     template <typename CALLABLE>
     bool WorkQueue::postMaybe(weak_t target, const TimePoint& time, CALLABLE&& callable)
     {
+        LL_PROFILE_ZONE_SCOPED;
         // target is a weak_ptr: have to lock it to check it
         auto tptr = target.lock();
         if (tptr)
diff --git a/indra/llrender/llimagegl.cpp b/indra/llrender/llimagegl.cpp
index 1b6920fe3b6cb1f9eca3cec1a0407a1db1d46ee8..eda61d3c74ea97ac3b1977f14f9bac34b51c1082 100644
--- a/indra/llrender/llimagegl.cpp
+++ b/indra/llrender/llimagegl.cpp
@@ -2257,6 +2257,7 @@ LLImageGLThread::LLImageGLThread(LLWindow* window)
     : ThreadPool("LLImageGL", 1, 1024*1024)
     , mWindow(window)
 {
+    LL_PROFILE_ZONE_SCOPED;
     mFinished = false;
 
     mContext = mWindow->createSharedContext();
@@ -2264,8 +2265,13 @@ LLImageGLThread::LLImageGLThread(LLWindow* window)
 
 void LLImageGLThread::run()
 {
+    LL_PROFILE_ZONE_SCOPED;
     // We must perform setup on this thread before actually servicing our
     // WorkQueue, likewise cleanup afterwards.
+    while (mContext == nullptr)
+    { // HACK -- wait for mContext to be initialized since this thread will usually start before mContext is set
+        std::this_thread::sleep_for(std::chrono::milliseconds(1));
+    }
     mWindow->makeContextCurrent(mContext);
     gGL.init();
     ThreadPool::run();
diff --git a/indra/llrender/llimagegl.h b/indra/llrender/llimagegl.h
index 27496def1d62b184731a2cd9312ca78e71a9c197..ae773bb36259393981a6d509f2750a8e0d0697b8 100644
--- a/indra/llrender/llimagegl.h
+++ b/indra/llrender/llimagegl.h
@@ -324,7 +324,7 @@ class LLImageGLThread : public LLSimpleton<LLImageGLThread>, LL::ThreadPool
 
 private:
     LLWindow* mWindow;
-    void* mContext;
+    void* mContext = nullptr;
     LLAtomicBool mFinished;
 };
 
diff --git a/indra/llwindow/llwindowwin32.cpp b/indra/llwindow/llwindowwin32.cpp
index 162c38b862bcb17cf3365ab9a2983a9351224ff5..51fd2286193834fef344dc43d14cf43e9d851bc8 100644
--- a/indra/llwindow/llwindowwin32.cpp
+++ b/indra/llwindow/llwindowwin32.cpp
@@ -1762,7 +1762,7 @@ void* LLWindowWin32::createSharedContext()
         0
     };
 
-    HGLRC rc = wglCreateContextAttribsARB(mhDC, mhRC, attribs);
+    HGLRC rc = 0;
 
     bool done = false;
     while (!done)