From 9ffcafb64b4483c315d00e88ffc1438bce1f7915 Mon Sep 17 00:00:00 2001
From: Nat Goodspeed <>
Date: Fri, 14 Dec 2018 10:48:43 -0500
Subject: [PATCH] SL-10153: Introduce ll_convert, windows_message() templates.

Add ll_convert<TO, FROM> template, used as (e.g.):
There is no generic template implementation -- the template exists solely to
provide generic aliases for a bewildering family of llstring.h string-
conversion functions with highly-specific names. There's a generic
implementation, though, for the degenerate case where FROM and TO are

Add ll_convert<> specialization aliases for most of the string-conversion
functions declared in llstring.h, including the Windows-specific ones
involving llutf16string and std::wstring.

Add a mini-lecture in llstring.h about appropriate use of string types on

Add LL_WCHAR_T_NATIVE llpreprocessor.h macro so we can detect whether to
provide separate conversions for llutf16string and std::wstring, or whether
those would collide because the types are identical.

Add inline ll_convert_wide_to_string(const std::wstring&) overloads so caller
isn't required to call arg.c_str(), which naturally permits an ll_convert

Add ll_convert_wide_to_wstring(), ll_convert_wstring_to_wide() as placeholders
for converting between Windows std::wstring and Linden LLWString, with
corresponding ll_convert aliases. We don't yet have library code to perform
such conversions officially; for now, just copy characters.

Add LLStringUtil::getenv(key) and getoptenv(key) functions. The latter returns
boost::optional<string_type> in case the caller needs to detect absence of a
given environment variable rather than simply accepting a default value.
Naturally getenv(), which accepts a default, is implemented using getoptenv().
getoptenv(), in turn, is implemented using an underlying llstring_getoptenv().

On Windows, llstring_getoptenv() returns boost::optional<std::wstring> (based
on GetEnvironmentVariableW()), whereas elsewhere, llstring_getoptenv() returns
boost::optional<std::string> (based on classic Posix getenv()).

The beauty of generic ll_convert is that the portable LLStringUtilBase<T>::
getoptenv() template can call the platform-specific llstring_getoptenv() and
transparently perform whatever conversion is necessary to return the desired

Add windows_message<T>(error) template, with an overload that implicitly calls
GetLastError(). We provide a single concrete windows_message<std::wstring>()
implementation because that's what we get from Windows FormatMessageW() --
everything else is a generic conversion to the desired target string type.

This obviates llprocess.cpp's previous WindowsErrorString() implementation --
reimplement using windows_message<std::string>().
 indra/llcommon/llpreprocessor.h |  21 ++++
 indra/llcommon/llprocess.cpp    |  27 +----
 indra/llcommon/llstring.cpp     | 125 ++++++++++++++++++++++-
 indra/llcommon/llstring.h       | 176 +++++++++++++++++++++++++++++++-
 indra/llcommon/stdtypes.h       |   7 +-
 5 files changed, 323 insertions(+), 33 deletions(-)

diff --git a/indra/llcommon/llpreprocessor.h b/indra/llcommon/llpreprocessor.h
index ef015fdce4b..e8f99814375 100644
--- a/indra/llcommon/llpreprocessor.h
+++ b/indra/llcommon/llpreprocessor.h
@@ -101,6 +101,9 @@
+// Although thread_local is now a standard storage class, we can't just
+// #define LL_THREAD_LOCAL as thread_local because the *usage* is different.
+// We'll have to take the time to change LL_THREAD_LOCAL declarations by hand.
 # define LL_THREAD_LOCAL __declspec(thread)
@@ -177,6 +180,24 @@
 #endif // LL_WINDOWS
+#if ! defined(LL_WINDOWS)
+#define LL_WCHAR_T_NATIVE 1
+#else  // LL_WINDOWS
+// _WCHAR_T_DEFINED is defined if wchar_t is provided at all.
+// Specifically, it has value 1 if wchar_t is an intrinsic type, else empty.
+// _NATIVE_WCHAR_T_DEFINED has value 1 if wchar_t is intrinsic, else undefined.
+// For years we have compiled with /Zc:wchar_t-, meaning that wchar_t is a
+// typedef for unsigned short (in stddef.h). Lore has it that one of our
+// proprietary binary-only libraries has traditionally been built that way and
+// therefore EVERYTHING ELSE requires it. Therefore, in a typical Linden
+// Windows build, _WCHAR_T_DEFINED is defined but empty, while
+// _NATIVE_WCHAR_T_DEFINED is undefined.
+# if defined(_NATIVE_WCHAR_T_DEFINED)
+#  define LL_WCHAR_T_NATIVE 1
+#endif // LL_WINDOWS
 // CMake automagically defines llcommon_EXPORTS only when building llcommon
 // sources, and only when llcommon is a shared library (i.e. when
diff --git a/indra/llcommon/llprocess.cpp b/indra/llcommon/llprocess.cpp
index 5753efdc592..1fa53f322b1 100644
--- a/indra/llcommon/llprocess.cpp
+++ b/indra/llcommon/llprocess.cpp
@@ -1205,30 +1205,9 @@ static LLProcess::Status interpret_status(int status)
 /// GetLastError()/FormatMessage() boilerplate
 static std::string WindowsErrorString(const std::string& operation)
-	int result = GetLastError();
-	LPTSTR error_str = 0;
-					   NULL,
-					   result,
-					   0,
-					   (LPTSTR)&error_str,
-					   0,
-					   NULL)
-		!= 0) 
-	{
-		// convert from wide-char string to multi-byte string
-		char message[256];
-		wcstombs(message, error_str, sizeof(message));
-		message[sizeof(message)-1] = 0;
-		LocalFree(error_str);
-		// convert to std::string to trim trailing whitespace
-		std::string mbsstr(message);
-		mbsstr.erase(mbsstr.find_last_not_of(" \t\r\n"));
-		return STRINGIZE(operation << " failed (" << result << "): " << mbsstr);
-	}
-	return STRINGIZE(operation << " failed (" << result
-					 << "), but FormatMessage() did not explain");
+	auto result = GetLastError();
+	return STRINGIZE(operation << " failed (" << result << "): "
+					 << windows_message<std::string>(result));
diff --git a/indra/llcommon/llstring.cpp b/indra/llcommon/llstring.cpp
index 42390c8a7b1..f931103ba69 100644
--- a/indra/llcommon/llstring.cpp
+++ b/indra/llcommon/llstring.cpp
@@ -53,6 +53,40 @@ std::string ll_safe_string(const char* in, S32 maxlen)
 	return std::string();
+boost::optional<std::wstring> llstring_getoptenv(const std::string& key)
+    auto wkey = ll_convert_string_to_wide(key);
+    // Take a wild guess as to how big the buffer should be.
+    std::vector<wchar_t> buffer(1024);
+    auto n = GetEnvironmentVariableW(wkey.c_str(), &buffer[0], buffer.size());
+    // If our initial guess was too short, n will indicate the size (in
+    // wchar_t's) that buffer should have been, including the terminating nul.
+    if (n > (buffer.size() - 1))
+    {
+        // make it big enough
+        buffer.resize(n);
+        // and try again
+        n = GetEnvironmentVariableW(wkey.c_str(), &buffer[0], buffer.size());
+    }
+    // did that (ultimately) succeed?
+    if (n)
+    {
+        // great, return populated boost::optional
+        return { &buffer[0] };
+    }
+    // not successful
+    auto last_error = GetLastError();
+    // Don't bother warning for NOT_FOUND; that's an expected case
+    if (last_error != ERROR_ENVVAR_NOT_FOUND)
+    {
+        LL_WARNS() << "GetEnvironmentVariableW('" << key << "') failed: "
+                   << windows_message<std::string>(last_error) << LL_ENDL;
+    }
+    // return empty boost::optional
+    return {};
 bool is_char_hex(char hex)
 	if((hex >= '0') && (hex <= '9'))
@@ -715,12 +749,12 @@ std::string ll_convert_wide_to_string(const wchar_t* in, unsigned int code_page)
 	return out;
-std::basic_string<wchar_t> ll_convert_string_to_wide(const std::string& in)
+std::wstring ll_convert_string_to_wide(const std::string& in)
 	return ll_convert_string_to_wide(in, CP_UTF8);
-std::basic_string<wchar_t> ll_convert_string_to_wide(const std::string& in, unsigned int code_page)
+std::wstring ll_convert_string_to_wide(const std::string& in, unsigned int code_page)
 	// From review:
 	// We can preallocate a wide char buffer that is the same length (in wchar_t elements) as the utf8 input,
@@ -745,6 +779,24 @@ std::basic_string<wchar_t> ll_convert_string_to_wide(const std::string& in, unsi
 	return {&w_out[0]};
+LLWString ll_convert_wide_to_wstring(const std::wstring& in)
+    // This function, like its converse, is a placeholder, encapsulating a
+    // guilty little hack: the only "official" way nat has found to convert
+    // between std::wstring (16 bits on Windows) and LLWString (UTF-32) is
+    // by using iconv, which we've avoided so far. It kinda sorta works to
+    // just copy individual characters...
+    // The point is that if/when we DO introduce some more official way to
+    // perform such conversions, we should only have to call it here.
+    return { in.begin(), in.end() };
+std::wstring ll_convert_wstring_to_wide(const LLWString& in)
+    // See comments in ll_convert_wide_to_wstring()
+    return { in.begin(), in.end() };
 std::string ll_convert_string_to_utf8_string(const std::string& in)
 	auto w_mesg = ll_convert_string_to_wide(in, CP_ACP);
@@ -752,7 +804,74 @@ std::string ll_convert_string_to_utf8_string(const std::string& in)
 	return out_utf8;
-#endif // LL_WINDOWS
+void HeapFree_deleter(void* ptr)
+    // instead of LocalFree(), per
+    HeapFree(GetProcessHeap(), NULL, ptr);
+} // anonymous namespace
+std::wstring windows_message<std::wstring>(DWORD error)
+    // derived from
+    wchar_t* rawptr = nullptr;
+    auto okay = FormatMessageW(
+        // use system message tables for GetLastError() codes
+        // internally allocate buffer and return its pointer
+        // you cannot pass insertion parameters (thanks Gandalf)
+        // ignore line breaks in message definition text
+        NULL,                       // lpSource, unused with FORMAT_MESSAGE_FROM_SYSTEM
+        error,                      // dwMessageId
+        (LPWSTR)&rawptr,         // lpBuffer: force-cast wchar_t** to wchar_t*
+        0,                // nSize, unused with FORMAT_MESSAGE_ALLOCATE_BUFFER
+        NULL);            // Arguments, unused
+    // make a unique_ptr from rawptr so it gets cleaned up properly
+    std::unique_ptr<wchar_t, void(*)(void*)> bufferptr(rawptr, HeapFree_deleter);
+    if (okay && bufferptr)
+    {
+        // got the message, return it ('okay' is length in characters)
+        return { bufferptr.get(), okay };
+    }
+    // did not get the message, synthesize one
+    auto format_message_error = GetLastError();
+    std::wostringstream out;
+    out << L"GetLastError() " << error << L" (FormatMessageW() failed with "
+        << format_message_error << L")";
+    return out.str();
+#else  // ! LL_WINDOWS
+boost::optional<std::string> llstring_getoptenv(const std::string& key)
+    auto found = getenv(key.c_str());
+    if (found)
+    {
+        // return populated boost::optional
+        return { found };
+    }
+    else
+    {
+        // return empty boost::optional
+        return {};
+    }
+#endif // ! LL_WINDOWS
 long LLStringOps::sPacificTimeOffset = 0;
 long LLStringOps::sLocalTimeOffset = 0;
diff --git a/indra/llcommon/llstring.h b/indra/llcommon/llstring.h
index 7c3e9f952d3..2dccc7cbdf5 100644
--- a/indra/llcommon/llstring.h
+++ b/indra/llcommon/llstring.h
@@ -27,6 +27,7 @@
 #ifndef LL_LLSTRING_H
 #define LL_LLSTRING_H
+#include <boost/optional/optional.hpp>
 #include <string>
 #include <cstdio>
 //#include <locale>
@@ -337,6 +338,19 @@ class LLStringUtilBase
 		const string_type& string,
 		const string_type& substr);
+	/**
+	 * get environment string value with proper Unicode handling
+	 * (key is always UTF-8)
+	 * detect absence by return value == dflt
+	 */
+	static string_type getenv(const std::string& key, const string_type& dflt="");
+	/**
+	 * get optional environment string value with proper Unicode handling
+	 * (key is always UTF-8)
+	 * detect absence by (! return value)
+	 */
+	static boost::optional<string_type> getoptenv(const std::string& key);
 	static void	addCRLF(string_type& string);
 	static void	removeCRLF(string_type& string);
 	static void removeWindowsCR(string_type& string);
@@ -496,6 +510,37 @@ LL_COMMON_API bool iswindividual(llwchar elem);
  * Unicode support
+/// generic conversion aliases
+template<typename TO, typename FROM, typename Enable=void>
+struct ll_convert_impl
+    // Don't even provide a generic implementation. We specialize for every
+    // combination we do support.
+    TO operator()(const FROM& in) const;
+// Use a function template to get the nice ll_convert<TO>(from_value) API.
+template<typename TO, typename FROM>
+TO ll_convert(const FROM& in)
+    return ll_convert_impl<TO, FROM>()(in);
+// degenerate case
+template<typename T>
+struct ll_convert_impl<T, T>
+    T operator()(const T& in) const { return in; }
+// specialize ll_convert_impl<TO, FROM> to return EXPR
+#define ll_convert_alias(TO, FROM, EXPR)                    \
+template<>                                                  \
+struct ll_convert_impl<TO, FROM>                            \
+{                                                           \
+    TO operator()(const FROM& in) const { return EXPR; }    \
 // Make the incoming string a utf8 string. Replaces any unknown glyph
 // with the UNKNOWN_CHARACTER. Once any unknown glyph is found, the rest
 // of the data may not be recovered.
@@ -503,30 +548,83 @@ LL_COMMON_API std::string rawstr_to_utf8(const std::string& raw);
 // We should never use UTF16 except when communicating with Win32!
+// nat 2018-12-14: I consider the whole llutf16string thing a mistake, because
+// the Windows APIs we want to call are all defined in terms of wchar_t*
+// (or worse, LPCTSTR).
+// While there is no point coding for an ASCII-only world (! defined(UNICODE)),
+// use of U16 and llutf16string for Windows APIs locks in /Zc:wchar_t-. Going
+// forward, we should code in terms of wchar_t and std::wstring so as to
+// support either setting of /Zc:wchar_t.
+// The first link above states that char can be used to hold ASCII or any
+// multi-byte character set, and distinguishes wchar_t (UTF-16LE), char16_t
+// (UTF-16) and char32_t (UTF-32). Nonetheless, within this code base:
+// * char and std::string always hold UTF-8 (of which ASCII is a subset). It
+//   is a BUG if they are used to pass strings in any other multi-byte
+//   encoding.
+// * wchar_t and std::wstring should be our interface to Windows wide-string
+//   APIs, and therefore hold UTF-16LE.
+// * U16 and llutf16string are the previous but DEPRECATED UTF-16LE type. Do
+//   not introduce new uses of U16 or llutf16string for string data.
+// * llwchar and LLWString hold UTF-32 strings.
+// * Do not introduce char16_t or std::u16string.
+// * Do not introduce char32_t or std::u32string.
+// This typedef may or may not be identical to std::wstring, depending on
 typedef std::basic_string<U16> llutf16string;
+#if ! defined(LL_WCHAR_T_NATIVE)
+// wchar_t is identical to U16, and std::wstring is identical to llutf16string.
+// Defining an ll_convert alias involving llutf16string would collide with the
+// comparable preferred alias involving std::wstring. (In this scenario, if
+// you pass llutf16string, it will engage the std::wstring specialization.)
+#define ll_convert_u16_alias(TO, FROM, EXPR) // nothing
+// wchar_t is a distinct native type, so llutf16string is also a distinct
+// type, and there IS a point to converting separately to/from llutf16string.
+// (But why? Windows APIs are still defined in terms of wchar_t, and
+// in this scenario llutf16string won't work for them!)
+#define ll_convert_u16_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR)
+// converting between std::wstring and llutf16string involves copying chars
+// enclose the brace-initialization expression in parens to protect the comma
+// from macro-argument parsing
+ll_convert_alias(llutf16string, std::wstring, ({ in.begin(), in.end() }));
+ll_convert_alias(std::wstring, llutf16string, ({ in.begin(), in.end() }));
 LL_COMMON_API LLWString utf16str_to_wstring(const llutf16string &utf16str, S32 len);
 LL_COMMON_API LLWString utf16str_to_wstring(const llutf16string &utf16str);
+ll_convert_u16_alias(LLWString, llutf16string, utf16str_to_wstring(in));
 LL_COMMON_API llutf16string wstring_to_utf16str(const LLWString &utf32str, S32 len);
 LL_COMMON_API llutf16string wstring_to_utf16str(const LLWString &utf32str);
+ll_convert_u16_alias(llutf16string, LLWString, wstring_to_utf16str(in));
 LL_COMMON_API llutf16string utf8str_to_utf16str ( const std::string& utf8str, S32 len);
 LL_COMMON_API llutf16string utf8str_to_utf16str ( const std::string& utf8str );
+ll_convert_u16_alias(llutf16string, std::string, utf8str_to_utf16str(in));
 LL_COMMON_API LLWString utf8str_to_wstring(const std::string &utf8str, S32 len);
 LL_COMMON_API LLWString utf8str_to_wstring(const std::string &utf8str);
 // Same function, better name. JC
 inline LLWString utf8string_to_wstring(const std::string& utf8_string) { return utf8str_to_wstring(utf8_string); }
+// best name of all
+ll_convert_alias(LLWString, std::string, utf8string_to_wstring(in));
 LL_COMMON_API S32 wchar_to_utf8chars(llwchar inchar, char* outchars);
 LL_COMMON_API std::string wstring_to_utf8str(const LLWString &utf32str, S32 len);
 LL_COMMON_API std::string wstring_to_utf8str(const LLWString &utf32str);
+ll_convert_alias(std::string, LLWString, wstring_to_utf8str(in));
 LL_COMMON_API std::string utf16str_to_utf8str(const llutf16string &utf16str, S32 len);
 LL_COMMON_API std::string utf16str_to_utf8str(const llutf16string &utf16str);
+ll_convert_u16_alias(std::string, llutf16string, utf16str_to_utf8str(in));
 inline std::string wstring_to_utf8str(const llutf16string &utf16str) { return utf16str_to_utf8str(utf16str);}
@@ -636,14 +734,36 @@ using snprintf_hack::snprintf;
 LL_COMMON_API std::string ll_convert_wide_to_string(const wchar_t* in, unsigned int code_page);
 LL_COMMON_API std::string ll_convert_wide_to_string(const wchar_t* in); // default CP_UTF8
+inline std::string ll_convert_wide_to_string(const std::wstring& in, unsigned int code_page)
+    return ll_convert_wide_to_string(in.c_str(), code_page);
+inline std::string ll_convert_wide_to_string(const std::wstring& in)
+    return ll_convert_wide_to_string(in.c_str());
+ll_convert_alias(std::string, std::wstring, ll_convert_wide_to_string(in));
  * Converts a string to wide string.
-LL_COMMON_API std::basic_string<wchar_t> ll_convert_string_to_wide(const std::string& in,
-                                                                   unsigned int code_page);
-LL_COMMON_API std::basic_string<wchar_t> ll_convert_string_to_wide(const std::string& in);
-                                                                   // default CP_UTF8
+LL_COMMON_API std::wstring ll_convert_string_to_wide(const std::string& in,
+                                                     unsigned int code_page);
+LL_COMMON_API std::wstring ll_convert_string_to_wide(const std::string& in);
+                                                     // default CP_UTF8
+ll_convert_alias(std::wstring, std::string, ll_convert_string_to_wide(in));
+ * Convert a Windows wide string to our LLWString
+ */
+LL_COMMON_API LLWString ll_convert_wide_to_wstring(const std::wstring& in);
+ll_convert_alias(LLWString, std::wstring, ll_convert_wide_to_wstring(in));
+ * Convert LLWString to Windows wide string
+ */
+LL_COMMON_API std::wstring ll_convert_wstring_to_wide(const LLWString& in);
+ll_convert_alias(std::wstring, LLWString, ll_convert_wstring_to_wide(in));
  * Converts incoming string into utf8 string
@@ -651,8 +771,39 @@ LL_COMMON_API std::basic_string<wchar_t> ll_convert_string_to_wide(const std::st
 LL_COMMON_API std::string ll_convert_string_to_utf8_string(const std::string& in);
+/// Get Windows message string for passed GetLastError() code
+// VS 2013 doesn't let us forward-declare this template, which is what we
+// started with, so the implementation could reference the specialization we
+// haven't yet declared. Somewhat weirdly, just stating the generic
+// implementation in terms of the specialization works, even in this order...
+// the general case is just a conversion from the sole implementation
+// Microsoft says DWORD is a typedef for unsigned long
+// so rather than drag windows.h into everybody's include space...
+template<typename STRING>
+STRING windows_message(unsigned long error)
+    return ll_convert<STRING>(windows_message<std::wstring>(error));
+/// There's only one real implementation
+LL_COMMON_API std::wstring windows_message<std::wstring>(unsigned long error);
+/// Get Windows message string, implicitly calling GetLastError()
+template<typename STRING>
+STRING windows_message() { return windows_message<STRING>(GetLastError()); }
-#endif // LL_WINDOWS
+LL_COMMON_API boost::optional<std::wstring> llstring_getoptenv(const std::string& key);
+#else // ! LL_WINDOWS
+LL_COMMON_API boost::optional<std::string>  llstring_getoptenv(const std::string& key);
+#endif // ! LL_WINDOWS
  * Many of the 'strip' and 'replace' methods of LLStringUtilBase need
@@ -1595,6 +1746,21 @@ bool LLStringUtilBase<T>::endsWith(
 	return (idx == (string.size() - substr.size()));
+// static
+template<class T>
+auto LLStringUtilBase<T>::getoptenv(const std::string& key) -> boost::optional<string_type>
+    auto found{llstring_getoptenv(key)};
+    return found? { ll_convert<string_type>(*found) } : {};
+// static
+template<class T>
+auto LLStringUtilBase<T>::getenv(const std::string& key, const string_type& dflt) -> string_type
+    auto found{getoptenv(key)};
+    return found? *found : dflt;
 template<class T> 
 BOOL LLStringUtilBase<T>::convertToBOOL(const string_type& string, BOOL& value)
diff --git a/indra/llcommon/stdtypes.h b/indra/llcommon/stdtypes.h
index bf3f3f9ee83..6c9871e76c9 100644
--- a/indra/llcommon/stdtypes.h
+++ b/indra/llcommon/stdtypes.h
@@ -37,7 +37,12 @@ typedef signed int			S32;
 typedef unsigned int			U32;
-// Windows wchar_t is 16-bit
+// Windows wchar_t is 16-bit, whichever way /Zc:wchar_t is set. In effect,
+// Windows wchar_t is always a typedef, either for unsigned short or __wchar_t.
+// (__wchar_t, available either way, is Microsoft's native 2-byte wchar_t type.)
+// In any case, llwchar should be a UTF-32 type.
 typedef U32				llwchar;
 typedef wchar_t				llwchar;