llstring.cpp

/** 
 * @file llstring.cpp
 * @brief String utility functions and the std::string class.
 *
 * $LicenseInfo:firstyear=2001&license=viewerlgpl$
 * Second Life Viewer Source Code
 * Copyright (C) 2010, Linden Research, Inc.
 * 
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation;
 * version 2.1 of the License only.
 * 
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 * 
 * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
 * $/LicenseInfo$
 */

#include "linden_common.h"

#include "llstring.h"
#include "llerror.h"
#include "llfasttimer.h"
#include "llsd.h"
#include <vector>

#if LL_WINDOWS
#include "llwin32headerslean.h"
#include <winnls.h> // for WideCharToMultiByte
#endif

#include <absl/strings/str_format.h>

LLTrace::BlockTimerStatHandle FT_STRING_FORMAT("String Format");


std::string ll_safe_string(const char* in)
{
	if(in) return std::string(in);
	return std::string();
}

std::string ll_safe_string(const char* in, S32 maxlen)
{
	if(in && maxlen > 0 ) return std::string(in, maxlen);

	return std::string();
}

bool is_char_hex(char hex)
{
	if((hex >= '0') && (hex <= '9'))
	{
		return true;
	}
	else if((hex >= 'a') && (hex <='f'))
	{
		return true;
	}
	else if((hex >= 'A') && (hex <='F'))
	{
		return true;
	}
	return false; // uh - oh, not hex any more...
}

U8 hex_as_nybble(char hex)
{
	if((hex >= '0') && (hex <= '9'))
	{
		return (U8)(hex - '0');
	}
	else if((hex >= 'a') && (hex <='f'))
	{
		return (U8)(10 + hex - 'a');
	}
	else if((hex >= 'A') && (hex <='F'))
	{
		return (U8)(10 + hex - 'A');
	}
	return 0; // uh - oh, not hex any more...
}

bool iswindividual(llwchar elem)
{   
	U32 cur_char = (U32)elem;
	bool result = false;
	if (0x2E80<= cur_char && cur_char <= 0x9FFF)
	{
		result = true;
	}
	else if (0xAC00<= cur_char && cur_char <= 0xD7A0 )
	{
		result = true;
	}
	else if (0xF900<= cur_char && cur_char <= 0xFA60 )
	{
		result = true;
	}
	return result;
}

bool _read_file_into_string(std::string& str, const std::string& filename)
{
	llifstream ifs(filename.c_str(), llifstream::binary);
	if (!ifs.is_open())
	{
		LL_INFOS() << "Unable to open file " << filename << LL_ENDL;
		return false;
	}

	std::ostringstream oss;

	oss << ifs.rdbuf();
	str = oss.str();
	ifs.close();
	return true;
}


// See http://www.unicode.org/Public/BETA/CVTUTF-1-2/ConvertUTF.c
// for the Unicode implementation - this doesn't match because it was written before finding
// it.


std::ostream& operator<<(std::ostream &s, const LLWString &wstr)
{
	std::string utf8_str = wstring_to_utf8str(wstr);
	s << utf8_str;
	return s;
}

std::string rawstr_to_utf8(const std::string& raw)
{
	LLWString wstr(utf8str_to_wstring(raw));
	return wstring_to_utf8str(wstr);
}

S32 wchar_to_utf8chars(llwchar in_char, char* outchars)
{
	U32 cur_char = (U32)in_char;
	char* base = outchars;
	if (cur_char < 0x80)
	{
		*outchars++ = (U8)cur_char;
	}
	else if (cur_char < 0x800)
	{
		*outchars++ = 0xC0 | (cur_char >> 6);
		*outchars++ = 0x80 | (cur_char & 0x3F);
	}
	else if (cur_char < 0x10000)
	{
		*outchars++ = 0xE0 | (cur_char >> 12);
		*outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
		*outchars++ = 0x80 | (cur_char & 0x3F);
	}
	else if (cur_char < 0x200000)
	{
		*outchars++ = 0xF0 | (cur_char >> 18);
		*outchars++ = 0x80 | ((cur_char >> 12) & 0x3F);
		*outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
		*outchars++ = 0x80 | (cur_char & 0x3F);
	}
	else if (cur_char < 0x4000000)
	{
		*outchars++ = 0xF8 | (cur_char >> 24);
		*outchars++ = 0x80 | ((cur_char >> 18) & 0x3F);
		*outchars++ = 0x80 | ((cur_char >> 12) & 0x3F);
		*outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
		*outchars++ = 0x80 | (cur_char & 0x3F);
	}
	else if (cur_char < 0x80000000)
	{
		*outchars++ = 0xFC | (cur_char >> 30);
		*outchars++ = 0x80 | ((cur_char >> 24) & 0x3F);
		*outchars++ = 0x80 | ((cur_char >> 18) & 0x3F);
		*outchars++ = 0x80 | ((cur_char >> 12) & 0x3F);
		*outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
		*outchars++ = 0x80 | (cur_char & 0x3F);
	}
	else
	{
		LL_WARNS() << "Invalid Unicode character " << cur_char << "!" << LL_ENDL;
		*outchars++ = LL_UNKNOWN_CHAR;
	}
	return outchars - base;
}	

S32 utf16chars_to_wchar(const U16* inchars, llwchar* outchar)
{
	const U16* base = inchars;
	U16 cur_char = *inchars++;
	llwchar char32 = cur_char;
	if ((cur_char >= 0xD800) && (cur_char <= 0xDFFF))
	{
		// Surrogates
		char32 = ((llwchar)(cur_char - 0xD800)) << 10;
		cur_char = *inchars++;
		char32 += (llwchar)(cur_char - 0xDC00) + 0x0010000UL;
	}
	else
	{
		char32 = (llwchar)cur_char;
	}
	*outchar = char32;
	return inchars - base;
}

llutf16string wstring_to_utf16str(const LLWString &utf32str, S32 len)
{
	llutf16string out;

	S32 i = 0;
	while (i < len)
	{
		U32 cur_char = utf32str[i];
		if (cur_char > 0xFFFF)
		{
			out += (0xD7C0 + (cur_char >> 10));
			out += (0xDC00 | (cur_char & 0x3FF));
		}
		else
		{
			out += cur_char;
		}
		i++;
	}
	return out;
}

llutf16string wstring_to_utf16str(const LLWString &utf32str)
{
	const S32 len = (S32)utf32str.length();
	return wstring_to_utf16str(utf32str, len);
}

llutf16string utf8str_to_utf16str ( const std::string& utf8str )
{
	LLWString wstr = utf8str_to_wstring ( utf8str );
	return wstring_to_utf16str ( wstr );
}


LLWString utf16str_to_wstring(const llutf16string &utf16str, S32 len)
{
	LLWString wout;
	if((len <= 0) || utf16str.empty()) return wout;

	S32 i = 0;
	// craziness to make gcc happy (llutf16string.c_str() is tweaked on linux):
	const U16* chars16 = &(*(utf16str.begin()));
	while (i < len)
	{
		llwchar cur_char;
		i += utf16chars_to_wchar(chars16+i, &cur_char);
		wout += cur_char;
	}
	return wout;
}

LLWString utf16str_to_wstring(const llutf16string &utf16str)
{
	const S32 len = (S32)utf16str.length();
	return utf16str_to_wstring(utf16str, len);
}

// Length in llwchar (UTF-32) of the first len units (16 bits) of the given UTF-16 string.
S32 utf16str_wstring_length(const llutf16string &utf16str, const S32 utf16_len)
{
	S32 surrogate_pairs = 0;
	// ... craziness to make gcc happy (llutf16string.c_str() is tweaked on linux):
	const U16 *const utf16_chars = &(*(utf16str.begin()));
	S32 i = 0;
	while (i < utf16_len)
	{
		const U16 c = utf16_chars[i++];
		if (c >= 0xD800 && c <= 0xDBFF)		// See http://en.wikipedia.org/wiki/UTF-16
		{   // Have first byte of a surrogate pair
			if (i >= utf16_len)
			{
				break;
			}
			const U16 d = utf16_chars[i];
			if (d >= 0xDC00 && d <= 0xDFFF)
			{   // Have valid second byte of a surrogate pair
				surrogate_pairs++;
				i++;
			}
		}
	}
	return utf16_len - surrogate_pairs;
}

// Length in utf16string (UTF-16) of wlen wchars beginning at woffset.
S32 wstring_utf16_length(const LLWString &wstr, const S32 woffset, const S32 wlen)
{
	const S32 end = llmin((S32)wstr.length(), woffset + wlen);
	if (end < woffset)
	{
		return 0;
	}
	else
	{
		S32 length = end - woffset;
		for (S32 i = woffset; i < end; i++)
		{
			if (wstr[i] >= 0x10000)
			{
				length++;
			}
		}
		return length;
	}
}

// Given a wstring and an offset in it, returns the length as wstring (i.e.,
// number of llwchars) of the longest substring that starts at the offset
// and whose equivalent utf-16 string does not exceeds the given utf16_length.
S32 wstring_wstring_length_from_utf16_length(const LLWString & wstr, const S32 woffset, const S32 utf16_length, BOOL *unaligned)
{
	const S32 end = wstr.length();
	BOOL u = FALSE;
	S32 n = woffset + utf16_length;
	S32 i = woffset;
	while (i < end)
	{
		if (wstr[i] >= 0x10000)
		{
			--n;
		}
		if (i >= n)
		{
			u = (i > n);
			break;
		}
		i++;
	}
	if (unaligned)
	{
		*unaligned = u;
	}
	return i - woffset;
}

S32 wchar_utf8_length(const llwchar wc)
{
	if (wc < 0x80)
	{
		// This case will also catch negative values which are
		// technically invalid.
		return 1;
	}
	else if (wc < 0x800)
	{
		return 2;
	}
	else if (wc < 0x10000)
	{
		return 3;
	}
	else if (wc < 0x200000)
	{
		return 4;
	}
	else if (wc < 0x4000000)
	{
		return 5;
	}
	else
	{
		return 6;
	}
}


S32 wstring_utf8_length(const LLWString& wstr)
{
	S32 len = 0;
	for (S32 i = 0; i < (S32)wstr.length(); i++)
	{
		len += wchar_utf8_length(wstr[i]);
	}
	return len;
}


LLWString utf8str_to_wstring(const std::string& utf8str, S32 len)
{
	LLWString wout;

	S32 i = 0;
	while (i < len)
	{
		llwchar unichar;
		U8 cur_char = utf8str[i];

		if (cur_char < 0x80)
		{
			// Ascii character, just add it
			unichar = cur_char;
		}
		else
		{
			S32 cont_bytes = 0;
			if ((cur_char >> 5) == 0x6)			// Two byte UTF8 -> 1 UTF32
			{
				unichar = (0x1F&cur_char);
				cont_bytes = 1;
			}
			else if ((cur_char >> 4) == 0xe)	// Three byte UTF8 -> 1 UTF32
			{
				unichar = (0x0F&cur_char);
				cont_bytes = 2;
			}
			else if ((cur_char >> 3) == 0x1e)	// Four byte UTF8 -> 1 UTF32
			{
				unichar = (0x07&cur_char);
				cont_bytes = 3;
			}
			else if ((cur_char >> 2) == 0x3e)	// Five byte UTF8 -> 1 UTF32
			{
				unichar = (0x03&cur_char);
				cont_bytes = 4;
			}
			else if ((cur_char >> 1) == 0x7e)	// Six byte UTF8 -> 1 UTF32
			{
				unichar = (0x01&cur_char);
				cont_bytes = 5;
			}
			else
			{
				wout += LL_UNKNOWN_CHAR;
				++i;
				continue;
			}

			// Check that this character doesn't go past the end of the string
			S32 end = (len < (i + cont_bytes)) ? len : (i + cont_bytes);
			do
			{
				++i;

				cur_char = utf8str[i];
				if ( (cur_char >> 6) == 0x2 )
				{
					unichar <<= 6;
					unichar += (0x3F&cur_char);
				}
				else
				{
					// Malformed sequence - roll back to look at this as a new char
					unichar = LL_UNKNOWN_CHAR;
					--i;
					break;
				}
			} while(i < end);

			// Handle overlong characters and NULL characters
			if ( ((cont_bytes == 1) && (unichar < 0x80))
				|| ((cont_bytes == 2) && (unichar < 0x800))
				|| ((cont_bytes == 3) && (unichar < 0x10000))
				|| ((cont_bytes == 4) && (unichar < 0x200000))
				|| ((cont_bytes == 5) && (unichar < 0x4000000)) )
			{
				unichar = LL_UNKNOWN_CHAR;
			}
		}

		wout += unichar;
		++i;
	}
	return wout;
}

LLWString utf8str_to_wstring(const std::string& utf8str)
{
	const S32 len = (S32)utf8str.length();
	return utf8str_to_wstring(utf8str, len);
}

std::string wstring_to_utf8str(const LLWString& utf32str, S32 len)
{
	std::string out;

	S32 i = 0;
	while (i < len)
	{
		char tchars[8];		/* Flawfinder: ignore */
		S32 n = wchar_to_utf8chars(utf32str[i], tchars);
		tchars[n] = 0;
		out += tchars;
		i++;
	}
	return out;
}

std::string wstring_to_utf8str(const LLWString& utf32str)
{
	const S32 len = (S32)utf32str.length();
	return wstring_to_utf8str(utf32str, len);
}

std::string utf16str_to_utf8str(const llutf16string& utf16str)
{
	return wstring_to_utf8str(utf16str_to_wstring(utf16str));
}

std::string utf16str_to_utf8str(const llutf16string& utf16str, S32 len)
{
	return wstring_to_utf8str(utf16str_to_wstring(utf16str, len), len);
}

std::string utf8str_trim(const std::string& utf8str)
{
	LLWString wstr = utf8str_to_wstring(utf8str);
	LLWStringUtil::trim(wstr);
	return wstring_to_utf8str(wstr);
}


std::string utf8str_tolower(const std::string& utf8str)
{
	LLWString out_str = utf8str_to_wstring(utf8str);
	LLWStringUtil::toLower(out_str);
	return wstring_to_utf8str(out_str);
}


S32 utf8str_compare_insensitive(const std::string& lhs, const std::string& rhs)
{
	LLWString wlhs = utf8str_to_wstring(lhs);
	LLWString wrhs = utf8str_to_wstring(rhs);
	return LLWStringUtil::compareInsensitive(wlhs, wrhs);
}

std::string utf8str_truncate(const std::string& utf8str, const S32 max_len)
{
	if (0 == max_len)
	{
		return std::string();
	}
	if ((S32)utf8str.length() <= max_len)
	{
		return utf8str;
	}
	else
	{
		S32 cur_char = max_len;

		// If we're ASCII, we don't need to do anything
		if ((U8)utf8str[cur_char] > 0x7f)
		{
			// If first two bits are (10), it's the tail end of a multibyte char.  We need to shift back
			// to the first character
			while (0x80 == (0xc0 & utf8str[cur_char]))
			{
				cur_char--;
				// Keep moving forward until we hit the first char;
				if (cur_char == 0)
				{
					// Make sure we don't trash memory if we've got a bogus string.
					break;
				}
			}
		}
		// The byte index we're on is one we want to get rid of, so we only want to copy up to (cur_char-1) chars
		return utf8str.substr(0, cur_char);
	}
}

// [RLVa:KB] - Checked: RLVa-2.1.0
std::string utf8str_substr(const std::string& utf8str, const S32 index, const S32 max_len)
{
	if (0 == max_len)
	{
		return std::string();
	}
	if (utf8str.length() - index  <= max_len)
	{
		return utf8str.substr(index, max_len);
	}
	else
	{
		S32 cur_char = max_len;

		// If we're ASCII, we don't need to do anything
		if ((U8)utf8str[index + cur_char] > 0x7f)
		{
			// If first two bits are (10), it's the tail end of a multibyte char.  We need to shift back
			// to the first character
			while (0x80 == (0xc0 & utf8str[index + cur_char]))
			{
				cur_char--;
				// Keep moving forward until we hit the first char;
				if (cur_char == 0)
				{
					// Make sure we don't trash memory if we've got a bogus string.
					break;
				}
			}
		}
		// The byte index we're on is one we want to get rid of, so we only want to copy up to (cur_char-1) chars
		return utf8str.substr(index, cur_char);
	}
}

void utf8str_split(std::list<std::string>& split_list, const std::string& utf8str, size_t maxlen, char split_token)
{
	split_list.clear();

	std::string::size_type lenMsg = utf8str.length(), lenIt = 0;

	const char* pstrIt = utf8str.c_str(); std::string strTemp;
	while (lenIt < lenMsg)
	{
		if (lenIt + maxlen < lenMsg)
		{
			// Find the last split character
			const char* pstrTemp = pstrIt + maxlen;
			while ( (pstrTemp > pstrIt) && (*pstrTemp != split_token) )
				pstrTemp--;

			if (pstrTemp > pstrIt)
				strTemp = utf8str.substr(lenIt, pstrTemp - pstrIt);
			else
				strTemp = utf8str_substr(utf8str, lenIt, maxlen);
		}
		else
		{
			strTemp = utf8str.substr(lenIt, std::string::npos);
		}

		split_list.push_back(strTemp);

		lenIt += strTemp.length();
		pstrIt = utf8str.c_str() + lenIt;
		if (*pstrIt == split_token)
			lenIt++;
	}
}
// [/RLVa:KB]

std::string utf8str_symbol_truncate(const std::string& utf8str, const S32 symbol_len)
{
    if (0 == symbol_len)
    {
        return std::string();
    }
    if ((S32)utf8str.length() <= symbol_len)
    {
        return utf8str;
    }
    else
    {
        int len = 0, byteIndex = 0;
        const char* aStr = utf8str.c_str();
        size_t origSize = utf8str.size();

        for (byteIndex = 0; len < symbol_len && byteIndex < origSize; byteIndex++)
        {
            if ((aStr[byteIndex] & 0xc0) != 0x80)
            {
                len += 1;
            }
        }
        return utf8str.substr(0, byteIndex);
    }
}

std::string utf8str_substChar(
	const std::string& utf8str,
	const llwchar target_char,
	const llwchar replace_char)
{
	LLWString wstr = utf8str_to_wstring(utf8str);
	LLWStringUtil::replaceChar(wstr, target_char, replace_char);
	//wstr = wstring_substChar(wstr, target_char, replace_char);
	return wstring_to_utf8str(wstr);
}

std::string utf8str_makeASCII(const std::string& utf8str)
{
	LLWString wstr = utf8str_to_wstring(utf8str);
	LLWStringUtil::_makeASCII(wstr);
	return wstring_to_utf8str(wstr);
}

std::string mbcsstring_makeASCII(const std::string& wstr)
{
	// Replace non-ASCII chars with replace_char
	std::string out_str = wstr;
	for (S32 i = 0; i < (S32)out_str.length(); i++)
	{
		if ((U8)out_str[i] > 0x7f)
		{
			out_str[i] = LL_UNKNOWN_CHAR;
		}
	}
	return out_str;
}
std::string utf8str_removeCRLF(const std::string& utf8str)
{
	if (0 == utf8str.length())
	{
		return std::string();
	}
	const char CR = 13;

	std::string out;
	out.reserve(utf8str.length());
	const S32 len = (S32)utf8str.length();
	for( S32 i = 0; i < len; i++ )
	{
		if( utf8str[i] != CR )
		{
			out.push_back(utf8str[i]);
		}
	}
	return out;
}

#if LL_WINDOWS
std::string ll_convert_wide_to_string(const wchar_t* in)
{
	return ll_convert_wide_to_string(in, CP_UTF8);
}

std::string ll_convert_wide_to_string(const wchar_t* in, unsigned int code_page)
{
	std::string out;
	if(in)
	{
		int len_in = wcslen(in);
		int len_out = WideCharToMultiByte(
			code_page,
			0,
			in,
			len_in,
			NULL,
			0,
			0,
			0);
		// We will need two more bytes for the double NULL ending
		// created in WideCharToMultiByte().
		char* pout = new char [len_out + 2];
		memset(pout, 0, len_out + 2);
		if(pout)
		{
			WideCharToMultiByte(
				code_page,
				0,
				in,
				len_in,
				pout,
				len_out,
				0,
				0);
			out.assign(pout);
			delete[] pout;
		}
	}
	return out;
}

std::wstring ll_convert_string_to_wide(const std::string& in)
{
	return ll_convert_string_to_wide(in, CP_UTF8);
}

std::wstring ll_convert_string_to_wide(const std::string& in, unsigned int code_page)
{
	// From review:
	// We can preallocate a wide char buffer that is the same length (in wchar_t elements) as the utf8 input,
	// plus one for a null terminator, and be guaranteed to not overflow.

	//	Normally, I'd call that sort of thing premature optimization,
	// but we *are* seeing string operations taking a bunch of time, especially when constructing widgets.
//	int output_str_len = MultiByteToWideChar(code_page, 0, in.c_str(), in.length(), NULL, 0);

	// reserve an output buffer that will be destroyed on exit, with a place
	// to put NULL terminator
	std::vector<wchar_t> w_out(in.length() + 1);

	memset(&w_out[0], 0, w_out.size());
	int real_output_str_len = MultiByteToWideChar(code_page, 0, in.c_str(), in.length(),
												  &w_out[0], w_out.size() - 1);

	//looks like MultiByteToWideChar didn't add null terminator to converted string, see EXT-4858.
	w_out[real_output_str_len] = 0;

	// construct string<wchar_t> from our temporary output buffer
	return {&w_out[0]};
}

LLWString ll_convert_wide_to_wstring(const std::wstring& in)
{
    // This function, like its converse, is a placeholder, encapsulating a
    // guilty little hack: the only "official" way nat has found to convert
    // between std::wstring (16 bits on Windows) and LLWString (UTF-32) is
    // by using iconv, which we've avoided so far. It kinda sorta works to
    // just copy individual characters...
    // The point is that if/when we DO introduce some more official way to
    // perform such conversions, we should only have to call it here.
    return { in.begin(), in.end() };
}

std::wstring ll_convert_wstring_to_wide(const LLWString& in)
{
    // See comments in ll_convert_wide_to_wstring()
    return { in.begin(), in.end() };
}

std::string ll_convert_string_to_utf8_string(const std::string& in)
{
	auto w_mesg = ll_convert_string_to_wide(in, CP_ACP);
	std::string out_utf8(ll_convert_wide_to_string(w_mesg.c_str(), CP_UTF8));

	return out_utf8;
}

namespace
{

void HeapFree_deleter(void* ptr)
{
    // instead of LocalFree(), per https://stackoverflow.com/a/31541205
    HeapFree(GetProcessHeap(), NULL, ptr);
}

} // anonymous namespace

template<>
std::wstring windows_message<std::wstring>(DWORD error)
{
    // derived from https://stackoverflow.com/a/455533
    wchar_t* rawptr = nullptr;
    auto okay = FormatMessageW(
        // use system message tables for GetLastError() codes
        FORMAT_MESSAGE_FROM_SYSTEM |
        // internally allocate buffer and return its pointer
        FORMAT_MESSAGE_ALLOCATE_BUFFER |
        // you cannot pass insertion parameters (thanks Gandalf)
        FORMAT_MESSAGE_IGNORE_INSERTS |
        // ignore line breaks in message definition text
        FORMAT_MESSAGE_MAX_WIDTH_MASK,
        NULL,                       // lpSource, unused with FORMAT_MESSAGE_FROM_SYSTEM
        error,                      // dwMessageId
        MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), // dwLanguageId
        (LPWSTR)&rawptr,         // lpBuffer: force-cast wchar_t** to wchar_t*
        0,                // nSize, unused with FORMAT_MESSAGE_ALLOCATE_BUFFER
        NULL);            // Arguments, unused

    // make a unique_ptr from rawptr so it gets cleaned up properly
    std::unique_ptr<wchar_t, void(*)(void*)> bufferptr(rawptr, HeapFree_deleter);

    if (okay && bufferptr)
    {
        // got the message, return it ('okay' is length in characters)
        return { bufferptr.get(), okay };
    }

    // did not get the message, synthesize one
    auto format_message_error = GetLastError();
    std::wostringstream out;
    out << L"GetLastError() " << error << L" (FormatMessageW() failed with "
        << format_message_error << L")";
    return out.str();
}

boost::optional<std::wstring> llstring_getoptenv(const std::string& key)
{
    auto wkey = ll_convert_string_to_wide(key);
    // Take a wild guess as to how big the buffer should be.
    std::vector<wchar_t> buffer(1024);
    auto n = GetEnvironmentVariableW(wkey.c_str(), &buffer[0], buffer.size());
    // If our initial guess was too short, n will indicate the size (in
    // wchar_t's) that buffer should have been, including the terminating nul.
    if (n > (buffer.size() - 1))
    {
        // make it big enough
        buffer.resize(n);
        // and try again
        n = GetEnvironmentVariableW(wkey.c_str(), &buffer[0], buffer.size());
    }
    // did that (ultimately) succeed?
    if (n)
    {
        // great, return populated boost::optional
        return boost::optional<std::wstring>(&buffer[0]);
    }

    // not successful
    auto last_error = GetLastError();
    // Don't bother warning for NOT_FOUND; that's an expected case
    if (last_error != ERROR_ENVVAR_NOT_FOUND)
    {
        LL_WARNS() << "GetEnvironmentVariableW('" << key << "') failed: "
                   << windows_message<std::string>(last_error) << LL_ENDL;
    }
    // return empty boost::optional
    return {};
}

#else  // ! LL_WINDOWS

boost::optional<std::string> llstring_getoptenv(const std::string& key)
{
    auto found = getenv(key.c_str());
    if (found)
    {
        // return populated boost::optional
        return boost::optional<std::string>(found);
    }
    else
    {
        // return empty boost::optional
        return {};
    }
}

#endif // ! LL_WINDOWS

long LLStringOps::sPacificTimeOffset = 0;
long LLStringOps::sLocalTimeOffset = 0;
bool LLStringOps::sPacificDaylightTime = 0;
absl::flat_hash_map<std::string, std::string> LLStringOps::datetimeToCodes;

std::vector<std::string> LLStringOps::sWeekDayList;
std::vector<std::string> LLStringOps::sWeekDayShortList;
std::vector<std::string> LLStringOps::sMonthList;
std::vector<std::string> LLStringOps::sMonthShortList;


std::string LLStringOps::sDayFormat;
std::string LLStringOps::sAM;
std::string LLStringOps::sPM;


S32	LLStringOps::collate(const llwchar* a, const llwchar* b)
{ 
	#if LL_WINDOWS
		// in Windows, wide string functions operator on 16-bit strings, 
		// not the proper 32 bit wide string
		return strcmp(wstring_to_utf8str(LLWString(a)).c_str(), wstring_to_utf8str(LLWString(b)).c_str());
	#else
		return wcscoll(a, b);
	#endif
}

void LLStringOps::setupDatetimeInfo (bool daylight)
{
	time_t nowT, localT, gmtT;
	struct tm * tmpT;

	nowT = time (NULL);

	tmpT = gmtime (&nowT);
	gmtT = mktime (tmpT);

	tmpT = localtime (&nowT);
	localT = mktime (tmpT);
	
	sLocalTimeOffset = (long) (gmtT - localT);
	if (tmpT->tm_isdst)
	{
		sLocalTimeOffset -= 60 * 60;	// 1 hour
	}

	sPacificDaylightTime = daylight;
	sPacificTimeOffset = (sPacificDaylightTime? 7 : 8 ) * 60 * 60;

	datetimeToCodes["wkday"]	= "%a";		// Thu
	datetimeToCodes["weekday"]	= "%A";		// Thursday
	datetimeToCodes["year4"]	= "%Y";		// 2009
	datetimeToCodes["year"]		= "%Y";		// 2009
	datetimeToCodes["year2"]	= "%y";		// 09
	datetimeToCodes["mth"]		= "%b";		// Aug
	datetimeToCodes["month"]	= "%B";		// August
	datetimeToCodes["mthnum"]	= "%m";		// 08
	datetimeToCodes["day"]		= "%d";		// 31
	datetimeToCodes["sday"]		= "%-d";	// 9
	datetimeToCodes["hour24"]	= "%H";		// 14
	datetimeToCodes["hour"]		= "%H";		// 14
	datetimeToCodes["hour12"]	= "%I";		// 02
	datetimeToCodes["min"]		= "%M";		// 59
	datetimeToCodes["ampm"]		= "%p";		// AM
	datetimeToCodes["second"]	= "%S";		// 59
	datetimeToCodes["timezone"]	= "%Z";		// PST
}