Skip to content
Snippets Groups Projects
llstring.h 57.9 KiB
Newer Older
  • Learn to ignore specific revisions
  • /** 
     * @file llstring.h
     * @brief String utility functions and std::string class.
     *
    
     * $LicenseInfo:firstyear=2001&license=viewerlgpl$
    
     * Second Life Viewer Source Code
    
     * Copyright (C) 2010, Linden Research, Inc.
     * 
     * This library is free software; you can redistribute it and/or
     * modify it under the terms of the GNU Lesser General Public
     * License as published by the Free Software Foundation;
     * version 2.1 of the License only.
    
     * This library is distributed in the hope that it will be useful,
     * but WITHOUT ANY WARRANTY; without even the implied warranty of
     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     * Lesser General Public License for more details.
    
     * You should have received a copy of the GNU Lesser General Public
     * License along with this library; if not, write to the Free Software
     * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
    
     * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
    
     * $/LicenseInfo$
     */
    
    #ifndef LL_LLSTRING_H
    #define LL_LLSTRING_H
    
    
    #include <boost/optional/optional.hpp>
    
    #include <string>
    #include <cstdio>
    
    //#include <locale>
    
    #include <iomanip>
    
    #include <algorithm>
    
    #include <vector>
    #include <map>
    
    #include "llformat.h"
    
    
    #if LL_LINUX || LL_SOLARIS
    #include <wctype.h>
    #include <wchar.h>
    #endif
    
    #include <string.h>
    
    #include <boost/scoped_ptr.hpp>
    
    
    #if LL_SOLARIS
    // stricmp and strnicmp do not exist on Solaris:
    #define stricmp strcasecmp
    #define strnicmp strncasecmp
    #endif
    
    const char LL_UNKNOWN_CHAR = '?';
    
    
    #if LL_DARWIN || LL_LINUX || LL_SOLARIS
    // Template specialization of char_traits for U16s. Only necessary on Mac and Linux (exists on Windows already)
    #include <cstring>
    
    namespace std
    {
    template<>
    struct char_traits<U16>
    {
    	typedef U16 		char_type;
    	typedef int 	    int_type;
    	typedef streampos 	pos_type;
    	typedef streamoff 	off_type;
    	typedef mbstate_t 	state_type;
    	
    	static void 
    		assign(char_type& __c1, const char_type& __c2)
    	{ __c1 = __c2; }
    	
    	static bool 
    		eq(const char_type& __c1, const char_type& __c2)
    	{ return __c1 == __c2; }
    	
    	static bool 
    		lt(const char_type& __c1, const char_type& __c2)
    	{ return __c1 < __c2; }
    	
    	static int 
    		compare(const char_type* __s1, const char_type* __s2, size_t __n)
    	{ return memcmp(__s1, __s2, __n * sizeof(char_type)); }
    	
    	static size_t
    		length(const char_type* __s)
    	{
    		const char_type *cur_char = __s;
    		while (*cur_char != 0)
    		{
    			++cur_char;
    		}
    		return cur_char - __s;
    	}
    	
    	static const char_type* 
    		find(const char_type* __s, size_t __n, const char_type& __a)
    	{ return static_cast<const char_type*>(memchr(__s, __a, __n * sizeof(char_type))); }
    	
    	static char_type* 
    		move(char_type* __s1, const char_type* __s2, size_t __n)
    	{ return static_cast<char_type*>(memmove(__s1, __s2, __n * sizeof(char_type))); }
    	
    	static char_type* 
    		copy(char_type* __s1, const char_type* __s2, size_t __n)
    	{  return static_cast<char_type*>(memcpy(__s1, __s2, __n * sizeof(char_type))); }	/* Flawfinder: ignore */
    	
    	static char_type* 
    		assign(char_type* __s, size_t __n, char_type __a)
    	{ 
    		// This isn't right.
    		//return static_cast<char_type*>(memset(__s, __a, __n * sizeof(char_type))); 
    		
    		// I don't think there's a standard 'memset' for 16-bit values.
    		// Do this the old-fashioned way.
    		
    		size_t __i;
    		for(__i = 0; __i < __n; __i++)
    		{
    			__s[__i] = __a;
    		}
    		return __s; 
    	}
    	
    	static char_type 
    		to_char_type(const int_type& __c)
    	{ return static_cast<char_type>(__c); }
    	
    	static int_type 
    		to_int_type(const char_type& __c)
    	{ return static_cast<int_type>(__c); }
    	
    	static bool 
    		eq_int_type(const int_type& __c1, const int_type& __c2)
    	{ return __c1 == __c2; }
    	
    	static int_type 
    		eof() { return static_cast<int_type>(EOF); }
    	
    	static int_type 
    		not_eof(const int_type& __c)
          { return (__c == eof()) ? 0 : __c; }
      };
    };
    #endif
    
    class LL_COMMON_API LLStringOps
    {
    private:
    	static long sPacificTimeOffset;
    	static long sLocalTimeOffset;
    	static bool sPacificDaylightTime;
    
    	static std::map<std::string, std::string> datetimeToCodes;
    
    public:
    
    	static std::vector<std::string> sWeekDayList;
    	static std::vector<std::string> sWeekDayShortList;
    	static std::vector<std::string> sMonthList;
    	static std::vector<std::string> sMonthShortList;
    	static std::string sDayFormat;
    
    	static std::string sAM;
    	static std::string sPM;
    
    
    	static char toUpper(char elem) { return toupper((unsigned char)elem); }
    	static llwchar toUpper(llwchar elem) { return towupper(elem); }
    	
    	static char toLower(char elem) { return tolower((unsigned char)elem); }
    	static llwchar toLower(llwchar elem) { return towlower(elem); }
    
    	static bool isSpace(char elem) { return isspace((unsigned char)elem) != 0; }
    	static bool isSpace(llwchar elem) { return iswspace(elem) != 0; }
    
    	static bool isUpper(char elem) { return isupper((unsigned char)elem) != 0; }
    	static bool isUpper(llwchar elem) { return iswupper(elem) != 0; }
    
    	static bool isLower(char elem) { return islower((unsigned char)elem) != 0; }
    	static bool isLower(llwchar elem) { return iswlower(elem) != 0; }
    
    	static bool isDigit(char a) { return isdigit((unsigned char)a) != 0; }
    	static bool isDigit(llwchar a) { return iswdigit(a) != 0; }
    
    	static bool isPunct(char a) { return ispunct((unsigned char)a) != 0; }
    	static bool isPunct(llwchar a) { return iswpunct(a) != 0; }
    
    
    	static bool isAlpha(char a) { return isalpha((unsigned char)a) != 0; }
    	static bool isAlpha(llwchar a) { return iswalpha(a) != 0; }
    
    
    	static bool isAlnum(char a) { return isalnum((unsigned char)a) != 0; }
    	static bool isAlnum(llwchar a) { return iswalnum(a) != 0; }
    
    	static S32	collate(const char* a, const char* b) { return strcoll(a, b); }
    	static S32	collate(const llwchar* a, const llwchar* b);
    
    	static void setupDatetimeInfo(bool pacific_daylight_time);
    
    
    	static void setupWeekDaysNames(const std::string& data);
    	static void setupWeekDaysShortNames(const std::string& data);
    	static void setupMonthNames(const std::string& data);
    	static void setupMonthShortNames(const std::string& data);
    	static void setupDayFormat(const std::string& data);
    
    
    
    	static long getPacificTimeOffset(void) { return sPacificTimeOffset;}
    	static long getLocalTimeOffset(void) { return sLocalTimeOffset;}
    	// Is the Pacific time zone (aka server time zone)
    	// currently in daylight savings time?
    	static bool getPacificDaylightTime(void) { return sPacificDaylightTime;}
    
    	static std::string getDatetimeCode (std::string key);
    
    
        // Express a value like 1234567 as "1.23M" 
        static std::string getReadableNumber(F64 num);
    
    };
    
    /**
     * @brief Return a string constructed from in without crashing if the
     * pointer is NULL.
     */
    LL_COMMON_API std::string ll_safe_string(const char* in);
    LL_COMMON_API std::string ll_safe_string(const char* in, S32 maxlen);
    
    
    // Allowing assignments from non-strings into format_map_t is apparently
    // *really* error-prone, so subclass std::string with just basic c'tors.
    class LLFormatMapString
    {
    public:
    	LLFormatMapString() {};
    	LLFormatMapString(const char* s) : mString(ll_safe_string(s)) {};
    	LLFormatMapString(const std::string& s) : mString(s) {};
    	operator std::string() const { return mString; }
    	bool operator<(const LLFormatMapString& rhs) const { return mString < rhs.mString; }
    	std::size_t length() const { return mString.length(); }
    	
    private:
    	std::string mString;
    };
    
    template <class T>
    class LLStringUtilBase
    {
    private:
    	static std::string sLocale;
    
    public:
    
    	typedef std::basic_string<T> string_type;
    	typedef typename string_type::size_type size_type;
    
    	
    public:
    	/////////////////////////////////////////////////////////////////////////////////////////
    	// Static Utility functions that operate on std::strings
    
    
    	
    	typedef std::map<LLFormatMapString, LLFormatMapString> format_map_t;
    
    	/// considers any sequence of delims as a single field separator
    	LL_COMMON_API static void getTokens(const string_type& instr,
    										std::vector<string_type >& tokens,
    										const string_type& delims);
    	/// like simple scan overload, but returns scanned vector
    
    	static std::vector<string_type> getTokens(const string_type& instr,
    											  const string_type& delims);
    
    	/// add support for keep_delims and quotes (either could be empty string)
    
    	static void getTokens(const string_type& instr,
    						  std::vector<string_type>& tokens,
    						  const string_type& drop_delims,
    						  const string_type& keep_delims,
    						  const string_type& quotes=string_type());
    
    	/// like keep_delims-and-quotes overload, but returns scanned vector
    
    	static std::vector<string_type> getTokens(const string_type& instr,
    											  const string_type& drop_delims,
    											  const string_type& keep_delims,
    											  const string_type& quotes=string_type());
    
    	/// add support for escapes (could be empty string)
    
    	static void getTokens(const string_type& instr,
    						  std::vector<string_type>& tokens,
    						  const string_type& drop_delims,
    						  const string_type& keep_delims,
    						  const string_type& quotes,
    						  const string_type& escapes);
    
    	/// like escapes overload, but returns scanned vector
    
    	static std::vector<string_type> getTokens(const string_type& instr,
    											  const string_type& drop_delims,
    											  const string_type& keep_delims,
    											  const string_type& quotes,
    											  const string_type& escapes);
    
    	LL_COMMON_API static void formatNumber(string_type& numStr, string_type decimals);
    	LL_COMMON_API static bool formatDatetime(string_type& replacement, string_type token, string_type param, S32 secFromEpoch);
    	LL_COMMON_API static S32 format(string_type& s, const format_map_t& substitutions);
    	LL_COMMON_API static S32 format(string_type& s, const LLSD& substitutions);
    	LL_COMMON_API static bool simpleReplacement(string_type& replacement, string_type token, const format_map_t& substitutions);
    	LL_COMMON_API static bool simpleReplacement(string_type& replacement, string_type token, const LLSD& substitutions);
    
    	LL_COMMON_API static void setLocale (std::string inLocale);
    	LL_COMMON_API static std::string getLocale (void);
    
    	static bool isValidIndex(const string_type& string, size_type i)
    
    	{
    		return !string.empty() && (0 <= i) && (i <= string.size());
    	}
    
    
    	static bool contains(const string_type& string, T c, size_type i=0)
    	{
    		return string.find(c, i) != string_type::npos;
    	}
    
    
    	static void	trimHead(string_type& string);
    	static void	trimTail(string_type& string);
    	static void	trim(string_type& string)	{ trimHead(string); trimTail(string); }
    	static void truncate(string_type& string, size_type count);
    
    	static void	toUpper(string_type& string);
    	static void	toLower(string_type& string);
    
    	
    	// True if this is the head of s.
    
    	static BOOL	isHead( const string_type& string, const T* s ); 
    
    
    	/**
    	 * @brief Returns true if string starts with substr
    	 *
    	 * If etither string or substr are empty, this method returns false.
    	 */
    	static bool startsWith(
    
    		const string_type& string,
    		const string_type& substr);
    
    
    	/**
    	 * @brief Returns true if string ends in substr
    	 *
    	 * If etither string or substr are empty, this method returns false.
    	 */
    	static bool endsWith(
    
    		const string_type& string,
    		const string_type& substr);
    
    	/**
    	 * get environment string value with proper Unicode handling
    	 * (key is always UTF-8)
    	 * detect absence by return value == dflt
    	 */
    	static string_type getenv(const std::string& key, const string_type& dflt="");
    	/**
    	 * get optional environment string value with proper Unicode handling
    	 * (key is always UTF-8)
    	 * detect absence by (! return value)
    	 */
    	static boost::optional<string_type> getoptenv(const std::string& key);
    
    
    	static void	addCRLF(string_type& string);
    	static void	removeCRLF(string_type& string);
    
    	static void removeWindowsCR(string_type& string);
    
    	static void	replaceTabsWithSpaces( string_type& string, size_type spaces_per_tab );
    	static void	replaceNonstandardASCII( string_type& string, T replacement );
    	static void	replaceChar( string_type& string, T target, T replacement );
    	static void replaceString( string_type& string, string_type target, string_type replacement );
    
    	static BOOL	containsNonprintable(const string_type& string);
    	static void	stripNonprintable(string_type& string);
    
    	/**
    
    	 * Double-quote an argument string if needed, unless it's already
    	 * double-quoted. Decide whether it's needed based on the presence of any
    	 * character in @a triggers (default space or double-quote). If we quote
    	 * it, escape any embedded double-quote with the @a escape string (default
    
    	 *
    	 * Passing triggers="" means always quote, unless it's already double-quoted.
    
    	static string_type quote(const string_type& str,
    							 const string_type& triggers=" \"",
    							 const string_type& escape="\\");
    
    
    	/**
    	 * @brief Unsafe way to make ascii characters. You should probably
    	 * only call this when interacting with the host operating system.
    	 * The 1 byte std::string does not work correctly.
    	 * The 2 and 4 byte std::string probably work, so LLWStringUtil::_makeASCII
    	 * should work.
    	 */
    
    	static void _makeASCII(string_type& string);
    
    
    	// Conversion to other data types
    
    	static BOOL	convertToBOOL(const string_type& string, BOOL& value);
    	static BOOL	convertToU8(const string_type& string, U8& value);
    	static BOOL	convertToS8(const string_type& string, S8& value);
    	static BOOL	convertToS16(const string_type& string, S16& value);
    	static BOOL	convertToU16(const string_type& string, U16& value);
    	static BOOL	convertToU32(const string_type& string, U32& value);
    	static BOOL	convertToS32(const string_type& string, S32& value);
    	static BOOL	convertToF32(const string_type& string, F32& value);
    	static BOOL	convertToF64(const string_type& string, F64& value);
    
    
    	/////////////////////////////////////////////////////////////////////////////////////////
    	// Utility functions for working with char*'s and strings
    
    	// Like strcmp but also handles empty strings. Uses
    	// current locale.
    	static S32		compareStrings(const T* lhs, const T* rhs);
    
    	static S32		compareStrings(const string_type& lhs, const string_type& rhs);
    
    	
    	// case insensitive version of above. Uses current locale on
    	// Win32, and falls back to a non-locale aware comparison on
    	// Linux.
    	static S32		compareInsensitive(const T* lhs, const T* rhs);
    
    	static S32		compareInsensitive(const string_type& lhs, const string_type& rhs);
    
    
    	// Case sensitive comparison with good handling of numbers.  Does not use current locale.
    	// a.k.a. strdictcmp()
    
    	static S32		compareDict(const string_type& a, const string_type& b);
    
    
    	// Case *in*sensitive comparison with good handling of numbers.  Does not use current locale.
    	// a.k.a. strdictcmp()
    
    	static S32		compareDictInsensitive(const string_type& a, const string_type& b);
    
    
    	// Puts compareDict() in a form appropriate for LL container classes to use for sorting.
    
    	static BOOL		precedesDict( const string_type& a, const string_type& b );
    
    
    	// A replacement for strncpy.
    	// If the dst buffer is dst_size bytes long or more, ensures that dst is null terminated and holds
    	// up to dst_size-1 characters of src.
    	static void		copy(T* dst, const T* src, size_type dst_size);
    	
    	// Copies src into dst at a given offset.  
    
    	static void		copyInto(string_type& dst, const string_type& src, size_type offset);
    
    	
    	static bool		isPartOfWord(T c) { return (c == (T)'_') || LLStringOps::isAlnum(c); }
    
    
    #ifdef _DEBUG	
    	LL_COMMON_API static void		testHarness();
    #endif
    
    private:
    
    	LL_COMMON_API static size_type getSubstitution(const string_type& instr, size_type& start, std::vector<string_type >& tokens);
    
    template<class T> const std::basic_string<T> LLStringUtilBase<T>::null;
    
    template<class T> std::string LLStringUtilBase<T>::sLocale;
    
    typedef LLStringUtilBase<char> LLStringUtil;
    typedef LLStringUtilBase<llwchar> LLWStringUtil;
    typedef std::basic_string<llwchar> LLWString;
    
    //@ Use this where we want to disallow input in the form of "foo"
    //  This is used to catch places where english text is embedded in the code
    //  instead of in a translatable XUI file.
    class LLStringExplicit : public std::string
    {
    public:
    	explicit LLStringExplicit(const char* s) : std::string(s) {}
    	LLStringExplicit(const std::string& s) : std::string(s) {}
    	LLStringExplicit(const std::string& s, size_type pos, size_type n = std::string::npos) : std::string(s, pos, n) {}
    };
    
    struct LLDictionaryLess
    {
    public:
    
    	bool operator()(const std::string& a, const std::string& b) const
    
    	{
    		return (LLStringUtil::precedesDict(a, b) ? true : false);
    	}
    };
    
    
    /**
     * Simple support functions
     */
    
    /**
     * @brief chop off the trailing characters in a string.
     *
     * This function works on bytes rather than glyphs, so this will
     * incorrectly truncate non-single byte strings.
     * Use utf8str_truncate() for utf8 strings
     * @return a copy of in string minus the trailing count bytes.
     */
    inline std::string chop_tail_copy(
    	const std::string& in,
    	std::string::size_type count)
    {
    	return std::string(in, 0, in.length() - count);
    }
    
    /**
     * @brief This translates a nybble stored as a hex value from 0-f back
     * to a nybble in the low order bits of the return byte.
     */
    
    LL_COMMON_API bool is_char_hex(char hex);
    
    LL_COMMON_API U8 hex_as_nybble(char hex);
    
    /**
     * @brief read the contents of a file into a string.
     *
     * Since this function has no concept of character encoding, most
     * anything you do with this method ill-advised. Please avoid.
     * @param str [out] The string which will have.
     * @param filename The full name of the file to read.
     * @return Returns true on success. If false, str is unmodified.
     */
    LL_COMMON_API bool _read_file_into_string(std::string& str, const std::string& filename);
    LL_COMMON_API bool iswindividual(llwchar elem);
    
    /**
     * Unicode support
     */
    
    
    /// generic conversion aliases
    template<typename TO, typename FROM, typename Enable=void>
    struct ll_convert_impl
    {
        // Don't even provide a generic implementation. We specialize for every
        // combination we do support.
        TO operator()(const FROM& in) const;
    };
    
    // Use a function template to get the nice ll_convert<TO>(from_value) API.
    template<typename TO, typename FROM>
    TO ll_convert(const FROM& in)
    {
        return ll_convert_impl<TO, FROM>()(in);
    }
    
    // degenerate case
    template<typename T>
    struct ll_convert_impl<T, T>
    {
        T operator()(const T& in) const { return in; }
    };
    
    // specialize ll_convert_impl<TO, FROM> to return EXPR
    #define ll_convert_alias(TO, FROM, EXPR)                    \
    template<>                                                  \
    struct ll_convert_impl<TO, FROM>                            \
    {                                                           \
        TO operator()(const FROM& in) const { return EXPR; }    \
    }
    
    
    // Make the incoming string a utf8 string. Replaces any unknown glyph
    
    // with the UNKNOWN_CHARACTER. Once any unknown glyph is found, the rest
    
    // of the data may not be recovered.
    LL_COMMON_API std::string rawstr_to_utf8(const std::string& raw);
    
    //
    // We should never use UTF16 except when communicating with Win32!
    
    // https://docs.microsoft.com/en-us/cpp/cpp/char-wchar-t-char16-t-char32-t
    // nat 2018-12-14: I consider the whole llutf16string thing a mistake, because
    // the Windows APIs we want to call are all defined in terms of wchar_t*
    // (or worse, LPCTSTR).
    // https://docs.microsoft.com/en-us/windows/desktop/winprog/windows-data-types
    
    // While there is no point coding for an ASCII-only world (! defined(UNICODE)),
    // use of U16 and llutf16string for Windows APIs locks in /Zc:wchar_t-. Going
    // forward, we should code in terms of wchar_t and std::wstring so as to
    // support either setting of /Zc:wchar_t.
    
    // The first link above states that char can be used to hold ASCII or any
    // multi-byte character set, and distinguishes wchar_t (UTF-16LE), char16_t
    // (UTF-16) and char32_t (UTF-32). Nonetheless, within this code base:
    // * char and std::string always hold UTF-8 (of which ASCII is a subset). It
    //   is a BUG if they are used to pass strings in any other multi-byte
    //   encoding.
    // * wchar_t and std::wstring should be our interface to Windows wide-string
    //   APIs, and therefore hold UTF-16LE.
    // * U16 and llutf16string are the previous but DEPRECATED UTF-16LE type. Do
    //   not introduce new uses of U16 or llutf16string for string data.
    // * llwchar and LLWString hold UTF-32 strings.
    // * Do not introduce char16_t or std::u16string.
    // * Do not introduce char32_t or std::u32string.
    
    // This typedef may or may not be identical to std::wstring, depending on
    // LL_WCHAR_T_NATIVE.
    
    typedef std::basic_string<U16> llutf16string;
    
    
    #if ! defined(LL_WCHAR_T_NATIVE)
    // wchar_t is identical to U16, and std::wstring is identical to llutf16string.
    // Defining an ll_convert alias involving llutf16string would collide with the
    // comparable preferred alias involving std::wstring. (In this scenario, if
    // you pass llutf16string, it will engage the std::wstring specialization.)
    #define ll_convert_u16_alias(TO, FROM, EXPR) // nothing
    
    #else  // defined(LL_WCHAR_T_NATIVE)
    
    // wchar_t is a distinct native type, so llutf16string is also a distinct
    // type, and there IS a point to converting separately to/from llutf16string.
    // (But why? Windows APIs are still defined in terms of wchar_t, and
    // in this scenario llutf16string won't work for them!)
    #define ll_convert_u16_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR)
    
    
    #if LL_WINDOWS
    // LL_WCHAR_T_NATIVE is defined on non-Windows systems because, in fact,
    // wchar_t is native. Everywhere but Windows, we use it for llwchar (see
    // stdtypes.h). That makes LLWString identical to std::wstring, so these
    // aliases for std::wstring would collide with those for LLWString. Only
    // define on Windows, where converting between std::wstring and llutf16string
    // means copying chars.
    ll_convert_alias(llutf16string, std::wstring, llutf16string(in.begin(), in.end()));
    ll_convert_alias(std::wstring, llutf16string,  std::wstring(in.begin(), in.end()));
    #endif // LL_WINDOWS
    #endif // defined(LL_WCHAR_T_NATIVE)
    
    LL_COMMON_API LLWString utf16str_to_wstring(const llutf16string &utf16str, S32 len);
    LL_COMMON_API LLWString utf16str_to_wstring(const llutf16string &utf16str);
    
    ll_convert_u16_alias(LLWString, llutf16string, utf16str_to_wstring(in));
    
    
    LL_COMMON_API llutf16string wstring_to_utf16str(const LLWString &utf32str, S32 len);
    LL_COMMON_API llutf16string wstring_to_utf16str(const LLWString &utf32str);
    
    ll_convert_u16_alias(llutf16string, LLWString, wstring_to_utf16str(in));
    
    
    LL_COMMON_API llutf16string utf8str_to_utf16str ( const std::string& utf8str, S32 len);
    LL_COMMON_API llutf16string utf8str_to_utf16str ( const std::string& utf8str );
    
    ll_convert_u16_alias(llutf16string, std::string, utf8str_to_utf16str(in));
    
    
    LL_COMMON_API LLWString utf8str_to_wstring(const std::string &utf8str, S32 len);
    LL_COMMON_API LLWString utf8str_to_wstring(const std::string &utf8str);
    // Same function, better name. JC
    inline LLWString utf8string_to_wstring(const std::string& utf8_string) { return utf8str_to_wstring(utf8_string); }
    
    // best name of all
    ll_convert_alias(LLWString, std::string, utf8string_to_wstring(in));
    
    
    //
    LL_COMMON_API S32 wchar_to_utf8chars(llwchar inchar, char* outchars);
    
    LL_COMMON_API std::string wstring_to_utf8str(const LLWString &utf32str, S32 len);
    LL_COMMON_API std::string wstring_to_utf8str(const LLWString &utf32str);
    
    ll_convert_alias(std::string, LLWString, wstring_to_utf8str(in));
    
    LL_COMMON_API std::string utf16str_to_utf8str(const llutf16string &utf16str, S32 len);
    LL_COMMON_API std::string utf16str_to_utf8str(const llutf16string &utf16str);
    
    ll_convert_u16_alias(std::string, llutf16string, utf16str_to_utf8str(in));
    
    #if LL_WINDOWS
    inline std::string wstring_to_utf8str(const llutf16string &utf16str) { return utf16str_to_utf8str(utf16str);}
    #endif
    
    
    // Length of this UTF32 string in bytes when transformed to UTF8
    LL_COMMON_API S32 wstring_utf8_length(const LLWString& wstr); 
    
    // Length in bytes of this wide char in a UTF8 string
    LL_COMMON_API S32 wchar_utf8_length(const llwchar wc); 
    
    LL_COMMON_API std::string utf8str_tolower(const std::string& utf8str);
    
    // Length in llwchar (UTF-32) of the first len units (16 bits) of the given UTF-16 string.
    LL_COMMON_API S32 utf16str_wstring_length(const llutf16string &utf16str, S32 len);
    
    // Length in utf16string (UTF-16) of wlen wchars beginning at woffset.
    LL_COMMON_API S32 wstring_utf16_length(const LLWString & wstr, S32 woffset, S32 wlen);
    
    // Length in wstring (i.e., llwchar count) of a part of a wstring specified by utf16 length (i.e., utf16 units.)
    LL_COMMON_API S32 wstring_wstring_length_from_utf16_length(const LLWString & wstr, S32 woffset, S32 utf16_length, BOOL *unaligned = NULL);
    
    /**
     * @brief Properly truncate a utf8 string to a maximum byte count.
     * 
     * The returned string may be less than max_len if the truncation
     * happens in the middle of a glyph. If max_len is longer than the
     * string passed in, the return value == utf8str.
     * @param utf8str A valid utf8 string to truncate.
     * @param max_len The maximum number of bytes in the return value.
     * @return Returns a valid utf8 string with byte count <= max_len.
     */
    LL_COMMON_API std::string utf8str_truncate(const std::string& utf8str, const S32 max_len);
    
    LL_COMMON_API std::string utf8str_trim(const std::string& utf8str);
    
    LL_COMMON_API S32 utf8str_compare_insensitive(
    	const std::string& lhs,
    	const std::string& rhs);
    
    
    /**
    * @brief Properly truncate a utf8 string to a maximum character count.
    *
    * If symbol_len is longer than the string passed in, the return
    * value == utf8str.
    * @param utf8str A valid utf8 string to truncate.
    * @param symbol_len The maximum number of symbols in the return value.
    * @return Returns a valid utf8 string with symbol count <= max_len.
    */
    LL_COMMON_API std::string utf8str_symbol_truncate(const std::string& utf8str, const S32 symbol_len);
    
    
    /**
     * @brief Replace all occurences of target_char with replace_char
     *
     * @param utf8str A utf8 string to process.
     * @param target_char The wchar to be replaced
     * @param replace_char The wchar which is written on replace
     */
    LL_COMMON_API std::string utf8str_substChar(
    	const std::string& utf8str,
    	const llwchar target_char,
    	const llwchar replace_char);
    
    LL_COMMON_API std::string utf8str_makeASCII(const std::string& utf8str);
    
    // Hack - used for evil notecards.
    LL_COMMON_API std::string mbcsstring_makeASCII(const std::string& str); 
    
    LL_COMMON_API std::string utf8str_removeCRLF(const std::string& utf8str);
    
    
    #if LL_WINDOWS
    /* @name Windows string helpers
     */
    //@{
    
    /**
     * @brief Implementation the expected snprintf interface.
     *
     * If the size of the passed in buffer is not large enough to hold the string,
     * two bad things happen:
     * 1. resulting formatted string is NOT null terminated
     * 2. Depending on the platform, the return value could be a) the required
     *    size of the buffer to copy the entire formatted string or b) -1.
     *    On Windows with VS.Net 2003, it returns -1 e.g. 
     *
     * safe_snprintf always adds a NULL terminator so that the caller does not
     * need to check for return value or need to add the NULL terminator.
     * It does not, however change the return value - to let the caller know
     * that the passed in buffer size was not large enough to hold the
     * formatted string.
     *
     */
    
    // Deal with the differeneces on Windows
    namespace snprintf_hack
    {
    	LL_COMMON_API int snprintf(char *str, size_t size, const char *format, ...);
    }
    
    using snprintf_hack::snprintf;
    
    /**
     * @brief Convert a wide string to std::string
     *
     * This replaces the unsafe W2A macro from ATL.
     */
    
    LL_COMMON_API std::string ll_convert_wide_to_string(const wchar_t* in, unsigned int code_page);
    
    LL_COMMON_API std::string ll_convert_wide_to_string(const wchar_t* in); // default CP_UTF8
    
    inline std::string ll_convert_wide_to_string(const std::wstring& in, unsigned int code_page)
    {
        return ll_convert_wide_to_string(in.c_str(), code_page);
    }
    inline std::string ll_convert_wide_to_string(const std::wstring& in)
    {
        return ll_convert_wide_to_string(in.c_str());
    }
    ll_convert_alias(std::string, std::wstring, ll_convert_wide_to_string(in));
    
    LL_COMMON_API std::wstring ll_convert_string_to_wide(const std::string& in,
                                                         unsigned int code_page);
    LL_COMMON_API std::wstring ll_convert_string_to_wide(const std::string& in);
                                                         // default CP_UTF8
    ll_convert_alias(std::wstring, std::string, ll_convert_string_to_wide(in));
    
    /**
     * Convert a Windows wide string to our LLWString
     */
    LL_COMMON_API LLWString ll_convert_wide_to_wstring(const std::wstring& in);
    ll_convert_alias(LLWString, std::wstring, ll_convert_wide_to_wstring(in));
    
    /**
     * Convert LLWString to Windows wide string
     */
    LL_COMMON_API std::wstring ll_convert_wstring_to_wide(const LLWString& in);
    ll_convert_alias(std::wstring, LLWString, ll_convert_wstring_to_wide(in));
    
     * Converts incoming string into utf8 string
    
     *
     */
    LL_COMMON_API std::string ll_convert_string_to_utf8_string(const std::string& in);
    
    /// Get Windows message string for passed GetLastError() code
    // VS 2013 doesn't let us forward-declare this template, which is what we
    // started with, so the implementation could reference the specialization we
    // haven't yet declared. Somewhat weirdly, just stating the generic
    // implementation in terms of the specialization works, even in this order...
    
    // the general case is just a conversion from the sole implementation
    // Microsoft says DWORD is a typedef for unsigned long
    // https://docs.microsoft.com/en-us/windows/desktop/winprog/windows-data-types
    // so rather than drag windows.h into everybody's include space...
    template<typename STRING>
    STRING windows_message(unsigned long error)
    {
        return ll_convert<STRING>(windows_message<std::wstring>(error));
    }
    
    /// There's only one real implementation
    template<>
    LL_COMMON_API std::wstring windows_message<std::wstring>(unsigned long error);
    
    /// Get Windows message string, implicitly calling GetLastError()
    template<typename STRING>
    STRING windows_message() { return windows_message<STRING>(GetLastError()); }
    
    
    
    LL_COMMON_API boost::optional<std::wstring> llstring_getoptenv(const std::string& key);
    
    #else // ! LL_WINDOWS
    
    LL_COMMON_API boost::optional<std::string>  llstring_getoptenv(const std::string& key);
    
    #endif // ! LL_WINDOWS
    
    
    /**
     * Many of the 'strip' and 'replace' methods of LLStringUtilBase need
     * specialization to work with the signed char type.
     * Sadly, it is not possible (AFAIK) to specialize a single method of
     * a template class.
     * That stuff should go here.
     */
    namespace LLStringFn
    {
    	/**
    	 * @brief Replace all non-printable characters with replacement in
    	 * string.
    	 * NOTE - this will zap non-ascii
    	 *
    	 * @param [in,out] string the to modify. out value is the string
    	 * with zero non-printable characters.
    	 * @param The replacement character. use LL_UNKNOWN_CHAR if unsure.
    	 */
    	LL_COMMON_API void replace_nonprintable_in_ascii(
    		std::basic_string<char>& string,
    		char replacement);
    
    
    	/**
    	 * @brief Replace all non-printable characters and pipe characters
    	 * with replacement in a string.
    	 * NOTE - this will zap non-ascii
    	 *
    	 * @param [in,out] the string to modify. out value is the string
    	 * with zero non-printable characters and zero pipe characters.
    	 * @param The replacement character. use LL_UNKNOWN_CHAR if unsure.
    	 */
    	LL_COMMON_API void replace_nonprintable_and_pipe_in_ascii(std::basic_string<char>& str,
    									   char replacement);
    
    
    	/**
    	 * @brief Remove all characters that are not allowed in XML 1.0.
    	 * Returns a copy of the string with those characters removed.
    	 * Works with US ASCII and UTF-8 encoded strings.  JC
    	 */
    	LL_COMMON_API std::string strip_invalid_xml(const std::string& input);
    
    
    	/**
    	 * @brief Replace all control characters (0 <= c < 0x20) with replacement in
    	 * string.   This is safe for utf-8
    	 *
    	 * @param [in,out] string the to modify. out value is the string
    	 * with zero non-printable characters.
    	 * @param The replacement character. use LL_UNKNOWN_CHAR if unsure.
    	 */
    	LL_COMMON_API void replace_ascii_controlchars(
    		std::basic_string<char>& string,
    		char replacement);
    }
    
    ////////////////////////////////////////////////////////////
    // NOTE: LLStringUtil::format, getTokens, and support functions moved to llstring.cpp.
    // There is no LLWStringUtil::format implementation currently.
    
    // Calling these for anything other than LLStringUtil will produce link errors.
    
    
    ////////////////////////////////////////////////////////////
    
    
    // static
    template <class T>
    std::vector<typename LLStringUtilBase<T>::string_type>
    LLStringUtilBase<T>::getTokens(const string_type& instr, const string_type& delims)
    {
    	std::vector<string_type> tokens;
    	getTokens(instr, tokens, delims);
    	return tokens;
    }
    
    // static
    template <class T>
    std::vector<typename LLStringUtilBase<T>::string_type>
    LLStringUtilBase<T>::getTokens(const string_type& instr,
    							   const string_type& drop_delims,
    							   const string_type& keep_delims,
    							   const string_type& quotes)
    {
    	std::vector<string_type> tokens;
    	getTokens(instr, tokens, drop_delims, keep_delims, quotes);
    	return tokens;
    }
    
    // static
    template <class T>
    std::vector<typename LLStringUtilBase<T>::string_type>
    LLStringUtilBase<T>::getTokens(const string_type& instr,
    							   const string_type& drop_delims,
    							   const string_type& keep_delims,
    							   const string_type& quotes,
    							   const string_type& escapes)
    {
    	std::vector<string_type> tokens;
    	getTokens(instr, tokens, drop_delims, keep_delims, quotes, escapes);
    	return tokens;
    }
    
    namespace LLStringUtilBaseImpl
    {
    
    /**
     * Input string scanner helper for getTokens(), or really any other
     * character-parsing routine that may have to deal with escape characters.
     * This implementation defines the concept (also an interface, should you
     * choose to implement the concept by subclassing) and provides trivial
     * implementations for a string @em without escape processing.
     */
    template <class T>
    struct InString
    {
    	typedef std::basic_string<T> string_type;
    	typedef typename string_type::const_iterator const_iterator;
    
    	InString(const_iterator b, const_iterator e):
    
    	bool done() const { return mIter == mEnd; }
    	/// Is the current character (*mIter) escaped? This implementation can
    
    	/// answer trivially because it doesn't support escapes.
    	virtual bool escaped() const { return false; }
    
    	/// Obtain the current character and advance @c mIter.
    	virtual T next() { return *mIter++; }
    
    	/// Does the current character match specified character?
    
    	virtual bool is(T ch) const { return (! done()) && *mIter == ch; }
    
    	/// Is the current character any one of the specified characters?
    	virtual bool oneof(const string_type& delims) const
    	{
    
    		return (! done()) && LLStringUtilBase<T>::contains(delims, *mIter);
    
    	}
    
    	/**
    	 * Scan forward from @from until either @a delim or end. This is primarily
    	 * useful for processing quoted substrings.
    	 *
    	 * If we do see @a delim, append everything from @from until (excluding)
    
    	 * @a delim to @a into, advance @c mIter to skip @a delim, and return @c
    
    	 * If we do not see @a delim, do not alter @a into or @c mIter and return
    
    	 * @c false. Do not pass GO, do not collect $200.
    	 *
    	 * @note The @c false case described above implements normal getTokens()
    	 * treatment of an unmatched open quote: treat the quote character as if
    	 * escaped, that is, simply collect it as part of the current token. Other
    	 * plausible behaviors directly affect the way getTokens() deals with an
    	 * unmatched quote: e.g. throwing an exception to treat it as an error, or
    	 * assuming a close quote beyond end of string (in which case return @c
    	 * true).
    	 */
    	virtual bool collect_until(string_type& into, const_iterator from, T delim)
    	{
    
    		const_iterator found = std::find(from, mEnd, delim);
    
    		// If we didn't find delim, change nothing, just tell caller.
    
    			return false;
    		// Found delim! Append everything between from and found.
    		into.append(from, found);
    		// advance past delim in input
    
    };
    
    /// InString subclass that handles escape characters
    template <class T>
    class InEscString: public InString<T>
    {
    public:
    	typedef InString<T> super;
    	typedef typename super::string_type string_type;
    	typedef typename super::const_iterator const_iterator;
    	using super::done;
    
    	using super::mIter;
    	using super::mEnd;
    
    	InEscString(const_iterator b, const_iterator e, const string_type& escapes):