From c6108f0fadd54a60c8bbeecb3f3604073a934c73 Mon Sep 17 00:00:00 2001
From: Lynx Linden <lynx@lindenlab.com>
Date: Fri, 15 Jan 2010 11:07:37 +0000
Subject: [PATCH] EXT-4125: Regexes hurt my head!

Updated the regex to match a free URL in plain text with no http:
protocol. This now explicitly does not match e-mail addresses, such as
test@lindenlab.com (yay negative lookbehind regexes). It additionally
matches URLs with a port or path after it, e.g., secondlife.com/status.

I've added a bunch more unit tests to asset positive and negative
matches for this regex, because no human can do this in their head.
---
 indra/llui/llurlentry.cpp            |  2 +-
 indra/llui/tests/llurlentry_test.cpp | 20 ++++++++++++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/indra/llui/llurlentry.cpp b/indra/llui/llurlentry.cpp
index 1b6dd1b264..4927e57a52 100644
--- a/indra/llui/llurlentry.cpp
+++ b/indra/llui/llurlentry.cpp
@@ -204,7 +204,7 @@ LLUrlEntryHTTPNoProtocol::LLUrlEntryHTTPNoProtocol()
 	mPattern = boost::regex("("
 				"\\bwww\\.\\S+\\.\\S+" // i.e. www.FOO.BAR
 				"|" // or
-				"\\b[^ \\t\\n\\r\\f\\v:/]+\\.(?:com|net|edu|org)[^[:space:][:alnum:]]*\\>" // i.e. FOO.net
+				"(?<!@)\\b[^[:space:]:@/]+\\.(?:com|net|edu|org)([/:]\\S*)?\\b" // i.e. FOO.net
 				")",
 				boost::regex::perl|boost::regex::icase);
 	mMenuName = "menu_url_http.xml";
diff --git a/indra/llui/tests/llurlentry_test.cpp b/indra/llui/tests/llurlentry_test.cpp
index 38cf7124ce..80be8fcbf7 100644
--- a/indra/llui/tests/llurlentry_test.cpp
+++ b/indra/llui/tests/llurlentry_test.cpp
@@ -571,6 +571,26 @@ namespace tut
 				  "MIT web site is at web.mit.edu and also www.mit.edu",
 				  "web.mit.edu");
 
+		testRegex("don't match e-mail addresses", r,
+				  "test@lindenlab.com",
+				  "");
+
+		testRegex(".com URL with path", r,
+				  "see secondlife.com/status for grid status",
+				  "secondlife.com/status");
+
+		testRegex(".com URL with port", r,
+				  "secondlife.com:80",
+				  "secondlife.com:80");
+
+		testRegex(".com URL with port and path", r,
+				  "see secondlife.com:80/status",
+				  "secondlife.com:80/status");
+
+		testRegex("www.*.com URL with port and path", r,
+				  "see www.secondlife.com:80/status",
+				  "www.secondlife.com:80/status");
+
 		testRegex("invalid .com URL [1]", r,
 				  "..com",
 				  "");
-- 
GitLab