From 1a9942a51c2b5db51adb75356f342665743d1f16 Mon Sep 17 00:00:00 2001
From: Callum Prentice <callum@gmail.com>
Date: Wed, 7 Oct 2020 16:43:01 -0700
Subject: [PATCH] Improve, rationalize and expand comments

---
 indra/llfilesystem/lldiskcache.cpp | 49 ++++++++++------------
 indra/llfilesystem/lldiskcache.h   | 65 ++++++++++++++++++++++++++++--
 2 files changed, 84 insertions(+), 30 deletions(-)

diff --git a/indra/llfilesystem/lldiskcache.cpp b/indra/llfilesystem/lldiskcache.cpp
index 455e27221ed..e2e50c775df 100644
--- a/indra/llfilesystem/lldiskcache.cpp
+++ b/indra/llfilesystem/lldiskcache.cpp
@@ -2,6 +2,12 @@
  * @file lldiskcache.cpp
  * @brief The disk cache implementation.
  *
+ * Note: Rather than keep the top level function comments up
+ * to date in both the source and header files, I elected to
+ * only have explicit comments about each function and variable
+ * in the header - look there for details. The same is true for
+ * description of how this code is supposed to work.
+ *
  * $LicenseInfo:firstyear=2009&license=viewerlgpl$
  * Second Life Viewer Source Code
  * Copyright (C) 2020, Linden Research, Inc.
@@ -40,10 +46,8 @@ LLDiskCache::LLDiskCache(const std::string cache_dir,
     mMaxSizeBytes(max_size_bytes),
     mEnableCacheDebugInfo(enable_cache_debug_info)
 {
-    // the prefix used for cache filenames to disambiguate them from other files
     mCacheFilenamePrefix = "sl_cache";
 
-    // create cache dir if it does not exist
     boost::filesystem::create_directory(cache_dir);
 }
 
@@ -126,7 +130,7 @@ void LLDiskCache::purge()
 const std::string LLDiskCache::assetTypeToString(LLAssetType::EType at)
 {
     /**
-     * Make use of the C++17 (or is it 14) feature that allows
+     * Make use of the handy C++17  feature that allows
      * for inline initialization of an std::map<>
      */
     typedef std::map<LLAssetType::EType, std::string> asset_type_to_name_t;
@@ -190,20 +194,12 @@ const std::string LLDiskCache::metaDataToFilepath(const std::string id,
     return file_path.str();
 }
 
-/**
- * Update the "last write time" of a file to "now". This must be called whenever a
- * file in the cache is read (not written) so that the last time the file was
- * accessed which is used in the mechanism for purging the cache, is up to date.
- */
 void LLDiskCache::updateFileAccessTime(const std::string file_path)
 {
     const std::time_t file_time = std::time(nullptr);
     boost::filesystem::last_write_time(file_path, file_time);
 }
 
-/**
- * 
- */
 const std::string LLDiskCache::getCacheInfo()
 {
     std::ostringstream cache_info;
@@ -219,14 +215,14 @@ const std::string LLDiskCache::getCacheInfo()
     return cache_info.str();
 }
 
-/**
- * Clear the cache by removing all the files in the cache directory
- * individually. It's important to maintain control of which directory
- * if passed in and not let the user inadvertently (or maliciously) set
- * it to an random location like your project source or OS system directory
- */
 void LLDiskCache::clearCache(const std::string cache_dir)
 {
+    /**
+     * See notes on performance in dirFileSize(..) - there may be
+     * a quicker way to do this by operating on the parent dir vs
+     * the component files but it's called infrequently so it's
+     * likely just fine
+     */
     if (boost::filesystem::is_directory(cache_dir))
     {
         for (auto& entry : boost::make_iterator_range(boost::filesystem::directory_iterator(cache_dir), {}))
@@ -242,20 +238,19 @@ void LLDiskCache::clearCache(const std::string cache_dir)
     }
 }
 
-/**
- * Utility function to get the total filesize of all files in a directory. It
- * used to test file extensions to only check cache files but that was removed.
- * There may be a better way that works directly on the folder (similar to
- * right clicking on a folder in the OS and asking for size vs right clicking
- * on all files and adding up manually) but this is very fast - less than 100ms
- * in my testing so, so long as it's not called frequently, it should be okay.
- * Note that's it's only currently used for logging/debugging so if performance
- * is ever an issue, optimizing this or removing it altogether, is an easy win.
- */
 uintmax_t LLDiskCache::dirFileSize(const std::string dir)
 {
     uintmax_t total_file_size = 0;
 
+    /**
+     * There may be a better way that works directly on the folder (similar to
+     * right clicking on a folder in the OS and asking for size vs right clicking
+     * on all files and adding up manually) but this is very fast - less than 100ms
+     * for 10,000 files in my testing so, so long as it's not called frequently,
+     * it should be okay. Note that's it's only currently used for logging/debugging
+     * so if performance is ever an issue, optimizing this or removing it altogether,
+     * is an easy win.
+     */
     if (boost::filesystem::is_directory(dir))
     {
         for (auto& entry : boost::make_iterator_range(boost::filesystem::directory_iterator(dir), {}))
diff --git a/indra/llfilesystem/lldiskcache.h b/indra/llfilesystem/lldiskcache.h
index 34a4fda7560..f718b7a3280 100644
--- a/indra/llfilesystem/lldiskcache.h
+++ b/indra/llfilesystem/lldiskcache.h
@@ -1,6 +1,41 @@
 /**
  * @file lldiskcache.h
- * @brief The disk cache implementation.
+ * @brief The disk cache implementation declarations.
+ *
+ * @Description:
+ * This code implements a disk cache using the following ideas:
+ * 1/ The metadata for a file can be encapsulated in the filename.
+      The filenames will be composed of the following fields:
+        Prefix:     Used to identify the file as a part of the cache.
+                    An additional reason for using a prefix is that it
+                    might be possible, either accidentally or maliciously
+                    to end up with the cache dir set to a non-cache
+                    location such as your OS system dir or a work folder.
+                    Purging files from that would obviously be a disaster
+                    so this is an extra step to help avoid that scenario.
+        ID:         Typically the asset ID (UUID) of the asset being
+                    saved but can be anything valid for a filename
+        Extra Info: A field for use in the future that can be used
+                    to store extra identifiers - e.g. the discard
+                    level of a JPEG2000 file
+        Asset Type: A text string created from the LLAssetType enum
+                    that identifies the type of asset being stored.
+        .asset      A file extension of .asset is used to help
+                    identify this as a Viewer asset file
+ * 2/ The time of last access for a file can be updated instantly
+ *    for file reads and automatically as part of the file writes.
+ * 3/ The purge algorithm collects a list of all files in the
+ *    directory, sorts them by date of last access (write) and then
+ *    deletes any files based on age until the total size of all
+ *    the files is less than the maximum size specified.
+ * 4/ An LLSingleton idiom is used since there will only ever be
+ *    a single cache and we want to access it from numerous places.
+ * 5/ Performance on my modest system seems very acceptable. For
+ *    example, in testing, I was able to purge a directory of
+ *    10,000 files, deleting about half of them in ~ 1700ms. For
+ *    the same sized directory of files, writing the last updated
+ *    time to each took less than 600ms indicating that this
+ *    important part of the mechanism has almost no overhead.
  *
  * $LicenseInfo:firstyear=2009&license=viewerlgpl$
  * Second Life Viewer Source Code
@@ -33,10 +68,32 @@ class LLDiskCache :
     public LLParamSingleton<LLDiskCache>
 {
     public:
+        /**
+         * Since this is using the LLSingleton pattern but we
+         * want to allow the constructor to be called first
+         * with various parameters, we also invoke the
+         * LLParamSingleton idiom and use it to initialize
+         * the class via a call in LLAppViewer.
+         */
         LLSINGLETON(LLDiskCache,
+                    /**
+                     * The full name of the cache folder - typically a
+                     * a child of the main Viewer cache directory. Defined
+                     * by the setting at 'DiskCacheDirName'
+                     */
                     const std::string cache_dir,
+                    /**
+                     * The maximum size of the cache in bytes - Defined by
+                     * the setting at 'DiskCacheMaxSizeMB' (* 1024 * 1024)
+                     */
                     const int max_size_bytes,
+                    /**
+                     * A flag that enables extra cache debugging so that
+                     * if there are bugs, we can ask uses to enable this
+                     * setting and send us their logs
+                     */
                     const bool enable_cache_debug_info);
+
         virtual ~LLDiskCache() = default;
 
     public:
@@ -54,7 +111,7 @@ class LLDiskCache :
         /**
          * Update the "last write time" of a file to "now". This must be called whenever a
          * file in the cache is read (not written) so that the last time the file was
-         * accessed which is used in the mechanism for purging the cache, is up to date.
+         * accessed is up to date (This is used in the mechanism for purging the cache)
          */
         void updateFileAccessTime(const std::string file_path);
 
@@ -65,7 +122,9 @@ class LLDiskCache :
         void purge();
 
         /**
-         * Clear the cache by removing the files in the cache directory individually
+         * Clear the cache by removing all the files in the specified cache
+         * directory individually. Only the files that contain a prefix defined
+         * by mCacheFilenamePrefix will be removed.
          */
         void clearCache(const std::string cache_dir);
 
-- 
GitLab