filesystem: Add LRU cache and full file scan for /data

OnInodes resolves all inodes. First looks in the static file map created once
in probes producer through CreateStaticDeviceToInodeMap.
Then searches in the LRU cache, currently sized 100.
Falls back on a full filesystem scan if there are still unresolved inodes.

Each inode is added to the InodeFileMap proto as resolved. If not resolved
even after the full file scan, just the inode number is added to the trace.
This ~shouldn't happen~ after a full scan.

Updated the LRU inode cache to take InodeMapValue for values.

Bug: 74584014
Change-Id: I20ecc75bb194909787ae42ccb49a26902d24cf52
diff --git a/src/traced/probes/filesystem/inode_file_data_source.cc b/src/traced/probes/filesystem/inode_file_data_source.cc
index bcb4bf7..77e85db 100644
--- a/src/traced/probes/filesystem/inode_file_data_source.cc
+++ b/src/traced/probes/filesystem/inode_file_data_source.cc
@@ -82,56 +82,129 @@
   }
 }
 
-void CreateDeviceToInodeMap(
+void CreateStaticDeviceToInodeMap(
     const std::string& root_directory,
-    std::map<BlockDeviceID, std::map<Inode, InodeMapValue>>* block_device_map) {
-  ScanFilesDFS(
-      root_directory,
-      [&block_device_map](BlockDeviceID block_device_id, Inode inode_number,
-                          const std::string& path,
-                          protos::pbzero::InodeFileMap_Entry_Type type) {
-        std::map<Inode, InodeMapValue>& inode_map =
-            (*block_device_map)[block_device_id];
-        inode_map[inode_number].SetType(type);
-        inode_map[inode_number].AddPath(path);
-        return true;
-      });
+    std::map<BlockDeviceID, std::map<Inode, InodeMapValue>>* static_file_map) {
+  ScanFilesDFS(root_directory,
+               [static_file_map](BlockDeviceID block_device_id,
+                                 Inode inode_number, const std::string& path,
+                                 protos::pbzero::InodeFileMap_Entry_Type type) {
+                 std::map<Inode, InodeMapValue>& inode_map =
+                     (*static_file_map)[block_device_id];
+                 inode_map[inode_number].SetType(type);
+                 inode_map[inode_number].AddPath(path);
+                 return true;
+               });
+}
+
+void FillInodeEntry(InodeFileMap* destination,
+                    Inode inode_number,
+                    const InodeMapValue& inode_map_value) {
+  auto* entry = destination->add_entries();
+  entry->set_inode_number(inode_number);
+  entry->set_type(inode_map_value.type());
+  for (const auto& path : inode_map_value.paths())
+    entry->add_paths(path.c_str());
 }
 
 InodeFileDataSource::InodeFileDataSource(
     TracingSessionID id,
-    std::map<BlockDeviceID, std::map<Inode, InodeMapValue>>*
-        system_partition_files,
+    std::map<BlockDeviceID, std::map<Inode, InodeMapValue>>* static_file_map,
+    LRUInodeCache* cache,
     std::unique_ptr<TraceWriter> writer)
     : session_id_(id),
-      system_partition_files_(system_partition_files),
+      static_file_map_(static_file_map),
+      cache_(cache),
       writer_(std::move(writer)),
       weak_factory_(this) {}
 
-bool InodeFileDataSource::AddInodeFileMapEntry(
-    InodeFileMap* inode_file_map,
-    BlockDeviceID block_device_id,
-    Inode inode_number,
-    const std::map<BlockDeviceID, std::map<Inode, InodeMapValue>>&
-        current_partition_map) {
-  auto block_device_entry = current_partition_map.find(block_device_id);
-  if (block_device_entry != current_partition_map.end()) {
-    auto inode_map = block_device_entry->second.find(inode_number);
-    if (inode_map != block_device_entry->second.end()) {
-      auto* entry = inode_file_map->add_entries();
-      entry->set_inode_number(inode_number);
-      entry->set_type(inode_map->second.type());
-      for (const auto& path : inode_map->second.paths())
-        entry->add_paths(path.c_str());
-      return true;
-    }
+void InodeFileDataSource::AddInodesFromFilesystemScan(
+    const std::string& root_directory,
+    BlockDeviceID provided_block_device_id,
+    std::set<Inode>* inode_numbers,
+    LRUInodeCache* cache,
+    InodeFileMap* destination) {
+  if (inode_numbers->empty())
+    return;
+  ScanFilesDFS(
+      root_directory,
+      [provided_block_device_id, inode_numbers, cache, destination](
+          BlockDeviceID block_device_id, Inode inode_number,
+          const std::string& path,
+          protos::pbzero::InodeFileMap_Entry_Type type) {
+        if (provided_block_device_id != block_device_id)
+          return true;
+        if (inode_numbers->find(inode_number) == inode_numbers->end())
+          return true;
+        std::pair<BlockDeviceID, Inode> key{block_device_id, inode_number};
+        auto cur_val = cache->Get(key);
+        if (cur_val != nullptr) {
+          cur_val->AddPath(path);
+          FillInodeEntry(destination, inode_number, *cur_val);
+        } else {
+          InodeMapValue new_val(InodeMapValue(type, {path}));
+          cache->Insert(key, new_val);
+          FillInodeEntry(destination, inode_number, new_val);
+        }
+        inode_numbers->erase(inode_number);
+        if (inode_numbers->empty())
+          return false;
+        return true;
+      });
+
+  // Could not be found, just add the inode number
+  PERFETTO_DLOG("%zu inodes not found", inode_numbers->size());
+  for (const auto& unresolved_inode : *inode_numbers) {
+    auto* entry = destination->add_entries();
+    entry->set_inode_number(unresolved_inode);
   }
-  return false;
+}
+
+void InodeFileDataSource::AddInodesFromStaticMap(BlockDeviceID block_device_id,
+                                                 std::set<Inode>* inode_numbers,
+                                                 InodeFileMap* destination) {
+  // Check if block device id exists in static file map
+  auto static_map_entry = static_file_map_->find(block_device_id);
+  if (static_map_entry == static_file_map_->end())
+    return;
+
+  uint64_t system_found_count = 0;
+  for (auto it = inode_numbers->begin(); it != inode_numbers->end();) {
+    Inode inode_number = *it;
+    // Check if inode number exists in static file map for given block device id
+    auto inode_it = static_map_entry->second.find(inode_number);
+    if (inode_it == static_map_entry->second.end()) {
+      ++it;
+      continue;
+    }
+    system_found_count++;
+    it = inode_numbers->erase(it);
+    FillInodeEntry(destination, inode_number, inode_it->second);
+  }
+  PERFETTO_DLOG("%" PRIu64 " inodes found in static file map",
+                system_found_count);
+}
+
+void InodeFileDataSource::AddInodesFromLRUCache(BlockDeviceID block_device_id,
+                                                std::set<Inode>* inode_numbers,
+                                                InodeFileMap* destination) {
+  uint64_t cache_found_count = 0;
+  for (auto it = inode_numbers->begin(); it != inode_numbers->end();) {
+    Inode inode_number = *it;
+    auto value = cache_->Get(std::make_pair(block_device_id, inode_number));
+    if (value == nullptr) {
+      ++it;
+      continue;
+    }
+    cache_found_count++;
+    it = inode_numbers->erase(it);
+    FillInodeEntry(destination, inode_number, *value);
+  }
+  PERFETTO_DLOG("%" PRIu64 " inodes found in cache", cache_found_count);
 }
 
 void InodeFileDataSource::OnInodes(
     const std::vector<std::pair<Inode, BlockDeviceID>>& inodes) {
-  PERFETTO_DLOG("Saw FtraceBundle with %zu inodes.", inodes.size());
   if (mount_points_.empty()) {
     mount_points_ = ParseMounts();
   }
@@ -142,37 +215,35 @@
     BlockDeviceID block_device_id = inodes_pair.second;
     inode_file_maps[block_device_id].emplace(inode_number);
   }
+  PERFETTO_DLOG("Saw %zu block devices.", inode_file_maps.size());
+
   // Write a TracePacket with an InodeFileMap proto for each block device id
   for (const auto& inode_file_map_data : inode_file_maps) {
     BlockDeviceID block_device_id = inode_file_map_data.first;
     std::set<Inode> inode_numbers = inode_file_map_data.second;
+    PERFETTO_DLOG("Saw %zu unique inode numbers.", inode_numbers.size());
 
     // New TracePacket for each InodeFileMap
     auto trace_packet = writer_->NewTracePacket();
     auto inode_file_map = trace_packet->set_inode_file_map();
 
-    // Add block device id
+    // Add block device id to InodeFileMap
     inode_file_map->set_block_device_id(block_device_id);
 
-    // Add mount points
+    // Add mount points to InodeFileMap
     auto range = mount_points_.equal_range(block_device_id);
     for (std::multimap<BlockDeviceID, std::string>::iterator it = range.first;
          it != range.second; ++it)
       inode_file_map->add_mount_points(it->second.c_str());
 
-    // Add entries for inodes in system
-    std::map<BlockDeviceID, std::set<Inode>> data_partition_inodes;
-    for (const auto& inode_number : inode_numbers) {
-      // Search in /system partition and add to InodeFileMap if found
-      bool in_system =
-          AddInodeFileMapEntry(inode_file_map, block_device_id, inode_number,
-                               *system_partition_files_);
-      if (!in_system) {
-        // TODO(fmayer): Add LRU and check before adding inode for full scan
-        data_partition_inodes[block_device_id].emplace(inode_number);
-      }
-    }
-
+    // Add entries to InodeFileMap as inodes are found and resolved to their
+    // paths/type
+    AddInodesFromStaticMap(block_device_id, &inode_numbers, inode_file_map);
+    AddInodesFromLRUCache(block_device_id, &inode_numbers, inode_file_map);
+    // TODO(azappone): Make root directory a mount point
+    std::string root_directory = "/data";
+    AddInodesFromFilesystemScan(root_directory, block_device_id, &inode_numbers,
+                                cache_, inode_file_map);
     trace_packet->Finalize();
   }
 }
diff --git a/src/traced/probes/filesystem/inode_file_data_source.h b/src/traced/probes/filesystem/inode_file_data_source.h
index af3efb9..db6d887 100644
--- a/src/traced/probes/filesystem/inode_file_data_source.h
+++ b/src/traced/probes/filesystem/inode_file_data_source.h
@@ -29,6 +29,7 @@
 #include "perfetto/tracing/core/basic_types.h"
 #include "perfetto/tracing/core/trace_writer.h"
 #include "src/traced/probes/filesystem/fs_mount.h"
+#include "src/traced/probes/filesystem/lru_inode_cache.h"
 
 #include "perfetto/trace/filesystem/inode_file_map.pbzero.h"
 
@@ -37,21 +38,6 @@
 using InodeFileMap = protos::pbzero::InodeFileMap;
 class TraceWriter;
 
-class InodeMapValue {
- public:
-  protos::pbzero::InodeFileMap_Entry_Type type() const { return entry_type_; }
-  std::set<std::string> paths() const { return paths_; }
-  void SetType(protos::pbzero::InodeFileMap_Entry_Type entry_type) {
-    entry_type_ = entry_type;
-  }
-  void SetPaths(std::set<std::string> paths) { paths_ = paths; }
-  void AddPath(std::string path) { paths_.emplace(path); }
-
- private:
-  protos::pbzero::InodeFileMap_Entry_Type entry_type_;
-  std::set<std::string> paths_;
-};
-
 void ScanFilesDFS(
     const std::string& root_directory,
     const std::function<bool(BlockDeviceID block_device_id,
@@ -59,33 +45,52 @@
                              const std::string& path,
                              protos::pbzero::InodeFileMap_Entry_Type type)>&);
 
-void CreateDeviceToInodeMap(
+// Creates block_device_map for /system partition
+void CreateStaticDeviceToInodeMap(
     const std::string& root_directory,
-    std::map<BlockDeviceID, std::map<Inode, InodeMapValue>>* block_device_map);
+    std::map<BlockDeviceID, std::map<Inode, InodeMapValue>>* static_file_map);
+
+void FillInodeEntry(InodeFileMap* destination,
+                    Inode inode_number,
+                    const InodeMapValue& inode_map_value);
 
 class InodeFileDataSource {
  public:
-  InodeFileDataSource(TracingSessionID,
-                      std::map<BlockDeviceID, std::map<Inode, InodeMapValue>>*
-                          system_partition_files,
-                      std::unique_ptr<TraceWriter> writer);
+  InodeFileDataSource(
+      TracingSessionID,
+      std::map<BlockDeviceID, std::map<Inode, InodeMapValue>>* static_file_map,
+      LRUInodeCache* cache,
+      std::unique_ptr<TraceWriter> writer);
 
   TracingSessionID session_id() const { return session_id_; }
   base::WeakPtr<InodeFileDataSource> GetWeakPtr() const;
 
+  // Called when Inodes are seen in the FtraceEventBundle
   void OnInodes(const std::vector<std::pair<Inode, BlockDeviceID>>& inodes);
 
-  bool AddInodeFileMapEntry(
-      InodeFileMap* inode_file_map,
-      BlockDeviceID block_device_id,
-      Inode inode,
-      const std::map<BlockDeviceID, std::map<Inode, InodeMapValue>>&
-          block_device_map);
+  // Filesystem scan starting from root directory to search for provided inode
+  // numbers. Adds all inode numbers to the InodeFileMap proto. Fills in cache
+  // for all inode numbers that get resolved from the scan.
+  void AddInodesFromFilesystemScan(const std::string& root_directory,
+                                   BlockDeviceID block_device_id,
+                                   std::set<Inode>* inode_numbers,
+                                   LRUInodeCache* cache,
+                                   InodeFileMap* destination);
+
+  // Search in /system partition and add inodes to InodeFileMap proto if found
+  void AddInodesFromStaticMap(BlockDeviceID block_device_id,
+                              std::set<Inode>* inode_numbers,
+                              InodeFileMap* destination);
+
+  // Search in LRUInodeCache and add inodes to InodeFileMap if found
+  void AddInodesFromLRUCache(BlockDeviceID block_device_id,
+                             std::set<Inode>* inode_numbers,
+                             InodeFileMap* destination);
 
  private:
   const TracingSessionID session_id_;
-  std::map<BlockDeviceID, std::map<Inode, InodeMapValue>>*
-      system_partition_files_;
+  std::map<BlockDeviceID, std::map<Inode, InodeMapValue>>* static_file_map_;
+  LRUInodeCache* cache_;
   std::multimap<BlockDeviceID, std::string> mount_points_;
   std::unique_ptr<TraceWriter> writer_;
   base::WeakPtrFactory<InodeFileDataSource> weak_factory_;  // Keep last.
diff --git a/src/traced/probes/filesystem/lru_inode_cache.cc b/src/traced/probes/filesystem/lru_inode_cache.cc
index ca9f30b..ee31566 100644
--- a/src/traced/probes/filesystem/lru_inode_cache.cc
+++ b/src/traced/probes/filesystem/lru_inode_cache.cc
@@ -17,9 +17,8 @@
 #include "src/traced/probes/filesystem/lru_inode_cache.h"
 
 namespace perfetto {
-namespace base {
 
-const LRUInodeCache::InodeValue* LRUInodeCache::Get(const InodeKey& k) {
+InodeMapValue* LRUInodeCache::Get(const InodeKey& k) {
   const auto& map_it = map_.find(k);
   if (map_it == map_.end()) {
     return nullptr;
@@ -29,17 +28,17 @@
   // We can borrow both elements of the pair stored in the list because
   // insert does not need them.
   Insert(map_it, std::move(list_entry->first), std::move(list_entry->second));
-  return &list_.cbegin()->second;
+  return &list_.begin()->second;
 }
 
-void LRUInodeCache::Insert(InodeKey k, InodeValue v) {
+void LRUInodeCache::Insert(InodeKey k, InodeMapValue v) {
   auto it = map_.find(k);
   return Insert(it, std::move(k), std::move(v));
 }
 
 void LRUInodeCache::Insert(typename MapType::iterator map_it,
                            InodeKey k,
-                           InodeValue v) {
+                           InodeMapValue v) {
   list_.emplace_front(k, std::move(v));
   if (map_it != map_.end()) {
     ListIteratorType& list_it = map_it->second;
@@ -56,5 +55,4 @@
     list_.erase(list_last_it);
   }
 }
-}  // namespace base
 }  // namespace perfetto
diff --git a/src/traced/probes/filesystem/lru_inode_cache.h b/src/traced/probes/filesystem/lru_inode_cache.h
index 762373d..0b36109 100644
--- a/src/traced/probes/filesystem/lru_inode_cache.h
+++ b/src/traced/probes/filesystem/lru_inode_cache.h
@@ -22,35 +22,34 @@
 #include <string>
 #include <tuple>
 
+#include "perfetto/traced/data_source_types.h"
+
 namespace perfetto {
-namespace base {
 
 // LRUInodeCache keeps up to |capacity| entries in a mapping from InodeKey
-// to InodeValue. This is used to map <block device, inode> tuples to file
+// to InodeMapValue. This is used to map <block device, inode> tuples to file
 // paths.
 class LRUInodeCache {
  public:
-  using InodeKey = std::pair<int64_t, int64_t>;
-  using InodeValue = std::string;
+  using InodeKey = std::pair<BlockDeviceID, Inode>;
 
   explicit LRUInodeCache(size_t capacity) : capacity_(capacity) {}
 
-  const LRUInodeCache::InodeValue* Get(const InodeKey& k);
-  void Insert(InodeKey k, LRUInodeCache::InodeValue v);
+  InodeMapValue* Get(const InodeKey& k);
+  void Insert(InodeKey k, InodeMapValue v);
 
  private:
-  using ItemType = std::pair<const InodeKey, const InodeValue>;
+  using ItemType = std::pair<const InodeKey, InodeMapValue>;
   using ListIteratorType = std::list<ItemType>::iterator;
   using MapType = std::map<const InodeKey, ListIteratorType>;
 
-  void Insert(MapType::iterator map_it, InodeKey k, InodeValue v);
+  void Insert(MapType::iterator map_it, InodeKey k, InodeMapValue v);
 
   const size_t capacity_;
   MapType map_;
   std::list<ItemType> list_;
 };
 
-}  // namespace base
 }  // namespace perfetto
 
 #endif  // SRC_TRACED_PROBES_FILESYSTEM_LRU_INODE_CACHE_H_
diff --git a/src/traced/probes/filesystem/lru_inode_cache_unittest.cc b/src/traced/probes/filesystem/lru_inode_cache_unittest.cc
index deb9286..14417ea 100644
--- a/src/traced/probes/filesystem/lru_inode_cache_unittest.cc
+++ b/src/traced/probes/filesystem/lru_inode_cache_unittest.cc
@@ -23,45 +23,62 @@
 #include <tuple>
 
 namespace perfetto {
-namespace base {
+
+bool operator==(const perfetto::InodeMapValue& lhs,
+                const perfetto::InodeMapValue& rhs);
+bool operator==(const perfetto::InodeMapValue& lhs,
+                const perfetto::InodeMapValue& rhs) {
+  return lhs.type() == rhs.type() && lhs.paths() == rhs.paths();
+}
+
 namespace {
 
 using ::testing::Eq;
 using ::testing::IsNull;
 using ::testing::Pointee;
 
-const std::pair<int64_t, int64_t> key1{0, 0};
-const std::pair<int64_t, int64_t> key2{0, 1};
-const std::pair<int64_t, int64_t> key3{0, 2};
+const std::pair<BlockDeviceID, Inode> key1{0, 0};
+const std::pair<BlockDeviceID, Inode> key2{0, 1};
+const std::pair<BlockDeviceID, Inode> key3{0, 2};
 
-constexpr char val1[] = "foo";
-constexpr char val2[] = "bar";
-constexpr char val3[] = "baz";
+InodeMapValue val1() {
+  return InodeMapValue(protos::pbzero::InodeFileMap_Entry_Type_DIRECTORY,
+                       std::set<std::string>{"Value 1"});
+}
+
+InodeMapValue val2() {
+  return InodeMapValue(protos::pbzero::InodeFileMap_Entry_Type_UNKNOWN,
+                       std::set<std::string>{"Value 2"});
+}
+
+InodeMapValue val3() {
+  return InodeMapValue(protos::pbzero::InodeFileMap_Entry_Type_UNKNOWN,
+                       std::set<std::string>{"Value 2"});
+}
 
 TEST(LRUInodeCacheTest, Basic) {
   LRUInodeCache cache(2);
-  cache.Insert(key1, val1);
-  EXPECT_THAT(cache.Get(key1), Pointee(Eq(val1)));
-  cache.Insert(key2, val2);
-  EXPECT_THAT(cache.Get(key1), Pointee(Eq(val1)));
-  EXPECT_THAT(cache.Get(key2), Pointee(Eq(val2)));
-  cache.Insert(key1, val2);
-  EXPECT_THAT(cache.Get(key1), Pointee(Eq(val2)));
+  cache.Insert(key1, val1());
+  EXPECT_THAT(cache.Get(key1), Pointee(Eq(val1())));
+  cache.Insert(key2, val2());
+  EXPECT_THAT(cache.Get(key1), Pointee(Eq(val1())));
+  EXPECT_THAT(cache.Get(key2), Pointee(Eq(val2())));
+  cache.Insert(key1, val2());
+  EXPECT_THAT(cache.Get(key1), Pointee(Eq(val2())));
 }
 
 TEST(LRUInodeCacheTest, Overflow) {
   LRUInodeCache cache(2);
-  cache.Insert(key1, val1);
-  cache.Insert(key2, val2);
-  EXPECT_THAT(cache.Get(key1), Pointee(Eq(val1)));
-  EXPECT_THAT(cache.Get(key2), Pointee(Eq(val2)));
-  cache.Insert(key3, val3);
+  cache.Insert(key1, val1());
+  cache.Insert(key2, val2());
+  EXPECT_THAT(cache.Get(key1), Pointee(Eq(val1())));
+  EXPECT_THAT(cache.Get(key2), Pointee(Eq(val2())));
+  cache.Insert(key3, val3());
   // key1 is the LRU and should be evicted.
   EXPECT_THAT(cache.Get(key1), IsNull());
-  EXPECT_THAT(cache.Get(key2), Pointee(Eq(val2)));
-  EXPECT_THAT(cache.Get(key3), Pointee(Eq(val3)));
+  EXPECT_THAT(cache.Get(key2), Pointee(Eq(val2())));
+  EXPECT_THAT(cache.Get(key3), Pointee(Eq(val3())));
 }
 
 }  // namespace
-}  // namespace base
 }  // namespace perfetto
diff --git a/src/traced/probes/probes_producer.cc b/src/traced/probes/probes_producer.cc
index 868ead9..a2591ab 100644
--- a/src/traced/probes/probes_producer.cc
+++ b/src/traced/probes/probes_producer.cc
@@ -186,10 +186,10 @@
   auto trace_writer = endpoint_->CreateTraceWriter(
       static_cast<BufferID>(source_config.target_buffer()));
   if (system_inodes_.empty())
-    CreateDeviceToInodeMap("/system/", &system_inodes_);
+    CreateStaticDeviceToInodeMap("/system/", &system_inodes_);
   auto file_map_source =
       std::unique_ptr<InodeFileDataSource>(new InodeFileDataSource(
-          session_id, &system_inodes_, std::move(trace_writer)));
+          session_id, &system_inodes_, &cache_, std::move(trace_writer)));
   file_map_sources_.emplace(id, std::move(file_map_source));
   AddWatchdogsTimer(id, source_config);
 }
diff --git a/src/traced/probes/probes_producer.h b/src/traced/probes/probes_producer.h
index 4fb5a76..1a0c777 100644
--- a/src/traced/probes/probes_producer.h
+++ b/src/traced/probes/probes_producer.h
@@ -33,6 +33,8 @@
 
 namespace perfetto {
 
+const uint64_t kLRUInodeCacheSize = 1000;
+
 class ProbesProducer : public Producer {
  public:
   ProbesProducer();
@@ -142,6 +144,7 @@
   std::map<DataSourceInstanceID, base::Watchdog::Timer> watchdogs_;
   std::map<DataSourceInstanceID, std::unique_ptr<InodeFileDataSource>>
       file_map_sources_;
+  LRUInodeCache cache_{kLRUInodeCacheSize};
   std::map<BlockDeviceID, std::map<Inode, InodeMapValue>> system_inodes_;
 };