OSDN Git Service

ANDROID: Add a tracepoint for mapping inode to full path
authorJoel Fernandes <joelaf@google.com>
Thu, 14 Mar 2019 21:32:39 +0000 (17:32 -0400)
committerJoel Fernandes <joelaf@google.com>
Tue, 27 Aug 2019 23:26:28 +0000 (23:26 +0000)
This will be used by eBPF and the iorapd project for high speed
inode/dev numbers to file path lookup. Look at the inodemap CL for more
details about of eBPF and iorapd using the tracepoint.

This is planned to be used by the inodemap BPF program. Also, ART folks
have been using this tracepoint for debugging "unknown inode numer"
issues.

The tracepoint will be out of tree, and not sent upstream, since VFS
developers don't accept tracepoints strictly.

Test: Run "find /" command in emulator and measure completion time
with/without treacepoint. find does a flood of lookups which stresses
the tracepoint. No performance change observed.
Test: eBPF prototypes (wip) successfully read data from the tracepoint.

OOT Bug: 139663736
Bug: 135143784
Bug: 137393447
Change-Id: I657f374659673a9c8853530d73c0622dbdbab146
Signed-off-by: Joel Fernandes <joelaf@google.com>
(cherry picked from commit 987732fcbbe3ea78368c28e5a0d0d236be61420f)
(cherry picked from commit 2104283a8d7349011860d9bffb8a3d25456e6d20)

fs/namei.c
include/trace/events/namei.h [new file with mode: 0644]

index 9071bb1..1b03c6a 100644 (file)
@@ -40,6 +40,9 @@
 #include "internal.h"
 #include "mount.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/namei.h>
+
 /* [Feb-1997 T. Schoebel-Theuer]
  * Fundamental changes in the pathname lookup mechanisms (namei)
  * were necessary because of omirr.  The reason is that omirr needs
@@ -784,6 +787,81 @@ static inline int d_revalidate(struct dentry *dentry, unsigned int flags)
        return dentry->d_op->d_revalidate(dentry, flags);
 }
 
+#define INIT_PATH_SIZE 64
+
+static void success_walk_trace(struct nameidata *nd)
+{
+       struct path *pt = &nd->path;
+       struct inode *i = nd->inode;
+       char buf[INIT_PATH_SIZE], *try_buf;
+       int cur_path_size;
+       char *p;
+
+       /* When eBPF/ tracepoint is disabled, keep overhead low. */
+       if (!trace_inodepath_enabled())
+               return;
+
+       /* First try stack allocated buffer. */
+       try_buf = buf;
+       cur_path_size = INIT_PATH_SIZE;
+
+       while (cur_path_size <= PATH_MAX) {
+               /* Free previous heap allocation if we are now trying
+                * a second or later heap allocation.
+                */
+               if (try_buf != buf)
+                       kfree(try_buf);
+
+               /* All but the first alloc are on the heap. */
+               if (cur_path_size != INIT_PATH_SIZE) {
+                       try_buf = kmalloc(cur_path_size, GFP_KERNEL);
+                       if (!try_buf) {
+                               try_buf = buf;
+                               sprintf(try_buf, "error:buf_alloc_failed");
+                               break;
+                       }
+               }
+
+               p = d_path(pt, try_buf, cur_path_size);
+
+               if (!IS_ERR(p)) {
+                       char *end = mangle_path(try_buf, p, "\n");
+
+                       if (end) {
+                               try_buf[end - try_buf] = 0;
+                               break;
+                       } else {
+                               /* On mangle errors, double path size
+                                * till PATH_MAX.
+                                */
+                               cur_path_size = cur_path_size << 1;
+                               continue;
+                       }
+               }
+
+               if (PTR_ERR(p) == -ENAMETOOLONG) {
+                       /* If d_path complains that name is too long,
+                        * then double path size till PATH_MAX.
+                        */
+                       cur_path_size = cur_path_size << 1;
+                       continue;
+               }
+
+               sprintf(try_buf, "error:d_path_failed_%lu",
+                       -1 * PTR_ERR(p));
+               break;
+       }
+
+       if (cur_path_size > PATH_MAX)
+               sprintf(try_buf, "error:d_path_name_too_long");
+
+       trace_inodepath(i, try_buf);
+
+       if (try_buf != buf)
+               kfree(try_buf);
+       return;
+}
+
 /**
  * complete_walk - successful completion of path walk
  * @nd:  pointer nameidata
@@ -806,15 +884,21 @@ static int complete_walk(struct nameidata *nd)
                        return -ECHILD;
        }
 
-       if (likely(!(nd->flags & LOOKUP_JUMPED)))
+       if (likely(!(nd->flags & LOOKUP_JUMPED))) {
+               success_walk_trace(nd);
                return 0;
+       }
 
-       if (likely(!(dentry->d_flags & DCACHE_OP_WEAK_REVALIDATE)))
+       if (likely(!(dentry->d_flags & DCACHE_OP_WEAK_REVALIDATE))) {
+               success_walk_trace(nd);
                return 0;
+       }
 
        status = dentry->d_op->d_weak_revalidate(dentry, nd->flags);
-       if (status > 0)
+       if (status > 0) {
+               success_walk_trace(nd);
                return 0;
+       }
 
        if (!status)
                status = -ESTALE;
diff --git a/include/trace/events/namei.h b/include/trace/events/namei.h
new file mode 100644 (file)
index 0000000..e8c3e21
--- /dev/null
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM namei
+
+#if !defined(_TRACE_INODEPATH_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_INODEPATH_H
+
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+#include <linux/mm.h>
+#include <linux/memcontrol.h>
+#include <linux/device.h>
+#include <linux/kdev_t.h>
+
+TRACE_EVENT(inodepath,
+               TP_PROTO(struct inode *inode, char *path),
+
+               TP_ARGS(inode, path),
+
+               TP_STRUCT__entry(
+                       /* dev_t and ino_t are arch dependent bit width
+                        * so just use 64-bit
+                        */
+                       __field(unsigned long, ino)
+                       __field(unsigned long, dev)
+                       __string(path, path)
+               ),
+
+               TP_fast_assign(
+                       __entry->ino = inode->i_ino;
+                       __entry->dev = inode->i_sb->s_dev;
+                       __assign_str(path, path);
+               ),
+
+               TP_printk("dev %d:%d ino=%lu path=%s",
+                       MAJOR(__entry->dev), MINOR(__entry->dev),
+                       __entry->ino, __get_str(path))
+);
+#endif /* _TRACE_INODEPATH_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>