OSDN Git Service

Allow atoms to log fields in bytes format.
authorYao Chen <yaochen@google.com>
Wed, 24 Oct 2018 19:15:56 +0000 (12:15 -0700)
committerJack He <siyuanh@google.com>
Sat, 15 Dec 2018 21:51:08 +0000 (13:51 -0800)
There are an increasing number of requests to log data in complex format to statsd, while the data
is not expected to be parsed or aggregated by statsd and only to be uploaded as events.

Instead of making an exception for each of these cases in a hard coded way, this CL add a feature to
annotate these field in atoms.proto and the stats-log-api-gen tool will produce byte array
interfaces for them.

Note that log_msg does not have byte array type, and only has string type, when statsd receives the
log, these fields are in string type. Only when the atom is written to proto, we will check if this
field should be bytes field and write it to protobuf in message format.

Change-Id: If53dd95c5826710c76d7fe982bf951a435dfc738
Merged-In: If53dd95c5826710c76d7fe982bf951a435dfc738
Fix: 118386797
Bug: 120635548
Test: unit test & manual test
(cherry picked from commit bbdd67d19f4912fbec00220b22e44c68eff5ab3f)

cmds/statsd/src/atom_field_options.proto
cmds/statsd/src/stats_log_util.cpp
tools/stats_log_api_gen/Collation.cpp
tools/stats_log_api_gen/Collation.h
tools/stats_log_api_gen/main.cpp
tools/stats_log_api_gen/test.proto
tools/stats_log_api_gen/test_collation.cpp

index a2a03b1..7dfe7d6 100644 (file)
@@ -64,10 +64,22 @@ message StateAtomFieldOption {
     optional StateField option = 1 [default = STATE_FIELD_UNSET];
 }
 
+// Used to generate StatsLog.write APIs.
+enum LogMode {
+    MODE_UNSET = 0;
+    // Log fields as their actual types e.g., all primary data types.
+    // Or fields that are hardcoded in stats_log_api_gen tool e.g., AttributionNode
+    MODE_AUTOMATIC = 1;
+    // Log fields in their proto binary format. These fields will not be parsed in statsd
+    MODE_BYTES = 2;
+}
+
 extend google.protobuf.FieldOptions {
     // Flags to decorate an atom that presents a state change.
     optional StateAtomFieldOption stateFieldOption = 50000;
 
     // Flags to decorate the uid fields in an atom.
     optional bool is_uid = 50001 [default = false];
+
+    optional LogMode log_mode = 50002 [default = MODE_AUTOMATIC];
 }
\ No newline at end of file
index a0ab3e4..444e5b7 100644 (file)
 #include <utils/Log.h>
 #include <utils/SystemClock.h>
 
+using android::util::AtomsInfo;
 using android::util::FIELD_COUNT_REPEATED;
 using android::util::FIELD_TYPE_BOOL;
+using android::util::FIELD_TYPE_FIXED64;
 using android::util::FIELD_TYPE_FLOAT;
 using android::util::FIELD_TYPE_INT32;
 using android::util::FIELD_TYPE_INT64;
-using android::util::FIELD_TYPE_UINT64;
-using android::util::FIELD_TYPE_FIXED64;
 using android::util::FIELD_TYPE_MESSAGE;
 using android::util::FIELD_TYPE_STRING;
+using android::util::FIELD_TYPE_UINT64;
 using android::util::ProtoOutputStream;
 
 namespace android {
@@ -294,8 +295,9 @@ void writeDimensionPathToProto(const std::vector<Matcher>& fieldMatchers,
 // }
 //
 //
-void writeFieldValueTreeToStreamHelper(const std::vector<FieldValue>& dims, size_t* index,
-                                       int depth, int prefix, ProtoOutputStream* protoOutput) {
+void writeFieldValueTreeToStreamHelper(int tagId, const std::vector<FieldValue>& dims,
+                                       size_t* index, int depth, int prefix,
+                                       ProtoOutputStream* protoOutput) {
     size_t count = dims.size();
     while (*index < count) {
         const auto& dim = dims[*index];
@@ -319,9 +321,31 @@ void writeFieldValueTreeToStreamHelper(const std::vector<FieldValue>& dims, size
                 case FLOAT:
                     protoOutput->write(FIELD_TYPE_FLOAT | fieldNum, dim.mValue.float_value);
                     break;
-                case STRING:
-                    protoOutput->write(FIELD_TYPE_STRING | fieldNum, dim.mValue.str_value);
+                case STRING: {
+                    bool isBytesField = false;
+                    // Bytes field is logged via string format in log_msg format. So here we check
+                    // if this string field is a byte field.
+                    std::map<int, std::vector<int>>::const_iterator itr;
+                    if (depth == 0 && (itr = AtomsInfo::kBytesFieldAtoms.find(tagId)) !=
+                                              AtomsInfo::kBytesFieldAtoms.end()) {
+                        const std::vector<int>& bytesFields = itr->second;
+                        for (int bytesField : bytesFields) {
+                            if (bytesField == fieldNum) {
+                                // This is a bytes field
+                                isBytesField = true;
+                                break;
+                            }
+                        }
+                    }
+                    if (isBytesField) {
+                        protoOutput->write(FIELD_TYPE_MESSAGE | fieldNum,
+                                           (const char*)dim.mValue.str_value.c_str(),
+                                           dim.mValue.str_value.length());
+                    } else {
+                        protoOutput->write(FIELD_TYPE_STRING | fieldNum, dim.mValue.str_value);
+                    }
                     break;
+                }
                 default:
                     break;
             }
@@ -337,7 +361,7 @@ void writeFieldValueTreeToStreamHelper(const std::vector<FieldValue>& dims, size
             }
             // Directly jump to the leaf value because the repeated position field is implied
             // by the position of the sub msg in the parent field.
-            writeFieldValueTreeToStreamHelper(dims, index, valueDepth,
+            writeFieldValueTreeToStreamHelper(tagId, dims, index, valueDepth,
                                               dim.mField.getPrefix(valueDepth), protoOutput);
             if (msg_token != 0) {
                 protoOutput->end(msg_token);
@@ -354,7 +378,7 @@ void writeFieldValueTreeToStream(int tagId, const std::vector<FieldValue>& value
     uint64_t atomToken = protoOutput->start(FIELD_TYPE_MESSAGE | tagId);
 
     size_t index = 0;
-    writeFieldValueTreeToStreamHelper(values, &index, 0, 0, protoOutput);
+    writeFieldValueTreeToStreamHelper(tagId, values, &index, 0, 0, protoOutput);
     protoOutput->end(atomToken);
 }
 
index ebdcdfd..61174d9 100644 (file)
@@ -47,7 +47,8 @@ AtomDecl::AtomDecl(const AtomDecl& that)
       fields(that.fields),
       primaryFields(that.primaryFields),
       exclusiveField(that.exclusiveField),
-      uidField(that.uidField) {}
+      uidField(that.uidField),
+      binaryFields(that.binaryFields) {}
 
 AtomDecl::AtomDecl(int c, const string& n, const string& m)
     :code(c),
@@ -116,6 +117,9 @@ java_type(const FieldDescriptor* field)
             if (field->message_type()->full_name() ==
                 "android.os.statsd.AttributionNode") {
               return JAVA_TYPE_ATTRIBUTION_CHAIN;
+            } else if (field->options().GetExtension(os::statsd::log_mode) ==
+                       os::statsd::LogMode::MODE_BYTES) {
+                return JAVA_TYPE_BYTE_ARRAY;
             } else {
                 return JAVA_TYPE_OBJECT;
             }
@@ -185,6 +189,8 @@ int collate_atom(const Descriptor *atom, AtomDecl *atomDecl,
   for (map<int, const FieldDescriptor *>::const_iterator it = fields.begin();
        it != fields.end(); it++) {
     const FieldDescriptor *field = it->second;
+    bool isBinaryField = field->options().GetExtension(os::statsd::log_mode) ==
+                         os::statsd::LogMode::MODE_BYTES;
 
     java_type_t javaType = java_type(field);
 
@@ -198,12 +204,19 @@ int collate_atom(const Descriptor *atom, AtomDecl *atomDecl,
                   field->name().c_str());
       errorCount++;
       continue;
-    } else if (javaType == JAVA_TYPE_BYTE_ARRAY) {
+    } else if (javaType == JAVA_TYPE_BYTE_ARRAY && !isBinaryField) {
       print_error(field, "Raw bytes type not allowed for field: %s\n",
                   field->name().c_str());
       errorCount++;
       continue;
     }
+
+    if (isBinaryField && javaType != JAVA_TYPE_BYTE_ARRAY) {
+      print_error(field, "Cannot mark field %s as bytes.\n",
+                  field->name().c_str());
+      errorCount++;
+      continue;
+    }
   }
 
   // Check that if there's an attribution chain, it's at position 1.
@@ -228,12 +241,16 @@ int collate_atom(const Descriptor *atom, AtomDecl *atomDecl,
        it != fields.end(); it++) {
     const FieldDescriptor *field = it->second;
     java_type_t javaType = java_type(field);
+    bool isBinaryField = field->options().GetExtension(os::statsd::log_mode) ==
+                         os::statsd::LogMode::MODE_BYTES;
 
     AtomField atField(field->name(), javaType);
     if (javaType == JAVA_TYPE_ENUM) {
       // All enums are treated as ints when it comes to function signatures.
       signature->push_back(JAVA_TYPE_INT);
       collate_enums(*field->enum_type(), &atField);
+    } else if (javaType == JAVA_TYPE_OBJECT && isBinaryField) {
+      signature->push_back(JAVA_TYPE_BYTE_ARRAY);
     } else {
       signature->push_back(javaType);
     }
@@ -275,6 +292,10 @@ int collate_atom(const Descriptor *atom, AtomDecl *atomDecl,
             errorCount++;
         }
     }
+    // Binary field validity is already checked above.
+    if (isBinaryField) {
+        atomDecl->binaryFields.push_back(it->first);
+    }
   }
 
   return errorCount;
index 5d2c302..a8b270c 100644 (file)
@@ -86,6 +86,8 @@ struct AtomDecl {
 
     int uidField = 0;
 
+    vector<int> binaryFields;
+
     AtomDecl();
     AtomDecl(const AtomDecl& that);
     AtomDecl(int code, const string& name, const string& message);
index e519909..597b055 100644 (file)
@@ -68,6 +68,8 @@ cpp_type_name(java_type_t type)
             return "double";
         case JAVA_TYPE_STRING:
             return "char const*";
+        case JAVA_TYPE_BYTE_ARRAY:
+            return "char const*";
         default:
             return "UNKNOWN";
     }
@@ -90,6 +92,8 @@ java_type_name(java_type_t type)
             return "double";
         case JAVA_TYPE_STRING:
             return "java.lang.String";
+        case JAVA_TYPE_BYTE_ARRAY:
+            return "byte[]";
         default:
             return "UNKNOWN";
     }
@@ -200,13 +204,40 @@ static int write_stats_log_cpp(FILE *out, const Atoms &atoms,
     }
 
     fprintf(out, "    return options;\n");
-    fprintf(out, "  }\n");
+    fprintf(out, "}\n");
 
     fprintf(out,
             "const std::map<int, StateAtomFieldOptions> "
             "AtomsInfo::kStateAtomsFieldOptions = "
             "getStateAtomFieldOptions();\n");
 
+    fprintf(out,
+            "static std::map<int, std::vector<int>> "
+            "getBinaryFieldAtoms() {\n");
+    fprintf(out, "    std::map<int, std::vector<int>> options;\n");
+    for (set<AtomDecl>::const_iterator atom = atoms.decls.begin();
+         atom != atoms.decls.end(); atom++) {
+        if (atom->binaryFields.size() == 0) {
+            continue;
+        }
+        fprintf(out,
+                "\n    // Adding binary fields for atom "
+                "(%d)%s\n",
+                atom->code, atom->name.c_str());
+
+        for (const auto& field : atom->binaryFields) {
+            fprintf(out, "    options[static_cast<int>(%s)].push_back(%d);\n",
+                    make_constant_name(atom->name).c_str(), field);
+        }
+    }
+
+    fprintf(out, "    return options;\n");
+    fprintf(out, "}\n");
+
+    fprintf(out,
+            "const std::map<int, std::vector<int>> "
+            "AtomsInfo::kBytesFieldAtoms = "
+            "getBinaryFieldAtoms();\n");
 
     fprintf(out, "int64_t lastRetryTimestampNs = -1;\n");
     fprintf(out, "const int64_t kMinRetryIntervalNs = NS_PER_SEC * 60 * 20; // 20 minutes\n");
@@ -600,6 +631,9 @@ write_stats_log_header(FILE* out, const Atoms& atoms, const AtomDecl &attributio
     fprintf(out,
             "  const static std::map<int, StateAtomFieldOptions> "
             "kStateAtomsFieldOptions;\n");
+    fprintf(out,
+            "  const static std::map<int, std::vector<int>> "
+            "kBytesFieldAtoms;");
     fprintf(out, "};\n");
 
     fprintf(out, "const static int kMaxPushedAtomId = %d;\n\n",
@@ -632,6 +666,8 @@ static void write_java_usage(FILE* out, const string& method_name, const string&
         field != atom.fields.end(); field++) {
         if (field->javaType == JAVA_TYPE_ATTRIBUTION_CHAIN) {
             fprintf(out, ", android.os.WorkSource workSource");
+        } else if (field->javaType == JAVA_TYPE_BYTE_ARRAY) {
+            fprintf(out, ", byte[] %s", field->name.c_str());
         } else {
             fprintf(out, ", %s %s", java_type_name(field->javaType), field->name.c_str());
         }
@@ -821,6 +857,8 @@ jni_type_name(java_type_t type)
             return "jdouble";
         case JAVA_TYPE_STRING:
             return "jstring";
+        case JAVA_TYPE_BYTE_ARRAY:
+            return "jbyteArray";
         default:
             return "UNKNOWN";
     }
@@ -868,6 +906,9 @@ jni_function_name(const string& method_name, const vector<java_type_t>& signatur
             case JAVA_TYPE_ATTRIBUTION_CHAIN:
               result += "_AttributionChain";
               break;
+            case JAVA_TYPE_BYTE_ARRAY:
+                result += "_bytes";
+                break;
             default:
                 result += "_UNKNOWN";
                 break;
@@ -893,6 +934,8 @@ java_type_signature(java_type_t type)
             return "D";
         case JAVA_TYPE_STRING:
             return "Ljava/lang/String;";
+        case JAVA_TYPE_BYTE_ARRAY:
+            return "[B";
         default:
             return "UNKNOWN";
     }
@@ -960,6 +1003,25 @@ write_stats_log_jni(FILE* out, const string& java_method_name, const string& cpp
                 fprintf(out, "    } else {\n");
                 fprintf(out, "        str%d = NULL;\n", argIndex);
                 fprintf(out, "    }\n");
+            } else if (*arg == JAVA_TYPE_BYTE_ARRAY) {
+                hadStringOrChain = true;
+                fprintf(out, "    jbyte* jbyte_array%d;\n", argIndex);
+                fprintf(out, "    const char* str%d;\n", argIndex);
+                fprintf(out, "    if (arg%d != NULL) {\n", argIndex);
+                fprintf(out,
+                        "        jbyte_array%d = "
+                        "env->GetByteArrayElements(arg%d, NULL);\n",
+                        argIndex, argIndex);
+                fprintf(out,
+                        "        str%d = "
+                        "reinterpret_cast<char*>(env->GetByteArrayElements(arg%"
+                        "d, NULL));\n",
+                        argIndex, argIndex);
+                fprintf(out, "    } else {\n");
+                fprintf(out, "        jbyte_array%d = NULL;\n", argIndex);
+                fprintf(out, "        str%d = NULL;\n", argIndex);
+                fprintf(out, "    }\n");
+
             } else if (*arg == JAVA_TYPE_ATTRIBUTION_CHAIN) {
                 hadStringOrChain = true;
                 for (auto chainField : attributionDecl.fields) {
@@ -1026,7 +1088,10 @@ write_stats_log_jni(FILE* out, const string& java_method_name, const string& cpp
                     }
                 }
             } else {
-                const char *argName = (*arg == JAVA_TYPE_STRING) ? "str" : "arg";
+                const char* argName = (*arg == JAVA_TYPE_STRING ||
+                                       *arg == JAVA_TYPE_BYTE_ARRAY)
+                                              ? "str"
+                                              : "arg";
                 fprintf(out, ", (%s)%s%d", cpp_type_name(*arg), argName, argIndex);
             }
             argIndex++;
@@ -1043,6 +1108,13 @@ write_stats_log_jni(FILE* out, const string& java_method_name, const string& cpp
                 fprintf(out, "        env->ReleaseStringUTFChars(arg%d, str%d);\n",
                         argIndex, argIndex);
                 fprintf(out, "    }\n");
+            } else if (*arg == JAVA_TYPE_BYTE_ARRAY) {
+                fprintf(out, "    if (str%d != NULL) { \n", argIndex);
+                fprintf(out,
+                        "        env->ReleaseByteArrayElements(arg%d, "
+                        "jbyte_array%d, 0);\n",
+                        argIndex, argIndex);
+                fprintf(out, "    }\n");
             } else if (*arg == JAVA_TYPE_ATTRIBUTION_CHAIN) {
                 for (auto chainField : attributionDecl.fields) {
                     if (chainField.javaType == JAVA_TYPE_INT) {
index 264a865..188b765 100644 (file)
@@ -109,6 +109,28 @@ message BadAttributionNodePosition {
   oneof event { BadAttributionNodePositionAtom bad = 1; }
 }
 
+message GoodEventWithBinaryFieldAtom {
+    oneof event { GoodBinaryFieldAtom field1 = 1; }
+}
+
+message ComplexField {
+    optional string str = 1;
+}
+
+message GoodBinaryFieldAtom {
+    optional int32 field1 = 1;
+    optional ComplexField bf = 2 [(android.os.statsd.log_mode) = MODE_BYTES];
+}
+
+message BadEventWithBinaryFieldAtom {
+    oneof event { BadBinaryFieldAtom field1 = 1; }
+}
+
+message BadBinaryFieldAtom {
+    optional int32 field1 = 1;
+    optional ComplexField bf = 2;
+}
+
 message BadStateAtoms {
     oneof event {
         BadStateAtom1 bad1 = 1;
index 1936d96..ad3bffa 100644 (file)
@@ -212,5 +212,19 @@ TEST(CollationTest, PassOnGoodStateAtomOptions) {
     EXPECT_EQ(0, errorCount);
 }
 
+TEST(CollationTest, PassOnGoodBinaryFieldAtom) {
+    Atoms atoms;
+    int errorCount =
+            collate_atoms(GoodEventWithBinaryFieldAtom::descriptor(), &atoms);
+    EXPECT_EQ(0, errorCount);
+}
+
+TEST(CollationTest, FailOnBadBinaryFieldAtom) {
+    Atoms atoms;
+    int errorCount =
+            collate_atoms(BadEventWithBinaryFieldAtom::descriptor(), &atoms);
+    EXPECT_TRUE(errorCount > 0);
+}
+
 }  // namespace stats_log_api_gen
 }  // namespace android
\ No newline at end of file