Anomaly detection for count reads from config

author Bookatz <bookatz@google.com>

Thu, 19 Oct 2017 17:13:49 +0000 (10:13 -0700)

committer Bookatz <bookatz@google.com>

Mon, 23 Oct 2017 20:35:01 +0000 (13:35 -0700)
author Bookatz <bookatz@google.com>
Thu, 19 Oct 2017 17:13:49 +0000 (10:13 -0700)
committer Bookatz <bookatz@google.com>
Mon, 23 Oct 2017 20:35:01 +0000 (13:35 -0700)
diff --git a/cmds/statsd/src/config/ConfigManager.cpp b/cmds/statsd/src/config/ConfigManager.cpp

index 88c7ebf..9ca7d62 100644 (file)
--- a/cmds/statsd/src/config/ConfigManager.cpp
+++ b/cmds/statsd/src/config/ConfigManager.cpp
@@ -144,6 +144,12 @@ static StatsdConfig build_fake_config() {
      metric->set_what("SCREEN_TURNED_ON");
      metric->mutable_bucket()->set_bucket_size_millis(30 * 1000L);
  
+    // Anomaly threshold for screen-on count.
+    Alert* alert = metric->add_alerts();
+    alert->set_number_of_buckets(6);
+    alert->set_trigger_if_sum_gt(10);
+    alert->set_refractory_period_secs(30);
+
      // Count process state changes, slice by uid.
      metric = config.add_count_metric();
      metric->set_metric_id(2);
diff --git a/cmds/statsd/src/metrics/CountAnomalyTracker.cpp b/cmds/statsd/src/metrics/CountAnomalyTracker.cpp

index e1c2b8b..7aa748f 100644 (file)
--- a/cmds/statsd/src/metrics/CountAnomalyTracker.cpp
+++ b/cmds/statsd/src/metrics/CountAnomalyTracker.cpp
@@ -17,23 +17,22 @@
  #define DEBUG true  // STOPSHIP if true
  #include "Log.h"
  
-#define VLOG(...) \
-    if (DEBUG) ALOGD(__VA_ARGS__);
-
  #include "CountAnomalyTracker.h"
  
+#include <time.h>
+
  namespace android {
  namespace os {
  namespace statsd {
  
-CountAnomalyTracker::CountAnomalyTracker(size_t numBuckets, int thresholdGt)
-    : mNumPastBuckets(numBuckets > 0 ? numBuckets - 1 : 0),
-      mPastBuckets(mNumPastBuckets > 0 ? (new int[mNumPastBuckets]) : nullptr),
-      mThresholdGt(thresholdGt) {
+CountAnomalyTracker::CountAnomalyTracker(const Alert& alert)
+    : mAlert(alert),
+      mNumPastBuckets(alert.number_of_buckets() > 0 ? alert.number_of_buckets() - 1 : 0),
+      mPastBuckets(mNumPastBuckets > 0 ? (new int[mNumPastBuckets]) : nullptr) {
  
      VLOG("CountAnomalyTracker() called");
-    if (numBuckets < 1) {
-        ALOGE("Cannot create CountAnomalyTracker with %zu buckets", numBuckets);
+    if (alert.number_of_buckets() < 1) {
+        ALOGE("Cannot create CountAnomalyTracker with %d buckets", alert.number_of_buckets());
      }
      reset(); // initialization
  }
@@ -84,22 +83,45 @@ void CountAnomalyTracker::reset() {
  }
  
  void CountAnomalyTracker::checkAnomaly(int currentCount) {
-    // Note that this works even if mNumPastBuckets < 1 (since then
-    // mSumPastCounters = 0 so the comparison is based only on currentCount).
+    // Skip the check if in refractory period.
+    if (time(nullptr) < mRefractoryPeriodEndsSec) {
+        VLOG("Skipping anomaly check since within refractory period");
+        return;
+    }
  
      // TODO: Remove these extremely verbose debugging log.
-    VLOG("Checking whether %d + %d > %d",
-         mSumPastCounters, currentCount, mThresholdGt);
+    VLOG("Checking whether %d + %d > %lld",
+         mSumPastCounters, currentCount, mAlert.trigger_if_sum_gt());
  
-    if (mSumPastCounters + currentCount > mThresholdGt) {
+    // Note that this works even if mNumPastBuckets < 1 (since then
+    // mSumPastCounters = 0 so the comparison is based only on currentCount).
+    if (mAlert.has_trigger_if_sum_gt() &&
+            mSumPastCounters + currentCount > mAlert.trigger_if_sum_gt()) {
          declareAnomaly();
      }
  }
  
  void CountAnomalyTracker::declareAnomaly() {
-    // TODO: check that not in refractory period.
-    // TODO: Do something.
-    ALOGI("An anomaly has occurred!");
+    // TODO(guardrail): Consider guarding against too short refractory periods.
+    time_t currTime = time(nullptr);
+    mRefractoryPeriodEndsSec = currTime + mAlert.refractory_period_secs();
+
+    // TODO: If we had access to the bucket_size_millis, consider calling reset()
+    // if (mAlert.refractory_period_secs() > mNumPastBuckets * bucket_size_millis * 1000).
+
+    if (mAlert.has_incidentd_details()) {
+        const Alert_IncidentdDetails& incident = mAlert.incidentd_details();
+        if (incident.has_alert_name()) {
+            ALOGW("An anomaly (%s) has occurred! Informing incidentd.",
+                  incident.alert_name().c_str());
+        } else {
+            // TODO: Can construct a name based on the criteria (and/or relay the criteria).
+            ALOGW("An anomaly (nameless) has occurred! Informing incidentd.");
+        }
+        // TODO: Send incidentd_details.name and incidentd_details.incidentd_sections to incidentd
+    } else {
+        ALOGW("An anomaly has occurred! (But informing incidentd not requested.)");
+    }
  }
  
  }  // namespace statsd
diff --git a/cmds/statsd/src/metrics/CountAnomalyTracker.h b/cmds/statsd/src/metrics/CountAnomalyTracker.h

index 449dee9..13c1ccd 100644 (file)
--- a/cmds/statsd/src/metrics/CountAnomalyTracker.h
+++ b/cmds/statsd/src/metrics/CountAnomalyTracker.h
@@ -17,8 +17,10 @@
  #ifndef COUNT_ANOMALY_TRACKER_H
  #define COUNT_ANOMALY_TRACKER_H
  
-#include <stdlib.h>
+#include "frameworks/base/cmds/statsd/src/statsd_config.pb.h" // Alert
+
  #include <memory> // unique_ptr
+#include <stdlib.h>
  
  namespace android {
  namespace os {
@@ -26,7 +28,7 @@ namespace statsd {
  
  class CountAnomalyTracker {
  public:
-    CountAnomalyTracker(size_t numBuckets, int thresholdGt);
+    CountAnomalyTracker(const Alert& alert);
  
      virtual ~CountAnomalyTracker();
  
@@ -43,6 +45,9 @@ public:
      void checkAnomaly(int currentCount);
  
  private:
+    // statsd_config.proto Alert message that defines this tracker.
+    const Alert mAlert;
+
      // Number of past buckets. One less than the total number of buckets needed
      // for the anomaly detection (since the current bucket is not in the past).
      const size_t mNumPastBuckets;
@@ -59,8 +64,9 @@ private:
      // Index of the oldest bucket (i.e. the next bucket to be overwritten).
      size_t mOldestBucketIndex = 0;
  
-    // If mSumPastCounters + currentCount > mThresholdGt --> Anomaly!
-    const int mThresholdGt;
+    // Timestamp that the refractory period (if this anomaly was declared) ends, in seconds.
+    // If an anomaly was never declared, set to 0.
+    time_t mRefractoryPeriodEndsSec = 0;
  
      void declareAnomaly();
  
diff --git a/cmds/statsd/src/metrics/CountMetricProducer.cpp b/cmds/statsd/src/metrics/CountMetricProducer.cpp

index 28cb503..7bb9c8a 100644 (file)
--- a/cmds/statsd/src/metrics/CountMetricProducer.cpp
+++ b/cmds/statsd/src/metrics/CountMetricProducer.cpp
@@ -39,9 +39,7 @@ CountMetricProducer::CountMetricProducer(const CountMetric& metric, const int co
                                           const sp<ConditionWizard>& wizard)
      // TODO: Pass in the start time from MetricsManager, instead of calling time() here.
      : MetricProducer((time(nullptr) * NANO_SECONDS_IN_A_SECOND), conditionIndex, wizard),
-      mMetric(metric),
-      // TODO: read mAnomalyTracker parameters from config file.
-      mAnomalyTracker(6, 10) {
+      mMetric(metric) {
      // TODO: evaluate initial conditions. and set mConditionMet.
      if (metric.has_bucket() && metric.bucket().has_bucket_size_millis()) {
          mBucketSizeNs = metric.bucket().bucket_size_millis() * 1000 * 1000;
@@ -49,6 +47,17 @@ CountMetricProducer::CountMetricProducer(const CountMetric& metric, const int co
          mBucketSizeNs = LLONG_MAX;
      }
  
+    mAnomalyTrackers.reserve(metric.alerts_size());
+    for (int i = 0; i < metric.alerts_size(); i++) {
+        const Alert& alert = metric.alerts(i);
+        if (alert.trigger_if_sum_gt() > 0 && alert.number_of_buckets() > 0) {
+            mAnomalyTrackers.push_back(std::make_unique<CountAnomalyTracker>(alert));
+        } else {
+            ALOGW("Ignoring invalid count metric alert: threshold=%lld num_buckets= %d",
+                  alert.trigger_if_sum_gt(), alert.number_of_buckets());
+        }
+    }
+
      // TODO: use UidMap if uid->pkg_name is required
      mDimension.insert(mDimension.begin(), metric.dimension().begin(), metric.dimension().end());
  
@@ -148,6 +157,11 @@ void CountMetricProducer::onMatchedLogEventInternal(
          count++;
      }
  
+    // TODO: Re-add anomaly detection (similar to):
+    // for (auto& tracker : mAnomalyTrackers) {
+    //     tracker->checkAnomaly(mCounter);
+    // }
+
      VLOG("metric %lld %s->%d", mMetric.metric_id(), eventKey.c_str(),
           mCurrentSlicedCounter[eventKey]);
  }
@@ -176,6 +190,11 @@ void CountMetricProducer::flushCounterIfNeeded(const uint64_t eventTimeNs) {
               counter.second);
      }
  
+    // TODO: Re-add anomaly detection (similar to):
+    // for (auto& tracker : mAnomalyTrackers) {
+    //     tracker->addPastBucket(mCounter, numBucketsForward);
+    //}
+
      // Reset counters
      mCurrentSlicedCounter.clear();
  
diff --git a/cmds/statsd/src/metrics/CountMetricProducer.h b/cmds/statsd/src/metrics/CountMetricProducer.h

index 8bbdb01..340c830 100644 (file)
--- a/cmds/statsd/src/metrics/CountMetricProducer.h
+++ b/cmds/statsd/src/metrics/CountMetricProducer.h
@@ -60,14 +60,14 @@ protected:
  private:
      const CountMetric mMetric;
  
-    CountAnomalyTracker mAnomalyTracker;
-
      // Save the past buckets and we can clear when the StatsLogReport is dumped.
      std::unordered_map<HashableDimensionKey, std::vector<CountBucketInfo>> mPastBuckets;
  
      // The current bucket.
      std::unordered_map<HashableDimensionKey, int> mCurrentSlicedCounter;
  
+    vector<unique_ptr<CountAnomalyTracker>> mAnomalyTrackers;
+
      void flushCounterIfNeeded(const uint64_t newEventTime);
  };
  
diff --git a/cmds/statsd/src/metrics/MetricsManager.cpp b/cmds/statsd/src/metrics/MetricsManager.cpp

index c19d462..1ffa58b 100644 (file)
--- a/cmds/statsd/src/metrics/MetricsManager.cpp
+++ b/cmds/statsd/src/metrics/MetricsManager.cpp
@@ -43,7 +43,7 @@ MetricsManager::MetricsManager(const StatsdConfig& config) {
  }
  
  MetricsManager::~MetricsManager() {
-    VLOG("~MetricManager()");
+    VLOG("~MetricsManager()");
  }
  
  bool MetricsManager::isConfigValid() const {
diff --git a/cmds/statsd/src/metrics/MetricsManager.h b/cmds/statsd/src/metrics/MetricsManager.h

index 56f57d3..2f91460 100644 (file)
--- a/cmds/statsd/src/metrics/MetricsManager.h
+++ b/cmds/statsd/src/metrics/MetricsManager.h
@@ -51,11 +51,11 @@ private:
      std::set<int> mTagIds;
  
      // We only store the sp of LogMatchingTracker, MetricProducer, and ConditionTracker in
-    // MetricManager. There are relationship between them, and the relationship are denoted by index
-    // instead of pointers. The reasons for this are: (1) the relationship between them are
-    // complicated, store index instead of pointers reduce the risk of A holds B's sp, and B holds
-    // A's sp. (2) When we evaluate matcher results, or condition results, we can quickly get the
-    // related results from a cache using the index.
+    // MetricsManager. There are relationships between them, and the relationships are denoted by
+    // index instead of pointers. The reasons for this are: (1) the relationship between them are
+    // complicated, so storing index instead of pointers reduces the risk that A holds B's sp, and B
+    // holds A's sp. (2) When we evaluate matcher results, or condition results, we can quickly get
+    // the related results from a cache using the index.
  
      // Hold all the log entry matchers from the config.
      std::vector<sp<LogMatchingTracker>> mAllLogEntryMatchers;
diff --git a/cmds/statsd/src/metrics/metrics_manager_util.h b/cmds/statsd/src/metrics/metrics_manager_util.h

index 38149a6..f23df9f 100644 (file)
--- a/cmds/statsd/src/metrics/metrics_manager_util.h
+++ b/cmds/statsd/src/metrics/metrics_manager_util.h
@@ -81,7 +81,7 @@ bool initMetrics(
          std::unordered_map<int, std::vector<int>>& conditionToMetricMap,
          std::unordered_map<int, std::vector<int>>& trackerToMetricMap);
  
-// Initialize MetricManager from StatsdConfig.
+// Initialize MetricsManager from StatsdConfig.
  // Parameters are the members of MetricsManager. See MetricsManager for declaration.
  bool initStatsdConfig(const StatsdConfig& config, std::set<int>& allTagIds,
                        std::vector<sp<LogMatchingTracker>>& allLogEntryMatchers,
diff --git a/cmds/statsd/src/statsd_config.proto b/cmds/statsd/src/statsd_config.proto

index afb3f2b..113ac62 100644 (file)
--- a/cmds/statsd/src/statsd_config.proto
+++ b/cmds/statsd/src/statsd_config.proto
@@ -114,7 +114,7 @@ message Alert {
  
    optional int32 refractory_period_secs = 4;
  
-  optional int64 trigger_if_gt = 5;
+  optional int64 trigger_if_sum_gt = 5;
  }
  
  message EventMetric {
author	Bookatz <bookatz@google.com>
	Thu, 19 Oct 2017 17:13:49 +0000 (10:13 -0700)
committer	Bookatz <bookatz@google.com>
	Mon, 23 Oct 2017 20:35:01 +0000 (13:35 -0700)
cmds/statsd/src/config/ConfigManager.cpp		patch \| blob \| history
cmds/statsd/src/metrics/CountAnomalyTracker.cpp		patch \| blob \| history
cmds/statsd/src/metrics/CountAnomalyTracker.h		patch \| blob \| history
cmds/statsd/src/metrics/CountMetricProducer.cpp		patch \| blob \| history
cmds/statsd/src/metrics/CountMetricProducer.h		patch \| blob \| history
cmds/statsd/src/metrics/MetricsManager.cpp		patch \| blob \| history
cmds/statsd/src/metrics/MetricsManager.h		patch \| blob \| history
cmds/statsd/src/metrics/metrics_manager_util.h		patch \| blob \| history
cmds/statsd/src/statsd_config.proto		patch \| blob \| history