services/common_time/clock_recovery.cpp

   1 /*
   2  * Copyright (C) 2011 The Android Open Source Project
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 /*
  18  * A service that exchanges time synchronization information between
  19  * a master that defines a timeline and clients that follow the timeline.
  20  */
  21
  22 #define __STDC_LIMIT_MACROS
  23 #define LOG_TAG "common_time"
  24 #include <utils/Log.h>
  25 #include <stdint.h>
  26
  27 #include <common_time/local_clock.h>
  28 #include <assert.h>
  29
  30 #include "clock_recovery.h"
  31 #include "common_clock.h"
  32 #ifdef TIME_SERVICE_DEBUG
  33 #include "diag_thread.h"
  34 #endif
  35
  36 // Define log macro so we can make LOGV into LOGE when we are exclusively
  37 // debugging this code.
  38 #ifdef TIME_SERVICE_DEBUG
  39 #define LOG_TS ALOGE
  40 #else
  41 #define LOG_TS ALOGV
  42 #endif
  43
  44 namespace android {
  45
  46 ClockRecoveryLoop::ClockRecoveryLoop(LocalClock* local_clock,
  47                                      CommonClock* common_clock) {
  48     assert(NULL != local_clock);
  49     assert(NULL != common_clock);
  50
  51     local_clock_  = local_clock;
  52     common_clock_ = common_clock;
  53
  54     local_clock_can_slew_ = local_clock_->initCheck() &&
  55                            (local_clock_->setLocalSlew(0) == OK);
  56     tgt_correction_ = 0;
  57     cur_correction_ = 0;
  58
  59     // Precompute the max rate at which we are allowed to change the VCXO
  60     // control.
  61     uint64_t N = 0x10000ull * 1000ull;
  62     uint64_t D = local_clock_->getLocalFreq() * kMinFullRangeSlewChange_mSec;
  63     LinearTransform::reduce(&N, &D);
  64     while ((N > INT32_MAX) || (D > UINT32_MAX)) {
  65         N >>= 1;
  66         D >>= 1;
  67         LinearTransform::reduce(&N, &D);
  68     }
  69     time_to_cur_slew_.a_to_b_numer = static_cast<int32_t>(N);
  70     time_to_cur_slew_.a_to_b_denom = static_cast<uint32_t>(D);
  71
  72     reset(true, true);
  73
  74 #ifdef TIME_SERVICE_DEBUG
  75     diag_thread_ = new DiagThread(common_clock_, local_clock_);
  76     if (diag_thread_ != NULL) {
  77         status_t res = diag_thread_->startWorkThread();
  78         if (res != OK)
  79             ALOGW("Failed to start A@H clock recovery diagnostic thread.");
  80     } else
  81         ALOGW("Failed to allocate diagnostic thread.");
  82 #endif
  83 }
  84
  85 ClockRecoveryLoop::~ClockRecoveryLoop() {
  86 #ifdef TIME_SERVICE_DEBUG
  87     diag_thread_->stopWorkThread();
  88 #endif
  89 }
  90
  91 // Constants.
  92 const float ClockRecoveryLoop::dT = 1.0;
  93 const float ClockRecoveryLoop::Kc = 1.0f;
  94 const float ClockRecoveryLoop::Ti = 15.0f;
  95 const float ClockRecoveryLoop::Tf = 0.05;
  96 const float ClockRecoveryLoop::bias_Fc = 0.01;
  97 const float ClockRecoveryLoop::bias_RC = (dT / (2 * 3.14159f * bias_Fc));
  98 const float ClockRecoveryLoop::bias_Alpha = (dT / (bias_RC + dT));
  99 const int64_t ClockRecoveryLoop::panic_thresh_ = 50000;
 100 const int64_t ClockRecoveryLoop::control_thresh_ = 10000;
 101 const float ClockRecoveryLoop::COmin = -100.0f;
 102 const float ClockRecoveryLoop::COmax = 100.0f;
 103 const uint32_t ClockRecoveryLoop::kMinFullRangeSlewChange_mSec = 300;
 104 const int ClockRecoveryLoop::kSlewChangeStepPeriod_mSec = 10;
 105
 106
 107 void ClockRecoveryLoop::reset(bool position, bool frequency) {
 108     Mutex::Autolock lock(&lock_);
 109     reset_l(position, frequency);
 110 }
 111
 112 uint32_t ClockRecoveryLoop::findMinRTTNdx(DisciplineDataPoint* data,
 113                                           uint32_t count) {
 114     uint32_t min_rtt = 0;
 115     for (uint32_t i = 1; i < count; ++i)
 116         if (data[min_rtt].rtt > data[i].rtt)
 117             min_rtt = i;
 118
 119     return min_rtt;
 120 }
 121
 122 bool ClockRecoveryLoop::pushDisciplineEvent(int64_t local_time,
 123                                             int64_t nominal_common_time,
 124                                             int64_t rtt) {
 125     Mutex::Autolock lock(&lock_);
 126
 127     int64_t local_common_time = 0;
 128     common_clock_->localToCommon(local_time, &local_common_time);
 129     int64_t raw_delta = nominal_common_time - local_common_time;
 130
 131 #ifdef TIME_SERVICE_DEBUG
 132     ALOGE("local=%lld, common=%lld, delta=%lld, rtt=%lld\n",
 133          local_common_time, nominal_common_time,
 134          raw_delta, rtt);
 135 #endif
 136
 137     // If we have not defined a basis for common time, then we need to use these
 138     // initial points to do so.  In order to avoid significant initial error
 139     // from a particularly bad startup data point, we collect the first N data
 140     // points and choose the best of them before moving on.
 141     if (!common_clock_->isValid()) {
 142         if (startup_filter_wr_ < kStartupFilterSize) {
 143             DisciplineDataPoint& d =  startup_filter_data_[startup_filter_wr_];
 144             d.local_time = local_time;
 145             d.nominal_common_time = nominal_common_time;
 146             d.rtt = rtt;
 147             startup_filter_wr_++;
 148         }
 149
 150         if (startup_filter_wr_ == kStartupFilterSize) {
 151             uint32_t min_rtt = findMinRTTNdx(startup_filter_data_,
 152                     kStartupFilterSize);
 153
 154             common_clock_->setBasis(
 155                     startup_filter_data_[min_rtt].local_time,
 156                     startup_filter_data_[min_rtt].nominal_common_time);
 157         }
 158
 159         return true;
 160     }
 161
 162     int64_t observed_common;
 163     int64_t delta;
 164     float delta_f, dCO;
 165     int32_t tgt_correction;
 166
 167     if (OK != common_clock_->localToCommon(local_time, &observed_common)) {
 168         // Since we just checked to make certain that this conversion was valid,
 169         // and no one else in the system should be messing with it, if this
 170         // conversion is suddenly invalid, it is a good reason to panic.
 171         ALOGE("Failed to convert local time to common time in %s:%d",
 172                 __PRETTY_FUNCTION__, __LINE__);
 173         return false;
 174     }
 175
 176     // Implement a filter which should match NTP filtering behavior when a
 177     // client is associated with only one peer of lower stratum.  Basically,
 178     // always use the best of the N last data points, where best is defined as
 179     // lowest round trip time.  NTP uses an N of 8; we use a value of 6.
 180     //
 181     // TODO(johngro) : experiment with other filter strategies.  The goal here
 182     // is to mitigate the effects of high RTT data points which typically have
 183     // large asymmetries in the TX/RX legs.  Downside of the existing NTP
 184     // approach (particularly because of the PID controller we are using to
 185     // produce the control signal from the filtered data) are that the rate at
 186     // which discipline events are actually acted upon becomes irregular and can
 187     // become drawn out (the time between actionable event can go way up).  If
 188     // the system receives a strong high quality data point, the proportional
 189     // component of the controller can produce a strong correction which is left
 190     // in place for too long causing overshoot.  In addition, the integral
 191     // component of the system currently is an approximation based on the
 192     // assumption of a more or less homogeneous sampling of the error.  Its
 193     // unclear what the effect of undermining this assumption would be right
 194     // now.
 195
 196     // Two ideas which come to mind immediately would be to...
 197     // 1) Keep a history of more data points (32 or so) and ignore data points
 198     //    whose RTT is more than a certain number of standard deviations outside
 199     //    of the norm.
 200     // 2) Eliminate the PID controller portion of this system entirely.
 201     //    Instead, move to a system which uses a very wide filter (128 data
 202     //    points or more) with a sum-of-least-squares line fitting approach to
 203     //    tracking the long term drift.  This would take the place of the I
 204     //    component in the current PID controller.  Also use a much more narrow
 205     //    outlier-rejector filter (as described in #1) to drive a short term
 206     //    correction factor similar to the P component of the PID controller.
 207     assert(filter_wr_ < kFilterSize);
 208     filter_data_[filter_wr_].local_time           = local_time;
 209     filter_data_[filter_wr_].observed_common_time = observed_common;
 210     filter_data_[filter_wr_].nominal_common_time  = nominal_common_time;
 211     filter_data_[filter_wr_].rtt                  = rtt;
 212     filter_data_[filter_wr_].point_used           = false;
 213     uint32_t current_point = filter_wr_;
 214     filter_wr_ = (filter_wr_ + 1) % kFilterSize;
 215     if (!filter_wr_)
 216         filter_full_ = true;
 217
 218     uint32_t scan_end = filter_full_ ? kFilterSize : filter_wr_;
 219     uint32_t min_rtt = findMinRTTNdx(filter_data_, scan_end);
 220     // We only use packets with low RTTs for control. If the packet RTT
 221     // is less than the panic threshold, we can probably eat the jitter with the
 222     // control loop. Otherwise, take the packet only if it better than all
 223     // of the packets we have in the history. That way we try to track
 224     // something, even if it is noisy.
 225     if (current_point == min_rtt || rtt < control_thresh_) {
 226         delta_f = delta = nominal_common_time - observed_common;
 227
 228         last_error_est_valid_ = true;
 229         last_error_est_usec_ = delta;
 230
 231         // Compute the error then clamp to the panic threshold.  If we ever
 232         // exceed this amt of error, its time to panic and reset the system.
 233         // Given that the error in the measurement of the error could be as
 234         // high as the RTT of the data point, we don't actually panic until
 235         // the implied error (delta) is greater than the absolute panic
 236         // threashold plus the RTT.  IOW - we don't panic until we are
 237         // absoluely sure that our best case sync is worse than the absolute
 238         // panic threshold.
 239         int64_t effective_panic_thresh = panic_thresh_ + rtt;
 240         if ((delta > effective_panic_thresh) ||
 241             (delta < -effective_panic_thresh)) {
 242             // PANIC!!!
 243             reset_l(false, true);
 244             return false;
 245         }
 246
 247     } else {
 248         // We do not have a good packet to look at, but we also do not want to
 249         // free-run the clock at some crazy slew rate. So we guess the
 250         // trajectory of the clock based on the last controller output and the
 251         // estimated bias of our clock against the master.
 252         // The net effect of this is that CO == CObias after some extended
 253         // period of no feedback.
 254         delta_f = last_delta_f_ - dT*(CO - CObias);
 255         delta = delta_f;
 256     }
 257
 258     // Velocity form PI control equation.
 259     dCO = Kc * (1.0f + dT/Ti) * delta_f - Kc * last_delta_f_;
 260     CO += dCO * Tf; // Filter CO by applying gain <1 here.
 261
 262     // Save error terms for later.
 263     last_delta_f_ = delta_f;
 264
 265     // Clamp CO to +/- 100ppm.
 266     if (CO < COmin)
 267         CO = COmin;
 268     else if (CO > COmax)
 269         CO = COmax;
 270
 271     // Update the controller bias.
 272     CObias = bias_Alpha * CO + (1.0f - bias_Alpha) * lastCObias;
 273     lastCObias = CObias;
 274
 275     // Convert PPM to 16-bit int range. Add some guard band (-0.01) so we
 276     // don't get fp weirdness.
 277     tgt_correction = CO * 327.66;
 278
 279     // If there was a change in the amt of correction to use, update the
 280     // system.
 281     setTargetCorrection_l(tgt_correction);
 282
 283     LOG_TS("clock_loop %lld %f %f %f %d\n", raw_delta, delta_f, CO, CObias, tgt_correction);
 284
 285 #ifdef TIME_SERVICE_DEBUG
 286     diag_thread_->pushDisciplineEvent(
 287             local_time,
 288             observed_common,
 289             nominal_common_time,
 290             tgt_correction,
 291             rtt);
 292 #endif
 293
 294     return true;
 295 }
 296
 297 int32_t ClockRecoveryLoop::getLastErrorEstimate() {
 298     Mutex::Autolock lock(&lock_);
 299
 300     if (last_error_est_valid_)
 301         return last_error_est_usec_;
 302     else
 303         return ICommonClock::kErrorEstimateUnknown;
 304 }
 305
 306 void ClockRecoveryLoop::reset_l(bool position, bool frequency) {
 307     assert(NULL != common_clock_);
 308
 309     if (position) {
 310         common_clock_->resetBasis();
 311         startup_filter_wr_ = 0;
 312     }
 313
 314     if (frequency) {
 315         last_error_est_valid_ = false;
 316         last_error_est_usec_ = 0;
 317         last_delta_f_ = 0.0;
 318         CO = 0.0f;
 319         lastCObias = CObias = 0.0f;
 320         setTargetCorrection_l(0);
 321         applySlew_l();
 322     }
 323
 324     filter_wr_   = 0;
 325     filter_full_ = false;
 326 }
 327
 328 void ClockRecoveryLoop::setTargetCorrection_l(int32_t tgt) {
 329     // When we make a change to the slew rate, we need to be careful to not
 330     // change it too quickly as it can anger some HDMI sinks out there, notably
 331     // some Sony panels from the 2010-2011 timeframe.  From experimenting with
 332     // some of these sinks, it seems like swinging from one end of the range to
 333     // another in less that 190mSec or so can start to cause trouble.  Adding in
 334     // a hefty margin, we limit the system to a full range sweep in no less than
 335     // 300mSec.
 336     if (tgt_correction_ != tgt) {
 337         int64_t now = local_clock_->getLocalTime();
 338         status_t res;
 339
 340         tgt_correction_ = tgt;
 341
 342         // Set up the transformation to figure out what the slew should be at
 343         // any given point in time in the future.
 344         time_to_cur_slew_.a_zero = now;
 345         time_to_cur_slew_.b_zero = cur_correction_;
 346
 347         // Make sure the sign of the slope is headed in the proper direction.
 348         bool needs_increase = (cur_correction_ < tgt_correction_);
 349         bool is_increasing  = (time_to_cur_slew_.a_to_b_numer > 0);
 350         if (( needs_increase && !is_increasing) ||
 351             (!needs_increase &&  is_increasing)) {
 352             time_to_cur_slew_.a_to_b_numer = -time_to_cur_slew_.a_to_b_numer;
 353         }
 354
 355         // Finally, figure out when the change will be finished and start the
 356         // slew operation.
 357         time_to_cur_slew_.doReverseTransform(tgt_correction_,
 358                                              &slew_change_end_time_);
 359
 360         applySlew_l();
 361     }
 362 }
 363
 364 bool ClockRecoveryLoop::applySlew_l() {
 365     bool ret = true;
 366
 367     // If cur == tgt, there is no ongoing sleq rate change and we are already
 368     // finished.
 369     if (cur_correction_ == tgt_correction_)
 370         goto bailout;
 371
 372     if (local_clock_can_slew_) {
 373         int64_t now = local_clock_->getLocalTime();
 374         int64_t tmp;
 375
 376         if (now >= slew_change_end_time_) {
 377             cur_correction_ = tgt_correction_;
 378             next_slew_change_timeout_.setTimeout(-1);
 379         } else {
 380             time_to_cur_slew_.doForwardTransform(now, &tmp);
 381
 382             if (tmp > INT16_MAX)
 383                 cur_correction_ = INT16_MAX;
 384             else if (tmp < INT16_MIN)
 385                 cur_correction_ = INT16_MIN;
 386             else
 387                 cur_correction_ = static_cast<int16_t>(tmp);
 388
 389             next_slew_change_timeout_.setTimeout(kSlewChangeStepPeriod_mSec);
 390             ret = false;
 391         }
 392
 393         local_clock_->setLocalSlew(cur_correction_);
 394     } else {
 395         // Since we are not actually changing the rate of a HW clock, we don't
 396         // need to worry to much about changing the slew rate so fast that we
 397         // anger any downstream HDMI devices.
 398         cur_correction_ = tgt_correction_;
 399         next_slew_change_timeout_.setTimeout(-1);
 400
 401         // The SW clock recovery implemented by the common clock class expects
 402         // values expressed in PPM. CO is in ppm.
 403         common_clock_->setSlew(local_clock_->getLocalTime(), CO);
 404     }
 405
 406 bailout:
 407     return ret;
 408 }
 409
 410 int ClockRecoveryLoop::applyRateLimitedSlew() {
 411     Mutex::Autolock lock(&lock_);
 412
 413     int ret = next_slew_change_timeout_.msecTillTimeout();
 414     if (!ret) {
 415         if (applySlew_l())
 416             next_slew_change_timeout_.setTimeout(-1);
 417         ret = next_slew_change_timeout_.msecTillTimeout();
 418     }
 419
 420     return ret;
 421 }
 422
 423 }  // namespace android