libs/common_time/clock_recovery.cpp

   1 /*
   2  * Copyright (C) 2011 The Android Open Source Project
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 /*
  18  * A service that exchanges time synchronization information between
  19  * a master that defines a timeline and clients that follow the timeline.
  20  */
  21
  22 #define __STDC_LIMIT_MACROS
  23 #define LOG_TAG "common_time"
  24 #include <utils/Log.h>
  25 #include <inttypes.h>
  26 #include <stdint.h>
  27
  28 #include <common_time/local_clock.h>
  29 #include <assert.h>
  30
  31 #include "clock_recovery.h"
  32 #include "common_clock.h"
  33 #ifdef TIME_SERVICE_DEBUG
  34 #include "diag_thread.h"
  35 #endif
  36
  37 // Define log macro so we can make LOGV into LOGE when we are exclusively
  38 // debugging this code.
  39 #ifdef TIME_SERVICE_DEBUG
  40 #define LOG_TS ALOGE
  41 #else
  42 #define LOG_TS ALOGV
  43 #endif
  44
  45 namespace android {
  46
  47 ClockRecoveryLoop::ClockRecoveryLoop(LocalClock* local_clock,
  48                                      CommonClock* common_clock) {
  49     assert(NULL != local_clock);
  50     assert(NULL != common_clock);
  51
  52     local_clock_  = local_clock;
  53     common_clock_ = common_clock;
  54
  55     local_clock_can_slew_ = local_clock_->initCheck() &&
  56                            (local_clock_->setLocalSlew(0) == OK);
  57     tgt_correction_ = 0;
  58     cur_correction_ = 0;
  59
  60     // Precompute the max rate at which we are allowed to change the VCXO
  61     // control.
  62     uint64_t N = 0x10000ull * 1000ull;
  63     uint64_t D = local_clock_->getLocalFreq() * kMinFullRangeSlewChange_mSec;
  64     LinearTransform::reduce(&N, &D);
  65     while ((N > INT32_MAX) || (D > UINT32_MAX)) {
  66         N >>= 1;
  67         D >>= 1;
  68         LinearTransform::reduce(&N, &D);
  69     }
  70     time_to_cur_slew_.a_to_b_numer = static_cast<int32_t>(N);
  71     time_to_cur_slew_.a_to_b_denom = static_cast<uint32_t>(D);
  72
  73     reset(true, true);
  74
  75 #ifdef TIME_SERVICE_DEBUG
  76     diag_thread_ = new DiagThread(common_clock_, local_clock_);
  77     if (diag_thread_ != NULL) {
  78         status_t res = diag_thread_->startWorkThread();
  79         if (res != OK)
  80             ALOGW("Failed to start A@H clock recovery diagnostic thread.");
  81     } else
  82         ALOGW("Failed to allocate diagnostic thread.");
  83 #endif
  84 }
  85
  86 ClockRecoveryLoop::~ClockRecoveryLoop() {
  87 #ifdef TIME_SERVICE_DEBUG
  88     diag_thread_->stopWorkThread();
  89 #endif
  90 }
  91
  92 // Constants.
  93 const float ClockRecoveryLoop::dT = 1.0;
  94 const float ClockRecoveryLoop::Kc = 1.0f;
  95 const float ClockRecoveryLoop::Ti = 15.0f;
  96 const float ClockRecoveryLoop::Tf = 0.05;
  97 const float ClockRecoveryLoop::bias_Fc = 0.01;
  98 const float ClockRecoveryLoop::bias_RC = (dT / (2 * 3.14159f * bias_Fc));
  99 const float ClockRecoveryLoop::bias_Alpha = (dT / (bias_RC + dT));
 100 const int64_t ClockRecoveryLoop::panic_thresh_ = 50000;
 101 const int64_t ClockRecoveryLoop::control_thresh_ = 10000;
 102 const float ClockRecoveryLoop::COmin = -100.0f;
 103 const float ClockRecoveryLoop::COmax = 100.0f;
 104 const uint32_t ClockRecoveryLoop::kMinFullRangeSlewChange_mSec = 300;
 105 const int ClockRecoveryLoop::kSlewChangeStepPeriod_mSec = 10;
 106
 107
 108 void ClockRecoveryLoop::reset(bool position, bool frequency) {
 109     Mutex::Autolock lock(&lock_);
 110     reset_l(position, frequency);
 111 }
 112
 113 uint32_t ClockRecoveryLoop::findMinRTTNdx(DisciplineDataPoint* data,
 114                                           uint32_t count) {
 115     uint32_t min_rtt = 0;
 116     for (uint32_t i = 1; i < count; ++i)
 117         if (data[min_rtt].rtt > data[i].rtt)
 118             min_rtt = i;
 119
 120     return min_rtt;
 121 }
 122
 123 bool ClockRecoveryLoop::pushDisciplineEvent(int64_t local_time,
 124                                             int64_t nominal_common_time,
 125                                             int64_t rtt) {
 126     Mutex::Autolock lock(&lock_);
 127
 128     int64_t local_common_time = 0;
 129     common_clock_->localToCommon(local_time, &local_common_time);
 130     int64_t raw_delta = nominal_common_time - local_common_time;
 131
 132 #ifdef TIME_SERVICE_DEBUG
 133     ALOGE("local=%lld, common=%lld, delta=%lld, rtt=%lld\n",
 134          local_common_time, nominal_common_time,
 135          raw_delta, rtt);
 136 #endif
 137
 138     // If we have not defined a basis for common time, then we need to use these
 139     // initial points to do so.  In order to avoid significant initial error
 140     // from a particularly bad startup data point, we collect the first N data
 141     // points and choose the best of them before moving on.
 142     if (!common_clock_->isValid()) {
 143         if (startup_filter_wr_ < kStartupFilterSize) {
 144             DisciplineDataPoint& d =  startup_filter_data_[startup_filter_wr_];
 145             d.local_time = local_time;
 146             d.nominal_common_time = nominal_common_time;
 147             d.rtt = rtt;
 148             startup_filter_wr_++;
 149         }
 150
 151         if (startup_filter_wr_ == kStartupFilterSize) {
 152             uint32_t min_rtt = findMinRTTNdx(startup_filter_data_,
 153                     kStartupFilterSize);
 154
 155             common_clock_->setBasis(
 156                     startup_filter_data_[min_rtt].local_time,
 157                     startup_filter_data_[min_rtt].nominal_common_time);
 158         }
 159
 160         return true;
 161     }
 162
 163     int64_t observed_common;
 164     int64_t delta;
 165     float delta_f, dCO;
 166     int32_t tgt_correction;
 167
 168     if (OK != common_clock_->localToCommon(local_time, &observed_common)) {
 169         // Since we just checked to make certain that this conversion was valid,
 170         // and no one else in the system should be messing with it, if this
 171         // conversion is suddenly invalid, it is a good reason to panic.
 172         ALOGE("Failed to convert local time to common time in %s:%d",
 173                 __PRETTY_FUNCTION__, __LINE__);
 174         return false;
 175     }
 176
 177     // Implement a filter which should match NTP filtering behavior when a
 178     // client is associated with only one peer of lower stratum.  Basically,
 179     // always use the best of the N last data points, where best is defined as
 180     // lowest round trip time.  NTP uses an N of 8; we use a value of 6.
 181     //
 182     // TODO(johngro) : experiment with other filter strategies.  The goal here
 183     // is to mitigate the effects of high RTT data points which typically have
 184     // large asymmetries in the TX/RX legs.  Downside of the existing NTP
 185     // approach (particularly because of the PID controller we are using to
 186     // produce the control signal from the filtered data) are that the rate at
 187     // which discipline events are actually acted upon becomes irregular and can
 188     // become drawn out (the time between actionable event can go way up).  If
 189     // the system receives a strong high quality data point, the proportional
 190     // component of the controller can produce a strong correction which is left
 191     // in place for too long causing overshoot.  In addition, the integral
 192     // component of the system currently is an approximation based on the
 193     // assumption of a more or less homogeneous sampling of the error.  Its
 194     // unclear what the effect of undermining this assumption would be right
 195     // now.
 196
 197     // Two ideas which come to mind immediately would be to...
 198     // 1) Keep a history of more data points (32 or so) and ignore data points
 199     //    whose RTT is more than a certain number of standard deviations outside
 200     //    of the norm.
 201     // 2) Eliminate the PID controller portion of this system entirely.
 202     //    Instead, move to a system which uses a very wide filter (128 data
 203     //    points or more) with a sum-of-least-squares line fitting approach to
 204     //    tracking the long term drift.  This would take the place of the I
 205     //    component in the current PID controller.  Also use a much more narrow
 206     //    outlier-rejector filter (as described in #1) to drive a short term
 207     //    correction factor similar to the P component of the PID controller.
 208     assert(filter_wr_ < kFilterSize);
 209     filter_data_[filter_wr_].local_time           = local_time;
 210     filter_data_[filter_wr_].observed_common_time = observed_common;
 211     filter_data_[filter_wr_].nominal_common_time  = nominal_common_time;
 212     filter_data_[filter_wr_].rtt                  = rtt;
 213     filter_data_[filter_wr_].point_used           = false;
 214     uint32_t current_point = filter_wr_;
 215     filter_wr_ = (filter_wr_ + 1) % kFilterSize;
 216     if (!filter_wr_)
 217         filter_full_ = true;
 218
 219     uint32_t scan_end = filter_full_ ? kFilterSize : filter_wr_;
 220     uint32_t min_rtt = findMinRTTNdx(filter_data_, scan_end);
 221     // We only use packets with low RTTs for control. If the packet RTT
 222     // is less than the panic threshold, we can probably eat the jitter with the
 223     // control loop. Otherwise, take the packet only if it better than all
 224     // of the packets we have in the history. That way we try to track
 225     // something, even if it is noisy.
 226     if (current_point == min_rtt || rtt < control_thresh_) {
 227         delta_f = delta = nominal_common_time - observed_common;
 228
 229         last_error_est_valid_ = true;
 230         last_error_est_usec_ = delta;
 231
 232         // Compute the error then clamp to the panic threshold.  If we ever
 233         // exceed this amt of error, its time to panic and reset the system.
 234         // Given that the error in the measurement of the error could be as
 235         // high as the RTT of the data point, we don't actually panic until
 236         // the implied error (delta) is greater than the absolute panic
 237         // threashold plus the RTT.  IOW - we don't panic until we are
 238         // absoluely sure that our best case sync is worse than the absolute
 239         // panic threshold.
 240         int64_t effective_panic_thresh = panic_thresh_ + rtt;
 241         if ((delta > effective_panic_thresh) ||
 242             (delta < -effective_panic_thresh)) {
 243             // PANIC!!!
 244             reset_l(false, true);
 245             return false;
 246         }
 247
 248     } else {
 249         // We do not have a good packet to look at, but we also do not want to
 250         // free-run the clock at some crazy slew rate. So we guess the
 251         // trajectory of the clock based on the last controller output and the
 252         // estimated bias of our clock against the master.
 253         // The net effect of this is that CO == CObias after some extended
 254         // period of no feedback.
 255         delta_f = last_delta_f_ - dT*(CO - CObias);
 256         delta = delta_f;
 257     }
 258
 259     // Velocity form PI control equation.
 260     dCO = Kc * (1.0f + dT/Ti) * delta_f - Kc * last_delta_f_;
 261     CO += dCO * Tf; // Filter CO by applying gain <1 here.
 262
 263     // Save error terms for later.
 264     last_delta_f_ = delta_f;
 265
 266     // Clamp CO to +/- 100ppm.
 267     if (CO < COmin)
 268         CO = COmin;
 269     else if (CO > COmax)
 270         CO = COmax;
 271
 272     // Update the controller bias.
 273     CObias = bias_Alpha * CO + (1.0f - bias_Alpha) * lastCObias;
 274     lastCObias = CObias;
 275
 276     // Convert PPM to 16-bit int range. Add some guard band (-0.01) so we
 277     // don't get fp weirdness.
 278     tgt_correction = CO * 327.66;
 279
 280     // If there was a change in the amt of correction to use, update the
 281     // system.
 282     setTargetCorrection_l(tgt_correction);
 283
 284     LOG_TS("clock_loop %" PRId64 " %f %f %f %d\n", raw_delta, delta_f, CO, CObias, tgt_correction);
 285
 286 #ifdef TIME_SERVICE_DEBUG
 287     diag_thread_->pushDisciplineEvent(
 288             local_time,
 289             observed_common,
 290             nominal_common_time,
 291             tgt_correction,
 292             rtt);
 293 #endif
 294
 295     return true;
 296 }
 297
 298 int32_t ClockRecoveryLoop::getLastErrorEstimate() {
 299     Mutex::Autolock lock(&lock_);
 300
 301     if (last_error_est_valid_)
 302         return last_error_est_usec_;
 303     else
 304         return ICommonClock::kErrorEstimateUnknown;
 305 }
 306
 307 void ClockRecoveryLoop::reset_l(bool position, bool frequency) {
 308     assert(NULL != common_clock_);
 309
 310     if (position) {
 311         common_clock_->resetBasis();
 312         startup_filter_wr_ = 0;
 313     }
 314
 315     if (frequency) {
 316         last_error_est_valid_ = false;
 317         last_error_est_usec_ = 0;
 318         last_delta_f_ = 0.0;
 319         CO = 0.0f;
 320         lastCObias = CObias = 0.0f;
 321         setTargetCorrection_l(0);
 322         applySlew_l();
 323     }
 324
 325     filter_wr_   = 0;
 326     filter_full_ = false;
 327 }
 328
 329 void ClockRecoveryLoop::setTargetCorrection_l(int32_t tgt) {
 330     // When we make a change to the slew rate, we need to be careful to not
 331     // change it too quickly as it can anger some HDMI sinks out there, notably
 332     // some Sony panels from the 2010-2011 timeframe.  From experimenting with
 333     // some of these sinks, it seems like swinging from one end of the range to
 334     // another in less that 190mSec or so can start to cause trouble.  Adding in
 335     // a hefty margin, we limit the system to a full range sweep in no less than
 336     // 300mSec.
 337     if (tgt_correction_ != tgt) {
 338         int64_t now = local_clock_->getLocalTime();
 339
 340         tgt_correction_ = tgt;
 341
 342         // Set up the transformation to figure out what the slew should be at
 343         // any given point in time in the future.
 344         time_to_cur_slew_.a_zero = now;
 345         time_to_cur_slew_.b_zero = cur_correction_;
 346
 347         // Make sure the sign of the slope is headed in the proper direction.
 348         bool needs_increase = (cur_correction_ < tgt_correction_);
 349         bool is_increasing  = (time_to_cur_slew_.a_to_b_numer > 0);
 350         if (( needs_increase && !is_increasing) ||
 351             (!needs_increase &&  is_increasing)) {
 352             time_to_cur_slew_.a_to_b_numer = -time_to_cur_slew_.a_to_b_numer;
 353         }
 354
 355         // Finally, figure out when the change will be finished and start the
 356         // slew operation.
 357         time_to_cur_slew_.doReverseTransform(tgt_correction_,
 358                                              &slew_change_end_time_);
 359
 360         applySlew_l();
 361     }
 362 }
 363
 364 bool ClockRecoveryLoop::applySlew_l() {
 365     bool ret = true;
 366
 367     // If cur == tgt, there is no ongoing sleq rate change and we are already
 368     // finished.
 369     if (cur_correction_ == tgt_correction_)
 370         goto bailout;
 371
 372     if (local_clock_can_slew_) {
 373         int64_t now = local_clock_->getLocalTime();
 374         int64_t tmp;
 375
 376         if (now >= slew_change_end_time_) {
 377             cur_correction_ = tgt_correction_;
 378             next_slew_change_timeout_.setTimeout(-1);
 379         } else {
 380             time_to_cur_slew_.doForwardTransform(now, &tmp);
 381
 382             if (tmp > INT16_MAX)
 383                 cur_correction_ = INT16_MAX;
 384             else if (tmp < INT16_MIN)
 385                 cur_correction_ = INT16_MIN;
 386             else
 387                 cur_correction_ = static_cast<int16_t>(tmp);
 388
 389             next_slew_change_timeout_.setTimeout(kSlewChangeStepPeriod_mSec);
 390             ret = false;
 391         }
 392
 393         local_clock_->setLocalSlew(cur_correction_);
 394     } else {
 395         // Since we are not actually changing the rate of a HW clock, we don't
 396         // need to worry to much about changing the slew rate so fast that we
 397         // anger any downstream HDMI devices.
 398         cur_correction_ = tgt_correction_;
 399         next_slew_change_timeout_.setTimeout(-1);
 400
 401         // The SW clock recovery implemented by the common clock class expects
 402         // values expressed in PPM. CO is in ppm.
 403         common_clock_->setSlew(local_clock_->getLocalTime(), CO);
 404     }
 405
 406 bailout:
 407     return ret;
 408 }
 409
 410 int ClockRecoveryLoop::applyRateLimitedSlew() {
 411     Mutex::Autolock lock(&lock_);
 412
 413     int ret = next_slew_change_timeout_.msecTillTimeout();
 414     if (!ret) {
 415         if (applySlew_l())
 416             next_slew_change_timeout_.setTimeout(-1);
 417         ret = next_slew_change_timeout_.msecTillTimeout();
 418     }
 419
 420     return ret;
 421 }
 422
 423 }  // namespace android