2 * Copyright (C) 2015 The Android Open Source Project
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 package android.net.ip;
19 import com.android.internal.annotations.GuardedBy;
21 import android.content.Context;
22 import android.net.LinkAddress;
23 import android.net.LinkProperties;
24 import android.net.LinkProperties.ProvisioningChange;
25 import android.net.ProxyInfo;
26 import android.net.RouteInfo;
27 import android.net.metrics.IpConnectivityLog;
28 import android.net.metrics.IpReachabilityEvent;
29 import android.net.netlink.NetlinkConstants;
30 import android.net.netlink.NetlinkErrorMessage;
31 import android.net.netlink.NetlinkMessage;
32 import android.net.netlink.NetlinkSocket;
33 import android.net.netlink.RtNetlinkNeighborMessage;
34 import android.net.netlink.StructNdaCacheInfo;
35 import android.net.netlink.StructNdMsg;
36 import android.net.netlink.StructNlMsgHdr;
37 import android.net.util.AvoidBadWifiTracker;
38 import android.os.PowerManager;
39 import android.os.SystemClock;
40 import android.system.ErrnoException;
41 import android.system.NetlinkSocketAddress;
42 import android.system.OsConstants;
43 import android.util.Log;
45 import java.io.InterruptedIOException;
46 import java.net.Inet6Address;
47 import java.net.InetAddress;
48 import java.net.InetSocketAddress;
49 import java.net.NetworkInterface;
50 import java.net.SocketAddress;
51 import java.net.SocketException;
52 import java.nio.ByteBuffer;
53 import java.util.Arrays;
54 import java.util.HashMap;
55 import java.util.HashSet;
56 import java.util.List;
62 * IpReachabilityMonitor.
64 * Monitors on-link IP reachability and notifies callers whenever any on-link
65 * addresses of interest appear to have become unresponsive.
67 * This code does not concern itself with "why" a neighbour might have become
68 * unreachable. Instead, it primarily reacts to the kernel's notion of IP
69 * reachability for each of the neighbours we know to be critically important
70 * to normal network connectivity. As such, it is often "just the messenger":
71 * the neighbours about which it warns are already deemed by the kernel to have
77 * 1. The "on-link neighbours of interest" found in a given LinkProperties
78 * instance are added to a "watch list" via #updateLinkProperties().
79 * This usually means all default gateways and any on-link DNS servers.
81 * 2. We listen continuously for netlink neighbour messages (RTM_NEWNEIGH,
82 * RTM_DELNEIGH), watching only for neighbours in the watch list.
84 * - A neighbour going into NUD_REACHABLE, NUD_STALE, NUD_DELAY, and
85 * even NUD_PROBE is perfectly normal; we merely record the new state.
87 * - A neighbour's entry may be deleted (RTM_DELNEIGH), for example due
88 * to garbage collection. This is not necessarily of immediate
89 * concern; we record the neighbour as moving to NUD_NONE.
91 * - A neighbour transitioning to NUD_FAILED (for any reason) is
92 * critically important and is handled as described below in #4.
94 * 3. All on-link neighbours in the watch list can be forcibly "probed" by
95 * calling #probeAll(). This should be called whenever it is important to
96 * verify that critical neighbours on the link are still reachable, e.g.
97 * when roaming between BSSIDs.
99 * - The kernel will send unicast ARP requests for IPv4 neighbours and
100 * unicast NS packets for IPv6 neighbours. The expected replies will
103 * - The forced probing is done holding a wakelock. The kernel may,
104 * however, initiate probing of a neighbor on its own, i.e. whenever
105 * a neighbour has expired from NUD_DELAY.
107 * - The kernel sends:
109 * /proc/sys/net/ipv{4,6}/neigh/<ifname>/ucast_solicit
111 * number of probes (usually 3) every:
113 * /proc/sys/net/ipv{4,6}/neigh/<ifname>/retrans_time_ms
115 * number of milliseconds (usually 1000ms). This normally results in
116 * 3 unicast packets, 1 per second.
118 * - If no response is received to any of the probe packets, the kernel
119 * marks the neighbour as being in state NUD_FAILED, and the listening
120 * process in #2 will learn of it.
122 * 4. We call the supplied Callback#notifyLost() function if the loss of a
123 * neighbour in NUD_FAILED would cause IPv4 or IPv6 configuration to
124 * become incomplete (a loss of provisioning).
126 * - For example, losing all our IPv4 on-link DNS servers (or losing
127 * our only IPv6 default gateway) constitutes a loss of IPv4 (IPv6)
128 * provisioning; Callback#notifyLost() would be called.
130 * - Since it can be non-trivial to reacquire certain IP provisioning
131 * state it may be best for the link to disconnect completely and
136 public class IpReachabilityMonitor {
137 private static final String TAG = "IpReachabilityMonitor";
138 private static final boolean DBG = false;
139 private static final boolean VDBG = false;
141 public interface Callback {
142 // This callback function must execute as quickly as possible as it is
143 // run on the same thread that listens to kernel neighbor updates.
145 // TODO: refactor to something like notifyProvisioningLost(String msg).
146 public void notifyLost(InetAddress ip, String logMsg);
149 private final Object mLock = new Object();
150 private final PowerManager.WakeLock mWakeLock;
151 private final String mInterfaceName;
152 private final int mInterfaceIndex;
153 private final Callback mCallback;
154 private final AvoidBadWifiTracker mAvoidBadWifiTracker;
155 private final NetlinkSocketObserver mNetlinkSocketObserver;
156 private final Thread mObserverThread;
157 private final IpConnectivityLog mMetricsLog = new IpConnectivityLog();
159 private LinkProperties mLinkProperties = new LinkProperties();
160 // TODO: consider a map to a private NeighborState class holding more
161 // information than a single NUD state entry.
163 private Map<InetAddress, Short> mIpWatchList = new HashMap<>();
165 private int mIpWatchListVersion;
167 private boolean mRunning;
168 // Time in milliseconds of the last forced probe request.
169 private volatile long mLastProbeTimeMs;
172 * Make the kernel perform neighbor reachability detection (IPv4 ARP or IPv6 ND)
173 * for the given IP address on the specified interface index.
175 * @return 0 if the request was successfully passed to the kernel; otherwise return
176 * a non-zero error code.
178 private static int probeNeighbor(int ifIndex, InetAddress ip) {
179 final String msgSnippet = "probing ip=" + ip.getHostAddress() + "%" + ifIndex;
180 if (DBG) { Log.d(TAG, msgSnippet); }
182 final byte[] msg = RtNetlinkNeighborMessage.newNewNeighborMessage(
183 1, ip, StructNdMsg.NUD_PROBE, ifIndex, null);
185 int errno = -OsConstants.EPROTO;
186 try (NetlinkSocket nlSocket = new NetlinkSocket(OsConstants.NETLINK_ROUTE)) {
187 final long IO_TIMEOUT = 300L;
188 nlSocket.connectToKernel();
189 nlSocket.sendMessage(msg, 0, msg.length, IO_TIMEOUT);
190 final ByteBuffer bytes = nlSocket.recvMessage(IO_TIMEOUT);
191 // recvMessage() guaranteed to not return null if it did not throw.
192 final NetlinkMessage response = NetlinkMessage.parse(bytes);
193 if (response != null && response instanceof NetlinkErrorMessage &&
194 (((NetlinkErrorMessage) response).getNlMsgError() != null)) {
195 errno = ((NetlinkErrorMessage) response).getNlMsgError().error;
197 // TODO: consider ignoring EINVAL (-22), which appears to be
198 // normal when probing a neighbor for which the kernel does
199 // not already have / no longer has a link layer address.
200 Log.e(TAG, "Error " + msgSnippet + ", errmsg=" + response.toString());
204 if (response == null) {
206 errmsg = "raw bytes: " + NetlinkConstants.hexify(bytes);
208 errmsg = response.toString();
210 Log.e(TAG, "Error " + msgSnippet + ", errmsg=" + errmsg);
212 } catch (ErrnoException e) {
213 Log.e(TAG, "Error " + msgSnippet, e);
215 } catch (InterruptedIOException e) {
216 Log.e(TAG, "Error " + msgSnippet, e);
217 errno = -OsConstants.ETIMEDOUT;
218 } catch (SocketException e) {
219 Log.e(TAG, "Error " + msgSnippet, e);
220 errno = -OsConstants.EIO;
225 public IpReachabilityMonitor(Context context, String ifName, Callback callback) {
226 this(context, ifName, callback, null);
229 public IpReachabilityMonitor(Context context, String ifName, Callback callback,
230 AvoidBadWifiTracker tracker) throws IllegalArgumentException {
231 mInterfaceName = ifName;
234 NetworkInterface netIf = NetworkInterface.getByName(ifName);
235 mInterfaceIndex = netIf.getIndex();
236 } catch (SocketException | NullPointerException e) {
237 throw new IllegalArgumentException("invalid interface '" + ifName + "': ", e);
239 mWakeLock = ((PowerManager) context.getSystemService(Context.POWER_SERVICE)).newWakeLock(
240 PowerManager.PARTIAL_WAKE_LOCK, TAG + "." + mInterfaceName);
241 mCallback = callback;
242 mAvoidBadWifiTracker = tracker;
243 mNetlinkSocketObserver = new NetlinkSocketObserver();
244 mObserverThread = new Thread(mNetlinkSocketObserver);
245 mObserverThread.start();
249 synchronized (mLock) { mRunning = false; }
250 clearLinkProperties();
251 mNetlinkSocketObserver.clearNetlinkSocket();
254 // TODO: add a public dump() method that can be called during a bug report.
256 private String describeWatchList() {
257 final String delimiter = ", ";
258 StringBuilder sb = new StringBuilder();
259 synchronized (mLock) {
260 sb.append("iface{" + mInterfaceName + "/" + mInterfaceIndex + "}, ");
261 sb.append("v{" + mIpWatchListVersion + "}, ");
262 sb.append("ntable=[");
263 boolean firstTime = true;
264 for (Map.Entry<InetAddress, Short> entry : mIpWatchList.entrySet()) {
268 sb.append(delimiter);
270 sb.append(entry.getKey().getHostAddress() + "/" +
271 StructNdMsg.stringForNudState(entry.getValue()));
275 return sb.toString();
278 private boolean isWatching(InetAddress ip) {
279 synchronized (mLock) {
280 return mRunning && mIpWatchList.containsKey(ip);
284 private boolean stillRunning() {
285 synchronized (mLock) {
290 private static boolean isOnLink(List<RouteInfo> routes, InetAddress ip) {
291 for (RouteInfo route : routes) {
292 if (!route.hasGateway() && route.matches(ip)) {
299 private short getNeighborStateLocked(InetAddress ip) {
300 if (mIpWatchList.containsKey(ip)) {
301 return mIpWatchList.get(ip);
303 return StructNdMsg.NUD_NONE;
306 public void updateLinkProperties(LinkProperties lp) {
307 if (!mInterfaceName.equals(lp.getInterfaceName())) {
308 // TODO: figure out whether / how to cope with interface changes.
309 Log.wtf(TAG, "requested LinkProperties interface '" + lp.getInterfaceName() +
310 "' does not match: " + mInterfaceName);
314 synchronized (mLock) {
315 mLinkProperties = new LinkProperties(lp);
316 Map<InetAddress, Short> newIpWatchList = new HashMap<>();
318 final List<RouteInfo> routes = mLinkProperties.getRoutes();
319 for (RouteInfo route : routes) {
320 if (route.hasGateway()) {
321 InetAddress gw = route.getGateway();
322 if (isOnLink(routes, gw)) {
323 newIpWatchList.put(gw, getNeighborStateLocked(gw));
328 for (InetAddress nameserver : lp.getDnsServers()) {
329 if (isOnLink(routes, nameserver)) {
330 newIpWatchList.put(nameserver, getNeighborStateLocked(nameserver));
334 mIpWatchList = newIpWatchList;
335 mIpWatchListVersion++;
337 if (DBG) { Log.d(TAG, "watch: " + describeWatchList()); }
340 public void clearLinkProperties() {
341 synchronized (mLock) {
342 mLinkProperties.clear();
343 mIpWatchList.clear();
344 mIpWatchListVersion++;
346 if (DBG) { Log.d(TAG, "clear: " + describeWatchList()); }
349 private void handleNeighborLost(String msg) {
350 InetAddress ip = null;
351 final ProvisioningChange delta;
352 synchronized (mLock) {
353 LinkProperties whatIfLp = new LinkProperties(mLinkProperties);
355 for (Map.Entry<InetAddress, Short> entry : mIpWatchList.entrySet()) {
356 if (entry.getValue() != StructNdMsg.NUD_FAILED) {
361 for (RouteInfo route : mLinkProperties.getRoutes()) {
362 if (ip.equals(route.getGateway())) {
363 whatIfLp.removeRoute(route);
367 if (avoidingBadLinks() || !(ip instanceof Inet6Address)) {
368 // We should do this unconditionally, but alas we cannot: b/31827713.
369 whatIfLp.removeDnsServer(ip);
373 delta = LinkProperties.compareProvisioning(mLinkProperties, whatIfLp);
376 if (delta == ProvisioningChange.LOST_PROVISIONING) {
377 final String logMsg = "FAILURE: LOST_PROVISIONING, " + msg;
379 if (mCallback != null) {
380 // TODO: remove |ip| when the callback signature no longer has
381 // an InetAddress argument.
382 mCallback.notifyLost(ip, logMsg);
388 private boolean avoidingBadLinks() {
389 return (mAvoidBadWifiTracker != null) ? mAvoidBadWifiTracker.currentValue() : true;
392 public void probeAll() {
393 Set<InetAddress> ipProbeList = new HashSet<InetAddress>();
394 synchronized (mLock) {
395 ipProbeList.addAll(mIpWatchList.keySet());
398 if (!ipProbeList.isEmpty() && stillRunning()) {
399 // Keep the CPU awake long enough to allow all ARP/ND
400 // probes a reasonable chance at success. See b/23197666.
402 // The wakelock we use is (by default) refcounted, and this version
403 // of acquire(timeout) queues a release message to keep acquisitions
404 // and releases balanced.
405 mWakeLock.acquire(getProbeWakeLockDuration());
408 for (InetAddress target : ipProbeList) {
409 if (!stillRunning()) {
412 final int returnValue = probeNeighbor(mInterfaceIndex, target);
413 logEvent(IpReachabilityEvent.PROBE, returnValue);
415 mLastProbeTimeMs = SystemClock.elapsedRealtime();
418 private static long getProbeWakeLockDuration() {
419 // Ideally, this would be computed by examining the values of:
421 // /proc/sys/net/ipv[46]/neigh/<ifname>/ucast_solicit
425 // /proc/sys/net/ipv[46]/neigh/<ifname>/retrans_time_ms
427 // For now, just make some assumptions.
428 final long numUnicastProbes = 3;
429 final long retransTimeMs = 1000;
430 final long gracePeriodMs = 500;
431 return (numUnicastProbes * retransTimeMs) + gracePeriodMs;
434 private void logEvent(int probeType, int errorCode) {
435 int eventType = probeType | (errorCode & 0xff);
436 mMetricsLog.log(new IpReachabilityEvent(mInterfaceName, eventType));
439 private void logNudFailed(ProvisioningChange delta) {
440 long duration = SystemClock.elapsedRealtime() - mLastProbeTimeMs;
441 boolean isFromProbe = (duration < getProbeWakeLockDuration());
442 boolean isProvisioningLost = (delta == ProvisioningChange.LOST_PROVISIONING);
443 int eventType = IpReachabilityEvent.nudFailureEventType(isFromProbe, isProvisioningLost);
444 mMetricsLog.log(new IpReachabilityEvent(mInterfaceName, eventType));
447 // TODO: simplify the number of objects by making this extend Thread.
448 private final class NetlinkSocketObserver implements Runnable {
449 private NetlinkSocket mSocket;
453 if (VDBG) { Log.d(TAG, "Starting observing thread."); }
454 synchronized (mLock) { mRunning = true; }
457 setupNetlinkSocket();
458 } catch (ErrnoException | SocketException e) {
459 Log.e(TAG, "Failed to suitably initialize a netlink socket", e);
460 synchronized (mLock) { mRunning = false; }
463 ByteBuffer byteBuffer;
464 while (stillRunning()) {
466 byteBuffer = recvKernelReply();
467 } catch (ErrnoException e) {
468 if (stillRunning()) { Log.w(TAG, "ErrnoException: ", e); }
471 final long whenMs = SystemClock.elapsedRealtime();
472 if (byteBuffer == null) {
475 parseNetlinkMessageBuffer(byteBuffer, whenMs);
478 clearNetlinkSocket();
480 synchronized (mLock) { mRunning = false; }
481 if (VDBG) { Log.d(TAG, "Finishing observing thread."); }
484 private void clearNetlinkSocket() {
485 if (mSocket != null) {
490 // TODO: Refactor the main loop to recreate the socket upon recoverable errors.
491 private void setupNetlinkSocket() throws ErrnoException, SocketException {
492 clearNetlinkSocket();
493 mSocket = new NetlinkSocket(OsConstants.NETLINK_ROUTE);
495 final NetlinkSocketAddress listenAddr = new NetlinkSocketAddress(
496 0, OsConstants.RTMGRP_NEIGH);
497 mSocket.bind(listenAddr);
500 final NetlinkSocketAddress nlAddr = mSocket.getLocalAddress();
501 Log.d(TAG, "bound to sockaddr_nl{"
502 + ((long) (nlAddr.getPortId() & 0xffffffff)) + ", "
503 + nlAddr.getGroupsMask()
508 private ByteBuffer recvKernelReply() throws ErrnoException {
510 return mSocket.recvMessage(0);
511 } catch (InterruptedIOException e) {
512 // Interruption or other error, e.g. another thread closed our file descriptor.
513 } catch (ErrnoException e) {
514 if (e.errno != OsConstants.EAGAIN) {
521 private void parseNetlinkMessageBuffer(ByteBuffer byteBuffer, long whenMs) {
522 while (byteBuffer.remaining() > 0) {
523 final int position = byteBuffer.position();
524 final NetlinkMessage nlMsg = NetlinkMessage.parse(byteBuffer);
525 if (nlMsg == null || nlMsg.getHeader() == null) {
526 byteBuffer.position(position);
527 Log.e(TAG, "unparsable netlink msg: " + NetlinkConstants.hexify(byteBuffer));
531 final int srcPortId = nlMsg.getHeader().nlmsg_pid;
532 if (srcPortId != 0) {
533 Log.e(TAG, "non-kernel source portId: " + ((long) (srcPortId & 0xffffffff)));
537 if (nlMsg instanceof NetlinkErrorMessage) {
538 Log.e(TAG, "netlink error: " + nlMsg);
540 } else if (!(nlMsg instanceof RtNetlinkNeighborMessage)) {
542 Log.d(TAG, "non-rtnetlink neighbor msg: " + nlMsg);
547 evaluateRtNetlinkNeighborMessage((RtNetlinkNeighborMessage) nlMsg, whenMs);
551 private void evaluateRtNetlinkNeighborMessage(
552 RtNetlinkNeighborMessage neighMsg, long whenMs) {
553 final StructNdMsg ndMsg = neighMsg.getNdHeader();
554 if (ndMsg == null || ndMsg.ndm_ifindex != mInterfaceIndex) {
558 final InetAddress destination = neighMsg.getDestination();
559 if (!isWatching(destination)) {
563 final short msgType = neighMsg.getHeader().nlmsg_type;
564 final short nudState = ndMsg.ndm_state;
565 final String eventMsg = "NeighborEvent{"
566 + "elapsedMs=" + whenMs + ", "
567 + destination.getHostAddress() + ", "
568 + "[" + NetlinkConstants.hexify(neighMsg.getLinkLayerAddress()) + "], "
569 + NetlinkConstants.stringForNlMsgType(msgType) + ", "
570 + StructNdMsg.stringForNudState(nudState)
574 Log.d(TAG, neighMsg.toString());
576 Log.d(TAG, eventMsg);
579 synchronized (mLock) {
580 if (mIpWatchList.containsKey(destination)) {
582 (msgType == NetlinkConstants.RTM_DELNEIGH)
583 ? StructNdMsg.NUD_NONE
585 mIpWatchList.put(destination, value);
589 if (nudState == StructNdMsg.NUD_FAILED) {
590 Log.w(TAG, "ALERT: " + eventMsg);
591 handleNeighborLost(eventMsg);