#include "RouteController.h"
+#include <arpa/inet.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/fib_rules.h>
+#include <net/if.h>
+#include <sys/stat.h>
+
+#include <private/android_filesystem_config.h>
+
+#include <map>
+
#include "Fwmark.h"
#include "UidRanges.h"
+#include "DummyNetwork.h"
+#include "android-base/file.h"
#define LOG_TAG "Netd"
#include "log/log.h"
#include "logwrap/logwrap.h"
+#include "netutils/ifc.h"
#include "resolv_netid.h"
-#include <arpa/inet.h>
-#include <fcntl.h>
-#include <linux/fib_rules.h>
-#include <map>
-#include <net/if.h>
-#include <sys/stat.h>
+using android::base::WriteStringToFile;
namespace {
// BEGIN CONSTANTS --------------------------------------------------------------------------------
const uint32_t RULE_PRIORITY_VPN_OVERRIDE_SYSTEM = 10000;
+const uint32_t RULE_PRIORITY_VPN_OVERRIDE_OIF = 10500;
const uint32_t RULE_PRIORITY_VPN_OUTPUT_TO_LOCAL = 11000;
const uint32_t RULE_PRIORITY_SECURE_VPN = 12000;
+const uint32_t RULE_PRIORITY_PROHIBIT_NON_VPN = 12500;
const uint32_t RULE_PRIORITY_EXPLICIT_NETWORK = 13000;
const uint32_t RULE_PRIORITY_OUTPUT_INTERFACE = 14000;
const uint32_t RULE_PRIORITY_LEGACY_SYSTEM = 15000;
const uint32_t RULE_PRIORITY_TETHERING = 18000;
const uint32_t RULE_PRIORITY_IMPLICIT_NETWORK = 19000;
const uint32_t RULE_PRIORITY_BYPASSABLE_VPN = 20000;
-// const uint32_t RULE_PRIORITY_VPN_FALLTHROUGH = 21000;
+const uint32_t RULE_PRIORITY_VPN_FALLTHROUGH = 21000;
const uint32_t RULE_PRIORITY_DEFAULT_NETWORK = 22000;
const uint32_t RULE_PRIORITY_DIRECTLY_CONNECTED = 23000;
-const uint32_t RULE_PRIORITY_UNREACHABLE = 24000;
+const uint32_t RULE_PRIORITY_UNREACHABLE = 32000;
const uint32_t ROUTE_TABLE_LOCAL_NETWORK = 97;
const uint32_t ROUTE_TABLE_LEGACY_NETWORK = 98;
const char* const IP_VERSIONS[] = {"-4", "-6"};
const uid_t UID_ROOT = 0;
+const char* const IIF_LOOPBACK = "lo";
const char* const IIF_NONE = NULL;
const char* const OIF_NONE = NULL;
const bool ACTION_ADD = true;
const bool MODIFY_NON_UID_BASED_RULES = true;
const char* const RT_TABLES_PATH = "/data/misc/net/rt_tables";
-const int RT_TABLES_FLAGS = O_CREAT | O_TRUNC | O_WRONLY | O_NOFOLLOW | O_CLOEXEC;
const mode_t RT_TABLES_MODE = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH; // mode 0644, rw-r--r--
+const unsigned ROUTE_FLUSH_ATTEMPTS = 2;
+
// Avoids "non-constant-expression cannot be narrowed from type 'unsigned int' to 'unsigned short'"
// warnings when using RTA_LENGTH(x) inside static initializers (even when x is already uint16_t).
constexpr uint16_t U16_RTA_LENGTH(uint16_t x) {
addTableName(entry.second, entry.first, &contents);
}
- int fd = open(RT_TABLES_PATH, RT_TABLES_FLAGS, RT_TABLES_MODE);
- if (fd == -1) {
- ALOGE("failed to create %s (%s)", RT_TABLES_PATH, strerror(errno));
+ if (!WriteStringToFile(contents, RT_TABLES_PATH, RT_TABLES_MODE, AID_SYSTEM, AID_WIFI)) {
+ ALOGE("failed to write to %s (%s)", RT_TABLES_PATH, strerror(errno));
return;
}
- // File creation is affected by umask, so make sure the right mode bits are set.
- if (fchmod(fd, RT_TABLES_MODE) == -1) {
- ALOGE("failed to set mode 0%o on %s (%s)", RT_TABLES_MODE, RT_TABLES_PATH, strerror(errno));
- }
- ssize_t bytesWritten = write(fd, contents.data(), contents.size());
- if (bytesWritten != static_cast<ssize_t>(contents.size())) {
- ALOGE("failed to write to %s (%zd vs %zu bytes) (%s)", RT_TABLES_PATH, bytesWritten,
- contents.size(), strerror(errno));
- }
- close(fd);
}
// Sends a netlink request and expects an ack.
// |iov| is an array of struct iovec that contains the netlink message payload.
// The netlink header is generated by this function based on |action| and |flags|.
// Returns -errno if there was an error or if the kernel reported an error.
+
+// Disable optimizations in ASan build.
+// ASan reports an out-of-bounds 32-bit(!) access in the first loop of the
+// function (over iov[]).
+#ifdef __clang__
+#if __has_feature(address_sanitizer)
+__attribute__((optnone))
+#endif
+#endif
WARN_UNUSED_RESULT int sendNetlinkRequest(uint16_t action, uint16_t flags, iovec* iov, int iovlen) {
nlmsghdr nlmsg = {
.nlmsg_type = action,
nlmsgerr err;
} response;
- int sock = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE);
+ int sock = socket(AF_NETLINK, SOCK_DGRAM | SOCK_CLOEXEC, NETLINK_ROUTE);
if (sock != -1 &&
connect(sock, reinterpret_cast<const sockaddr*>(&NETLINK_ADDRESS),
sizeof(NETLINK_ADDRESS)) != -1 &&
// Adds or removes a routing rule for IPv4 and IPv6.
//
-// + If |table| is non-zero, the rule points at the specified routing table. Otherwise, the rule
-// returns ENETUNREACH.
+// + If |table| is non-zero, the rule points at the specified routing table. Otherwise, the table is
+// unspecified. An unspecified table is not allowed when creating an FR_ACT_TO_TBL rule.
// + If |mask| is non-zero, the rule matches the specified fwmark and mask. Otherwise, |fwmark| is
// ignored.
// + If |iif| is non-NULL, the rule matches the specified incoming interface.
// range (inclusive). Otherwise, the rule matches packets from all UIDs.
//
// Returns 0 on success or negative errno on failure.
-WARN_UNUSED_RESULT int modifyIpRule(uint16_t action, uint32_t priority, uint32_t table,
- uint32_t fwmark, uint32_t mask, const char* iif,
+WARN_UNUSED_RESULT int modifyIpRule(uint16_t action, uint32_t priority, uint8_t ruleType,
+ uint32_t table, uint32_t fwmark, uint32_t mask, const char* iif,
const char* oif, uid_t uidStart, uid_t uidEnd) {
// Ensure that if you set a bit in the fwmark, it's not being ignored by the mask.
if (fwmark & ~mask) {
ALOGE("incompatible start and end UIDs (%u vs %u)", uidStart, uidEnd);
return -EUSERS;
}
+
bool isUidRule = (uidStart != INVALID_UID);
// Assemble a rule request and put it in an array of iovec structures.
fib_rule_hdr rule = {
- .action = static_cast<uint8_t>(table != RT_TABLE_UNSPEC ? FR_ACT_TO_TBL :
- FR_ACT_UNREACHABLE),
+ .action = ruleType,
+ // Note that here we're implicitly setting rule.table to 0. When we want to specify a
+ // non-zero table, we do this via the FRATTR_TABLE attribute.
};
+ // Don't ever create a rule that looks up table 0, because table 0 is the local table.
+ // It's OK to specify a table ID of 0 when deleting a rule, because that doesn't actually select
+ // table 0, it's a wildcard that matches anything.
+ if (table == RT_TABLE_UNSPEC && rule.action == FR_ACT_TO_TBL && action != RTM_DELRULE) {
+ ALOGE("RT_TABLE_UNSPEC only allowed when deleting rules");
+ return -ENOTUNIQ;
+ }
+
rtattr fraIifName = { U16_RTA_LENGTH(iifLength), FRA_IIFNAME };
rtattr fraOifName = { U16_RTA_LENGTH(oifLength), FRA_OIFNAME };
}
WARN_UNUSED_RESULT int modifyIpRule(uint16_t action, uint32_t priority, uint32_t table,
+ uint32_t fwmark, uint32_t mask, const char* iif,
+ const char* oif, uid_t uidStart, uid_t uidEnd) {
+ return modifyIpRule(action, priority, FR_ACT_TO_TBL, table, fwmark, mask, iif, oif, uidStart,
+ uidEnd);
+}
+
+WARN_UNUSED_RESULT int modifyIpRule(uint16_t action, uint32_t priority, uint32_t table,
uint32_t fwmark, uint32_t mask) {
return modifyIpRule(action, priority, table, fwmark, mask, IIF_NONE, OIF_NONE, INVALID_UID,
INVALID_UID);
return -ENOBUFS; // Cannot happen; parsePrefix only supports IPv4 and IPv6.
}
- // If an interface was specified, find the ifindex.
+ uint8_t type = RTN_UNICAST;
uint32_t ifindex;
- if (interface != OIF_NONE) {
- ifindex = if_nametoindex(interface);
- if (!ifindex) {
- ALOGE("cannot find interface %s", interface);
- return -ENODEV;
+ uint8_t rawNexthop[sizeof(in6_addr)];
+
+ if (nexthop && !strcmp(nexthop, "unreachable")) {
+ type = RTN_UNREACHABLE;
+ // 'interface' is likely non-NULL, as the caller (modifyRoute()) likely used it to lookup
+ // the table number. But it's an error to specify an interface ("dev ...") or a nexthop for
+ // unreachable routes, so nuke them. (IPv6 allows them to be specified; IPv4 doesn't.)
+ interface = OIF_NONE;
+ nexthop = NULL;
+ } else if (nexthop && !strcmp(nexthop, "throw")) {
+ type = RTN_THROW;
+ interface = OIF_NONE;
+ nexthop = NULL;
+ } else {
+ // If an interface was specified, find the ifindex.
+ if (interface != OIF_NONE) {
+ ifindex = if_nametoindex(interface);
+ if (!ifindex) {
+ ALOGE("cannot find interface %s", interface);
+ return -ENODEV;
+ }
}
- }
- // If a nexthop was specified, parse it as the same family as the prefix.
- uint8_t rawNexthop[sizeof(in6_addr)];
- if (nexthop && inet_pton(family, nexthop, rawNexthop) <= 0) {
- ALOGE("inet_pton failed for nexthop %s", nexthop);
- return -EINVAL;
+ // If a nexthop was specified, parse it as the same family as the prefix.
+ if (nexthop && inet_pton(family, nexthop, rawNexthop) <= 0) {
+ ALOGE("inet_pton failed for nexthop %s", nexthop);
+ return -EINVAL;
+ }
}
// Assemble a rtmsg and put it in an array of iovec structures.
rtmsg route = {
.rtm_protocol = RTPROT_STATIC,
- .rtm_type = RTN_UNICAST,
+ .rtm_type = type,
.rtm_family = family,
.rtm_dst_len = prefixLength,
+ .rtm_scope = static_cast<uint8_t>(nexthop ? RT_SCOPE_UNIVERSE : RT_SCOPE_LINK),
};
rtattr rtaDst = { U16_RTA_LENGTH(rawLength), RTA_DST };
}
return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, priority, table, fwmark.intValue,
- mask.intValue, IIF_NONE, OIF_NONE, uidStart, uidEnd);
+ mask.intValue, IIF_LOOPBACK, OIF_NONE, uidStart, uidEnd);
}
// A rule to allow system apps to send traffic over this VPN even if they are not part of the target
//
// Supports apps that use SO_BINDTODEVICE or IP_PKTINFO options and the kernel that already knows
// the outgoing interface (typically for link-local communications).
-WARN_UNUSED_RESULT int modifyOutputInterfaceRule(const char* interface, uint32_t table,
- Permission permission, uid_t uidStart,
- uid_t uidEnd, bool add) {
+WARN_UNUSED_RESULT int modifyOutputInterfaceRules(const char* interface, uint32_t table,
+ Permission permission, uid_t uidStart,
+ uid_t uidEnd, bool add) {
Fwmark fwmark;
Fwmark mask;
fwmark.permission = permission;
mask.permission = permission;
+ // If this rule does not specify a UID range, then also add a corresponding high-priority rule
+ // for UID. This covers forwarded packets and system daemons such as the tethering DHCP server.
+ if (uidStart == INVALID_UID && uidEnd == INVALID_UID) {
+ if (int ret = modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, RULE_PRIORITY_VPN_OVERRIDE_OIF,
+ table, fwmark.intValue, mask.intValue, IIF_NONE, interface,
+ UID_ROOT, UID_ROOT)) {
+ return ret;
+ }
+ }
+
return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, RULE_PRIORITY_OUTPUT_INTERFACE, table,
fwmark.intValue, mask.intValue, IIF_NONE, interface, uidStart, uidEnd);
}
fwmark.intValue, mask.intValue);
}
+// A rule to enable split tunnel VPNs.
+//
+// If a packet with a VPN's netId doesn't find a route in the VPN's routing table, it's allowed to
+// go over the default network, provided it wasn't explicitly restricted to the VPN and has the
+// permissions required by the default network.
+WARN_UNUSED_RESULT int modifyVpnFallthroughRule(uint16_t action, unsigned vpnNetId,
+ const char* physicalInterface,
+ Permission permission) {
+ uint32_t table = getRouteTableForInterface(physicalInterface);
+ if (table == RT_TABLE_UNSPEC) {
+ return -ESRCH;
+ }
+
+ Fwmark fwmark;
+ Fwmark mask;
+
+ fwmark.netId = vpnNetId;
+ mask.netId = FWMARK_NET_ID_MASK;
+
+ fwmark.explicitlySelected = false;
+ mask.explicitlySelected = true;
+
+ fwmark.permission = permission;
+ mask.permission = permission;
+
+ return modifyIpRule(action, RULE_PRIORITY_VPN_FALLTHROUGH, table, fwmark.intValue,
+ mask.intValue);
+}
+
// Add rules to allow legacy routes added through the requestRouteToHost() API.
WARN_UNUSED_RESULT int addLegacyRouteRules() {
Fwmark fwmark;
fwmark.intValue, mask.intValue);
}
+int configureDummyNetwork() {
+ const char *interface = DummyNetwork::INTERFACE_NAME;
+ uint32_t table = getRouteTableForInterface(interface);
+ if (table == RT_TABLE_UNSPEC) {
+ // getRouteTableForInterface has already looged an error.
+ return -ESRCH;
+ }
+
+ ifc_init();
+ int ret = ifc_up(interface);
+ ifc_close();
+ if (ret) {
+ ALOGE("Can't bring up %s: %s", interface, strerror(errno));
+ return -errno;
+ }
+
+ if ((ret = modifyOutputInterfaceRules(interface, table, PERMISSION_NONE,
+ INVALID_UID, INVALID_UID, ACTION_ADD))) {
+ ALOGE("Can't create oif rules for %s: %s", interface, strerror(-ret));
+ return ret;
+ }
+
+ if ((ret = modifyIpRoute(RTM_NEWROUTE, table, interface, "0.0.0.0/0", NULL))) {
+ ALOGE("Can't add IPv4 default route to %s: %s", interface, strerror(-ret));
+ return ret;
+ }
+
+ if ((ret = modifyIpRoute(RTM_NEWROUTE, table, interface, "::/0", NULL))) {
+ ALOGE("Can't add IPv6 default route to %s: %s", interface, strerror(-ret));
+ return ret;
+ }
+
+ return 0;
+}
+
// Add a new rule to look up the 'main' table, with the same selectors as the "default network"
-// rule, but with a lower priority. Since the default network rule points to a table with a default
-// route, the rule we're adding will never be used for normal routing lookups. However, the kernel
-// may fall-through to it to find directly-connected routes when it validates that a nexthop (in a
-// route being added) is reachable.
+// rule, but with a lower priority. We will never create routes in the main table; it should only be
+// used for directly-connected routes implicitly created by the kernel when adding IP addresses.
+// This is necessary, for example, when adding a route through a directly-connected gateway: in
+// order to add the route, there must already be a directly-connected route that covers the gateway.
WARN_UNUSED_RESULT int addDirectlyConnectedRule() {
Fwmark fwmark;
Fwmark mask;
fwmark.intValue, mask.intValue, IIF_NONE, OIF_NONE, UID_ROOT, UID_ROOT);
}
-// Add a rule to preempt the pre-defined "from all lookup main" rule. Packets that reach this rule
-// will be null-routed, and won't fall-through to the main table.
+// Add an explicit unreachable rule close to the end of the prioriy list to make it clear that
+// relying on the kernel-default "from all lookup main" rule at priority 32766 is not intended
+// behaviour. We do flush the kernel-default rules at startup, but having an explicit unreachable
+// rule will hopefully make things even clearer.
WARN_UNUSED_RESULT int addUnreachableRule() {
return modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_UNREACHABLE, RT_TABLE_UNSPEC, MARK_UNSET,
MARK_UNSET);
if (int ret = modifyIncomingPacketMark(netId, interface, PERMISSION_NONE, add)) {
return ret;
}
- return modifyOutputInterfaceRule(interface, ROUTE_TABLE_LOCAL_NETWORK, PERMISSION_NONE,
- INVALID_UID, INVALID_UID, add);
+ return modifyOutputInterfaceRules(interface, ROUTE_TABLE_LOCAL_NETWORK, PERMISSION_NONE,
+ INVALID_UID, INVALID_UID, add);
}
WARN_UNUSED_RESULT int modifyPhysicalNetwork(unsigned netId, const char* interface,
add)) {
return ret;
}
- if (int ret = modifyOutputInterfaceRule(interface, table, permission, INVALID_UID, INVALID_UID,
+ if (int ret = modifyOutputInterfaceRules(interface, table, permission, INVALID_UID, INVALID_UID,
add)) {
return ret;
}
return modifyImplicitNetworkRule(netId, table, permission, add);
}
+WARN_UNUSED_RESULT int modifyRejectNonSecureNetworkRule(const UidRanges& uidRanges, bool add) {
+ Fwmark fwmark;
+ Fwmark mask;
+ fwmark.protectedFromVpn = false;
+ mask.protectedFromVpn = true;
+
+ for (const UidRanges::Range& range : uidRanges.getRanges()) {
+ if (int ret = modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE,
+ RULE_PRIORITY_PROHIBIT_NON_VPN, FR_ACT_PROHIBIT, RT_TABLE_UNSPEC,
+ fwmark.intValue, mask.intValue, IIF_LOOPBACK, OIF_NONE,
+ range.first, range.second)) {
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
WARN_UNUSED_RESULT int modifyVirtualNetwork(unsigned netId, const char* interface,
const UidRanges& uidRanges, bool secure, bool add,
bool modifyNonUidBasedRules) {
range.second, add)) {
return ret;
}
- if (int ret = modifyOutputInterfaceRule(interface, table, PERMISSION_NONE, range.first,
- range.second, add)) {
+ if (int ret = modifyOutputInterfaceRules(interface, table, PERMISSION_NONE, range.first,
+ range.second, add)) {
return ret;
}
}
}
int ret = modifyIpRoute(action, table, interface, destination, nexthop);
- // We allow apps to call requestRouteToHost() multiple times with the same route, so ignore
- // EEXIST failures when adding routes to legacy tables.
- if (ret && !(action == RTM_NEWROUTE && ret == -EEXIST &&
- (tableType == RouteController::LEGACY_NETWORK ||
- tableType == RouteController::LEGACY_SYSTEM))) {
+ // Trying to add a route that already exists shouldn't cause an error.
+ if (ret && !(action == RTM_NEWROUTE && ret == -EEXIST)) {
return ret;
}
- // If there's no nexthop, this is a directly connected route. Add it to the main table also, to
- // let the kernel find it when validating nexthops when global routes are added.
- if (!nexthop) {
- ret = modifyIpRoute(action, RT_TABLE_MAIN, interface, destination, NULL);
- // A failure with action == ADD && errno == EEXIST means that the route already exists in
- // the main table, perhaps because the kernel added it automatically as part of adding the
- // IP address to the interface. Ignore this, but complain about everything else.
- if (ret && !(action == RTM_NEWROUTE && ret == -EEXIST)) {
- return ret;
- }
- }
-
return 0;
}
char tableString[UINT32_STRLEN];
snprintf(tableString, sizeof(tableString), "%u", table);
+ int ret = 0;
for (size_t i = 0; i < ARRAY_SIZE(IP_VERSIONS); ++i) {
const char* argv[] = {
IP_PATH,
"table",
tableString,
};
- if (android_fork_execvp(ARRAY_SIZE(argv), const_cast<char**>(argv), NULL, false, false)) {
- ALOGE("failed to flush routes");
- return -EREMOTEIO;
+
+ // A flush works by dumping routes and deleting each route as it's returned, and it can
+ // fail if something else deletes the route between the dump and the delete. This can
+ // happen, for example, if an interface goes down while we're trying to flush its routes.
+ // So try multiple times and only return an error if the last attempt fails.
+ //
+ // TODO: replace this with our own netlink code.
+ unsigned attempts = 0;
+ int err;
+ do {
+ err = android_fork_execvp(ARRAY_SIZE(argv), const_cast<char**>(argv),
+ NULL, false, false);
+ ++attempts;
+ } while (err != 0 && attempts < ROUTE_FLUSH_ATTEMPTS);
+ if (err) {
+ ALOGE("failed to flush %s routes in table %s after %d attempts",
+ IP_VERSIONS[i], tableString, attempts);
+ ret = -EREMOTEIO;
}
}
- interfaceToTable.erase(interface);
- return 0;
+ // If we failed to flush routes, the caller may elect to keep this interface around, so keep
+ // track of its name.
+ if (!ret) {
+ interfaceToTable.erase(interface);
+ }
+
+ return ret;
+}
+
+WARN_UNUSED_RESULT int clearTetheringRules(const char* inputInterface) {
+ int ret = 0;
+ while (ret == 0) {
+ ret = modifyIpRule(RTM_DELRULE, RULE_PRIORITY_TETHERING, 0, MARK_UNSET, MARK_UNSET,
+ inputInterface, OIF_NONE, INVALID_UID, INVALID_UID);
+ }
+
+ if (ret == -ENOENT) {
+ return 0;
+ } else {
+ return ret;
+ }
}
} // namespace
if (int ret = addUnreachableRule()) {
return ret;
}
+ // Don't complain if we can't add the dummy network, since not all devices support it.
+ configureDummyNetwork();
+
updateTableNamesFile();
return 0;
}
if (int ret = flushRoutes(interface)) {
return ret;
}
+ if (int ret = clearTetheringRules(interface)) {
+ return ret;
+ }
updateTableNamesFile();
return 0;
}
return modifyPhysicalNetwork(netId, interface, oldPermission, ACTION_DEL);
}
+int RouteController::addUsersToRejectNonSecureNetworkRule(const UidRanges& uidRanges) {
+ return modifyRejectNonSecureNetworkRule(uidRanges, true);
+}
+
+int RouteController::removeUsersFromRejectNonSecureNetworkRule(const UidRanges& uidRanges) {
+ return modifyRejectNonSecureNetworkRule(uidRanges, false);
+}
+
int RouteController::addUsersToVirtualNetwork(unsigned netId, const char* interface, bool secure,
const UidRanges& uidRanges) {
return modifyVirtualNetwork(netId, interface, uidRanges, secure, ACTION_ADD,
int RouteController::disableTethering(const char* inputInterface, const char* outputInterface) {
return modifyTetheredNetwork(RTM_DELRULE, inputInterface, outputInterface);
}
+
+int RouteController::addVirtualNetworkFallthrough(unsigned vpnNetId, const char* physicalInterface,
+ Permission permission) {
+ return modifyVpnFallthroughRule(RTM_NEWRULE, vpnNetId, physicalInterface, permission);
+}
+
+int RouteController::removeVirtualNetworkFallthrough(unsigned vpnNetId,
+ const char* physicalInterface,
+ Permission permission) {
+ return modifyVpnFallthroughRule(RTM_DELRULE, vpnNetId, physicalInterface, permission);
+}