5
0
mirror of https://github.com/cwinfo/yggdrasil-go.git synced 2025-01-22 08:03:18 +00:00

Merge pull request #192 from yggdrasil-network/develop

Version 0.2.7
This commit is contained in:
Arceliar 2018-10-13 13:41:43 -05:00 committed by GitHub
commit b087e955fb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 592 additions and 155 deletions

View File

@ -5,7 +5,7 @@ version: 2
jobs:
build:
docker:
- image: circleci/golang:1.9
- image: circleci/golang:1.11
working_directory: /go/src/github.com/{{ORG_NAME}}/{{REPO_NAME}}
@ -24,9 +24,16 @@ jobs:
command: |
sudo apt-get install -y alien
- run:
name: Test debug builds
command: |
./build -d
test -f yggdrasil && test -f yggdrasilctl
- run:
name: Build for Linux (including Debian packages and RPMs)
command: |
rm -f {yggdrasil,yggdrasilctl}
PKGARCH=amd64 sh contrib/deb/generate.sh && mv yggdrasil /tmp/upload/$CINAME-$CIVERSION-linux-amd64 && mv yggdrasilctl /tmp/upload/$CINAME-$CIVERSION-yggdrasilctl-linux-amd64;
PKGARCH=i386 sh contrib/deb/generate.sh && mv yggdrasil /tmp/upload/$CINAME-$CIVERSION-linux-i386 && mv yggdrasilctl /tmp/upload/$CINAME-$CIVERSION-yggdrasilctl-linux-i386;
PKGARCH=mipsel sh contrib/deb/generate.sh && mv yggdrasil /tmp/upload/$CINAME-$CIVERSION-linux-mipsel && mv yggdrasilctl /tmp/upload/$CINAME-$CIVERSION-yggdrasilctl-linux-mipsel;
@ -39,36 +46,42 @@ jobs:
- run:
name: Build for macOS
command: |
rm -f {yggdrasil,yggdrasilctl}
GOOS=darwin GOARCH=amd64 ./build && mv yggdrasil /tmp/upload/$CINAME-$CIVERSION-darwin-amd64 && mv yggdrasilctl /tmp/upload/$CINAME-$CIVERSION-yggdrasilctl-darwin-amd64;
GOOS=darwin GOARCH=386 ./build && mv yggdrasil /tmp/upload/$CINAME-$CIVERSION-darwin-i386 && mv yggdrasilctl /tmp/upload/$CINAME-$CIVERSION-yggdrasilctl-darwin-i386;
- run:
name: Build for OpenBSD
command: |
rm -f {yggdrasil,yggdrasilctl}
GOOS=openbsd GOARCH=amd64 ./build && mv yggdrasil /tmp/upload/$CINAME-$CIVERSION-openbsd-amd64 && mv yggdrasilctl /tmp/upload/$CINAME-$CIVERSION-yggdrasilctl-openbsd-amd64;
GOOS=openbsd GOARCH=386 ./build && mv yggdrasil /tmp/upload/$CINAME-$CIVERSION-openbsd-i386 && mv yggdrasilctl /tmp/upload/$CINAME-$CIVERSION-yggdrasilctl-openbsd-i386;
- run:
name: Build for FreeBSD
command: |
rm -f {yggdrasil,yggdrasilctl}
GOOS=freebsd GOARCH=amd64 ./build && mv yggdrasil /tmp/upload/$CINAME-$CIVERSION-freebsd-amd64 && mv yggdrasilctl /tmp/upload/$CINAME-$CIVERSION-yggdrasilctl-freebsd-amd64;
GOOS=freebsd GOARCH=386 ./build && mv yggdrasil /tmp/upload/$CINAME-$CIVERSION-freebsd-i386 && mv yggdrasilctl /tmp/upload/$CINAME-$CIVERSION-yggdrasilctl-freebsd-i386;
- run:
name: Build for NetBSD
command: |
rm -f {yggdrasil,yggdrasilctl}
GOOS=netbsd GOARCH=amd64 ./build && mv yggdrasil /tmp/upload/$CINAME-$CIVERSION-netbsd-amd64 && mv yggdrasilctl /tmp/upload/$CINAME-$CIVERSION-yggdrasilctl-netbsd-amd64;
GOOS=netbsd GOARCH=386 ./build && mv yggdrasil /tmp/upload/$CINAME-$CIVERSION-netbsd-i386 && mv yggdrasilctl /tmp/upload/$CINAME-$CIVERSION-yggdrasilctl-netbsd-i386;
- run:
name: Build for Windows
command: |
rm -f {yggdrasil,yggdrasilctl}
GOOS=windows GOARCH=amd64 ./build && mv yggdrasil.exe /tmp/upload/$CINAME-$CIVERSION-windows-amd64.exe && mv yggdrasilctl.exe /tmp/upload/$CINAME-$CIVERSION-yggdrasilctl-windows-amd64.exe;
GOOS=windows GOARCH=386 ./build && mv yggdrasil.exe /tmp/upload/$CINAME-$CIVERSION-windows-i386.exe && mv yggdrasilctl.exe /tmp/upload/$CINAME-$CIVERSION-yggdrasilctl-windows-i386.exe;
- run:
name: Build for EdgeRouter
command: |
rm -f {yggdrasil,yggdrasilctl}
git clone https://github.com/neilalexander/vyatta-yggdrasil /tmp/vyatta-yggdrasil;
cd /tmp/vyatta-yggdrasil;
BUILDDIR_YGG=$CIRCLE_WORKING_DIRECTORY ./build-edgerouter-x $CIRCLE_BRANCH;

View File

@ -25,6 +25,22 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
- in case of vulnerabilities.
-->
## [0.2.7] - 2018-10-13
### Added
- Session firewall, which makes it possible to control who can open sessions with your node
- Add `getSwitchQueues` to admin socket
- Add `InterfacePeers` for configuring static peerings via specific network interfaces
- More output shown in `getSwitchPeers`
- FreeBSD service script in `contrib`
## Changed
- CircleCI builds are now built with Go 1.11 instead of Go 1.9
## Fixed
- Race condition in the switch table, reported by trn
- Debug builds are now tested by CircleCI as well as platform release builds
- Port number fixed on admin graph from unknown nodes
## [0.2.6] - 2018-07-31
### Added
- Configurable TCP timeouts to assist in peering over Tor/I2P

72
contrib/freebsd/yggdrasil Normal file
View File

@ -0,0 +1,72 @@
#!/bin/sh
#
# Put the yggdrasil and yggdrasilctl binaries into /usr/local/bin
# Then copy this script into /etc/rc.d/yggdrasil
# Finally, run:
# 1. chmod +x /etc/rc.d/yggdrasil /usr/local/bin/{yggdrasil,yggdrasilctl}
# 2. echo "yggdrasil_enable=yes" >> /etc/rc.d
# 3. service yggdrasil start
#
# PROVIDE: yggdrasil
# REQUIRE: networking
# KEYWORD:
. /etc/rc.subr
name="yggdrasil"
rcvar="yggdrasil_enable"
start_cmd="${name}_start"
stop_cmd="${name}_stop"
pidfile="/var/run/yggdrasil/${name}.pid"
command="/usr/sbin/daemon"
command_args="-P ${pidfile} -r -f ${yggdrasil_command}"
yggdrasil_start()
{
test ! -x /usr/local/bin/yggdrasil && (
logger -s -t yggdrasil "Warning: /usr/local/bin/yggdrasil is missing or not executable"
logger -s -t yggdrasil "Copy the yggdrasil binary into /usr/local/bin and then chmod +x /usr/local/bin/yggdrasil"
return 1
)
test ! -f /etc/yggdrasil.conf && (
logger -s -t yggdrasil "Generating new configuration file into /etc/yggdrasil.conf"
/usr/local/bin/yggdrasil -genconf > /etc/yggdrasil.conf
)
tap_path="$(cat /etc/yggdrasil.conf | egrep -o '/dev/tap[0-9]{1,2}$')"
tap_name="$(echo -n ${tap_path} | tr -d '/dev/')"
/sbin/ifconfig ${tap_name} >/dev/null 2>&1 || (
logger -s -t yggdrasil "Creating ${tap_name} adapter"
/sbin/ifconfig ${tap_name} create || logger -s -t yggdrasil "Failed to create ${tap_name} adapter"
)
test ! -d /var/run/yggdrasil && mkdir -p /var/run/yggdrasil
logger -s -t yggdrasil "Starting yggdrasil"
${command} ${command_args} /usr/local/bin/yggdrasil -useconffile /etc/yggdrasil.conf \
1>/var/log/yggdrasil.stdout.log \
2>/var/log/yggdrasil.stderr.log &
}
yggdrasil_stop()
{
logger -s -t yggdrasil "Stopping yggdrasil"
test -f /var/run/yggdrasil/${name}.pid && kill -TERM $(cat /var/run/yggdrasil/${name}.pid)
tap_path="$(cat /etc/yggdrasil.conf | grep /dev/tap | egrep -o '/dev/.*$')"
tap_name="$(echo -n ${tap_path} | tr -d '/dev/')"
/sbin/ifconfig ${tap_name} >/dev/null 2>&1 && (
logger -s -t yggdrasil "Destroying ${tap_name} adapter"
/sbin/ifconfig ${tap_name} destroy || logger -s -t yggdrasil "Failed to destroy ${tap_name} adapter"
)
}
load_rc_config $name
: ${yggdrasil_enable:=no}
run_rc_command "$1"

View File

@ -2,133 +2,145 @@
Note: This is a very rough early draft.
Yggdrasil is a routing protocol designed for scalable name-independent routing on internet-like graphs.
The design is built around a name-dependent routing scheme which uses distance on a spanning tree as a metric for greedy routing, and a kademlia-like distributed hash table to facilitate lookups of metric space routing information from static cryptographically generated identifiers.
This approach can find routes on any network, as it reduces to spanning tree routing in the worst case, but is observed to be particularly efficient on internet-like graphs.
In an effort to mitigate many forms of attacks, the routing scheme only uses information which is either cryptographically verifiable or based on observed local network state.
The implementation is distributed and runs on dynamic graphs, though this implementation may not converge quickly enough to be practical on networks with high node mobility.
This document attempts to give a rough overview of how some of the key parts of the protocol are implemented, as well as an explanation of why a few subtle points are handled the way they are.
Yggdrasil is an encrypted IPv6 network running in the [`200::/7` address range](https://en.wikipedia.org/wiki/Unique_local_address).
It is an experimental/toy network, so failure is acceptable, as long as it's instructive to see how it breaks if/when everything falls apart.
IP addresses are derived from cryptographic keys, to reduce the need for public key infrastructure.
A form of locator/identifier separation (similar in goal to [LISP](https://en.wikipedia.org/wiki/Locator/Identifier_Separation_Protocol)) is used to map static identifiers (IP addresses) onto dynamic routing information (locators), using a [distributed hash table](https://en.wikipedia.org/wiki/Distributed_hash_table) (DHT).
Locators are used to approximate the distance between nodes in the network, where the approximate distance is the length of a real worst-case-scenario path through the network.
This is (arguably) easier to secure and requires less information about the network than commonly used routing schemes.
While not technically a [compact routing scheme](https://arxiv.org/abs/0708.2309), tests on real-world networks suggest that routing in this style incurs stretch comparable to the name-dependent compact routing schemes designed for static networks.
Compared to compact routing schemes, Yggdrasil appears to have smaller average routing table sizes, works on dynamic networks, and is name-independent.
It currently lacks the provable bounds of compact routing schemes, and there's a serious argument to be made that it cheats by stretching the definition of some of the above terms, but the main point to be emphasized is that there are trade-offs between different concerns when trying to route traffic, and we'd rather make every part *good* than try to make any one part *perfect*.
In that sense, Yggdrasil seems to be competitive, on what are supposedly realistic networks, with compact routing schemes.
## Addressing
Addresses in Yggdrasil are derived from a truncated version of a `NodeID`.
The `NodeID` itself is a sha512sum of a node's permanent public Curve25519 key.
Each node's IPv6 address is then assigned from the lower half of the `fd00::/8` prefix using the following approach:
Yggdrasil uses a truncated version of a `NodeID` to assign addresses.
An address is assigned from the `200::/7` prefix, according to the following:
1. Begin with `0xfd` as the first byte of the address.
1. Begin with `0x02` as the first byte of the address, or `0x03` if it's a `/64` prefix.
2. Count the number of leading `1` bits in the NodeID.
3. Set the second byte of the address to the number of leading `1` bits, subject to the constraint that this is still in the lower half of the address range (it is unlikely that a node will have 128 or more leading `1` bits in a sha512sum hash, for the foreseeable future).
3. Set the second byte of the address to the number of leading `1` bits in the NodeID (8 bit unsigned integer, at most 255).
4. Append the NodeID to the remaining bits of the address, truncating the leading `1` bits and the first `0` bit, to a total address size of 128 bits.
The last bit of the first byte is used to flag if an address is for a router (`200::/8`), or part of an advertised prefix (`300::/8`), where each router owns a `/64` that matches their address (except with the eight bit set to 1 instead of 0).
This allows the prefix to be advertised to the router's LAN, so unsupported devices can still connect to the network (e.g. network printers).
The NodeID is a [sha512sum](https://en.wikipedia.org/wiki/SHA-512) of a node's public encryption key.
Addresses are checked that they match NodeID, to prevent address spoofing.
As such, while a 128 bit IPv6 address is likely too short to be considered secure by cryptographer standards, there is a significant cost in attempting to cause an address collision.
Addresses can be made more secure by brute force generating a large number of leading `1` bits in the NodeID.
When connecting to a node, the IP address is unpacked into the known bits of the NodeID and a matching bitmask to track which bits are significant.
A node is only communicated with if its `NodeID` matches its public key and the known `NodeID` bits from the address.
It is important to note that only `NodeID` is used internally for routing, so the addressing scheme could in theory be changed without breaking compatibility with intermediate routers.
This may become useful if the IPv6 address range ever needs to be changed, or if a new addressing format that allows for more significant bits is ever implemented by the OS.
This has been done once, when moving the address range from the `fd00::/8` ULA range to the reserved-but-[deprecated](https://tools.ietf.org/html/rfc4048) `200::/7` range.
Further addressing scheme changes could occur if, for example, an IPv7 format ever emerges.
### Cryptography
Public key encryption is done using the `golang.org/x/crypto/nacl/box`, which uses Curve25519, XSalsa20, and Poly1305 for key exchange, encryption, and authentication.
Public key encryption is done using the `golang.org/x/crypto/nacl/box`, which uses [Curve25519](https://en.wikipedia.org/wiki/Curve25519), [XSalsa20](https://en.wikipedia.org/wiki/Salsa20), and [Poly1305](https://en.wikipedia.org/wiki/Poly1305) for key exchange, encryption, and authentication (interoperable with [NaCl](https://en.wikipedia.org/wiki/NaCl_(software))).
Permanent keys are used only for protocol traffic, with random nonces generated on a per-packet basis using `crypto/rand` from Go's standard library.
Ephemeral session keys are generated for encapsulated IPv6 traffic, using the same set of primitives, with random initial nonces that are subsequently incremented.
Ephemeral session keys (for [forward secrecy](https://en.wikipedia.org/wiki/Forward_secrecy)) are generated for encapsulated IPv6 traffic, using the same set of primitives, with random initial nonces that are subsequently incremented.
A list of recently received session nonces is kept (as a bitmask) and checked to reject duplicated packets, in an effort to block duplicate packets and replay attacks.
A separate private key is generated and used for signing with Ed25519, which is used by the name-dependent routing layer to secure construction of the spanning tree, with a TreeID hash of a node's public Ed key being used to select the highest TreeID as the root of the tree.
A separate set of keys are generated and used for signing with [Ed25519](https://en.wikipedia.org/wiki/Ed25519), which is used by the routing layer to secure construction of a spanning tree.
### Prefixes
Recall that each node's address is in the lower half of the address range, I.e. `fd00::/9`. A `/64` prefix is made available to each node under `fd80::/9`, where the remaining bits of the prefix match the node's address under `fd00::/9`.
Recall that each node's address is in the lower half of the address range, I.e. `200::/8`. A `/64` prefix is made available to each node under `300::/8`, where the remaining bits of the prefix match the node's address under `200::/8`.
A node may optionally advertise a prefix on their local area network, which allows unsupported or legacy devices with IPv6 support to connect to the network.
Note that there are 64 fewer bits of `NodeID` available to check in each address from a routing prefix, so it makes sense to brute force a `NodeID` with more significant bits in the address if this approach is to be used.
Running `genkeys.go` will do this by default.
## Name-independent routing
## Locators and Routing
A distributed hash table is used to facilitate the lookup of a node's name-dependent routing `coords` from a `NodeID`.
A kademlia-like peer structure and xor metric are used in the DHT layout, but only peering info is used--there is no key:value store.
In contrast with standard kademlia, instead of using iterative parallel lookups, a recursive lookup strategy is used.
This is an intentional design decision to make the DHT more fragile--the intent is for DHT inconsistencies to lead to lookup failures, because of concerns that the iterative parallel approach may hide DHT bugs.
Locators are generated using information from a spanning tree (described below).
The result is that each node has a set of [coordinates in a greedy metric space](https://en.wikipedia.org/wiki/Greedy_embedding).
These coordinates are used as a distance label.
Given the coordinates of any two nodes, it is possible to calculate the length of some real path through the network between the two nodes.
In particular, the DHT is bootstrapped off of a node's one-hop neighbors, and I've observed that this causes a standard kademlia implementation to diverge in the general case.
To get around this, buckets are updated more aggressively, and the least recently pinged node from each bucket is flushed to make room for new nodes as soon as a response is heard from them.
This appears to fix the bootstrapping issues on all networks where they had been observed in testing, but recursive lookups are kept for the time being to continue monitoring the issue.
However, recursive lookups require fewer round trips, so they are expected to be lower latency.
As such, even if a switch to iterative parallel lookups was made, the recursive lookup functionality may be kept and used optimistically to minimize handshake time in stable networks.
Traffic is forwarded using a [greedy routing](https://en.wikipedia.org/wiki/Small-world_routing#Greedy_routing) scheme, where each node forwards the packet to a one-hop neighbor that is closer to the destination (according to this distance metric) than the current node.
In particular, when a packet needs to be forward, a node will forward it to whatever peer is closest to the destination in the greedy [metric space](https://en.wikipedia.org/wiki/Metric_space) used by the network, provided that the peer is closer to the destination than the current node.
Other than these differences, the DHT is more-or-less what you might expect from a kad implementation.
If no closer peers are idle, then the packet is queued in FIFO order, with separate queues per destination coords (currently, as a bit of a hack, IPv6 flow labels are embedeed after the end of the significant part of the coords, so queues distinguish between different traffic streams with the same destination).
Whenever the node finishes forwarding a packet to a peer, it checks the queues, and will forward the first packet from the queue with the maximum `<age of first packet>/<queue size in bytes>`, i.e. the bandwidth the queue is attempting to use, subject to the constraint that the peer is a valid next hop (i.e. closer to the destination than the current node).
If no non-empty queue is available, then the peer is added to the idle set, forward packets when the need arises.
## Name-dependent routing
This acts as a crude approximation of backpressure routing, where the remote queue sizes are assumed to be equal to the distance of a node from a destination (rather than communicating queue size information), and packets are never forwarded "backwards" through the network, but congestion on a local link is routed around when possible.
The queue selection strategy behaves similar to shortest-queue-first, in that a larger fration of available bandwith to sessions that attempt to use less bandwidth, and is loosely based on the rationale behind some proposed solutions to the [cake-cutting](https://en.wikipedia.org/wiki/Fair_cake-cutting) problem.
A spanning tree is constructed and used for name-dependent routing.
The basic idea is to use the distance between nodes *on the tree* as a distance metric, and then perform greedy routing in that metric space.
As the tree is constructed from a subset of the real links in the network, this distance metric (unlike the DHT's xor metric) has some relationship with the underlying physical network.
In the worst case, greedy routing with this metric reduces to routing on the spanning tree, which should be comparable to ethernet.
However, greedy routing can use any link, provided that the node on the other end of the link is closer to the destination, so this allows the use of off-tree shortcuts, with the possibility and effectiveness of this being topology dependent.
The main assumption that Yggdrasil's performance hinges on, is that this distance metric is close to real network distance, on average, in realistic networks.
The name dependent scheme is implemented in roughly the following way:
1. Each node generates a set of Ed25519 keys for signing routing messages, with a `TreeID` defined as the sha512sum of a node's public signing key.
2. If a node doesn't know a better (higher `TreeID`) root for the tree, then it makes itself the root of its own tree.
3. Nodes periodically send announcement messages to neighbors, which specify a sequence number for that node's current locator in the tree.
4. When a node A sees an unrecognized sequence number from a neighbor B, then A asks B to send them a locator.
5. This locator is sent in the form of a path from the root, through B, and ending at A.
6. Each hop in the path includes the public signing key of the next hop, and a signature for the full path from the root to the next hop, to prevent forgery of path information (similar to S-BGP).
7. The first hop, from the root, includes a signed sequence number which must increase (implemented as a unix timestamp, for convenience), which is used to detect root timeouts and prevent replays.
The highest `TreeID` approach to root selection is just to ensure that nodes select the same root, otherwise distance calculations wouldn't work.
Root selection has a minor effect on the stretch of the paths selected by the network, but this effect was seen to be small compared to the minimum stretch, for nearly all choices of root.
The current implementation tracks how long a neighbor has been advertising a locator for the same path, and it prefers to select a parent with a stable locator and a short distance to the root (maximize uptime/distance).
When forwarding traffic, the next hop is selected taking bandwidth to the next hop and distance to the destination into account (maximize bandwidth/distance), subject to the requirement that distance must always decrease.
The bandwidth estimation isn't very good, but it correlates well enough that e.g. when a slow wifi and a fast ethernet link to the same node are available, it typically uses the ethernet link.
However, if the ethernet link comes up while the wifi link is under heavy use, then it tends to keep using the wifi link until things settle down, and only switches to ethernet after the wireless link is no longer overloaded.
A better approach to bandwidth estimation could probably switch to the new link faster.
The queue size is limited to 4 MB. If a packet is added to a queue and the total size of all queues is larger than this threshold, then a random queue is selected (with odds proportional to relative queue sizes), and the first packet from that queue is dropped, with the process repeated until the total queue size drops below the allowed threshold.
Note that this forwarding procedure generalizes to nodes that are not one-hop neighbors, but the current implementation omits the use of more distant neighbors, as this is expected to be a minor optimization (it would add per-link control traffic to pass path-vector-like information about a subset of the network, which is a lot of overhead compared to the current setup).
## Other implementation details
### Spanning Tree
In case you hadn't noticed, this implementation is written in Go.
That decision was made because the designer and initial author (@Arceliar) felt like learning a new language when the implementation was started, and the Go language seemed like an OK choice for prototyping a network application.
While Go's GC pauses are small, they do exist, so this implementation probably isn't suited to applications that require very low latency and jitter.
A [spanning tree](https://en.wikipedia.org/wiki/Spanning_tree) is constructed with the tree rooted at the highest TreeID, where TreeID is equal to a sha512sum of a node's public [Ed25519](https://en.wikipedia.org/wiki/Ed25519) key (used for signing).
A node sends periodic advertisement messages to each neighbor.
The advertisement contains the coords that match the path from the root through the node, plus one additional hop from the node to the neighbor being advertised to.
Each hop in this advertisement includes a matching ed25519 signature.
These signatures prevent nodes from forging arbitrary routing advertisements.
Aside from that, an effort was made to write each part of it to be as "bad" (i.e. fragile) as could be managed while still being technically correct.
That's a decision made for debugging purposes: the intent is to make any bugs as obvious as possible, so they can more easily be found and fixed in a small or simulated network.
The first hop, from the root, also includes a sequence number, which must be updated periodically.
A node will blacklist the current root (keeping a record of the last sequence number observed) if the root fails to update for longer than some timeout (currently hard coded at 1 minute).
Normally, a root node will update their sequence number for frequently than this (once every 30 seconds).
Nodes are throttled to ignore updates with a new sequence number for some period after updating their most recently seen sequence number (currently this cooldown is 10 seconds).
The implementation chooses to set the sequence number equal to the unix time on the root's clock, so that a new (higher) sequence number will be selected if the root is restarted and the clock is not set back.
This implementation runs as an overlay network on top of regular IPv4 or IPv6 traffic.
It uses link-local IPv6 multicast traffic to automatically connect to devices on the same network, but it can also be fed a list of address:port pairs to connect to.
This can be used to e.g. set up two local networks and bridge them over the internet.
Other than the root node, every other node in the network must select one of its neighbors to use as their parent.
This selection is done by maximizing: `<uptime + timeout> / <distance to the root>`.
Here, `uptime` is the time between when we first and last received a message from the node which advertised the node's current location in the tree (resetting to zero if the location changes), and timeout is the time we wait before dropping a root due to inactivity.
This essentially means the numerator is at least as long as the amount of time between when the neighbor was first seen at its present location, and when the advertisement from the neighbor becomes invalid due to root timeout.
Resetting the uptime with each coordinate change causes nodes to favor long-lived stable paths over short-lived unstable ones, for the purposes of tree construction (indirectly impacting route selection).
## Performance
The distance metric between nodes is simply the distance between the nodes if they routed on the spanning tree.
This is equal to the sum of the distance from each node to the last common ancestor of the two nodes being compared.
The locator then consists of a root's key, timestamp, and coordinates representing each hop in the path from the root to the node.
In practice, only the coords are used for routing, while the root and timestamp, along with all the per-hop signatures, are needed to securely construct the spanning tree.
This section compares Yggdrasil with the results in [arxiv:0708.2309](https://arxiv.org/abs/0708.2309) (specifically table 1) from tests on the 9204-node [skitter](https://www.caida.org/tools/measurement/skitter/) network maps from [caida](https://www.caida.org/).
## Name-independent routing
A [simplified version](misc/sim/treesim-forward.py) of this routing scheme was written (long before the Yggdrasil implementation was started), and tested for comparison with the results from the above paper.
This version includes only the name-dependent part of the routing scheme, but the overhead of the name-independent portion is easy enough to check with the full implementation.
In summary:
A [Kademlia](https://en.wikipedia.org/wiki/Kademlia)-like Distributed Hash Table (DHT) is used as a distributed database that maps NodeIDs onto coordinates in the spanning tree metric space.
The DHT is Kademlia-like in that it uses the `xor` metric and structures the hash table into k-buckets (with 2 nodes per bucket in the normal case, plus some additional slots for keyspace neighbors and one-hop neighbors at the router level).
It differs from kademlia in that there are no values in the key:value store -- it only stores information about DHT peers.
1. Multiplicative stretch is approximately 1.08 with Yggdrasil, using unweighted links undirected links, as in the paper.
2. A modified version can get this as low as 1.01, but it depends on knowing the degree of each one-hop neighbor, which it is not obviously possible to cryptographically secure, and it requires using source routing to find a path from A to B and from B to A, and then have both nodes use whichever path was observed to be shorter.
3. In either case, approximately 6 routing table entries are needed, on average, for the name-dependent routing scheme, where each node needs one routing table entry per one-hop neighbor.
4. Approximately 30 DHT entries are needed to facilitate name-independent routing.
This requires a lookup and caches the results, so old information needs to time out to work on dynamic networks.
The schemes it's being compared to only work on static networks, where a similar approach would be fine, so this seems like a reasonably fair comparison.
The stretch of that initial lookup can be *very* high, but it's only for a couple of round trips to look up keys and then do the ephemeral key exchange, so this may be an acceptable tradeoff (it's probably more expensive than a DNS lookup, but is similar in principle and effect).
5. Both the name-dependent and name-independent routing table entries are of a size proportional to the length of the path between the root and the node, which is at most the diameter of the network after things have fully converged, but name-dependent routing table entries tend to be much larger in practice due to the size of cryptographic signatures (64 bytes for a signature + 32 for the signing key).
6. The name-dependent routing scheme only sends messages about one-hop neighbors on the link between those neighbors, so if you measure things by per *link* overhead instead of per *node*, then this doesn't seem so bad to me.
7. The name-independent routing scheme scales like a DHT running as an overlay on top of the router-level topology, so the per-link and per-node overhead are going to be topology dependent.
This hasn't been studied in a lot of detail, but for realistic topologies, where yggdrasil routing seems to approximate shortest path routing, academic research has shown that shortest path routing does not lead to congestion.
The main complication is that, when the DHT is bootstrapped off of a node's one-hop neighbors, with no special measures taken about which nodes are included in each bucket, then the network may diverge (settle into a stable bad state, where at least some lookups will always fail).
The current strategy is to place additional preferences on which nodes are kept in each bucket -- in particular, we try to keep the closest nodes in xor space in each bucket.
This seems to mitigate the issue in some quick tests, but it's a topic that could use additional study.
The designer (@Arceliar) believes that the main reason Yggdrasil performs so well is because it stores information about all one-hop neighbors.
Consider that, if Yggdrasil did not maintain state about all one-hop neighbors, but the protocol still had the ability to forward to all of them through some mechanism (i.e. source routing), then the OS still needs a way to forward traffic to them.
In most cases, this would require some form of per-neighbor state to be stored by the OS, either because there's one dedicated interface per peer or because there are entries in an arp/NDP table to reach multiple devices over a shared switch.
So while compact routing schemes have nice theoretical limits, which do not require even as much state as one entry per one-hop neighbor, that property does not seem realistic if the implementation is running at the router level (as opposed to the AS level).
As such, keeping one entry per neighbor may be reasonable, especially if nodes with a high degree have proportionally more resources available to them, but it is possible that something may have been overlooked in the design.
Other than these differences, the DHT is more-or-less what you might expect from a kad implementation.
## Disclaimer
To summarize the entire routing procedure, when given only a node's IP address, the goal is to find a route to the destination.
That happens through 3 steps:
1. The address is unpacked into the known bits of a NodeID and a bitmask to signal which bits of the NodeID are known (the unknown bits are ignored).
2. A DHT search is performed, which normally results in a response from the node closest in the DHT keyspace to the target `NodeID`. The response contains the node's curve25519 public key, which is checked to match the `NodeID` (and therefore the address), as well as the node's coordinates.
3. Using the keys and coords from the above step, an ephemeral key exchange occurs between the source and destination nodes. These ephemeral session keys are used to encrypt any ordinary IPv6 traffic that may be encapsulated and sent between the nodes.
From that point, the session keys and coords are cached and used to encrypt and send traffic between nodes. This is *mostly* transparent to the user: the initial DHT lookup and key exchange takes at least 2 round trips, so there's some delay before session setup completes and normal IPv6 traffic can flow. This is similar to the delay caused by a DNS lookup, although it generally takes longer, as a DHT lookup requires multiple iterations to reach the destination.
## Project Status and Plans
The current (Go) implementation is considered alpha, so compatibility with future versions is neither guaranteed nor expected.
While users are discouraged from running anything truly critical on top of it, as of writing, it seems reliable enough for day-to-day use.
As an "alpha" quality release, Yggdrasil *should* at least be able to detect incompatible versions when it sees them, and warn the users that an update may be needed.
A "beta" quality release should know enough to be compatible in the face of wire format changes, and reasonably feature complete.
A "stable" 1.0 release, if it ever happens, would probably be feature complete, with no expectation of future wire format changes, and free of known critical bugs.
Roughly speaking, there are a few obvious ways the project could turn out:
1. The developers could lose interest before it goes anywhere.
2. The project could be reasonably complete (beta or stable), but never gain a significant number of users.
3. The network may grow large enough that fundamental (non-fixable) design problems appear, which is hopefully a learning experience, but the project may die as a result.
4. The network may grow large, but never hit any design problems, in which case we need to think about either moving the important parts into other projects ([cjdns](https://github.com/cjdelisle/cjdns)) or rewriting compatible implementations that are better optimized for the target platforms (e.g. a linux kernel module).
That last one is probably impossible, because the speed of light would *eventually* become a problem, for a sufficiently large network.
If the only thing limiting network growth turns out to be the underlying physics, then that arguably counts as a win.
Also, note that some design decisions were made for ease-of-programming or ease-of-testing reasons, and likely need to be reconsidered at some point.
In particular, Yggdrasil currently uses TCP for connections with one-hop neighbors, which introduces an additional layer of buffering that can lead to increased and/or unstable latency in congested areas of the network.
This is a draft version of documentation for a work-in-progress protocol.
The design and implementation should be considered pre-alpha, with any and all aspects subject to change in light of ongoing R&D.
It is possible that this document and the code base may fall out of sync with eachother.
Some details that are known to be likely to change, packet formats in particular, have been omitted.

View File

@ -90,6 +90,10 @@ func (a *admin) init(c *Core, listenaddr string) {
}
return admin_info{"switchpeers": switchpeers}, nil
})
a.addHandler("getSwitchQueues", []string{}, func(in admin_info) (admin_info, error) {
queues := a.getData_getSwitchQueues()
return admin_info{"switchqueues": queues.asMap()}, nil
})
a.addHandler("getDHT", []string{}, func(in admin_info) (admin_info, error) {
sort := "ip"
dht := make(admin_info)
@ -112,8 +116,14 @@ func (a *admin) init(c *Core, listenaddr string) {
}
return admin_info{"sessions": sessions}, nil
})
a.addHandler("addPeer", []string{"uri"}, func(in admin_info) (admin_info, error) {
if a.addPeer(in["uri"].(string)) == nil {
a.addHandler("addPeer", []string{"uri", "[interface]"}, func(in admin_info) (admin_info, error) {
// Set sane defaults
intf := ""
// Has interface been specified?
if itf, ok := in["interface"]; ok {
intf = itf.(string)
}
if a.addPeer(in["uri"].(string), intf) == nil {
return admin_info{
"added": []string{
in["uri"].(string),
@ -390,12 +400,12 @@ func (a *admin) printInfos(infos []admin_nodeInfo) string {
}
// addPeer triggers a connection attempt to a node.
func (a *admin) addPeer(addr string) error {
func (a *admin) addPeer(addr string, sintf string) error {
u, err := url.Parse(addr)
if err == nil {
switch strings.ToLower(u.Scheme) {
case "tcp":
a.core.tcp.connect(u.Host)
a.core.tcp.connect(u.Host, sintf)
case "socks":
a.core.tcp.connectSOCKS(u.Host, u.Path[1:])
default:
@ -407,7 +417,7 @@ func (a *admin) addPeer(addr string) error {
if strings.HasPrefix(addr, "tcp:") {
addr = addr[4:]
}
a.core.tcp.connect(addr)
a.core.tcp.connect(addr, "")
return nil
}
return nil
@ -504,12 +514,43 @@ func (a *admin) getData_getSwitchPeers() []admin_nodeInfo {
{"ip", net.IP(addr[:]).String()},
{"coords", fmt.Sprint(coords)},
{"port", elem.port},
{"bytes_sent", atomic.LoadUint64(&peer.bytesSent)},
{"bytes_recvd", atomic.LoadUint64(&peer.bytesRecvd)},
}
peerInfos = append(peerInfos, info)
}
return peerInfos
}
// getData_getSwitchQueues returns info from Core.switchTable for an queue data.
func (a *admin) getData_getSwitchQueues() admin_nodeInfo {
var peerInfos admin_nodeInfo
switchTable := a.core.switchTable
getSwitchQueues := func() {
queues := make([]map[string]interface{}, 0)
for k, v := range switchTable.queues.bufs {
nexthop := switchTable.bestPortForCoords([]byte(k))
queue := map[string]interface{}{
"queue_id": k,
"queue_size": v.size,
"queue_packets": len(v.packets),
"queue_port": nexthop,
}
queues = append(queues, queue)
}
peerInfos = admin_nodeInfo{
{"queues", queues},
{"queues_count", len(switchTable.queues.bufs)},
{"queues_size", switchTable.queues.size},
{"highest_queues_count", switchTable.queues.maxbufs},
{"highest_queues_size", switchTable.queues.maxsize},
{"maximum_queues_size", switch_buffer_maxSize},
}
}
a.core.switchTable.doAdmin(getSwitchQueues)
return peerInfos
}
// getData_getDHT returns info from Core.dht for an admin response.
func (a *admin) getData_getDHT() []admin_nodeInfo {
var infos []admin_nodeInfo
@ -656,6 +697,16 @@ func (a *admin) getResponse_dot() []byte {
newInfo.name = "?"
newInfo.key = key
newInfo.options = "fontname=\"sans serif\" style=dashed color=\"#999999\" fontcolor=\"#999999\""
coordsSplit := coordSlice(newInfo.key)
if len(coordsSplit) != 0 {
portStr := coordsSplit[len(coordsSplit)-1]
portUint, err := strconv.ParseUint(portStr, 10, 64)
if err == nil {
newInfo.port = switchPort(portUint)
}
}
infos[key] = newInfo
}
}

View File

@ -2,19 +2,21 @@ package config
// NodeConfig defines all configuration values needed to run a signle yggdrasil node
type NodeConfig struct {
Listen string `comment:"Listen address for peer connections. Default is to listen for all\nTCP connections over IPv4 and IPv6 with a random port."`
AdminListen string `comment:"Listen address for admin connections Default is to listen for local\nconnections either on TCP/9001 or a UNIX socket depending on your\nplatform. Use this value for yggdrasilctl -endpoint=X."`
Peers []string `comment:"List of connection strings for static peers in URI format, i.e.\ntcp://a.b.c.d:e or socks://a.b.c.d:e/f.g.h.i:j"`
ReadTimeout int32 `comment:"Read timeout for connections, specified in milliseconds. If less than 6000 and not negative, 6000 (the default) is used. If negative, reads won't time out."`
AllowedEncryptionPublicKeys []string `comment:"List of peer encryption public keys to allow or incoming TCP\nconnections from. If left empty/undefined then all connections\nwill be allowed by default."`
EncryptionPublicKey string `comment:"Your public encryption key. Your peers may ask you for this to put\ninto their AllowedEncryptionPublicKeys configuration."`
EncryptionPrivateKey string `comment:"Your private encryption key. DO NOT share this with anyone!"`
SigningPublicKey string `comment:"Your public signing key. You should not ordinarily need to share\nthis with anyone."`
SigningPrivateKey string `comment:"Your private signing key. DO NOT share this with anyone!"`
MulticastInterfaces []string `comment:"Regular expressions for which interfaces multicast peer discovery\nshould be enabled on. If none specified, multicast peer discovery is\ndisabled. The default value is .* which uses all interfaces."`
IfName string `comment:"Local network interface name for TUN/TAP adapter, or \"auto\" to select\nan interface automatically, or \"none\" to run without TUN/TAP."`
IfTAPMode bool `comment:"Set local network interface to TAP mode rather than TUN mode if\nsupported by your platform - option will be ignored if not."`
IfMTU int `comment:"Maximux Transmission Unit (MTU) size for your local TUN/TAP interface.\nDefault is the largest supported size for your platform. The lowest\npossible value is 1280."`
Listen string `comment:"Listen address for peer connections. Default is to listen for all\nTCP connections over IPv4 and IPv6 with a random port."`
AdminListen string `comment:"Listen address for admin connections Default is to listen for local\nconnections either on TCP/9001 or a UNIX socket depending on your\nplatform. Use this value for yggdrasilctl -endpoint=X."`
Peers []string `comment:"List of connection strings for static peers in URI format, i.e.\ntcp://a.b.c.d:e or socks://a.b.c.d:e/f.g.h.i:j."`
InterfacePeers map[string][]string `comment:"List of connection strings for static peers in URI format, arranged\nby source interface, i.e. { \"eth0\": [ tcp://a.b.c.d:e ] }. Note that\nSOCKS peerings will NOT be affected by this option and should go in\nthe \"Peers\" section instead."`
ReadTimeout int32 `comment:"Read timeout for connections, specified in milliseconds. If less\nthan 6000 and not negative, 6000 (the default) is used. If negative,\nreads won't time out."`
AllowedEncryptionPublicKeys []string `comment:"List of peer encryption public keys to allow or incoming TCP\nconnections from. If left empty/undefined then all connections\nwill be allowed by default."`
EncryptionPublicKey string `comment:"Your public encryption key. Your peers may ask you for this to put\ninto their AllowedEncryptionPublicKeys configuration."`
EncryptionPrivateKey string `comment:"Your private encryption key. DO NOT share this with anyone!"`
SigningPublicKey string `comment:"Your public signing key. You should not ordinarily need to share\nthis with anyone."`
SigningPrivateKey string `comment:"Your private signing key. DO NOT share this with anyone!"`
MulticastInterfaces []string `comment:"Regular expressions for which interfaces multicast peer discovery\nshould be enabled on. If none specified, multicast peer discovery is\ndisabled. The default value is .* which uses all interfaces."`
IfName string `comment:"Local network interface name for TUN/TAP adapter, or \"auto\" to select\nan interface automatically, or \"none\" to run without TUN/TAP."`
IfTAPMode bool `comment:"Set local network interface to TAP mode rather than TUN mode if\nsupported by your platform - option will be ignored if not."`
IfMTU int `comment:"Maximux Transmission Unit (MTU) size for your local TUN/TAP interface.\nDefault is the largest supported size for your platform. The lowest\npossible value is 1280."`
SessionFirewall SessionFirewall `comment:"The session firewall controls who can send/receive network traffic\nto/from. This is useful if you want to protect this node without\nresorting to using a real firewall. This does not affect traffic\nbeing routed via this node to somewhere else. Rules are prioritised as\nfollows: blacklist, whitelist, always allow outgoing, direct, remote."`
//Net NetConfig `comment:"Extended options for connecting to peers over other networks."`
}
@ -23,3 +25,12 @@ type NetConfig struct {
Tor TorConfig `comment:"Experimental options for configuring peerings over Tor."`
I2P I2PConfig `comment:"Experimental options for configuring peerings over I2P."`
}
type SessionFirewall struct {
Enable bool `comment:"Enable or disable the session firewall. If disabled, network traffic\nfrom any node will be allowed. If enabled, the below rules apply."`
AllowFromDirect bool `comment:"Allow network traffic from directly connected peers."`
AllowFromRemote bool `comment:"Allow network traffic from remote nodes on the network that you are\nnot directly peered with."`
AlwaysAllowOutbound bool `comment:"Allow outbound network traffic regardless of AllowFromDirect or\nAllowFromRemote. This does allow a remote node to send unsolicited\ntraffic back to you for the length of the session."`
WhitelistEncryptionPublicKeys []string `comment:"List of public keys from which network traffic is always accepted,\nregardless of AllowFromDirect or AllowFromRemote."`
BlacklistEncryptionPublicKeys []string `comment:"List of public keys from which network traffic is always rejected,\nregardless of the whitelist, AllowFromDirect or AllowFromRemote."`
}

View File

@ -107,6 +107,15 @@ func (c *Core) Start(nc *config.NodeConfig, log *log.Logger) error {
return err
}
c.sessions.setSessionFirewallState(nc.SessionFirewall.Enable)
c.sessions.setSessionFirewallDefaults(
nc.SessionFirewall.AllowFromDirect,
nc.SessionFirewall.AllowFromRemote,
nc.SessionFirewall.AlwaysAllowOutbound,
)
c.sessions.setSessionFirewallWhitelist(nc.SessionFirewall.WhitelistEncryptionPublicKeys)
c.sessions.setSessionFirewallBlacklist(nc.SessionFirewall.BlacklistEncryptionPublicKeys)
if err := c.router.start(); err != nil {
c.log.Println("Failed to start router")
return err
@ -182,8 +191,8 @@ func (c *Core) SetLogger(log *log.Logger) {
// Adds a peer. This should be specified in the peer URI format, i.e.
// tcp://a.b.c.d:e, udp://a.b.c.d:e, socks://a.b.c.d:e/f.g.h.i:j
func (c *Core) AddPeer(addr string) error {
return c.admin.addPeer(addr)
func (c *Core) AddPeer(addr string, sintf string) error {
return c.admin.addPeer(addr, sintf)
}
// Adds an expression to select multicast interfaces for peer discovery. This

View File

@ -399,7 +399,7 @@ func (c *Core) DEBUG_maybeSendUDPKeys(saddr string) {
////////////////////////////////////////////////////////////////////////////////
func (c *Core) DEBUG_addPeer(addr string) {
err := c.admin.addPeer(addr)
err := c.admin.addPeer(addr, "")
if err != nil {
panic(err)
}
@ -427,7 +427,7 @@ func (c *Core) DEBUG_addSOCKSConn(socksaddr, peeraddr string) {
//*
func (c *Core) DEBUG_setupAndStartGlobalTCPInterface(addrport string) {
if err := c.tcp.init(c, addrport); err != nil {
if err := c.tcp.init(c, addrport, 0); err != nil {
c.log.Println("Failed to start TCP interface:", err)
panic(err)
}
@ -438,7 +438,7 @@ func (c *Core) DEBUG_getGlobalTCPAddr() *net.TCPAddr {
}
func (c *Core) DEBUG_addTCPConn(saddr string) {
c.tcp.call(saddr, nil)
c.tcp.call(saddr, nil, "")
}
//*/

View File

@ -153,6 +153,6 @@ func (m *multicast) listen() {
}
addr.Zone = from.Zone
saddr := addr.String()
m.core.tcp.connect(saddr)
m.core.tcp.connect(saddr, "")
}
}

View File

@ -17,7 +17,7 @@ import (
type peers struct {
core *Core
mutex sync.Mutex // Synchronize writes to atomic
ports atomic.Value //map[Port]*peer, use CoW semantics
ports atomic.Value //map[switchPort]*peer, use CoW semantics
authMutex sync.RWMutex
allowedEncryptionPublicKeys map[boxPubKey]struct{}
}
@ -127,7 +127,7 @@ func (ps *peers) removePeer(port switchPort) {
return
} // Can't remove self peer
ps.core.router.doAdmin(func() {
ps.core.switchTable.unlockedRemovePeer(port)
ps.core.switchTable.forgetPeer(port)
})
ps.mutex.Lock()
oldPorts := ps.getPorts()

View File

@ -184,6 +184,10 @@ func (s *searches) checkDHTRes(info *searchInfo, res *dhtRes) bool {
sinfo, isIn := s.core.sessions.getByTheirPerm(&res.Key)
if !isIn {
sinfo = s.core.sessions.createSession(&res.Key)
if sinfo == nil {
// nil if the DHT search finished but the session wasn't allowed
return true
}
_, isIn := s.core.sessions.getByTheirPerm(&res.Key)
if !isIn {
panic("This should never happen")

View File

@ -6,6 +6,7 @@ package yggdrasil
import (
"bytes"
"encoding/hex"
"time"
)
@ -107,6 +108,13 @@ type sessions struct {
byTheirPerm map[boxPubKey]*handle
addrToPerm map[address]*boxPubKey
subnetToPerm map[subnet]*boxPubKey
// Options from the session firewall
sessionFirewallEnabled bool
sessionFirewallAllowsDirect bool
sessionFirewallAllowsRemote bool
sessionFirewallAlwaysAllowsOutbound bool
sessionFirewallWhitelist []string
sessionFirewallBlacklist []string
}
// Initializes the session struct.
@ -121,6 +129,84 @@ func (ss *sessions) init(core *Core) {
ss.lastCleanup = time.Now()
}
// Enable or disable the session firewall
func (ss *sessions) setSessionFirewallState(enabled bool) {
ss.sessionFirewallEnabled = enabled
}
// Set the session firewall defaults (first parameter is whether to allow
// sessions from direct peers, second is whether to allow from remote nodes).
func (ss *sessions) setSessionFirewallDefaults(allowsDirect bool, allowsRemote bool, alwaysAllowsOutbound bool) {
ss.sessionFirewallAllowsDirect = allowsDirect
ss.sessionFirewallAllowsRemote = allowsRemote
ss.sessionFirewallAlwaysAllowsOutbound = alwaysAllowsOutbound
}
// Set the session firewall whitelist - nodes always allowed to open sessions.
func (ss *sessions) setSessionFirewallWhitelist(whitelist []string) {
ss.sessionFirewallWhitelist = whitelist
}
// Set the session firewall blacklist - nodes never allowed to open sessions.
func (ss *sessions) setSessionFirewallBlacklist(blacklist []string) {
ss.sessionFirewallBlacklist = blacklist
}
// Determines whether the session with a given publickey is allowed based on
// session firewall rules.
func (ss *sessions) isSessionAllowed(pubkey *boxPubKey, initiator bool) bool {
// Allow by default if the session firewall is disabled
if !ss.sessionFirewallEnabled {
return true
}
// Prepare for checking whitelist/blacklist
var box boxPubKey
// Reject blacklisted nodes
for _, b := range ss.sessionFirewallBlacklist {
key, err := hex.DecodeString(b)
if err == nil {
copy(box[:boxPubKeyLen], key)
if box == *pubkey {
return false
}
}
}
// Allow whitelisted nodes
for _, b := range ss.sessionFirewallWhitelist {
key, err := hex.DecodeString(b)
if err == nil {
copy(box[:boxPubKeyLen], key)
if box == *pubkey {
return true
}
}
}
// Allow outbound sessions if appropriate
if ss.sessionFirewallAlwaysAllowsOutbound {
if initiator {
return true
}
}
// Look and see if the pubkey is that of a direct peer
var isDirectPeer bool
for _, peer := range ss.core.peers.ports.Load().(map[switchPort]*peer) {
if peer.box == *pubkey {
isDirectPeer = true
break
}
}
// Allow direct peers if appropriate
if ss.sessionFirewallAllowsDirect && isDirectPeer {
return true
}
// Allow remote nodes if appropriate
if ss.sessionFirewallAllowsRemote && !isDirectPeer {
return true
}
// Finally, default-deny if not matching any of the above rules
return false
}
// Gets the session corresponding to a given handle.
func (ss *sessions) getSessionForHandle(handle *handle) (*sessionInfo, bool) {
sinfo, isIn := ss.sinfos[*handle]
@ -174,6 +260,11 @@ func (ss *sessions) getByTheirSubnet(snet *subnet) (*sessionInfo, bool) {
// Creates a new session and lazily cleans up old/timedout existing sessions.
// This includse initializing session info to sane defaults (e.g. lowest supported MTU).
func (ss *sessions) createSession(theirPermKey *boxPubKey) *sessionInfo {
if ss.sessionFirewallEnabled {
if !ss.isSessionAllowed(theirPermKey, true) {
return nil
}
}
sinfo := sessionInfo{}
sinfo.core = ss.core
sinfo.theirPermPub = *theirPermKey
@ -311,6 +402,12 @@ func (ss *sessions) sendPingPong(sinfo *sessionInfo, isPong bool) {
func (ss *sessions) handlePing(ping *sessionPing) {
// Get the corresponding session (or create a new session)
sinfo, isIn := ss.getByTheirPerm(&ping.SendPermPub)
// Check the session firewall
if !isIn && ss.sessionFirewallEnabled {
if !ss.isSessionAllowed(&ping.SendPermPub, false) {
return
}
}
if !isIn || sinfo.timedout() {
if isIn {
sinfo.close()

View File

@ -161,11 +161,13 @@ type switchTable struct {
parent switchPort // Port of whatever peer is our parent, or self if we're root
drop map[sigPubKey]int64 // Tstamp associated with a dropped root
mutex sync.RWMutex // Lock for reads/writes of switchData
data switchData
updater atomic.Value //*sync.Once
table atomic.Value //lookupTable
packetIn chan []byte // Incoming packets for the worker to handle
idleIn chan switchPort // Incoming idle notifications from peer links
data switchData //
updater atomic.Value // *sync.Once
table atomic.Value // lookupTable
packetIn chan []byte // Incoming packets for the worker to handle
idleIn chan switchPort // Incoming idle notifications from peer links
admin chan func() // Pass a lambda for the admin socket to query stuff
queues switch_buffers // Queues - not atomic so ONLY use through admin chan
}
// Initializes the switchTable struct.
@ -181,6 +183,7 @@ func (t *switchTable) init(core *Core, key sigPubKey) {
t.drop = make(map[sigPubKey]int64)
t.packetIn = make(chan []byte, 1024)
t.idleIn = make(chan switchPort, 1024)
t.admin = make(chan func())
}
// Safely gets a copy of this node's locator.
@ -239,7 +242,9 @@ func (t *switchTable) cleanRoot() {
// Removes a peer.
// Must be called by the router mainLoop goroutine, e.g. call router.doAdmin with a lambda that calls this.
// If the removed peer was this node's parent, it immediately tries to find a new parent.
func (t *switchTable) unlockedRemovePeer(port switchPort) {
func (t *switchTable) forgetPeer(port switchPort) {
t.mutex.Lock()
defer t.mutex.Unlock()
delete(t.data.peers, port)
t.updater.Store(&sync.Once{})
if port != t.parent {
@ -538,6 +543,22 @@ func switch_getPacketStreamID(packet []byte) string {
return string(switch_getPacketCoords(packet))
}
// Find the best port for a given set of coords
func (t *switchTable) bestPortForCoords(coords []byte) switchPort {
table := t.getTable()
var best switchPort
bestDist := table.self.dist(coords)
for to, elem := range table.elems {
dist := elem.locator.dist(coords)
if !(dist < bestDist) {
continue
}
best = to
bestDist = dist
}
return best
}
// Handle an incoming packet
// Either send it to ourself, or to the first idle peer that's free
// Returns true if the packet has been handled somehow, false if it should be queued
@ -582,6 +603,8 @@ type switch_packetInfo struct {
time time.Time // Timestamp of when the packet arrived
}
const switch_buffer_maxSize = 4 * 1048576 // Maximum 4 MB
// Used to keep track of buffered packets
type switch_buffer struct {
packets []switch_packetInfo // Currently buffered packets, which may be dropped if it grows too large
@ -589,8 +612,10 @@ type switch_buffer struct {
}
type switch_buffers struct {
bufs map[string]switch_buffer // Buffers indexed by StreamID
size uint64 // Total size of all buffers, in bytes
bufs map[string]switch_buffer // Buffers indexed by StreamID
size uint64 // Total size of all buffers, in bytes
maxbufs int
maxsize uint64
}
func (b *switch_buffers) cleanup(t *switchTable) {
@ -606,8 +631,8 @@ func (b *switch_buffers) cleanup(t *switchTable) {
delete(b.bufs, streamID)
}
}
const maxSize = 4 * 1048576 // Maximum 4 MB
for b.size > maxSize {
for b.size > switch_buffer_maxSize {
// Drop a random queue
target := rand.Uint64() % b.size
var size uint64 // running total
@ -635,16 +660,16 @@ func (b *switch_buffers) cleanup(t *switchTable) {
// Handles incoming idle notifications
// Loops over packets and sends the newest one that's OK for this peer to send
// Returns true if the peer is no longer idle, false if it should be added to the idle list
func (t *switchTable) handleIdle(port switchPort, bufs *switch_buffers) bool {
func (t *switchTable) handleIdle(port switchPort) bool {
to := t.core.peers.getPorts()[port]
if to == nil {
return true
}
var best string
var bestPriority float64
bufs.cleanup(t)
t.queues.cleanup(t)
now := time.Now()
for streamID, buf := range bufs.bufs {
for streamID, buf := range t.queues.bufs {
// Filter over the streams that this node is closer to
// Keep the one with the smallest queue
packet := buf.packets[0]
@ -656,17 +681,17 @@ func (t *switchTable) handleIdle(port switchPort, bufs *switch_buffers) bool {
}
}
if bestPriority != 0 {
buf := bufs.bufs[best]
buf := t.queues.bufs[best]
var packet switch_packetInfo
// TODO decide if this should be LIFO or FIFO
packet, buf.packets = buf.packets[0], buf.packets[1:]
buf.size -= uint64(len(packet.bytes))
bufs.size -= uint64(len(packet.bytes))
t.queues.size -= uint64(len(packet.bytes))
if len(buf.packets) == 0 {
delete(bufs.bufs, best)
delete(t.queues.bufs, best)
} else {
// Need to update the map, since buf was retrieved by value
bufs.bufs[best] = buf
t.queues.bufs[best] = buf
}
to.sendPacket(packet.bytes)
return true
@ -677,9 +702,8 @@ func (t *switchTable) handleIdle(port switchPort, bufs *switch_buffers) bool {
// The switch worker does routing lookups and sends packets to where they need to be
func (t *switchTable) doWorker() {
var bufs switch_buffers
bufs.bufs = make(map[string]switch_buffer) // Packets per PacketStreamID (string)
idle := make(map[switchPort]struct{}) // this is to deduplicate things
t.queues.bufs = make(map[string]switch_buffer) // Packets per PacketStreamID (string)
idle := make(map[switchPort]struct{}) // this is to deduplicate things
for {
select {
case bytes := <-t.packetIn:
@ -688,19 +712,47 @@ func (t *switchTable) doWorker() {
// There's nobody free to take it right now, so queue it for later
packet := switch_packetInfo{bytes, time.Now()}
streamID := switch_getPacketStreamID(packet.bytes)
buf := bufs.bufs[streamID]
buf, bufExists := t.queues.bufs[streamID]
buf.packets = append(buf.packets, packet)
buf.size += uint64(len(packet.bytes))
bufs.size += uint64(len(packet.bytes))
bufs.bufs[streamID] = buf
bufs.cleanup(t)
t.queues.size += uint64(len(packet.bytes))
// Keep a track of the max total queue size
if t.queues.size > t.queues.maxsize {
t.queues.maxsize = t.queues.size
}
t.queues.bufs[streamID] = buf
if !bufExists {
// Keep a track of the max total queue count. Only recalculate this
// when the queue is new because otherwise repeating len(dict) might
// cause unnecessary processing overhead
if len(t.queues.bufs) > t.queues.maxbufs {
t.queues.maxbufs = len(t.queues.bufs)
}
}
t.queues.cleanup(t)
}
case port := <-t.idleIn:
// Try to find something to send to this peer
if !t.handleIdle(port, &bufs) {
if !t.handleIdle(port) {
// Didn't find anything ready to send yet, so stay idle
idle[port] = struct{}{}
}
case f := <-t.admin:
f()
}
}
}
// Passed a function to call.
// This will send the function to t.admin and block until it finishes.
func (t *switchTable) doAdmin(f func()) {
// Pass this a function that needs to be run by the router's main goroutine
// It will pass the function to the router and wait for the router to finish
done := make(chan struct{})
newF := func() {
f()
close(done)
}
t.admin <- newF
<-done
}

View File

@ -64,13 +64,13 @@ func (iface *tcpInterface) getAddr() *net.TCPAddr {
}
// Attempts to initiate a connection to the provided address.
func (iface *tcpInterface) connect(addr string) {
iface.call(addr, nil)
func (iface *tcpInterface) connect(addr string, intf string) {
iface.call(addr, nil, intf)
}
// Attempst to initiate a connection to the provided address, viathe provided socks proxy address.
func (iface *tcpInterface) connectSOCKS(socksaddr, peeraddr string) {
iface.call(peeraddr, &socksaddr)
iface.call(peeraddr, &socksaddr, "")
}
// Initializes the struct.
@ -110,20 +110,24 @@ func (iface *tcpInterface) listener() {
// If the dial is successful, it launches the handler.
// When finished, it removes the outgoing call, so reconnection attempts can be made later.
// This all happens in a separate goroutine that it spawns.
func (iface *tcpInterface) call(saddr string, socksaddr *string) {
func (iface *tcpInterface) call(saddr string, socksaddr *string, sintf string) {
go func() {
callname := saddr
if sintf != "" {
callname = fmt.Sprintf("%s/%s", saddr, sintf)
}
quit := false
iface.mutex.Lock()
if _, isIn := iface.calls[saddr]; isIn {
if _, isIn := iface.calls[callname]; isIn {
quit = true
} else {
iface.calls[saddr] = struct{}{}
iface.calls[callname] = struct{}{}
defer func() {
// Block new calls for a little while, to mitigate livelock scenarios
time.Sleep(default_tcp_timeout)
time.Sleep(time.Duration(rand.Intn(1000)) * time.Millisecond)
iface.mutex.Lock()
delete(iface.calls, saddr)
delete(iface.calls, callname)
iface.mutex.Unlock()
}()
}
@ -134,6 +138,9 @@ func (iface *tcpInterface) call(saddr string, socksaddr *string) {
var conn net.Conn
var err error
if socksaddr != nil {
if sintf != "" {
return
}
var dialer proxy.Dialer
dialer, err = proxy.SOCKS5("tcp", *socksaddr, nil, proxy.Direct)
if err != nil {
@ -151,7 +158,41 @@ func (iface *tcpInterface) call(saddr string, socksaddr *string) {
},
}
} else {
conn, err = net.Dial("tcp", saddr)
dialer := net.Dialer{}
if sintf != "" {
ief, err := net.InterfaceByName(sintf)
if err != nil {
return
} else {
if ief.Flags&net.FlagUp == 0 {
return
}
addrs, err := ief.Addrs()
if err == nil {
dst, err := net.ResolveTCPAddr("tcp", saddr)
if err != nil {
return
}
for _, addr := range addrs {
src, _, err := net.ParseCIDR(addr.String())
if err != nil {
continue
}
if (src.To4() != nil) == (dst.IP.To4() != nil) && src.IsGlobalUnicast() {
dialer.LocalAddr = &net.TCPAddr{
IP: src,
Port: 0,
}
break
}
}
if dialer.LocalAddr == nil {
return
}
}
}
}
conn, err = dialer.Dial("tcp", saddr)
if err != nil {
return
}
@ -307,17 +348,18 @@ func (iface *tcpInterface) handler(sock net.Conn, incoming bool) {
// Put all of our cleanup here...
p.core.peers.removePeer(p.port)
}()
us, _, _ := net.SplitHostPort(sock.LocalAddr().String())
them, _, _ := net.SplitHostPort(sock.RemoteAddr().String())
themNodeID := getNodeID(&info.box)
themAddr := address_addrForNodeID(themNodeID)
themAddrString := net.IP(themAddr[:]).String()
themString := fmt.Sprintf("%s@%s", themAddrString, them)
iface.core.log.Println("Connected:", themString)
iface.core.log.Println("Connected:", themString, "source", us)
err = iface.reader(sock, in) // In this goroutine, because of defers
if err == nil {
iface.core.log.Println("Disconnected:", themString)
iface.core.log.Println("Disconnected:", themString, "source", us)
} else {
iface.core.log.Println("Disconnected:", themString, "with error:", err)
iface.core.log.Println("Disconnected:", themString, "source", us, "with error:", err)
}
return
}

View File

@ -60,11 +60,15 @@ func generateConfig(isAutoconf bool) *nodeConfig {
cfg.SigningPublicKey = hex.EncodeToString(spub[:])
cfg.SigningPrivateKey = hex.EncodeToString(spriv[:])
cfg.Peers = []string{}
cfg.InterfacePeers = map[string][]string{}
cfg.AllowedEncryptionPublicKeys = []string{}
cfg.MulticastInterfaces = []string{".*"}
cfg.IfName = defaults.GetDefaults().DefaultIfName
cfg.IfMTU = defaults.GetDefaults().DefaultIfMTU
cfg.IfTAPMode = defaults.GetDefaults().DefaultIfTAPMode
cfg.SessionFirewall.Enable = false
cfg.SessionFirewall.AllowFromDirect = true
cfg.SessionFirewall.AllowFromRemote = true
return &cfg
}
@ -231,14 +235,20 @@ func main() {
// configure them. The loop ensures that disconnected peers will eventually
// be reconnected with.
go func() {
if len(cfg.Peers) == 0 {
if len(cfg.Peers) == 0 && len(cfg.InterfacePeers) == 0 {
return
}
for {
for _, p := range cfg.Peers {
n.core.AddPeer(p)
for _, peer := range cfg.Peers {
n.core.AddPeer(peer, "")
time.Sleep(time.Second)
}
for intf, intfpeers := range cfg.InterfacePeers {
for _, peer := range intfpeers {
n.core.AddPeer(peer, intf)
time.Sleep(time.Second)
}
}
time.Sleep(time.Minute)
}
}()

View File

@ -183,6 +183,54 @@ func main() {
fmt.Println("Coords:", coords)
}
}
case "getswitchqueues":
maximumqueuesize := float64(4194304)
portqueues := make(map[float64]float64)
portqueuesize := make(map[float64]float64)
portqueuepackets := make(map[float64]float64)
v := res["switchqueues"].(map[string]interface{})
if queuecount, ok := v["queues_count"].(float64); ok {
fmt.Printf("Active queue count: %d queues\n", uint(queuecount))
}
if queuesize, ok := v["queues_size"].(float64); ok {
fmt.Printf("Active queue size: %d bytes\n", uint(queuesize))
}
if highestqueuecount, ok := v["highest_queues_count"].(float64); ok {
fmt.Printf("Highest queue count: %d queues\n", uint(highestqueuecount))
}
if highestqueuesize, ok := v["highest_queues_size"].(float64); ok {
fmt.Printf("Highest queue size: %d bytes\n", uint(highestqueuesize))
}
if m, ok := v["maximum_queues_size"].(float64); ok {
fmt.Printf("Maximum queue size: %d bytes\n", uint(maximumqueuesize))
maximumqueuesize = m
}
if queues, ok := v["queues"].([]interface{}); ok {
if len(queues) != 0 {
fmt.Println("Active queues:")
for _, v := range queues {
queueport := v.(map[string]interface{})["queue_port"].(float64)
queuesize := v.(map[string]interface{})["queue_size"].(float64)
queuepackets := v.(map[string]interface{})["queue_packets"].(float64)
queueid := v.(map[string]interface{})["queue_id"].(string)
portqueues[queueport] += 1
portqueuesize[queueport] += queuesize
portqueuepackets[queueport] += queuepackets
queuesizepercent := (100 / maximumqueuesize) * queuesize
fmt.Printf("- Switch port %d, Stream ID: %v, size: %d bytes (%d%% full), %d packets\n",
uint(queueport), []byte(queueid), uint(queuesize),
uint(queuesizepercent), uint(queuepackets))
}
}
}
if len(portqueuesize) > 0 && len(portqueuepackets) > 0 {
fmt.Println("Aggregated statistics by switchport:")
for k, v := range portqueuesize {
queuesizepercent := (100 / (portqueues[k] * maximumqueuesize)) * v
fmt.Printf("- Switch port %d, size: %d bytes (%d%% full), %d packets\n",
uint(k), uint(v), uint(queuesizepercent), uint(portqueuepackets[k]))
}
}
case "addpeer", "removepeer", "addallowedencryptionpublickey", "removeallowedencryptionpublickey":
if _, ok := res["added"]; ok {
for _, v := range res["added"].([]interface{}) {