diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000..51cbf65 --- /dev/null +++ b/Gemfile @@ -0,0 +1,11 @@ +gem "jekyll", "~> 3.8" + +gem "jekyll-theme-minimal", "~> 0.1.1" + +gem "jekyll-feed", "~> 0.11.0" + +gem "jekyll-sitemap", "~> 1.2" + +gem "github-pages", group: :jekyll_plugins + +gem "nokogiri", ">= 1.10.4" diff --git a/Gemfile.lock b/Gemfile.lock new file mode 100644 index 0000000..71bab45 --- /dev/null +++ b/Gemfile.lock @@ -0,0 +1,251 @@ +GEM + specs: + activesupport (4.2.11.1) + i18n (~> 0.7) + minitest (~> 5.1) + thread_safe (~> 0.3, >= 0.3.4) + tzinfo (~> 1.1) + addressable (2.6.0) + public_suffix (>= 2.0.2, < 4.0) + coffee-script (2.4.1) + coffee-script-source + execjs + coffee-script-source (1.11.1) + colorator (1.1.0) + commonmarker (0.17.13) + ruby-enum (~> 0.5) + concurrent-ruby (1.1.5) + dnsruby (1.61.2) + addressable (~> 2.5) + em-websocket (0.5.1) + eventmachine (>= 0.12.9) + http_parser.rb (~> 0.6.0) + ethon (0.12.0) + ffi (>= 1.3.0) + eventmachine (1.2.7) + execjs (2.7.0) + faraday (0.15.4) + multipart-post (>= 1.2, < 3) + ffi (1.11.1) + forwardable-extended (2.6.0) + gemoji (3.0.1) + github-pages (198) + activesupport (= 4.2.11.1) + github-pages-health-check (= 1.16.1) + jekyll (= 3.8.5) + jekyll-avatar (= 0.6.0) + jekyll-coffeescript (= 1.1.1) + jekyll-commonmark-ghpages (= 0.1.5) + jekyll-default-layout (= 0.1.4) + jekyll-feed (= 0.11.0) + jekyll-gist (= 1.5.0) + jekyll-github-metadata (= 2.12.1) + jekyll-mentions (= 1.4.1) + jekyll-optional-front-matter (= 0.3.0) + jekyll-paginate (= 1.1.0) + jekyll-readme-index (= 0.2.0) + jekyll-redirect-from (= 0.14.0) + jekyll-relative-links (= 0.6.0) + jekyll-remote-theme (= 0.3.1) + jekyll-sass-converter (= 1.5.2) + jekyll-seo-tag (= 2.5.0) + jekyll-sitemap (= 1.2.0) + jekyll-swiss (= 0.4.0) + jekyll-theme-architect (= 0.1.1) + jekyll-theme-cayman (= 0.1.1) + jekyll-theme-dinky (= 0.1.1) + jekyll-theme-hacker (= 0.1.1) + jekyll-theme-leap-day (= 0.1.1) + jekyll-theme-merlot (= 0.1.1) + jekyll-theme-midnight (= 0.1.1) + jekyll-theme-minimal (= 0.1.1) + jekyll-theme-modernist (= 0.1.1) + jekyll-theme-primer (= 0.5.3) + jekyll-theme-slate (= 0.1.1) + jekyll-theme-tactile (= 0.1.1) + jekyll-theme-time-machine (= 0.1.1) + jekyll-titles-from-headings (= 0.5.1) + jemoji (= 0.10.2) + kramdown (= 1.17.0) + liquid (= 4.0.0) + listen (= 3.1.5) + mercenary (~> 0.3) + minima (= 2.5.0) + nokogiri (>= 1.8.5, < 2.0) + rouge (= 2.2.1) + terminal-table (~> 1.4) + github-pages-health-check (1.16.1) + addressable (~> 2.3) + dnsruby (~> 1.60) + octokit (~> 4.0) + public_suffix (~> 3.0) + typhoeus (~> 1.3) + html-pipeline (2.11.1) + activesupport (>= 2) + nokogiri (>= 1.4) + http_parser.rb (0.6.0) + i18n (0.9.5) + concurrent-ruby (~> 1.0) + jekyll (3.8.5) + addressable (~> 2.4) + colorator (~> 1.0) + em-websocket (~> 0.5) + i18n (~> 0.7) + jekyll-sass-converter (~> 1.0) + jekyll-watch (~> 2.0) + kramdown (~> 1.14) + liquid (~> 4.0) + mercenary (~> 0.3.3) + pathutil (~> 0.9) + rouge (>= 1.7, < 4) + safe_yaml (~> 1.0) + jekyll-avatar (0.6.0) + jekyll (~> 3.0) + jekyll-coffeescript (1.1.1) + coffee-script (~> 2.2) + coffee-script-source (~> 1.11.1) + jekyll-commonmark (1.3.1) + commonmarker (~> 0.14) + jekyll (>= 3.7, < 5.0) + jekyll-commonmark-ghpages (0.1.5) + commonmarker (~> 0.17.6) + jekyll-commonmark (~> 1) + rouge (~> 2) + jekyll-default-layout (0.1.4) + jekyll (~> 3.0) + jekyll-feed (0.11.0) + jekyll (~> 3.3) + jekyll-gist (1.5.0) + octokit (~> 4.2) + jekyll-github-metadata (2.12.1) + jekyll (~> 3.4) + octokit (~> 4.0, != 4.4.0) + jekyll-mentions (1.4.1) + html-pipeline (~> 2.3) + jekyll (~> 3.0) + jekyll-optional-front-matter (0.3.0) + jekyll (~> 3.0) + jekyll-paginate (1.1.0) + jekyll-readme-index (0.2.0) + jekyll (~> 3.0) + jekyll-redirect-from (0.14.0) + jekyll (~> 3.3) + jekyll-relative-links (0.6.0) + jekyll (~> 3.3) + jekyll-remote-theme (0.3.1) + jekyll (~> 3.5) + rubyzip (>= 1.2.1, < 3.0) + jekyll-sass-converter (1.5.2) + sass (~> 3.4) + jekyll-seo-tag (2.5.0) + jekyll (~> 3.3) + jekyll-sitemap (1.2.0) + jekyll (~> 3.3) + jekyll-swiss (0.4.0) + jekyll-theme-architect (0.1.1) + jekyll (~> 3.5) + jekyll-seo-tag (~> 2.0) + jekyll-theme-cayman (0.1.1) + jekyll (~> 3.5) + jekyll-seo-tag (~> 2.0) + jekyll-theme-dinky (0.1.1) + jekyll (~> 3.5) + jekyll-seo-tag (~> 2.0) + jekyll-theme-hacker (0.1.1) + jekyll (~> 3.5) + jekyll-seo-tag (~> 2.0) + jekyll-theme-leap-day (0.1.1) + jekyll (~> 3.5) + jekyll-seo-tag (~> 2.0) + jekyll-theme-merlot (0.1.1) + jekyll (~> 3.5) + jekyll-seo-tag (~> 2.0) + jekyll-theme-midnight (0.1.1) + jekyll (~> 3.5) + jekyll-seo-tag (~> 2.0) + jekyll-theme-minimal (0.1.1) + jekyll (~> 3.5) + jekyll-seo-tag (~> 2.0) + jekyll-theme-modernist (0.1.1) + jekyll (~> 3.5) + jekyll-seo-tag (~> 2.0) + jekyll-theme-primer (0.5.3) + jekyll (~> 3.5) + jekyll-github-metadata (~> 2.9) + jekyll-seo-tag (~> 2.0) + jekyll-theme-slate (0.1.1) + jekyll (~> 3.5) + jekyll-seo-tag (~> 2.0) + jekyll-theme-tactile (0.1.1) + jekyll (~> 3.5) + jekyll-seo-tag (~> 2.0) + jekyll-theme-time-machine (0.1.1) + jekyll (~> 3.5) + jekyll-seo-tag (~> 2.0) + jekyll-titles-from-headings (0.5.1) + jekyll (~> 3.3) + jekyll-watch (2.2.1) + listen (~> 3.0) + jemoji (0.10.2) + gemoji (~> 3.0) + html-pipeline (~> 2.2) + jekyll (~> 3.0) + kramdown (1.17.0) + liquid (4.0.0) + listen (3.1.5) + rb-fsevent (~> 0.9, >= 0.9.4) + rb-inotify (~> 0.9, >= 0.9.7) + ruby_dep (~> 1.2) + mercenary (0.3.6) + mini_portile2 (2.4.0) + minima (2.5.0) + jekyll (~> 3.5) + jekyll-feed (~> 0.9) + jekyll-seo-tag (~> 2.1) + minitest (5.11.3) + multipart-post (2.1.1) + nokogiri (1.10.3) + mini_portile2 (~> 2.4.0) + octokit (4.14.0) + sawyer (~> 0.8.0, >= 0.5.3) + pathutil (0.16.2) + forwardable-extended (~> 2.6) + public_suffix (3.1.1) + rb-fsevent (0.10.3) + rb-inotify (0.10.0) + ffi (~> 1.0) + rouge (2.2.1) + ruby-enum (0.7.2) + i18n + ruby_dep (1.5.0) + rubyzip (1.2.3) + safe_yaml (1.0.5) + sass (3.7.4) + sass-listen (~> 4.0.0) + sass-listen (4.0.0) + rb-fsevent (~> 0.9, >= 0.9.4) + rb-inotify (~> 0.9, >= 0.9.7) + sawyer (0.8.2) + addressable (>= 2.3.5) + faraday (> 0.8, < 2.0) + terminal-table (1.8.0) + unicode-display_width (~> 1.1, >= 1.1.1) + thread_safe (0.3.6) + typhoeus (1.3.1) + ethon (>= 0.9.0) + tzinfo (1.2.5) + thread_safe (~> 0.1) + unicode-display_width (1.6.0) + +PLATFORMS + ruby + +DEPENDENCIES + github-pages + jekyll (~> 3.8) + jekyll-feed (~> 0.11.0) + jekyll-sitemap (~> 1.2) + jekyll-theme-minimal (~> 0.1.1) + +BUNDLED WITH + 1.16.1 diff --git a/_posts/2018-07-28-addressing.md b/_posts/2018-07-28-addressing.md index a50c7a5..0183681 100644 --- a/_posts/2018-07-28-addressing.md +++ b/_posts/2018-07-28-addressing.md @@ -100,7 +100,7 @@ So full connection process looks something like the following: 5. The node checks that the destination `NodeID` and bitmask match the `NodeID` of the closest node (if not, then it means the destination node doesn't exist / is offline / is unreachable while the network re-converges due some disruption). 6. The node sends a session ping to the destination. 7. The node receives a session pong from the destination, learning their public ephemeral key. -8. The nodes can now send regular IPv6 traffic to eachother, encrypted with the ephemeral shared secret, using the session's cached `coords` to avoid future lookups (unless the session is unresponsive for too long, in which case any new sends will also trigger a ping, or a new DHT lookup if the pings fail). +8. The nodes can now send regular IPv6 traffic to each other, encrypted with the ephemeral shared secret, using the session's cached `coords` to avoid future lookups (unless the session is unresponsive for too long, in which case any new sends will also trigger a ping, or a new DHT lookup if the pings fail). ### Conclusion diff --git a/_posts/2018-08-18-congestion-control.md b/_posts/2018-08-18-congestion-control.md index d8a29ad..7d82362 100644 --- a/_posts/2018-08-18-congestion-control.md +++ b/_posts/2018-08-18-congestion-control.md @@ -27,7 +27,7 @@ This post attempts to explain Yggdrasil's congestion control mechanism, why past The first thing to try is not to implement any explicit buffering in Yggdrasil. Packets are received from a socket, we look up where the packet needs to go next, and then we send on that socket. -This immediately leads to blocking network operations and poor performance, so we need need separate read and write threads (goroutines, in our case). +This immediately leads to blocking network operations and poor performance, so we need separate read and write threads (goroutines, in our case). Initially, we used buffered channels and non-blocking channel sends. This means that, instead of the reader goroutine writing to the socket to send, it would pass it to a channel which a dedicated writer goroutine would read from. The problem with this approach is that Go channels with non-blocking sends are [FIFO](https://en.wikipedia.org/wiki/FIFO_(computing_and_electronics)) and [tail dropped](https://en.wikipedia.org/wiki/Tail_drop). @@ -56,7 +56,7 @@ What we want is for multiple streams of traffic to be handled independently, to Then, we can reward different traffic streams to prioritize based on lowest bandwidth (i.e. size of queue / age of oldest packet in queue, with a separate queue per traffic stream). Now we let traffic streams compete for bandwidth. The winning strategy, to get more bandwidth during times of congestion, is to attempt to use *less* bandwidth, which I argue is exactly the behavior we want to encourage. -Streams of traffic that play nice get a fair share of bandwidth, which includes pretty much every sane TCP implementation, and streams that flood goto timeout. +Streams of traffic that play nice get a fair share of bandwidth, which includes pretty much every sane TCP implementation, and streams that flood go to timeout. ### Yggdrasil's congestion control @@ -99,7 +99,7 @@ Still, because we won't really know without trying, adding the required new pack Yggdrasil has gone through a number of different congestion control mechanisms since the TCP link layer was introduced. The current congestion control mechanism rewards traffic streams which utilize less bandwidth by prioritizing them higher than streams using more bandwidth. -Cooperative stream obtain a fair share of bandwidth, while stream which attempt to use more than their fair share are given lower priority, and are forced to throttle down as a result. +Cooperative streams obtain a fair share of bandwidth, while streams which attempt to use more than their fair share are given lower priority, and are forced to throttle down as a result. When packet drops become necessary, a random drop mechanism is used which penalizes large queues the most, which should signal congestion to the worst offenders. Much of this is a precursor to backpressure routing, which, if it works out in practice as well as it does on paper, should give the network a nearly-optimal latency/bandwidth trade-off. diff --git a/_posts/2018-11-06-crypto-key-routing.md b/_posts/2018-11-06-crypto-key-routing.md index a49dac2..1f3ac64 100644 --- a/_posts/2018-11-06-crypto-key-routing.md +++ b/_posts/2018-11-06-crypto-key-routing.md @@ -33,62 +33,58 @@ is released: TunnelRouting: { Enable: false - IPv6Destinations: {} - IPv6Sources: [] - IPv4Destinations: {} - IPv4Sources: [] + IPv6RemoteSubnets: {} + IPv6LocalSubnets: [] + IPv4RemoteSubnets: {} + IPv4LocalSubnets: [] } ``` -The `IPv6Destinations` and `IPv4Destinations` options are used to specify +The `IPv6RemoteSubnets` and `IPv4RemoteSubnets` options are used to specify crypto-key routes. The subnet refers to an encryption public key of another node, for example: ``` -IPv6Destinations: { +IPv6RemoteSubnets: { "fd64:642b:1a20::/48": "ef78da7fc983c6c210609529921a701ca3e43fa5cfd79f5f20cc67bf66e45c1a", "fd25:8a33:9311:a53b::/64": "417fd0a66a104f050ae3544b3bc03eeb3648dded4a8c1fb085d65ffa25e83d6e" } ``` As with a typical routing table, more specific routes are preferred. -The `IPv6Sources` and `IPv4Sources` options are used to specify which source -addresses are eligible to be routed across a tunnel. For IPv6, this is optional -as each Yggdrasil node already has an address and a routed subnet, and these are -always allowed as source addresses. Specifying additional source addresses means -that you can use the tunnel to route between networks. For IPv4, this is -mandatory as Yggdrasil nodes do not have IPv4 addresses by default. +The `IPv6LocalSubnets` and `IPv4LocalSubnets` options are used to specify which source +addresses are eligible to be routed across a tunnel. These options are mandatory. ### Bridging networks Assume that node A is `a.a.a.a/24` and `aaaa:aaaa:aaaa:aaaa::/64` and node B is `b.b.b.b/24` and `bbbb:bbbb:bbbb:bbbb::/64`. On node A, use the following `TunnelRouting` configuration: ``` Enable: true -IPv6Destinations: { +IPv6RemoteSubnets: { "bbbb:bbbb:bbbb:bbbb::/64": "xxxxxxxxxxxxxx" } -IPv6Sources: { +IPv6LocalSubnets: { aaaa:aaaa:aaaa:aaaa::/64 } -IPv4Destinations: { +IPv4RemoteSubnets: { "b.b.b.b/24": "xxxxxxxxxxxxxx" } -IPv4Sources: { +IPv4LocalSubnets: { a.a.a.a/24 } ``` On node B, use the reverse `TunnelRouting` configuration: ``` Enable: true -IPv6Destinations: { +IPv6RemoteSubnets: { "aaaa:aaaa:aaaa:aaaa::/64": "xxxxxxxxxxxxxx" } -IPv6Sources: { +IPv6LocalSubnets: { bbbb:bbbb:bbbb:bbbb::/64 } -IPv4Destinations: { +IPv4RemoteSubnets: { "a.a.a.a/24": "xxxxxxxxxxxxxx" } -IPv4Sources: { +IPv4LocalSubnets: { b.b.b.b/24 } ``` diff --git a/_posts/2019-01-08-history.md b/_posts/2019-01-08-history.md index 6bce200..31a8d3a 100644 --- a/_posts/2019-01-08-history.md +++ b/_posts/2019-01-08-history.md @@ -56,7 +56,7 @@ Then, when a node needs to forward a packet, it checks the tree location of each This is explained in more detail in earlier blog posts, if you're not familiar with how Yggdrasil routes and care to read more. In our package delivery example, imagine if the streets in Alice's town were laid out in a grid, and then named and numbered systematically by blocks, with street signs to label where any off-grid bypasses go. -Alice and friends still haven't bought maps, but they they know each other's *addresses* instead. +Alice and friends still haven't bought maps, but they know each other's *addresses* instead. So, if Alice wants to contact Carol, she first travels to Bob's house and asks him for Carol's address. Now, when she wants to deliver a package to Carol, she can simply follow the block structure of the town until she arrives on Carol's block, and she has the option to take any bypass she happens to come across if it brings her closer to Carol's place. That's basically how routing on the tree, or taking an off-tree shortcut, work in Yggdrasil's greedy routing scheme, except with a tree instead of a grid (which, in addition to working everywhere, seems to work *well* in the places we care about). diff --git a/_posts/2019-03-24-peering.md b/_posts/2019-03-24-peering.md index 2350c6e..b4764cb 100644 --- a/_posts/2019-03-24-peering.md +++ b/_posts/2019-03-24-peering.md @@ -28,7 +28,7 @@ In addition, the number of peers you want to add depends on what you want to do. ### What happens when things go wrong -Lets imagine we have some nodes in New York, and initially they follow the peering rules outlined above. Now suppose that two of these nodes decide that they want to add connections to London. In Yggdrasil, nodes tend to select parents that minimize latency to the root, which happens to be a node in Paris at the time I'm writing this. As a result, both of the NY nodes are likely to select their respective London peers as their parents. If the nodes are following the peering rules, then at least one of them has also decided to peer with the other, so they have a shortcut they can use to talk to each-other (or any descendants in the tree). +Let's imagine we have some nodes in New York, and initially they follow the peering rules outlined above. Now suppose that two of these nodes decide that they want to add connections to London. In Yggdrasil, nodes tend to select parents that minimize latency to the root, which happens to be a node in Paris at the time I'm writing this. As a result, both of the NY nodes are likely to select their respective London peers as their parents. If the nodes are following the peering rules, then at least one of them has also decided to peer with the other, so they have a shortcut they can use to talk to each-other (or any descendants in the tree). However, if they ignore the peering rules and *don't* peer with each other, then they are likely to route through London instead of communicating over their local mesh network. A shorter path exists, through their local mesh network, but it's not one that the network *must* know about for routing to work, so they won't necessarily know about it. As a result, the latency between these two nodes (or decedents thereof) will likely be an order of magnitude more than it needs to be (and probably lower bandwidth as well). diff --git a/_posts/2019-08-03-release-v0-3-6.md b/_posts/2019-08-03-release-v0-3-6.md new file mode 100644 index 0000000..6248e08 --- /dev/null +++ b/_posts/2019-08-03-release-v0-3-6.md @@ -0,0 +1,114 @@ +--- +layout: post +title: "Version 0.3.6" +date: 2019-08-03 08:00:00 -0000 +author: Neil Alexander +--- + +### New release! + +It's been nearly five months since we released version 0.3.5 of Yggdrasil. In +that time we've seen the node count rise to over 400 nodes on the public network +at times (over 80% of which are running the latest released version) and we've +gained valuable insight to the kinds of challenges that our users have. We've +worked to fix a number of bugs and to improve Yggdrasil. + +In terms of lines of code changed, version 0.3.6 is the biggest release of +Yggdrasil to date, with several thousands of lines of code affected. It +represents a massive refactoring exercise in which we've broken up and +modularised the code, dividing core Yggdrasil functionality, TUN/TAP, admin +socket and multicast features into their own respective Go packages. + +### Fixes + +Most of the user-facing changes in this release are fairly minimal, however some +bugs have been corrected. A complete list is available in the [changelog](/changelog.html). + +Highlights include peers now being added correctly even when one or more +configured peers are unavailable or unreachable. Multicast interfaces are also +being evaluated more frequently now, which can help if an interface becomes +available or goes down after Yggdrasil has already started. + +A number of bugs have been fixed in the TUN/TAP and IP-specific code, including +problems that affected ICMPv6 and Neighbour Discovery in TAP mode specifically. +This helps reliability on platforms where TAP mode is used more commonly, e.g. +on BSD platforms or on Windows, although this also improves TAP support on Linux +too. + +### Refactoring and API + +Around the previous release, it became obvious to us that our codebase was +turning into a monolith. We had pretty much all of the necessary behaviour in +a single `yggdrasil` package to run a single node, but this made our codebase +inflexible and difficult to maintain and extend. It also meant that Yggdrasil +was virtually impossible to integrate into other applications. + +Our refactoring efforts in version 0.3.6 mean that our codebase is now easier to +manage and to understand. It also includes the first taste of our API! The +API makes it possible to take the Yggdrasil core, drop it into your own Go +application and use the Yggdrasil network as a fully end-to-end encrypted and +distributed transport layer. We've also moved all of the IP-specific code into +the TUN/TAP module, which means that Yggdrasil's core now provides a completely +protocol-agnostic transport. + +Documentation on how to use the API to integrate Yggdrasil into your own +applications will follow soon—watch this space! In the meantime, [`godoc` can be +used to examine our new API functions](https://godoc.org/github.com/yggdrasil-network/yggdrasil-go/src/yggdrasil). + +Please note though that **API functions are not yet finalised and may be subject +to change** in future versions. Yggdrasil is still alpha-grade software at this +point so all of the usual warnings apply. + +### Platform Support + +We enjoy great support from our community in bringing and packaging Yggdrasil on +new platforms. Since the release of version 0.3.5, the following third-party +packages have cropped up, and we are very grateful to the maintainers: + +- A [new RPM build](https://copr.fedorainfracloud.org/coprs/leisteth/yggdrasil/) for Red Hat, Fedora, CentOS etc. +- An [AUR package](https://aur.archlinux.org/packages/yggdrasil-git/) for Arch Linux +- A [Void package](https://github.com/void-linux/void-packages/tree/master/srcpkgs/yggdrasil) for Void Linux +- A [MacPorts package](https://github.com/macports/macports-ports/blob/master/net/yggdrasil-go/Portfile) for macOS + +We expect that any third-party packages which have not yet been updated for +v0.3.6 will be updated soon! + +We are aware of a few outstanding issues with Windows, which are largely related +to one or two bugs in the [Water](https://github.com/songgao/water) library +which we use for TUN/TAP support. We are hoping to address these problems with +the maintainer of this library soon. Using Yggdrasil in router-only mode does +work as expected, but some bugs when using the TAP adapter still remain. In the +meantime, we'd certainly welcome any assistance in maintaining the Windows port +of Yggdrasil. + +The iOS build has been largely neglected due to API changes, although hopefully +a new TestFlight build for version 0.3.6 will be available before too long. + +### Upgrading + +We recommend that all Yggdrasil users always run the latest version of the code +wherever possible, so please upgrade as soon as it is convenient. New downloads +are available from our [Builds](/builds.html) page and +[Neil](https://github.com/neilalexander)'s S3 repositories are up-to-date for +Debian and EdgeRouter installs. + +If you have installed through a package manager, you should be able to upgrade +in-place as soon as the new packages are available. On macOS, you can simply +install the new `.pkg` from the builds page over the top of the old one. On +Windows, and on any installation where the binary was installed by hand, you can +simply replace the `yggdrasil` and `yggdrasilctl` binaries with the newly +released builds. + +Building from source is simple if you have Git and Go 1.11 or later installed: +``` +git clone https://github.com/yggdrasil-network/yggdrasil-go +cd yggdrasil-go +./build +``` + +### Feedback + +We always welcome feedback, so please do feel free to join us either in our +[Matrix channel](https://riot.im/app/#/room/%23yggdrasil:matrix.org) or on IRC +in `#yggdrasil` on Freenode. You can also raise bug reports and issues in [our +GitHub repository](https://github.com/yggdrasil-network/yggdrasil-go/issues). diff --git a/_posts/2019-08-19-awdl.md b/_posts/2019-08-19-awdl.md new file mode 100644 index 0000000..94344dc --- /dev/null +++ b/_posts/2019-08-19-awdl.md @@ -0,0 +1,237 @@ +--- +layout: post +title: "Meshing using Apple Wireless Direct Link (AWDL)" +date: 2019-08-19 08:00:00 -0000 +author: Neil Alexander +--- + +### Wireless without borders + +I was mostly prompted to write this post in response to a [Hacker News +thread](https://news.ycombinator.com/item?id=20735462) recently, which announced +the release of an open-source AirDrop implementation called +[OpenDrop](https://github.com/seemoo-lab/opendrop), from the same team at Seemoo +Lab who produced an open-source implementation of Apple Wireless Direct Link +(AWDL) protocol called [OWL](https://github.com/seemoo-lab/owl). AWDL is the +secret sauce behind AirDrop, peer-to-peer AirPlay and some other Apple wireless +technologies. Even though everything covered in this post was done some time +ago, I have never spent the time to document it. + +With a few exceptions, most wireless networks in the world operate in +"infrastructure mode" which is where a wireless access point serves one or more +wireless clients. Think of your Wi-Fi at home, at work or in a coffee shop. +However, as implied by the name, reliable and usable infrastructure Wi-Fi is +often only available in certain physical locations with "good infrastructure". +If you wanted to connect some devices together anywhere not served by an +infrastructure Wi-Fi network, or in a location where you can't suddenly plug in +a wireless access point, you may not have many options (Bluetooth aside). + +AWDL is designed to avoid this problem by extending the 802.11 wireless standard +to allow client devices to communicate directly with each other, without the +help of the central wireless access point. You can walk out into a field with a +couple of iPhones or Macs and they can use AWDL to discover each other and +exchange data, peer-to-peer. Even better is that nearby devices that are +connected to different infrastructure Wi-Fi networks can still communicate with +each other using AWDL! + +### The science + +Normally, when connected to a wireless access point, wireless clients remain +locked to the specific radio channel that the AP is using. AWDL works by +instructing the wireless adapter in the device to "hop" between channels so that +it can not only remain connected to the wireless access point, but can also +listen to other nearby devices. + +Devices announce their presence and information about their services on a +"social channel" for other devices to hear, effectively creating peer-to-peer +service discovery. Once two devices have decided that they want to communicate +directly, they agree to jump to another channel for real data exchange so that +they don't interrupt existing Wi-Fi networks or, indeed, the social channel. +These "hops" between wireless channels happen so quickly that there's very +little disruption to what the user is doing with their Wi-Fi connection already +(except for some minor wireless performance degradation - to be covered later). + +A number of papers have been published by the OWLink team on the inner workings +of the AWDL protocol, which can be [found +here](https://owlink.org/publications/). In particular, [this +paper](https://arxiv.org/pdf/1808.03156.pdf) from Mobicom 2018 contains a +significant amount of detail about the AWDL protocol itself, channel hopping +techniques and security considerations, amongst other things. + +### Mesh opportunities + +Yggdrasil is designed to create a mesh network automatically out of +interconnected nodes - the idea being that all nodes can route to all other +nodes on the mesh network by routing through other nodes. + +Today, many of these connections happen between nodes across the Internet, since +the community is still relatively small and geographically dispersed. A node +joining the Yggdrasil network needs to only peer with a single device that is +already connected to the wider network in order to participate in the +fully-routable mesh. + +However, it's not the goal of Yggdrasil to remain something that we just toy +with over the Internet. We want to build a protocol that can scale globally and +work ad-hoc, even in places where infrastructure might not be particularly +strong otherwise. We think that one of Yggdrasil's greatest strengths is that it +is very close to zero-configuration, beyond giving it a very small number of +configuration options, and it should scale well too in principle. + +Yggdrasil can already discover potential peers on the same network segment by +using multicast service discovery, which sounds a lot like what AWDL does on the +social channel. You can configure which interfaces Yggdrasil beacons on with the +`MulticastInterfaces` configuration directive. + +I wanted to know if we could blend the two so that Yggdrasil could automatically +discover other nearby devices and initiate peering connections with them using +AWDL. + +### Getting started + +Macs are a good target for developing and testing AWDL-aware applications as +AWDL is exposed to userspace through a network adapter called `awdl0`. It sits +there with a link-local IPv6 address, you can run `tcpdump` or Wireshark on it +to listen to AWDL traffic and you can even ping multicast group addresses on the +interface and get responses from other nearby devices, e.g. using `ping6 +ff02::1%awdl0`! However, Apple devices don't always keep AWDL alive and +listening all of the time. + +On macOS, the AWDL driver is only woken up when either AirDrop is being +actively used in Finder, or where a `NetService` has been created (usually +through Objective-C or Swift) which requests peer-to-peer networking. AWDL is +normally kept alive long enough to satisfy connectivity for these sessions and +then will be sent back to sleep after a period of idleness. + +On iOS, the story is somewhat similar to above, except that AWDL is often woken +up as soon as the device is unlocked if AirDrop is enabled. The `NetService` API +otherwise functions the same way. + +tvOS is the outlier in that it seems to wake up and listen to AWDL randomly, +even when the device is otherwise asleep, presumably because it is advertising +the ability to receive incoming AirPlay sessions to nearby devices. + +From a user perspective, the `awdl0` interface looks entirely unremarkable. It +behaves largely like any other ethernet interface, carrying regular IPv6 +traffic. In the background it's a bit more complicated, as the AWDL driver +performs traffic filtering for security reasons, namely, to stop someone sat +next to you in the airport from browsing your file shares. Regular listening +sockets won't accept connections over AWDL unless a specific socket option was +configured on the socket before it started listening. + +Multicast traffic, however, does largely get passed through the filter +untouched. Bingo. + +### Waking up AWDL + +The `NetService` API is effectively a wrapper around multicast DNS-SD, which in +Apple's colourful language, is affectionately known as Bonjour. The API has the +added benefit of being able to tell the operating system to wake up the AWDL +driver pretty much on demand on behalf of "peer-to-peer" services. + +So all we would need to do to wake up AWDL is to call the `NetService` API, +publish a service that requests peer-to-peer functionality and let the operating +system do the hard work for us. Yggdrasil, being written in Go, didn't have any +concept of `NetService` but thankfully we were able to use Cgo to do this +instead. + +We wrote a Cgo function which calls the NetService API and advertises our new +fake service, `_yggdrasil._tcp`, which causes the operating system to wake up +the AWDL driver. Amazingly this worked. + +Yggdrasil doesn't actually use DNS-SD - we currently use a custom-formatted +multicast beacon on a different multicast group. It is planned to eventually +migrate to something more standard, like DNS-SD, for service discovery. However, +in this instance, registering a fake DNS-SD service was just enough to wake up +AWDL. + +### Peering automatically + +Once the driver is active, the regular Yggdrasil multicast beacons on the +`ff02::114` multicast group address seem to be passed through to the driver +normally and the Yggdrasil nodes running on each machine start to hear each +other's calls. + +The only thing that remained to be done was to configure the sockets with the +aforementioned socket option to allow them to communicate over the AWDL +interface. This socket option is called `SO_RECV_ANYIF` and is defined in +`sys/socket.h` on Darwin as `0x1104`. + +We configure the socket option on our TCP peering socket: +``` +err = unix.SetsockoptInt(int(fd), syscall.SOL_SOCKET, 0x1104, 1) +if err != nil { + ... +} +``` + +Now that the Yggdrasil nodes can hear each other's advertisements over the +`awdl0` interface, the regular automatic peering process kicks in and a TCP +session is opened between the two devices, creating a peering. The net result? +AWDL peerings! +``` +$ sudo yggdrasilctl getSwitchPeers + bytes_recvd bytes_sent coords endpoint ip port proto +1 244278 313907 [3 5 5 2 1] fe80::xxxx:xxxx:xxxx:xxxx%awdl0 xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx 1 tcp +``` + +To further cement the experiment, we can actually disconnect the two devices +from each other, or connect to different Wi-Fi networks automatically, and the +peering over the `awdl0` interface still continues to function! + +An `iperf3` test over Yggdrasil using the new AWDL link looks fairly good - the +devices are sat next to each other: +``` +[ ID] Interval Transfer Bandwidth +[ 5] 0.00-1.00 sec 15.4 MBytes 129 Mbits/sec +[ 5] 1.00-2.00 sec 16.9 MBytes 141 Mbits/sec +[ 5] 2.00-3.00 sec 15.9 MBytes 133 Mbits/sec +[ 5] 3.00-4.00 sec 17.6 MBytes 147 Mbits/sec +[ 5] 4.00-5.00 sec 16.8 MBytes 141 Mbits/sec +[ 5] 5.00-6.00 sec 16.2 MBytes 136 Mbits/sec +[ 5] 6.00-7.00 sec 12.5 MBytes 105 Mbits/sec +[ 5] 7.00-8.00 sec 12.7 MBytes 106 Mbits/sec +[ 5] 8.00-9.00 sec 14.9 MBytes 125 Mbits/sec +[ 5] 9.00-10.00 sec 13.5 MBytes 113 Mbits/sec +``` + +### Observations and iOS + +As the `iperf3` test above shows, the link performance is actually quite good! +It routinely exceeds 100mbps, although this is between only two devices. I have +not been able to test this with Yggdrasil nodes running over AWDL in any +particular density due to only having a limited number of Macs to hand. + +One thing that I did notice though is that, while AWDL is active, my wireless +connection to my home Wi-Fi network does reduce in speed somewhat. This is to be +expected, given that the wireless chipset is hopping between channels rather +than spending all of its time on a single channel. + +Sadly we weren't able to reproduce this test using iOS Testflight builds of +Yggdrasil. On iOS, we implement Yggdrasil as a VPN service which is subject to a +number of probably reasonable restrictions imposed by the OS, which presumably +exist to stop VPN extensions from spying on you. + +We were able to create a `NetService` from within the VPN extension and the +service beacons were advertised as expected, however, we weren't able to +initiate any other kind of connections over the `awdl0` interface. After a chat +with an engineer at Apple, it turns out that the `awdl0` interface isn't scoped +for use within a VPN extension, thus squashing our hopes and dreams of being +able to sprinkle this kind of magic onto our iOS port of Yggdrasil. We have a +feature request radar open with Apple in the hope that they may be able to +change this restriction in the future. + +But we were able to get this to work on macOS and that, itself, is quite +awesome. + +### Conclusion + +Yggdrasil doesn't enable AWDL by default because of the reduction in wireless +performance that AWDL being active can cause. Therefore, to enable AWDL peering, +you must add the `awdl0` interface specifically into the `MulticastInterfaces` +configuration option in `yggdrasil.conf`. However, we do have working support +for connecting Macs together and meshing automatically using AWDL, and you can +enable it very easily if you wish to experiment! + +We'd love to hear if you are peering Yggdrasil nodes using AWDL, or have +performed any more extensive tests of how it performs in real-world scenarios - +join us on our [Matrix channel](https://matrix.to/#/#yggdrasil:matrix.org)! diff --git a/_posts/2019-09-01-actors.md b/_posts/2019-09-01-actors.md new file mode 100644 index 0000000..ad0bee4 --- /dev/null +++ b/_posts/2019-09-01-actors.md @@ -0,0 +1,171 @@ +--- +layout: post +title: "Acting out" +date: 2019-09-01 21:00:00 +0000 +author: Arceliar +--- + +### Overture + +We've recently rewritten much of Yggdrasil's internals to change from Go's native [communicating sequential processes](https://en.wikipedia.org/wiki/Communicating_sequential_processes) (goroutine+channel) style to using an asynchronous [actor model](https://en.wikipedia.org/wiki/Actor_model) approach to concurrency. While this change should be invisible to the average user, it dramatically changes what we developers need to think about when working on the code. I thought it would be useful to explain a little about the motivation for rewriting things this way, and what the consequences are. + +Caution: theatre puns and references throughout, because `Actor`s. + +### Exposition + +Yggdrasil is written in the Go programming language. Go makes it easy to start a function running concurrently, and gives developers the tools they need to make concurrently executing functions communicate, but it's not always easy to use them correctly. To be clear, the things I'm about to rant about are all fixable. Working around them is a normal thing to do in Go. More importantly, it's a case where doing things the obvious way (which is sometimes even safe in isolation) leads to *wrong* behavior in a larger program. I prefer models where the obvious thing is still correct, and non-obvious things are only needed as a performance optimization. + +#### Composition + +There's a common pattern that has emerged many times in the Yggdrasil code base. We'll have a `struct` with some mutable fields that need reading or updating, such as information about a particular cryptographic session, or the switch's table of idle peers and buffered traffic. Since shared mutable state is hard, and Go is all about "[Share Memory By Communicating](https://blog.golang.org/share-memory-by-communicating)", we'll have packets get passed to a dedicated worker goroutine that "owns" that particular `struct`. The worker uses information from the packet and the owned `struct` to do whatever it is needs to do, updates these things accordingly, and passes the packet along to the next goroutine in the pipeline. + +This often results in a "`for select`" pattern, where goroutines sit in an infinite `for` loop and `select` on several channels, to wait for packets to process or various types of signals from other goroutines. There are a few ways around it (with heavy use of `reflect` or `chan interface{}`, for example), but in most cases, every `select` statement needs to fully enumerate every behavior that the goroutine may need to engage in at that point in the code. If there's a common set of `case`s that always need to be handled, and then a few exceptional `case`s that may or may not matter (possibly when the associated `struct`s the workers are using are similar but not exactly the same types, or as the state of a `struct`'s fields change), then that typically involves multiple `select` statements with only the addition or modification of one or two `case`s. + +Go embraces composition in its type system, but `select` statements (and channel operations in general) make execution resistant to composition. + +#### Deadlocks + +The "`for select`" pattern is safe, as far as I know, if the flow of messages through the program form a directed acyclic graph. However, in our case, cycles emerge if we try to handle things in the obvious way. For example, a cryptographic session needs to somehow get outbound encrypted traffic to the switch, but incoming encrypted traffic also needs to make it from the switch to the sessions for decryption (via the router, which is responsible for, among other things, identify which session is associated with the traffic). + +When cycles of goroutines naively pass messages over channels, deadlocks are all but inevitable. There are a few ways to address this, but they're not always appropriate. Ideally, we would change the design to remove cycles, but this is not always possible, and may require significant changes to the workflow in cases where it is possible. In practice, what we'd actually do is either buffer messages (having some dedicated reader goroutine to take the message, add it to a slice, and then pass it to the real destination ASAP) or drop messages entirely (with a `select` statement that aborts and does cleanup in a `default` case, or by having a dedicated reader that drops messages more intelligently, such as from the front of the queue, under the assumption that older messages are less useful). + +#### Leaks + +Typically, when a goroutine is started, it continues to run until either the function returns or the program exits. For this reason, if a goroutine executes any statements which can block (such as a channel operation), it's important to include some `case` which signals that it's time to return. Forgetting to do this can result in goroutine leaks. [Never start a goroutine without knowing how it will stop](https://dave.cheney.net/2016/12/22/never-start-a-goroutine-without-knowing-how-it-will-stop), or so the experts say. + +This is sometimes harder than it needs to be. To be blunt, the single producer N consumer cases are fine, you just close the channel and have all the consumers take this as a signal to exit. Anything involving multiple producers requires some sort of signaling to indicate that all producers have exited. Since you're using a channel already, the obvious option is a `select` statement with another channel that closes to signal shutdown, and then something like e.g. a [`sync.WaitGroup`](https://golang.org/pkg/sync/#WaitGroup) to wait for all producers to exit before closing the channel. Until your number of producers needs to change at runtime, and you realize that this races if you start to `Wait` before `Add`ing everything to the group, so you need to implement a custom counter, and be careful that additions and subtractions can also race and cause it to shut down early. And have fun solving it, because with how much `select` resists composition and code reuse, you're going to be implementing the same patterns over, and over, and over, and over... + +It's not that this is some impossible problem to solve, it's just that Go's take on the [CSP](https://en.wikipedia.org/wiki/Communicating_sequential_processes), combined with the rest of the tools the language gives you, makes it easy and concise to run thing the *wrong* way, and leads to comparatively complex and delicate code when trying to run it the right way. At least, that's my personal view of it based on my experience so far, but it probably varies some based on the problem the code is trying to solve. + +### Rising action + +The [actor model](https://en.wikipedia.org/wiki/Actor_model) is another programming paradigm that embraces concurrency with a "share memory by communicating" philosophy. + +For our purposes, an actor is basically a data type with a few special properties: +1. It has an inbox where messages to the actor are placed. +2. It has an associated unit of execution, such as a thread, which processes messages from the inbox one at a time. +3. Rather than exposing ordinary functions for other code to call, the actor exposes *behaviors*. A behavior is a function which has no return value, and is executed only for its side effects. When an actor `A` calls a behavior of an actor `B`, what really happens is that `A` places a message in `B`'s inbox, and `B` processes that message by executing some code. + +Different implementations differ on details after that, such as what order messages are processed in, if actors are allowed to wait for a particular type of message before continuing, whether actors run locally or are distributed across a cluster, etc., but they tend to all include some version of the broad strokes above. + +### Turing point + + + +I'm particularly fond of the [pony](https://ponylang.io) programming language's take on the actor model. I really can't say enough nice things about their approach, and fully describing it is beyond the scope of this blog post, but if you come out of here with an interest in the actor model, then I highly recommend checking out that language. Maybe watch a few of the talks from the developers that have been posted to YouTube, or read their papers about what is *easily* the most promising approach to garbage collection I've ever come across. + +Anyway, I don't actually work on anything written in pony, but I like their version of the actor model so much that I decided to see if I could trick Go's runtime into faking it. The result is [`phony`](https://github.com/Arceliar/phony), which manages to do most of what I want in under 70 lines of code. When we write code using this asynchronous message passing style, instead of ordinary goroutines+channels, the implications are pretty significant: + +1. There are no deadlocks. Message sends always succeed, and are quite fast (it doesn't even require [CAS](https://en.wikipedia.org/wiki/Compare-and-swap) instructions in the normal case). +2. Inbox sizes stay small due to backpressure: if the sender sees that the receiver's inbox has too many pending messages, it will schedule itself to stop at some deadlock-free safe point in the future, to wait until the receiver signals that it's handled the message. +3. `Actor`s are *shockingly* lightweight: on a modern 64-bit processor, an idle `Actor`'s only resources are 24 bytes for an empty `Inbox`, some of which is padding that may not apply if embedded into a struct. In particular, an idle `Actor` with an empty `Inbox` has no associated goroutine, so it requires no stack. +4. The lack of a goroutine also means that idle `Actor`s, even cycles of `Actor`s, can be garbage collected automatically. +5. Any `struct` that embeds an `Inbox` satisfies the `Actor` interface. Since `Actor`s encapsulate their own unit of execution, it means the range of behaviors that unit of execution can engage in are encoded into the type system and can even be abstracted through `interface` types. In my opinion, the resulting code is cleaner, easier to read and understand, and far easier to reuse or extend than the `for select` pattern from goroutine+channel use. + +### Falling action + +I'm happy enough with the current state of `phony` that I decided to start migrating the `yggdrasil-go` code base to use it. This is still work in progress (there are some non-`Actor` goroutines around the edges of the code, mostly in main `Accept` loops and that sort of thing), but the hot paths are now `Actor` based. + +Most of this was done in a weekend and came together with surprisingly little pain. I had exactly 2 crashes the entire time (1 accidental `nil` pointer deference and 1 legitimate bug I needed to fix in `phony`), and more importantly, 0 deadlocks. Most things just worked as intended the first time they compiled. There were a few bugs to work out when I was rewriting the `link` code, but nothing compared to the mess I had to deal with when writing the old code (which was a couple of horrifying interdependent `for select` loops to build a state machine). + +So by now you're probably wondering what any of this looks like in practice. Just to give a generic example, suppose we have some struct with an exported function that needs to run code on a worker goroutine. We could end up with something like the following when writing Go in the CSP style: + +```Go + +// This is the function we want the worker to run. +func (n *NonActorStruct) theFunction(arg1 Type1, arg2 Type2) { + // this is where the code we actually care about goes, the rest is basically boilerplate +} + +// This is the struct that we want the worker to own and manipulate. +type NonActorStruct struct { + inputForTheFunction chan argsForTheFunction + // fields we care about, plus maybe more channels for other things +} + +// Needed to initialize the channel to a working state +func NewNonActorStruct() *NonActorStruct { + n := NonActorStruct{ + inputForTheFunction: make(chan argsForTheFunction), + } + return &n +} + +// This is just a helper struct to carry arguments for the function. +type argsForTheFunction struct { + Arg1 Type1 + Arg2 Type2 +} + +// This is the function we export. +func (n *NonActorStruct) RunTheFunction(arg1 Type1, arg2 Type2) { + n.inputForTheFunction<-argsForTheFunction{arg1, arg2} +} + +// This is needed to start the worker, otherwise things block. +func (n *NonActorStruct) Start() { + go func() { + for { + select{ + // cases for other things we may need to do would also be here + // presumably at least one is involved in safely shutting down + case args := <-n.inputForTheFunction: + // We could possibly have a switch statement here + // Then switch on the arg type to pick which function to run + n.theFunction(args.Arg1, args.Arg2) + } + } + }() +} + +// This is needed to stop the worker when we're done. +func (n *NonActorStruct) Stop() { + // Actual implemenation depends on what else the worker does in its loop, + // but it probably just sends a specific message and/or closes some channel. +} + +// Then to use the code, we have something like: +myStruct := NewNonActorStruct() +myStruct.Start() +defer myStruct.Stop() // Or arrange this to happen somewhere else +myStruct.RunTheFunction(arg1, arg2) +``` + + +When migrating to the actor model, the basic pattern that emerged was to embed a `phony.Inbox` into any `struct` we wanted to make into a `phony.Actor`, and then define functions of the struct like so: + +```Go + +// This is the function we want the worker to run. +func (a *ActorStruct) theFunction(arg1 Type1, arg2 Type2) { + // this is where the code we actually care about goes, the rest is basically boilerplate +} + +// This is the struct that we want the worker to own and manipulate. +type ActorStruct struct { + phony.Inbox // This defines the Act function, satisfying the Actor interface + // fields we care about +} + +// This is the function we export. +func (a *ActorStruct) RunTheFunction(from phony.Actor, arg1 Type1, arg2 Type2) { + a.Act(from, func() { + a.theFunction(arg1, arg2) + }) +} + +// And then to use it, an Actor x would run something like: +myActor := new(ActorStruct) +myActor.RunTheFunction(x, arg1, arg2) +``` + +And that's about it. The first argument to `myActor.RunTheFunction` also `nil`able, if we have non-`Actor` code that needs to send a message, it just means there's no backpressure to slow down the non-`Actor` code if it's sending messages faster than the `Actor` can handle them. A `phony.Block` function exists to help non-`Actor`s wait for an `Actor` to process a message before continuing, since this seems like a common enough use case (especially when a package wants to export a non-`Actor` interface that uses `Actor` code internally). + +What's great is that we don't need to think about starting or stopping workers, deadlocks and leaks are not possible outside of blocking operations (e.g. I/O), and we can add or reuse behaviors just as easily as any function. I find the code easier to read and reason about too. + +I/O is one rough spot, since an `Actor` can block on a `Read` or a `Write` and not process incoming messages as a result. This isn't really any worse than working with normal Go code, and the pattern we've adopted is to have separate `Actor`s for `Read` and `Write`, where one mostly just sits in a `Read` loop and sends the results (and/or error) somewhere whenever a `Read` finishes. These two workers can be children of some parent `Actor`, which is the only one the rest of the code needs to know about, and then all we need to remember to do is close the `ReadWriteCloser` (e.g. socket) at some point when we're done. This is the sort of thing that we'll eventually want to write a standard `struct` for, update our code everywhere to use it, and then never have to think about it again. In the meantime, we have a couple of very similar implementations for working with sockets or the tun/tap device. + +### Dénouement + +The Go language makes concurrency easy, but for some problems it can be difficult to do safely out-of-the-box. However, the language provides the tools needed to implement an actor model approach very easily. While I won't claim that the actor model is a panacea for all development woes, Yggdrasil by its very nature requires us to think about networks of nodes communicating asynchronously, so it makes sense to use a programming paradigm that lets us model that approach more explicitly in our code base. Outside of a couple of corner cases (namely blocking I/O for the network sockets and the tun/tap device), we expect this to obviate any need to even thing about deadlocks, make development easier moving forward, and generally lead to a better user experience as a result. The code migration is still a work in progress, but `Actor`s have replace `for select` workers along the hot paths through the code (minus 1 crypto worker pool in the session code) and will slowly replace synchronization primitives in the remaining code base. The current code has been merged into our `develop` branch, and I'm quite excited to see it land in Yggdrasil `v0.3.9`, along with the usual bug fixes and incremental improvements, which we plan to release in the near future. + diff --git a/_posts/2020-02-21-release-v0-3-13.md b/_posts/2020-02-21-release-v0-3-13.md new file mode 100644 index 0000000..e4f2fac --- /dev/null +++ b/_posts/2020-02-21-release-v0-3-13.md @@ -0,0 +1,121 @@ +--- +layout: post +title: "Release v0.3.13" +date: 2020-02-21 09:00:00 +0000 +author: Neil Alexander +--- + +### Release time! + +Our last Yggdrasil release, v0.3.12, was merged a couple of months ago at the +end of November. For the most part we have seen good stability with the v0.3.12 +builds, not to mention good adoption (with the crawler showing over 500 nodes +running it). Today we are releasing our next version, v0.3.13. + +Many of our releases tend not to warrant blog post entries, especially given +that the changelog documents the changes. However, there's some fairly big news +points associated with this version therefore this post aims to discuss them in +a bit more detail. + +#### TUN adapter changes + +The first big talking point is that this is the first Yggdrasil release that +departs entirely from the Water library and replaces it with the Wireguard TUN +library. There are a few reasons why we decided to switch from Water to the +Wireguard library, but one of the most prominent is that it gives us better TUN +support across all platforms and allows us to finally remove TAP support +altogether. + +At a high-level, TUN interfaces are effectively emulating "Layer 3" interfaces - +they deal only in IP packets - whereas TAP interfaces are emulating "Layer 2" +full-fat Ethernet interfaces. + +To run in TAP mode, Yggdrasil not only had to add and remove Ethernet headers +for each packet, but it also has to implement an entire NDP implementation and +track MAC addresses in order to trick the host operating system into believing +that there was a real Ethernet domain on the other end of the adapter. Needless +to say, the amount of boilerplate code in order to make TAP mode work correctly +was significant and much of that code was very fragile. + +Although we implemented NDP, we did not ever get around to implementing ARP, +which also meant that sending tunnel-routed IPv4 traffic over TAP interfaces +invariably did not work either. We have now been able to remove much of this +code and simplify the TUN code massively, closing the gaps between some of our +supported platforms. + +There is one platform that is negatively impacted by this change and that's +NetBSD. The Wireguard TUN package that we are using currently has **no support +for NetBSD**, so we are also removing NetBSD as a supported target until the +necessary code appears upstream. To our knowledge, we don't have a base of +NetBSD users anyway, but we will aim to re-add this soon. + +The `IfTAPMode` configuration option has now been removed from Yggdrasil +entirely and it will be ignored if specified. **If you are using TAP mode today, +then this will affect you**. Please make sure to check your Yggdrasil +configuration since this may result in interface naming changes and you may have +to update network settings in your host operating system. + +Initially we added TAP support into Yggdrasil as it was the only way to support +Windows, since the OpenVPN driver that we used at the time only supported TAP +mode. Thankfully, this is no longer a problem, as the Wireguard project have +also released [Wintun](https://wintun.net), which is supported by the Wireguard +TUN library. The net result is that we gain TUN support on Windows and the +performance is *far* better than the buggy OpenVPN driver, which is a nice segue +into... + +#### Windows installer and performance + +We have spent a lot of time trying to improve the installation and setup +experience on Windows. This mostly falls into two areas. + +The first is that using the Wintun driver has *massively* improved performance, +in some cases by hundreds of MB/s, and starting the Yggdrasil process is now +much more reliable too - it should no longer be necessary to restart Yggdrasil +due to cases of the TAP adapter not being set up or configured correctly. + +The second is that we now automatically generate Windows `.msi` installers using +Appveyor, which means that installing or upgrading Yggdrasil is now simpler than +ever. It is no longer necessary to create directories, copy files and register +Windows services by hand - a marked improvement! + +The installer also bundles the Wintun driver and it is installed automatically +if required, therefore there is no longer a need to hunt down and install the +OpenVPN TAP driver separately. We hope that these changes will help to encourage +adoption of Yggdrasil on Windows platforms by significantly reducing the barrier +to entry. + +As in the previous section, Yggdrasil on Windows has gone from supporting TAP +mode only to now supporting TUN mode only. **This may mean that you need to +review your configuration**. If you no longer need the OpenVPN TAP driver on +your system, it is best to entirely uninstall it. It is also important to make +sure that the `IfName` configuration option in your `yggdrasil.conf` does not +specify the same name as an existing OpenVPN TAP interface or Yggdrasil may fail +to start. + +#### End of the v0.3 release cycle + +Generally we try, where possible, to avoid make any changes which would damage +backward compatibility with previous versions. The last version that had +breaking changes was v0.2.1 - over a year and a half ago. However, maintaining +backward compatibility so tightly also prevents us from improving the Yggdrasil +design in various ways. + +Therefore, unless any serious bugs or security vulnerabilities appear, it is +very likely that this version will be the last in the v0.3 release cycle. +Instead, we will start working on the v0.4 release, which is likely to include a +number of breaking protocol changes and will be incompatible with v0.3 releases +as a result. + +More information will be announced on the types of changes in v0.4 as they +happen - expect to see more blog posts and chatter in the Matrix channel on this +subject - but we will aim to give as much notice as possible before releases +occur that contain breaking changes. + +#### Final mentions + +In addition to the release notes above, I'd like to relay the message that +[@mwarning](https://github.com/mwarning) has a proposal open for a Google Summer +of Code (GSoC) project under the Freifunk umbrella, comparing a number of mesh +routing protocols including Yggdrasil. More information about the proposal is +available [here](https://projects.freifunk.net/#/projects?project=freifunk_meshnet_protocol_evaluation&lang=en). +If you are interested, please reach out! diff --git a/admin.md b/admin.md index c28cc70..cf5caf2 100644 --- a/admin.md +++ b/admin.md @@ -118,7 +118,7 @@ For each IPv6 address: - `bytes_recvd` (`uint64`) contains the number of bytes received from that peer - `endpoint` (`string`) contains the connected IPv4/IPv6 address and port of the peering - `port` (`uint8`) contains the local switch port number for that peer -- `uptime` (`uint32`) contains the number of seconds since the peer connection was established +- `uptime` (`float64`) contains the number of seconds since the peer connection was established #### `addPeer` @@ -252,7 +252,7 @@ Returns: #### `removeRoute` Expects: -- `subnet=` `string` for the subnet to remove the route route for +- `subnet=` `string` for the subnet to remove the route for - `box_pub_key=` `string` for the public key that is routed to Removes an existing crypto-key route. diff --git a/assets/css/style.scss b/assets/css/style.scss index 5fdf139..805ee4b 100644 --- a/assets/css/style.scss +++ b/assets/css/style.scss @@ -22,15 +22,93 @@ div.wrapper section p a code { section { flex: auto; max-width: 100vw; + margin-right: 20px; + @media screen and (max-width: 768px) { + margin-top: 20px; + border: 0; + float: none; + position: relative; + flex: initial; + word-break: break-word; + overflow: hidden; + table { + thead { + th { + min-width: 90px; + } + } + } + } } header { flex: initial; position: relative; - width: 240px !important; margin-right: 1em; + @media screen and (max-width: 768px) { + > p { + padding-right: 20%; + } + font-size: 0; + margin: 0; + padding: 0; + &:before { + content:'\2261'; + display: block; + position: absolute; + left: 0px; + font-size: 40px; + top: -38px; + } + img { + display: none; + } + > a { + position: absolute; + top: -23px; + left: 50px; + } + a { + &:hover { + font-weight: initial; + } + } + &:hover { + padding: 0; + font-size: initial; + > a { + pointer-events: none; + position: initial; + } + position: fixed; + overflow-y: scroll; + background-color: white; + top: 0px; + left: 0; + right: 0; + bottom: 0; + width: 100%; + z-index: 100; + padding-top: 20px; + padding-bottom: 50px; + padding-left: 20px; + p { + a { + font-size: 24px; + padding-top: 2px; + padding-bottom: 2px; + display: block; + } + br { + display: none; + } + } + &:before { + display: none; + } + } + } } - pre { overflow-x: scroll; } @@ -77,3 +155,9 @@ div.blogpost div#overview div#excerpt { text-decoration: none; font-size: 1em; } + +a:hover, a:focus { + color:#0be; + font-weight: initial; + text-decoration: underline; +} diff --git a/builds.md b/builds.md index 2bcd579..ecf9458 100644 --- a/builds.md +++ b/builds.md @@ -9,6 +9,8 @@ After every push to the `master` branch of the [GitHub repository](https://githu If you prefer to live dangerously and want to test bleeding-edge features or changes, development builds are [available from the `develop` branch](builds-develop.md), although these may not be as stable. +Looking for Windows installers? [Click here to go to AppVeyor](https://ci.appveyor.com/project/neilalexander/yggdrasil-go/build/artifacts). + For convenience, the latest builds are linked below: