diff --git a/CHANGELOG.md b/CHANGELOG.md index fdac254..bbf0bfa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,17 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - in case of vulnerabilities. --> +## [0.3.10] - 2019-10-10 +### Added +- The core library now includes several unit tests for peering and `yggdrasil.Conn` connections + +### Changed +- On recent Linux kernels, Yggdrasil will now set the `tcp_congestion_control` algorithm used for its own TCP sockets to [BBR](https://github.com/google/bbr), which reduces latency under load +- The systemd service configuration in `contrib` (and, by extension, some of our packages) now attemps to load the `tun` module, in case TUN/TAP support is available but not loaded, and it restricts Yggdrasil to the `CAP_NET_ADMIN` capability for managing the TUN/TAP adapter, rather than letting it do whatever the (typically `root`) user can do + +### Fixed +- The `yggdrasil.Conn.RemoteAddr()` function no longer blocks, fixing a deadlock when CKR is used while under heavy load + ## [0.3.9] - 2019-09-27 ### Added - Yggdrasil will now complain more verbosely when a peer URI is incorrectly formatted diff --git a/contrib/systemd/yggdrasil.service b/contrib/systemd/yggdrasil.service index 37859e7..0223dd9 100644 --- a/contrib/systemd/yggdrasil.service +++ b/contrib/systemd/yggdrasil.service @@ -8,6 +8,8 @@ Group=yggdrasil ProtectHome=true ProtectSystem=true SyslogIdentifier=yggdrasil +CapabilityBoundSet=CAP_NET_ADMIN +ExecStartPre=+/sbin/modprobe tun ExecStartPre=/bin/sh -ec "if ! test -s /etc/yggdrasil.conf; \ then umask 077; \ yggdrasil -genconf > /etc/yggdrasil.conf; \ diff --git a/go.mod b/go.mod index d86101b..83c2292 100644 --- a/go.mod +++ b/go.mod @@ -1,7 +1,7 @@ module github.com/yggdrasil-network/yggdrasil-go require ( - github.com/Arceliar/phony v0.0.0-20190907031509-af5bdbeecab6 + github.com/Arceliar/phony v0.0.0-20191005181740-21679e75e3f0 github.com/gologme/log v0.0.0-20181207131047-4e5d8ccb38e8 github.com/hashicorp/go-syslog v1.0.0 github.com/hjson/hjson-go v3.0.1-0.20190209023717-9147687966d9+incompatible diff --git a/go.sum b/go.sum index cdabc40..bca7c23 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,5 @@ -github.com/Arceliar/phony v0.0.0-20190907031509-af5bdbeecab6 h1:zMj5Q1V0yF4WNfV/FpXG6iXfPJ965Xc5asR2vHXanXc= -github.com/Arceliar/phony v0.0.0-20190907031509-af5bdbeecab6/go.mod h1:6Lkn+/zJilRMsKmbmG1RPoamiArC6HS73xbwRyp3UyI= +github.com/Arceliar/phony v0.0.0-20191005181740-21679e75e3f0 h1:IOFsvAMFkgnKfSQHxXTeqb1+ODFeR5px1HCHU86KF30= +github.com/Arceliar/phony v0.0.0-20191005181740-21679e75e3f0/go.mod h1:6Lkn+/zJilRMsKmbmG1RPoamiArC6HS73xbwRyp3UyI= github.com/gologme/log v0.0.0-20181207131047-4e5d8ccb38e8 h1:WD8iJ37bRNwvETMfVTusVSAi0WdXTpfNVGY2aHycNKY= github.com/gologme/log v0.0.0-20181207131047-4e5d8ccb38e8/go.mod h1:gq31gQ8wEHkR+WekdWsqDuf8pXTUZA9BnnzTuPz1Y9U= github.com/hashicorp/go-syslog v1.0.0 h1:KaodqZuhUoZereWVIYmpUgZysurB1kBLX2j0MwMrUAE= @@ -31,4 +31,5 @@ golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e h1:FDhOuMEY4JVRztM/gsbk+IKUQ8kj74bxZrgw87eMMVc= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= diff --git a/src/tuntap/conn.go b/src/tuntap/conn.go index 31875d9..6db46b2 100644 --- a/src/tuntap/conn.go +++ b/src/tuntap/conn.go @@ -31,7 +31,7 @@ func (s *tunConn) close() { } func (s *tunConn) _close_from_tun() { - s.conn.Close() + go s.conn.Close() // Just in case it blocks on actor operations delete(s.tun.addrToConn, s.addr) delete(s.tun.subnetToConn, s.snet) func() { diff --git a/src/yggdrasil/conn.go b/src/yggdrasil/conn.go index eb2bb74..bb5964b 100644 --- a/src/yggdrasil/conn.go +++ b/src/yggdrasil/conn.go @@ -96,7 +96,7 @@ func (c *Conn) setMTU(from phony.Actor, mtu uint16) { c.Act(from, func() { c.mtu = mtu }) } -// This should never be called from the router goroutine, used in the dial functions +// This should never be called from an actor, used in the dial functions func (c *Conn) search() error { var err error done := make(chan struct{}) @@ -115,9 +115,13 @@ func (c *Conn) search() error { default: if sinfo != nil { // Finish initializing the session - sinfo.setConn(nil, c) + c.session = sinfo + c.session.setConn(nil, c) + c.nodeID = crypto.GetNodeID(&c.session.theirPermPub) + for i := range c.nodeMask { + c.nodeMask[i] = 0xFF + } } - c.session = sinfo err = e close(done) } @@ -133,12 +137,6 @@ func (c *Conn) search() error { if c.session == nil && err == nil { panic("search failed but returned no error") } - if c.session != nil { - c.nodeID = crypto.GetNodeID(&c.session.theirPermPub) - for i := range c.nodeMask { - c.nodeMask[i] = 0xFF - } - } return err } @@ -294,6 +292,9 @@ func (c *Conn) writeNoCopy(msg FlowKeyMessage) error { var cancel util.Cancellation var doCancel bool phony.Block(c, func() { cancel, doCancel = c._getDeadlineCancellation(c.writeDeadline) }) + if doCancel { + defer cancel.Cancel(nil) + } var err error select { case <-cancel.Finished(): @@ -353,10 +354,8 @@ func (c *Conn) LocalAddr() crypto.NodeID { // RemoteAddr returns the complete node ID of the remote side of the connection. func (c *Conn) RemoteAddr() crypto.NodeID { - // TODO warn that this can block while waiting for the Conn actor to run, so don't call it from other actors... - var n crypto.NodeID - phony.Block(c, func() { n = *c.nodeID }) - return n + // RemoteAddr is set during the dial or accept, and isn't changed, so it's safe to access directly + return *c.nodeID } // SetDeadline is equivalent to calling both SetReadDeadline and diff --git a/src/yggdrasil/core_test.go b/src/yggdrasil/core_test.go new file mode 100644 index 0000000..364ed0b --- /dev/null +++ b/src/yggdrasil/core_test.go @@ -0,0 +1,204 @@ +package yggdrasil + +import ( + "bytes" + "math/rand" + "os" + "testing" + "time" + + "github.com/gologme/log" + + "github.com/yggdrasil-network/yggdrasil-go/src/config" +) + +// GenerateConfig produces default configuration with suitable modifications for tests. +func GenerateConfig() *config.NodeConfig { + cfg := config.GenerateConfig() + cfg.AdminListen = "none" + cfg.Listen = []string{"tcp://127.0.0.1:0"} + cfg.IfName = "none" + + return cfg +} + +// GetLoggerWithPrefix creates a new logger instance wih prefix. +// If verbose is set to true, three log levels are enabled: "info", "warn", "error". +func GetLoggerWithPrefix(prefix string, verbose bool) *log.Logger { + l := log.New(os.Stderr, prefix, log.Flags()) + if !verbose { + return l + } + l.EnableLevel("info") + l.EnableLevel("warn") + l.EnableLevel("error") + return l +} + +// CreateAndConnectTwo creates two nodes. nodeB connects to nodeA. +// Verbosity flag is passed to logger. +func CreateAndConnectTwo(t testing.TB, verbose bool) (nodeA *Core, nodeB *Core) { + nodeA = new(Core) + _, err := nodeA.Start(GenerateConfig(), GetLoggerWithPrefix("A: ", verbose)) + if err != nil { + t.Fatal(err) + } + + nodeB = new(Core) + _, err = nodeB.Start(GenerateConfig(), GetLoggerWithPrefix("B: ", verbose)) + if err != nil { + t.Fatal(err) + } + + err = nodeB.AddPeer("tcp://"+nodeA.link.tcp.getAddr().String(), "") + if err != nil { + t.Fatal(err) + } + + if l := len(nodeA.GetPeers()); l != 1 { + t.Fatal("unexpected number of peers", l) + } + if l := len(nodeB.GetPeers()); l != 1 { + t.Fatal("unexpected number of peers", l) + } + + return nodeA, nodeB +} + +// WaitConnected blocks until either nodes negotiated DHT or 5 seconds passed. +func WaitConnected(nodeA, nodeB *Core) bool { + // It may take up to 3 seconds, but let's wait 5. + for i := 0; i < 50; i++ { + time.Sleep(100 * time.Millisecond) + if len(nodeA.GetSwitchPeers()) > 0 && len(nodeB.GetSwitchPeers()) > 0 { + return true + } + } + return false +} + +// CreateEchoListener creates a routine listening on nodeA. It expects repeats messages of length bufLen. +// It returns a channel used to synchronize the routine with caller. +func CreateEchoListener(t testing.TB, nodeA *Core, bufLen int, repeats int) chan struct{} { + // Listen. Doing it here guarantees that there will be something to try to connect when it returns. + listener, err := nodeA.ConnListen() + if err != nil { + t.Fatal(err) + } + + // Start routine + done := make(chan struct{}) + go func() { + defer listener.Close() + conn, err := listener.Accept() + if err != nil { + t.Error(err) + return + } + defer conn.Close() + buf := make([]byte, bufLen) + + for i := 0; i < repeats; i++ { + n, err := conn.Read(buf) + if err != nil { + t.Error(err) + return + } + if n != bufLen { + t.Error("missing data") + return + } + _, err = conn.Write(buf) + if err != nil { + t.Error(err) + } + } + done <- struct{}{} + }() + + return done +} + +// TestCore_Start_Connect checks if two nodes can connect together. +func TestCore_Start_Connect(t *testing.T) { + CreateAndConnectTwo(t, true) +} + +// TestCore_Start_Transfer checks that messages can be passed between nodes (in both directions). +func TestCore_Start_Transfer(t *testing.T) { + nodeA, nodeB := CreateAndConnectTwo(t, true) + + msgLen := 1500 + done := CreateEchoListener(t, nodeA, msgLen, 1) + + if !WaitConnected(nodeA, nodeB) { + t.Fatal("nodes did not connect") + } + + // Dial + dialer, err := nodeB.ConnDialer() + if err != nil { + t.Fatal(err) + } + conn, err := dialer.Dial("nodeid", nodeA.NodeID().String()) + if err != nil { + t.Fatal(err) + } + defer conn.Close() + msg := make([]byte, msgLen) + rand.Read(msg) + conn.Write(msg) + if err != nil { + t.Fatal(err) + } + buf := make([]byte, msgLen) + _, err = conn.Read(buf) + if err != nil { + t.Fatal(err) + } + if bytes.Compare(msg, buf) != 0 { + t.Fatal("expected echo") + } + <-done +} + +// BenchmarkCore_Start_Transfer estimates the possible transfer between nodes (in MB/s). +func BenchmarkCore_Start_Transfer(b *testing.B) { + nodeA, nodeB := CreateAndConnectTwo(b, false) + + msgLen := 1500 // typical MTU + done := CreateEchoListener(b, nodeA, msgLen, b.N) + + if !WaitConnected(nodeA, nodeB) { + b.Fatal("nodes did not connect") + } + + // Dial + dialer, err := nodeB.ConnDialer() + if err != nil { + b.Fatal(err) + } + conn, err := dialer.Dial("nodeid", nodeA.NodeID().String()) + if err != nil { + b.Fatal(err) + } + defer conn.Close() + msg := make([]byte, msgLen) + rand.Read(msg) + buf := make([]byte, msgLen) + + b.SetBytes(int64(b.N * msgLen)) + b.ResetTimer() + + for i := 0; i < b.N; i++ { + conn.Write(msg) + if err != nil { + b.Fatal(err) + } + _, err = conn.Read(buf) + if err != nil { + b.Fatal(err) + } + } + <-done +} diff --git a/src/yggdrasil/tcp_linux.go b/src/yggdrasil/tcp_linux.go new file mode 100644 index 0000000..7eda3b5 --- /dev/null +++ b/src/yggdrasil/tcp_linux.go @@ -0,0 +1,31 @@ +// +build linux + +package yggdrasil + +import ( + "syscall" + + "golang.org/x/sys/unix" +) + +// WARNING: This context is used both by net.Dialer and net.Listen in tcp.go + +func (t *tcp) tcpContext(network, address string, c syscall.RawConn) error { + var control error + var bbr error + + control = c.Control(func(fd uintptr) { + bbr = unix.SetsockoptString(int(fd), unix.IPPROTO_TCP, unix.TCP_CONGESTION, "bbr") + }) + + // Log any errors + if bbr != nil { + t.link.core.log.Debugln("Failed to set tcp_congestion_control to bbr for socket, SetsockoptString error:", bbr) + } + if control != nil { + t.link.core.log.Debugln("Failed to set tcp_congestion_control to bbr for socket, Control error:", control) + } + + // Return nil because errors here are not considered fatal for the connection, it just means congestion control is suboptimal + return nil +} diff --git a/src/yggdrasil/tcp_other.go b/src/yggdrasil/tcp_other.go index 47bd772..44c3d76 100644 --- a/src/yggdrasil/tcp_other.go +++ b/src/yggdrasil/tcp_other.go @@ -1,4 +1,4 @@ -// +build !darwin +// +build !darwin,!linux package yggdrasil