mirror of
https://github.com/cwinfo/yggdrasil-go.git
synced 2024-11-25 23:01:38 +00:00
store 'faster' relationships between all pairs of peers, to make fallback easier when a parent goes offline
This commit is contained in:
parent
38093219fd
commit
dcfe55dae8
@ -18,10 +18,12 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
const switch_timeout = time.Minute
|
const (
|
||||||
const switch_updateInterval = switch_timeout / 2
|
switch_timeout = time.Minute
|
||||||
const switch_throttle = switch_updateInterval / 2
|
switch_updateInterval = switch_timeout / 2
|
||||||
const switch_faster_threshold = 2880 // 1 update per 30 seconds for 24 hours
|
switch_throttle = switch_updateInterval / 2
|
||||||
|
switch_faster_threshold = 240 //Number of switch updates before switching to a faster parent
|
||||||
|
)
|
||||||
|
|
||||||
// The switch locator represents the topology and network state dependent info about a node, minus the signatures that go with it.
|
// The switch locator represents the topology and network state dependent info about a node, minus the signatures that go with it.
|
||||||
// Nodes will pick the best root they see, provided that the root continues to push out updates with new timestamps.
|
// Nodes will pick the best root they see, provided that the root continues to push out updates with new timestamps.
|
||||||
@ -119,13 +121,13 @@ func (x *switchLocator) isAncestorOf(y *switchLocator) bool {
|
|||||||
|
|
||||||
// Information about a peer, used by the switch to build the tree and eventually make routing decisions.
|
// Information about a peer, used by the switch to build the tree and eventually make routing decisions.
|
||||||
type peerInfo struct {
|
type peerInfo struct {
|
||||||
key sigPubKey // ID of this peer
|
key sigPubKey // ID of this peer
|
||||||
locator switchLocator // Should be able to respond with signatures upon request
|
locator switchLocator // Should be able to respond with signatures upon request
|
||||||
degree uint64 // Self-reported degree
|
degree uint64 // Self-reported degree
|
||||||
time time.Time // Time this node was last seen
|
time time.Time // Time this node was last seen
|
||||||
faster uint16 // Counter of how often a node is faster than the current parent, penalized extra if slower
|
faster map[switchPort]uint64 // Counter of how often a node is faster than the current parent, penalized extra if slower
|
||||||
port switchPort // Interface number of this peer
|
port switchPort // Interface number of this peer
|
||||||
msg switchMsg // The wire switchMsg used
|
msg switchMsg // The wire switchMsg used
|
||||||
}
|
}
|
||||||
|
|
||||||
// This is just a uint64 with a named type for clarity reasons.
|
// This is just a uint64 with a named type for clarity reasons.
|
||||||
@ -350,8 +352,6 @@ func (t *switchTable) unlockedHandleMsg(msg *switchMsg, fromPort switchPort, rep
|
|||||||
prevKey = hop.Next
|
prevKey = hop.Next
|
||||||
}
|
}
|
||||||
sender.msg = *msg
|
sender.msg = *msg
|
||||||
oldSender, isIn := t.data.peers[fromPort]
|
|
||||||
sender.faster = oldSender.faster
|
|
||||||
sender.port = fromPort
|
sender.port = fromPort
|
||||||
sender.time = now
|
sender.time = now
|
||||||
// Decide what to do
|
// Decide what to do
|
||||||
@ -370,34 +370,39 @@ func (t *switchTable) unlockedHandleMsg(msg *switchMsg, fromPort switchPort, rep
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
doUpdate := false
|
doUpdate := false
|
||||||
|
oldSender := t.data.peers[fromPort]
|
||||||
if !equiv(&sender.locator, &oldSender.locator) {
|
if !equiv(&sender.locator, &oldSender.locator) {
|
||||||
doUpdate = true
|
doUpdate = true
|
||||||
}
|
}
|
||||||
// Check if faster than the current parent, and update sender.faster accordingly
|
// Update the matrix of peer "faster" thresholds
|
||||||
switch {
|
if reprocessing {
|
||||||
case reprocessing:
|
sender.faster = oldSender.faster
|
||||||
// Don't change anything if we're just reprocessing old messages.
|
} else {
|
||||||
case !isIn:
|
sender.faster = make(map[switchPort]uint64, len(oldSender.faster))
|
||||||
// Not known, sender.faster == 0, but set it explicitly just to make that obvious to the reader.
|
for port, peer := range t.data.peers {
|
||||||
sender.faster = 0
|
if port == fromPort {
|
||||||
case msg.Root != oldSender.locator.root:
|
continue
|
||||||
// This is a new root.
|
}
|
||||||
// Honestly not sure if we should reset or do something else. For now, we'll just leave it alone.
|
switch {
|
||||||
case sender.port == t.parent:
|
case msg.Root != peer.locator.root:
|
||||||
// This is the current parent. If roots change, there's a good chance that they're still the best route to the root, so we probably don't want them to converge towards 0.
|
// Different roots, blindly guess that the relationships will stay the same?
|
||||||
// If we leae them alone, then when a different node gets parented, this one will get penalized by a couple of points, so it hopefully shouldn't flap too hard to leave this alone for now.
|
sender.faster[port] = oldSender.faster[peer.port]
|
||||||
case sender.locator.tstamp <= t.data.locator.tstamp:
|
case sender.locator.tstamp <= peer.locator.tstamp:
|
||||||
// This timestamp came in slower than our parent's, so we should penalize them by more than we reward faster nodes.
|
// Slower than this node, penalize (more than the reward amount)
|
||||||
if sender.faster > 1 {
|
if oldSender.faster[port] > 1 {
|
||||||
sender.faster -= 2
|
sender.faster[port] = oldSender.faster[peer.port] - 2
|
||||||
} else {
|
} else {
|
||||||
// If exactly 1, don't let it roll under
|
sender.faster[port] = 0
|
||||||
sender.faster = 0
|
}
|
||||||
|
default:
|
||||||
|
// We were faster than this node, so increment, as long as we don't overflow because of it
|
||||||
|
if oldSender.faster[peer.port] < switch_faster_threshold {
|
||||||
|
sender.faster[port] = oldSender.faster[peer.port] + 1
|
||||||
|
} else {
|
||||||
|
sender.faster[port] = switch_faster_threshold
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
default:
|
|
||||||
// They sent us an update faster than our parent did, so reward them.
|
|
||||||
// FIXME make sure this can't ever roll over. It shouldn't be possible, we'd switch to them as a parent first, but still...
|
|
||||||
sender.faster++
|
|
||||||
}
|
}
|
||||||
// Update sender
|
// Update sender
|
||||||
t.data.peers[fromPort] = sender
|
t.data.peers[fromPort] = sender
|
||||||
@ -433,30 +438,30 @@ func (t *switchTable) unlockedHandleMsg(msg *switchMsg, fromPort switchPort, rep
|
|||||||
case noParent:
|
case noParent:
|
||||||
// We currently have no working parent, and at this point in the switch statement, anything is better than nothing.
|
// We currently have no working parent, and at this point in the switch statement, anything is better than nothing.
|
||||||
updateRoot = true
|
updateRoot = true
|
||||||
case sender.faster > switch_faster_threshold:
|
case sender.faster[t.parent] >= switch_faster_threshold:
|
||||||
// The is reliably faster than the current parent.
|
// The is reliably faster than the current parent.
|
||||||
updateRoot = true
|
updateRoot = true
|
||||||
case reprocessing && len(sender.locator.coords) < len(oldParent.locator.coords):
|
case reprocessing && sender.faster[t.parent] > oldParent.faster[sender.port]:
|
||||||
// We're reprocessing old messages to find a new parent.
|
// The sender seems to be reliably faster than the current parent, so switch to them instead.
|
||||||
// That means we're in the middle of a route flap.
|
|
||||||
// We don't know how often each node is faster than the others, only relative to the old parent.
|
|
||||||
// If any of them was faster than the old parent, then we'd probably already be using them.
|
|
||||||
// So the best we can really do is pick the shortest route and hope it's OK as a starting point.
|
|
||||||
// TODO: Find some way to reliably store relative order between all peers. Basically a pxp "faster" matrix, more likely a faster port->uint map per peer, but preferably not literally that, since it'd be tedious to manage and probably slows down updates.
|
|
||||||
updateRoot = true
|
updateRoot = true
|
||||||
case sender.port != t.parent:
|
case sender.port != t.parent:
|
||||||
// Ignore further cases if the sender isn't our parent.
|
// Ignore further cases if the sender isn't our parent.
|
||||||
case !equiv(&sender.locator, &t.data.locator):
|
case !reprocessing && !equiv(&sender.locator, &t.data.locator):
|
||||||
// Special case:
|
// Special case:
|
||||||
// If coords changed, then this may now be a worse parent than before.
|
// If coords changed, then we need to penalize this node somehow, to prevent flapping.
|
||||||
// Re-parent the node (de-parent and reprocess the message).
|
// First, reset all faster-related info to 0.
|
||||||
// Then reprocess *all* messages to look for a better parent.
|
// Then, de-parent the node and reprocess all messages to find a new parent.
|
||||||
// This is so we don't keep using this node as our parent if there's something better.
|
|
||||||
t.parent = 0
|
t.parent = 0
|
||||||
t.unlockedHandleMsg(msg, fromPort, true)
|
sender.faster = nil
|
||||||
for _, info := range t.data.peers {
|
for _, peer := range t.data.peers {
|
||||||
t.unlockedHandleMsg(&info.msg, info.port, true)
|
if peer.port == sender.port {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
delete(peer.faster, sender.port)
|
||||||
|
t.unlockedHandleMsg(&peer.msg, peer.port, true)
|
||||||
}
|
}
|
||||||
|
// Process the sender last, to avoid keeping them as a parent if at all possible.
|
||||||
|
t.unlockedHandleMsg(&sender.msg, sender.port, true)
|
||||||
case now.Sub(t.time) < switch_throttle:
|
case now.Sub(t.time) < switch_throttle:
|
||||||
// We've already gotten an update from this root recently, so ignore this one to avoid flooding.
|
// We've already gotten an update from this root recently, so ignore this one to avoid flooding.
|
||||||
case sender.locator.tstamp > t.data.locator.tstamp:
|
case sender.locator.tstamp > t.data.locator.tstamp:
|
||||||
|
Loading…
Reference in New Issue
Block a user