5
0
mirror of https://github.com/cwinfo/yggdrasil-map synced 2024-11-24 20:11:37 +00:00

WIP update crawler for ygg future branch

This commit is contained in:
Arceliar 2021-05-22 22:47:16 -05:00
parent e1638133b6
commit b47b1fffd4
2 changed files with 93 additions and 97 deletions

View File

@ -1,97 +0,0 @@
import json
import socket
import sys
import time
#gives the option to get data from an external server instead and send that
#if no options given it will default to localhost instead
if len(sys.argv) == 3:
host_port = (sys.argv[1], int(sys.argv[2]))
else:
host_port = ('localhost', 9001)
def getDHTPingRequest(key, coords, target=None):
if target:
return '{{"keepalive":true, "request":"dhtPing", "box_pub_key":"{}", "coords":"{}", "target":"{}"}}'.format(key, coords, target)
else:
return '{{"keepalive":true, "request":"dhtPing", "box_pub_key":"{}", "coords":"{}"}}'.format(key, coords)
def doRequest(req):
try:
ygg = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
ygg.connect(host_port)
ygg.send(req)
data = json.loads(ygg.recv(1024*15))
return data
except:
return None
def getNodeInfo(key, coords):
try:
req = '{{"keepalive":true, "request":"getNodeInfo", "box_pub_key":"{}", "coords":"{}"}}'.format(key, coords)
ygg = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
ygg.connect(host_port)
ygg.send(req)
data = json.loads(ygg.recv(1024*15))
return data
except:
return None
visited = dict() # Add nodes after a successful lookup response
rumored = dict() # Add rumors about nodes to ping
timedout = dict()
def handleResponse(address, info, data):
global visited
global rumored
global timedout
timedout[str(address)] = {'box_pub_key':str(info['box_pub_key']), 'coords':str(info['coords'])}
if not data: return
if 'response' not in data: return
if 'nodes' not in data['response']: return
for addr,rumor in data['response']['nodes'].iteritems():
if addr in visited: continue
rumored[addr] = rumor
if address not in visited:
# TODO? remove this, it's debug output that happens to be in the same format as yakamo's "current" json file
now = time.time()
visited[str(address)] = {'box_pub_key':str(info['box_pub_key']), 'coords':str(info['coords']), 'time':now}
if address in timedout: del timedout[address]
nodeinfo = getNodeInfo(str(info['box_pub_key']), str(info['coords']))
#print "\nDEBUG:", info, nodeinfo
if len(visited) > 1: sys.stdout.write(",\n")
nodename = None
try:
if nodeinfo and 'response' in nodeinfo and 'nodeinfo' in nodeinfo['response'] and 'name' in nodeinfo['response']['nodeinfo']:
nodename = '"' + str(nodeinfo['response']['nodeinfo']['name']) + '"'
except:
pass
if nodename:
sys.stdout.write('"{}": ["{}", {}, {}]'.format(address, info['coords'], int(now), nodename))
else:
sys.stdout.write('"{}": ["{}", {}]'.format(address, info['coords'], int(now)))
sys.stdout.flush()
# End handleResponse
# Get self info
selfInfo = doRequest('{"keepalive":true, "request":"getSelf"}')
# Initialize dicts of visited/rumored nodes
for k,v in selfInfo['response']['self'].iteritems(): rumored[k] = v
# Loop over rumored nodes and ping them, adding to visited if they respond
print '{"yggnodes": {'
while len(rumored) > 0:
for k,v in rumored.iteritems():
handleResponse(k, v, doRequest(getDHTPingRequest(v['box_pub_key'], v['coords'])))
# These next two are imperfect workarounds to deal with old kad nodes
#handleResponse(k, v, doRequest(getDHTPingRequest(v['box_pub_key'], v['coords'], '0'*128)))
#handleResponse(k, v, doRequest(getDHTPingRequest(v['box_pub_key'], v['coords'], 'f'*128)))
break
del rumored[k]
print '\n}}'
#End
# TODO do something with the results
#print visited
#print timedout

93
scripts/crawl.py Normal file
View File

@ -0,0 +1,93 @@
import json
import socket
import sys
import time
#gives the option to get data from an external server instead and send that
#if no options given it will default to localhost instead
if len(sys.argv) == 3:
socktype = socket.AF_INET
sockaddr = (sys.argv[1], int(sys.argv[2]))
elif len(sys.argv) == 2:
socktype = socket.AF_UNIX
sockaddr = sys.argv[1]
else:
socktype = socket.AF_UNIX
sockaddr = "/var/run/yggdrasil.sock"
def getPeersRequest(key):
return '{{"keepalive":true, "request":"debugGetPeers", "key":"{}"}}'.format(key)
def doRequest(req):
try:
ygg = socket.socket(socktype, socket.SOCK_STREAM)
ygg.connect(sockaddr)
ygg.send(req)
data = json.loads(ygg.recv(1024*15))
return data
except:
return None
visited = set() # Add nodes after a successful lookup response
rumored = set() # Add rumors about nodes to ping
timedout = set()
def handleResponse(address, data):
global visited
global rumored
global timedout
if address in visited: return
if not data: return
if 'response' not in data: return
for k,v in data['response'].iteritems():
if 'keys' not in v: continue
keys = v['keys']
for key in keys:
if key in visited: continue
if key in timedout: continue
rumored.add(key)
selfInfo = doRequest('{{"keepalive":true, "request":"debugGetSelf", "key":"{}"}}'.format(address))
if 'response' not in selfInfo: return
coords = None
for _,v in selfInfo['response'].iteritems():
if 'Coords' not in v: continue
coords = str(v['Coords'])
break
if coords == None: return
nodename = None
nodeinfo = doRequest('{{"keepalive":true, "request":"getNodeInfo", "key":"{}"}}'.format(address))
try:
if nodeinfo and 'response' in nodeinfo and 'nodeinfo' in nodeinfo['response'] and 'name' in nodeinfo['response']['nodeinfo']:
nodename = '"' + str(nodeinfo['response']['nodeinfo']['name']) + '"'
except:
pass
now = time.time()
if len(visited) > 0: sys.stdout.write(",\n")
if nodename:
sys.stdout.write('"{}": ["{}", {}, {}]'.format(address, coords, int(now), nodename))
else:
sys.stdout.write('"{}": ["{}", {}]'.format(address, coords, int(now)))
sys.stdout.flush()
visited.add(address)
# End handleResponse
# Get self info
selfInfo = doRequest('{"keepalive":true, "request":"getSelf"}')
for k,v in selfInfo['response']['self'].iteritems(): rumored.add(v['key'])
# Initialize dicts of visited/rumored nodes
#for k,v in selfInfo['response']['self'].iteritems(): rumored[k] = v
# Loop over rumored nodes and ping them, adding to visited if they respond
print '{"yggnodes": {'
while len(rumored) > 0:
for k in rumored:
handleResponse(k, doRequest(getPeersRequest(v['key'])))
break
rumored.remove(k)
print '\n}}'
#End
# TODO do something with the results
#print visited
#print timedout