From b47b1fffd445a652344903aa7515408b00a62391 Mon Sep 17 00:00:00 2001 From: Arceliar Date: Sat, 22 May 2021 22:47:16 -0500 Subject: [PATCH] WIP update crawler for ygg future branch --- scripts/crawl-dht.py | 97 -------------------------------------------- scripts/crawl.py | 93 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+), 97 deletions(-) delete mode 100644 scripts/crawl-dht.py create mode 100644 scripts/crawl.py diff --git a/scripts/crawl-dht.py b/scripts/crawl-dht.py deleted file mode 100644 index fe30b39..0000000 --- a/scripts/crawl-dht.py +++ /dev/null @@ -1,97 +0,0 @@ -import json -import socket -import sys -import time - -#gives the option to get data from an external server instead and send that -#if no options given it will default to localhost instead -if len(sys.argv) == 3: - host_port = (sys.argv[1], int(sys.argv[2])) -else: - host_port = ('localhost', 9001) - -def getDHTPingRequest(key, coords, target=None): - if target: - return '{{"keepalive":true, "request":"dhtPing", "box_pub_key":"{}", "coords":"{}", "target":"{}"}}'.format(key, coords, target) - else: - return '{{"keepalive":true, "request":"dhtPing", "box_pub_key":"{}", "coords":"{}"}}'.format(key, coords) - -def doRequest(req): - try: - ygg = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - ygg.connect(host_port) - ygg.send(req) - data = json.loads(ygg.recv(1024*15)) - return data - except: - return None - -def getNodeInfo(key, coords): - try: - req = '{{"keepalive":true, "request":"getNodeInfo", "box_pub_key":"{}", "coords":"{}"}}'.format(key, coords) - ygg = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - ygg.connect(host_port) - ygg.send(req) - data = json.loads(ygg.recv(1024*15)) - return data - except: - return None - -visited = dict() # Add nodes after a successful lookup response -rumored = dict() # Add rumors about nodes to ping -timedout = dict() -def handleResponse(address, info, data): - global visited - global rumored - global timedout - timedout[str(address)] = {'box_pub_key':str(info['box_pub_key']), 'coords':str(info['coords'])} - if not data: return - if 'response' not in data: return - if 'nodes' not in data['response']: return - for addr,rumor in data['response']['nodes'].iteritems(): - if addr in visited: continue - rumored[addr] = rumor - if address not in visited: - # TODO? remove this, it's debug output that happens to be in the same format as yakamo's "current" json file - now = time.time() - visited[str(address)] = {'box_pub_key':str(info['box_pub_key']), 'coords':str(info['coords']), 'time':now} - if address in timedout: del timedout[address] - nodeinfo = getNodeInfo(str(info['box_pub_key']), str(info['coords'])) - #print "\nDEBUG:", info, nodeinfo - if len(visited) > 1: sys.stdout.write(",\n") - nodename = None - try: - if nodeinfo and 'response' in nodeinfo and 'nodeinfo' in nodeinfo['response'] and 'name' in nodeinfo['response']['nodeinfo']: - nodename = '"' + str(nodeinfo['response']['nodeinfo']['name']) + '"' - except: - pass - if nodename: - sys.stdout.write('"{}": ["{}", {}, {}]'.format(address, info['coords'], int(now), nodename)) - else: - sys.stdout.write('"{}": ["{}", {}]'.format(address, info['coords'], int(now))) - sys.stdout.flush() -# End handleResponse - -# Get self info -selfInfo = doRequest('{"keepalive":true, "request":"getSelf"}') - -# Initialize dicts of visited/rumored nodes -for k,v in selfInfo['response']['self'].iteritems(): rumored[k] = v - -# Loop over rumored nodes and ping them, adding to visited if they respond -print '{"yggnodes": {' -while len(rumored) > 0: - for k,v in rumored.iteritems(): - handleResponse(k, v, doRequest(getDHTPingRequest(v['box_pub_key'], v['coords']))) - # These next two are imperfect workarounds to deal with old kad nodes - #handleResponse(k, v, doRequest(getDHTPingRequest(v['box_pub_key'], v['coords'], '0'*128))) - #handleResponse(k, v, doRequest(getDHTPingRequest(v['box_pub_key'], v['coords'], 'f'*128))) - break - del rumored[k] -print '\n}}' -#End - -# TODO do something with the results - -#print visited -#print timedout diff --git a/scripts/crawl.py b/scripts/crawl.py new file mode 100644 index 0000000..6bad223 --- /dev/null +++ b/scripts/crawl.py @@ -0,0 +1,93 @@ +import json +import socket +import sys +import time + +#gives the option to get data from an external server instead and send that +#if no options given it will default to localhost instead +if len(sys.argv) == 3: + socktype = socket.AF_INET + sockaddr = (sys.argv[1], int(sys.argv[2])) +elif len(sys.argv) == 2: + socktype = socket.AF_UNIX + sockaddr = sys.argv[1] +else: + socktype = socket.AF_UNIX + sockaddr = "/var/run/yggdrasil.sock" + +def getPeersRequest(key): + return '{{"keepalive":true, "request":"debugGetPeers", "key":"{}"}}'.format(key) + +def doRequest(req): + try: + ygg = socket.socket(socktype, socket.SOCK_STREAM) + ygg.connect(sockaddr) + ygg.send(req) + data = json.loads(ygg.recv(1024*15)) + return data + except: + return None + +visited = set() # Add nodes after a successful lookup response +rumored = set() # Add rumors about nodes to ping +timedout = set() +def handleResponse(address, data): + global visited + global rumored + global timedout + if address in visited: return + if not data: return + if 'response' not in data: return + for k,v in data['response'].iteritems(): + if 'keys' not in v: continue + keys = v['keys'] + for key in keys: + if key in visited: continue + if key in timedout: continue + rumored.add(key) + selfInfo = doRequest('{{"keepalive":true, "request":"debugGetSelf", "key":"{}"}}'.format(address)) + if 'response' not in selfInfo: return + coords = None + for _,v in selfInfo['response'].iteritems(): + if 'Coords' not in v: continue + coords = str(v['Coords']) + break + if coords == None: return + nodename = None + nodeinfo = doRequest('{{"keepalive":true, "request":"getNodeInfo", "key":"{}"}}'.format(address)) + try: + if nodeinfo and 'response' in nodeinfo and 'nodeinfo' in nodeinfo['response'] and 'name' in nodeinfo['response']['nodeinfo']: + nodename = '"' + str(nodeinfo['response']['nodeinfo']['name']) + '"' + except: + pass + now = time.time() + if len(visited) > 0: sys.stdout.write(",\n") + if nodename: + sys.stdout.write('"{}": ["{}", {}, {}]'.format(address, coords, int(now), nodename)) + else: + sys.stdout.write('"{}": ["{}", {}]'.format(address, coords, int(now))) + sys.stdout.flush() + visited.add(address) +# End handleResponse + +# Get self info +selfInfo = doRequest('{"keepalive":true, "request":"getSelf"}') +for k,v in selfInfo['response']['self'].iteritems(): rumored.add(v['key']) + +# Initialize dicts of visited/rumored nodes +#for k,v in selfInfo['response']['self'].iteritems(): rumored[k] = v + +# Loop over rumored nodes and ping them, adding to visited if they respond +print '{"yggnodes": {' +while len(rumored) > 0: + for k in rumored: + handleResponse(k, doRequest(getPeersRequest(v['key']))) + break + rumored.remove(k) +print '\n}}' +#End + +# TODO do something with the results + +#print visited +#print timedout