Quote from: revizorrro on January 01, 2026, 04:50:24 AMCan you share script for crawling nodes ?For construct the map I used already crawled nodes found at http://[207:2896:e6c0:b868:b11a:8552:cb8c:d663] , it has a list of nodes and list of peers each node know. You need to construct the tree for placing on /static/graph.json of the web server, for construct that you need to iterate over yggcrawlfull and do a BFS here is the code for that, not a clean code but with minimum effort and for python3:
I found one at github, but after scanning it shows only 270 nodes with coords, then total ~4300
Code Select
def main():
import json
from collections import deque
import sys, requests, time
import pygraphviz as pgv
import networkx as nx
from networkx.algorithms import centrality
# all network data in yggcrawlfull.json
url = "http://178.20.46.171/yggcrawlfull.json"
file_path = 'yggcrawlfull.json'
'''
[
{
"remote": null
"address": "201:5c8b:b25b:2a0d:c1e5:4ce9:9d14:bd25"
"key": "68dd1369357c8f86acc598bad0b68da8f993f351747e05605f0f93c11cd3e3a8"
"checked": true
"nodeinfo": '{\n "68dd1369357c8f86acc598bad0b68da8f993f351747e05605f0f93c11cd3e3a8": {\n "buildarch": "amd64",\n "buildname": "yggdrasil",\n "buildplatform": "linux",\n "buildversion": "0.5.12"\n }\n}\n'
"peers": '{\n "201:5c8b:b25b:2a0d:c1e5:4ce9:9d14:bd25": {\n "keys": [\n "00340a5fffffffe3d55ff7bade852943129075bc5ce561f5eb6594a2707476a1",\n "00340a5fffffffe3d55ff7bade852943129075bc5ce561f5eb6594a2707476a1",\n "000002692ce4bca396c7b037b6fab696dddb076b566fca5e43339fc2cc116f23",\n "00340a5fffffffe3d55ff7bade852943129075bc5ce561f5eb6594a2707476a1",\n "2bab442a5e424cb576a952ab1d374dbe2c95cc6e39ce1dd328ea1754f833d700",\n "2bab442a5e424cb576a952ab1d374dbe2c95cc6e39ce1dd328ea1754f833d700",\n "000002692ce4bca396c7b037b6fab696dddb076b566fca5e43339fc2cc116f23"\n ]\n }\n}\n'
"name": ""},
...
]
'''
response = requests.get(url)
response.raise_for_status() # grand that last updated value are gotten
with open(file_path, 'wb') as file:
file.write(response.content)
try:
with open(file_path, 'r', encoding='utf-8') as f:
data = json.load(f)
except Exception as e:
print("Error: ", e)
sys.exit(1)
def getIPv6(key):
try:
return nodes[key]['id']
except KeyError:
return key
def getRoutingTable(node_str):
if node_str:
return eval(node_str.replace("\n", ""))
else:
return None
nodes = {} # by key
for node in data:
if node['checked']:
table_updated = getRoutingTable(node['peers'])
if table_updated: # only nodes connected with someone
try:
atts = { 'id': node['address'], 'peers' : table_updated[node['address']]['keys'] }
except KeyError:
print('Keyerror at parsing data')
atts = {'id': node['address'], 'peers': ""}
nodes[node['key']] = atts
# nodes dict have the following structure
# nodes[key] = { id[ipv6 addrs], peers[peer list by key]
import json
from collections import deque
# 1. Mock Data Source (Replace this with your actual API calls)
# Assume 'all_nodes' is just a list of addresses you have
all_nodes = list(nodes.keys())
# Assume this function mimics your 'remotegettree'
# It takes an address and returns a list of connected peer addresses
def get_neighbors(key):
# Example topology: Root <-> A <-> C, and Root <-> B
return nodes[key]['peers']
# --- Step 1: Identify Root ---
# Yggdrasil Root is the lowest address/key
root = min(all_nodes)
# --- Step 2: Build the Tree via BFS ---
# We use a set to keep track of nodes we have already attached to the tree
visited = {root}
queue = deque([root])
# The output lists
json_nodes = []
for n in nodes:
node = { 'id': getIPv6(n), 'label': getIPv6(n), 'name': getIPv6(n), 'key': n}
json_nodes.append(node)
json_edges = []
# If you need to fetch neighbors dynamically, you might do it inside the loop,
# or pre-fetch them into a map if you want to avoid network lag during processing.
while queue:
current_parent = queue.popleft()
# Get neighbors of the current node
try:
neighbors = get_neighbors(current_parent)
except KeyError:
print('key error', current_parent)
continue
for neighbor in neighbors:
if neighbor not in visited:
# We found a new node! 'current_parent' is its parent.
visited.add(neighbor)
queue.append(neighbor)
# Add the edge to your visualization list
# src is parent, dest is child (standard tree direction)
json_edges.append({
"sourceID": getIPv6(current_parent),
"targetID": getIPv6(neighbor)
})
#connected = set()
#for e in json_edges:
# connected.add(e['sourceID'])
# connected.add(e['targetID'])
#
#json_nodes = [n for n in json_nodes if n['id'] not in connected]
# --- Step 3: Construct Final JSON ---
final_output = {
"nodes": json_nodes,
"edges": json_edges
}
output_path = "graph.json"
with open(output_path, "w") as f:
json.dump(final_output, f, indent=2)
def position_nodes(nodes, edges):
G = pgv.AGraph(strict=True, directed=False, size='10!')
for n in nodes:
if n['id'] == '':
print('node empty')
continue
G.add_node(n['id'], label=n['id'], coords='[1]')
for e in edges:
G.add_edge(e['sourceID'], e['targetID'], len=1.0)
G.layout(prog='neato', args='-Gepsilon=0.001 -Gdefaultdist=1.0 -GK=0.3 -Gpos=(1000.0,1000.0) -Gmaxiter=10000') # Default is 1000 for neato
return G
graph = position_nodes(json_nodes, json_edges)
def compute_centrality(G):
ng = nx.Graph()
for start in G.iternodes():
others = G.neighbors(start)
for other in others:
ng.add_edge(start, other)
c = centrality.betweenness_centrality(ng, k=1000) # slow, use k sampling for speedup
for k, v in c.items():
c[k] = v
return c
def canonalize_ip(ip):
return ':'.join( i.rjust(4, '0') for i in ip.split(':') )
def get_graph_json(G):
max_neighbors = 1
for n in G.iternodes():
neighbors = len(G.neighbors(n))
if neighbors > max_neighbors:
max_neighbors = neighbors
print('Max neighbors: %d' % max_neighbors)
out_data = {
'created': int(time.time()),
'nodes': [],
'edges': []
}
centralities = compute_centrality(G)
#db = load_db()
for n in G.iternodes():
#break
neighbor_ratio = len(G.neighbors(n)) / float(max_neighbors)
pos = n.attr['pos'].split(',', 1)
centrality = centralities[n]
#break
size = 5*(1 + 1*centrality)
#name = db.get(canonalize_ip(n['label']))
# If label isn't the default value, set name to that instead
#if n.attr['label'] != n['label'].split(':')[-1]: name = n.attr['label']
out_data['nodes'].append({
'id': n,
'label': n,
'name': n,
'coords': '[1]',
'x': float(pos[0]),
'y': float(pos[1]),
'color': _gradient_color(neighbor_ratio, [(100, 100, 100), (0, 0, 0)]),
'size': size*1.01,
'centrality': '%.4f' % centrality
})
for e in G.iteredges():
out_data['edges'].append({
'sourceID': e[0],
'targetID': e[1]
})
return json.dumps(out_data)
def _gradient_color(ratio, colors):
jump = 1.0 / (len(colors) - 1)
gap_num = int(ratio / (jump + 0.0000001))
a = colors[gap_num]
b = colors[gap_num + 1]
ratio = (ratio - gap_num * jump) * (len(colors) - 1)
r = a[0] + (b[0] - a[0]) * ratio
g = a[1] + (b[1] - a[1]) * ratio
b = a[2] + (b[2] - a[2]) * ratio
return '#%02x%02x%02x' % (int(r), int(g), int(b))
# to catch any key error, it is because inconsistencies in node input list not trated here
#FIXME
try:
js = get_graph_json(graph)
with open('/root/web/static/graph.json', 'w+') as f:
f.write(js)
except:
print('error at js var, doing again in 30 minutes...')
pass
try:
main()
except:
passI also have written a code for crawling it myself and generating list of reachable nodes:
Code Select
import json
from collections import deque
import sys, requests, subprocess
import multiprocessing
# iterate over addresses connected to and find all data: address, key, nodeinfo, peers, name
'''[
{
"address": "",
"key": "",
"nodeinfo": null,
"peers": "",
"name":
},
...
]
'''
# wrapper functions for yggdrasilctl calls
def remote_getself(address):
'''
address: public key of remote node
returns: a list of public keys in routing table of the remote node
'''
command = ["yggdrasilctl", "debug_remotegetself", "key="+str(address)]
result = subprocess.run(command, capture_output=True, text=True, check=False)
if result.returncode:
print("ERROR: key", address)
return None
else:
result = eval(result.stdout)
for i in result:
return result[i]['keys']
def getself():
'''
return: self address
'''
command = ["yggdrasilctl", "getself"]
result = subprocess.run(command, capture_output=True, text=True, check=False)
if result.returncode:
print("ERROR: getself")
return None
else:
result = result.stdout.strip().replace("\t\n", ",").replace("\t", ",")
return result.split(",")[-1]
def gettree():
'''
return: self routing tree
'''
command = ["yggdrasilctl", "gettree"]
result = subprocess.run(command, capture_output=True, text=True, check=False)
if result.returncode:
print("ERROR: getself")
return None
else:
return result.stdout.replace("\t\n", ",").replace("\t", ",").replace("\n", ",").split(",")
def remote_gettree(address):
'''
address: public key of remote node
returns: a list of public keys in routing table of the remote node
'''
command = ["yggdrasilctl", "debug_remotegettree", "key="+str(address)]
result = subprocess.run(command, capture_output=True, text=True, check=False)
if result.returncode:
print("ERROR: key", address)
return None, None
else:
result = eval(result.stdout)
for k in result:
try:
return k, result[k]['keys']
except KeyError:
return None, None
def getnodeinfo(address):
'''
address: public key of remote node
returns: string nodeinfo with raw string json similar
'''
command = ["yggdrasilctl", "getnodeinfo", "key="+str(address)]
result = subprocess.run(command, capture_output=True, text=True, check=False)
if result.returncode:
print("ERROR: key", address)
return None
else:
#Expect format as described here https://yggdrasil-network.github.io/nodeinfo.html
try:
return eval(string)
except:
return {}
self_tree = gettree()
to_visit = set()
for i in self_tree:
addr = i.strip()
if len(addr) != 64:
continue
else:
to_visit.add(addr)
final_tree = dict()
final_tree[getself()] = ""
visited = [getself()]
print("processing nodes...")
#TODO write recursive to use multi-threading in wide
while len(to_visit) > 0:
print("\rTo Proccess:",len(to_visit), end="")
key = to_visit.pop()
if key in visited:
continue
else:
visited.append(key)
#print('visiting',key)
# get infor of node: address, nodeinfo, peers
address, peers = remote_gettree(key)
nodeinfo = getnodeinfo(key)
with multiprocessing.Pool(processes=2) as pool: # call two functions at the same time
async_remote_gettree = pool.apply_async(remote_gettree, args=(key,))
async_getnodeinfo = pool.apply_async(getnodeinfo, args=(key,))
address, peers = async_remote_gettree.get()
nodeinfo = async_getnodeinfo.get()
final_tree[key] = {
"address": address,
"key": key,
"nodeinfo": nodeinfo,
"peers": peers,
}
if address == None and peers == None:
continue
else:
for elem in peers:
to_visit.add(elem)
total_nodes = len(visited)
print("Total:", total_nodes, "nodes")
final_list = []
for node in visited:
final_list.append(final_tree[key])
output = json.dumps(final_list, indent=2)
file_path = 'yggcrawlfull.json'
with open(file_path, 'w+') as f:
f.write(output)