Welcome to Yggdrasil forum!
Quote from: revizorrro on January 01, 2026, 04:50:24 AMCan you share script for crawling nodes ?For construct the map I used already crawled nodes found at http://[207:2896:e6c0:b868:b11a:8552:cb8c:d663] , it has a list of nodes and list of peers each node know. You need to construct the tree for placing on /static/graph.json of the web server, for construct that you need to iterate over yggcrawlfull and do a BFS here is the code for that, not a clean code but with minimum effort and for python3:
I found one at github, but after scanning it shows only 270 nodes with coords, then total ~4300
def main():
import json
from collections import deque
import sys, requests, time
import pygraphviz as pgv
import networkx as nx
from networkx.algorithms import centrality
# all network data in yggcrawlfull.json
url = "http://178.20.46.171/yggcrawlfull.json"
file_path = 'yggcrawlfull.json'
'''
[
{
"remote": null
"address": "201:5c8b:b25b:2a0d:c1e5:4ce9:9d14:bd25"
"key": "68dd1369357c8f86acc598bad0b68da8f993f351747e05605f0f93c11cd3e3a8"
"checked": true
"nodeinfo": '{\n "68dd1369357c8f86acc598bad0b68da8f993f351747e05605f0f93c11cd3e3a8": {\n "buildarch": "amd64",\n "buildname": "yggdrasil",\n "buildplatform": "linux",\n "buildversion": "0.5.12"\n }\n}\n'
"peers": '{\n "201:5c8b:b25b:2a0d:c1e5:4ce9:9d14:bd25": {\n "keys": [\n "00340a5fffffffe3d55ff7bade852943129075bc5ce561f5eb6594a2707476a1",\n "00340a5fffffffe3d55ff7bade852943129075bc5ce561f5eb6594a2707476a1",\n "000002692ce4bca396c7b037b6fab696dddb076b566fca5e43339fc2cc116f23",\n "00340a5fffffffe3d55ff7bade852943129075bc5ce561f5eb6594a2707476a1",\n "2bab442a5e424cb576a952ab1d374dbe2c95cc6e39ce1dd328ea1754f833d700",\n "2bab442a5e424cb576a952ab1d374dbe2c95cc6e39ce1dd328ea1754f833d700",\n "000002692ce4bca396c7b037b6fab696dddb076b566fca5e43339fc2cc116f23"\n ]\n }\n}\n'
"name": ""},
...
]
'''
response = requests.get(url)
response.raise_for_status() # grand that last updated value are gotten
with open(file_path, 'wb') as file:
file.write(response.content)
try:
with open(file_path, 'r', encoding='utf-8') as f:
data = json.load(f)
except Exception as e:
print("Error: ", e)
sys.exit(1)
def getIPv6(key):
try:
return nodes[key]['id']
except KeyError:
return key
def getRoutingTable(node_str):
if node_str:
return eval(node_str.replace("\n", ""))
else:
return None
nodes = {} # by key
for node in data:
if node['checked']:
table_updated = getRoutingTable(node['peers'])
if table_updated: # only nodes connected with someone
try:
atts = { 'id': node['address'], 'peers' : table_updated[node['address']]['keys'] }
except KeyError:
print('Keyerror at parsing data')
atts = {'id': node['address'], 'peers': ""}
nodes[node['key']] = atts
# nodes dict have the following structure
# nodes[key] = { id[ipv6 addrs], peers[peer list by key]
import json
from collections import deque
# 1. Mock Data Source (Replace this with your actual API calls)
# Assume 'all_nodes' is just a list of addresses you have
all_nodes = list(nodes.keys())
# Assume this function mimics your 'remotegettree'
# It takes an address and returns a list of connected peer addresses
def get_neighbors(key):
# Example topology: Root <-> A <-> C, and Root <-> B
return nodes[key]['peers']
# --- Step 1: Identify Root ---
# Yggdrasil Root is the lowest address/key
root = min(all_nodes)
# --- Step 2: Build the Tree via BFS ---
# We use a set to keep track of nodes we have already attached to the tree
visited = {root}
queue = deque([root])
# The output lists
json_nodes = []
for n in nodes:
node = { 'id': getIPv6(n), 'label': getIPv6(n), 'name': getIPv6(n), 'key': n}
json_nodes.append(node)
json_edges = []
# If you need to fetch neighbors dynamically, you might do it inside the loop,
# or pre-fetch them into a map if you want to avoid network lag during processing.
while queue:
current_parent = queue.popleft()
# Get neighbors of the current node
try:
neighbors = get_neighbors(current_parent)
except KeyError:
print('key error', current_parent)
continue
for neighbor in neighbors:
if neighbor not in visited:
# We found a new node! 'current_parent' is its parent.
visited.add(neighbor)
queue.append(neighbor)
# Add the edge to your visualization list
# src is parent, dest is child (standard tree direction)
json_edges.append({
"sourceID": getIPv6(current_parent),
"targetID": getIPv6(neighbor)
})
#connected = set()
#for e in json_edges:
# connected.add(e['sourceID'])
# connected.add(e['targetID'])
#
#json_nodes = [n for n in json_nodes if n['id'] not in connected]
# --- Step 3: Construct Final JSON ---
final_output = {
"nodes": json_nodes,
"edges": json_edges
}
output_path = "graph.json"
with open(output_path, "w") as f:
json.dump(final_output, f, indent=2)
def position_nodes(nodes, edges):
G = pgv.AGraph(strict=True, directed=False, size='10!')
for n in nodes:
if n['id'] == '':
print('node empty')
continue
G.add_node(n['id'], label=n['id'], coords='[1]')
for e in edges:
G.add_edge(e['sourceID'], e['targetID'], len=1.0)
G.layout(prog='neato', args='-Gepsilon=0.001 -Gdefaultdist=1.0 -GK=0.3 -Gpos=(1000.0,1000.0) -Gmaxiter=10000') # Default is 1000 for neato
return G
graph = position_nodes(json_nodes, json_edges)
def compute_centrality(G):
ng = nx.Graph()
for start in G.iternodes():
others = G.neighbors(start)
for other in others:
ng.add_edge(start, other)
c = centrality.betweenness_centrality(ng, k=1000) # slow, use k sampling for speedup
for k, v in c.items():
c[k] = v
return c
def canonalize_ip(ip):
return ':'.join( i.rjust(4, '0') for i in ip.split(':') )
def get_graph_json(G):
max_neighbors = 1
for n in G.iternodes():
neighbors = len(G.neighbors(n))
if neighbors > max_neighbors:
max_neighbors = neighbors
print('Max neighbors: %d' % max_neighbors)
out_data = {
'created': int(time.time()),
'nodes': [],
'edges': []
}
centralities = compute_centrality(G)
#db = load_db()
for n in G.iternodes():
#break
neighbor_ratio = len(G.neighbors(n)) / float(max_neighbors)
pos = n.attr['pos'].split(',', 1)
centrality = centralities[n]
#break
size = 5*(1 + 1*centrality)
#name = db.get(canonalize_ip(n['label']))
# If label isn't the default value, set name to that instead
#if n.attr['label'] != n['label'].split(':')[-1]: name = n.attr['label']
out_data['nodes'].append({
'id': n,
'label': n,
'name': n,
'coords': '[1]',
'x': float(pos[0]),
'y': float(pos[1]),
'color': _gradient_color(neighbor_ratio, [(100, 100, 100), (0, 0, 0)]),
'size': size*1.01,
'centrality': '%.4f' % centrality
})
for e in G.iteredges():
out_data['edges'].append({
'sourceID': e[0],
'targetID': e[1]
})
return json.dumps(out_data)
def _gradient_color(ratio, colors):
jump = 1.0 / (len(colors) - 1)
gap_num = int(ratio / (jump + 0.0000001))
a = colors[gap_num]
b = colors[gap_num + 1]
ratio = (ratio - gap_num * jump) * (len(colors) - 1)
r = a[0] + (b[0] - a[0]) * ratio
g = a[1] + (b[1] - a[1]) * ratio
b = a[2] + (b[2] - a[2]) * ratio
return '#%02x%02x%02x' % (int(r), int(g), int(b))
# to catch any key error, it is because inconsistencies in node input list not trated here
#FIXME
try:
js = get_graph_json(graph)
with open('/root/web/static/graph.json', 'w+') as f:
f.write(js)
except:
print('error at js var, doing again in 30 minutes...')
pass
try:
main()
except:
passimport json
from collections import deque
import sys, requests, subprocess
import multiprocessing
# iterate over addresses connected to and find all data: address, key, nodeinfo, peers, name
'''[
{
"address": "",
"key": "",
"nodeinfo": null,
"peers": "",
"name":
},
...
]
'''
# wrapper functions for yggdrasilctl calls
def remote_getself(address):
'''
address: public key of remote node
returns: a list of public keys in routing table of the remote node
'''
command = ["yggdrasilctl", "debug_remotegetself", "key="+str(address)]
result = subprocess.run(command, capture_output=True, text=True, check=False)
if result.returncode:
print("ERROR: key", address)
return None
else:
result = eval(result.stdout)
for i in result:
return result[i]['keys']
def getself():
'''
return: self address
'''
command = ["yggdrasilctl", "getself"]
result = subprocess.run(command, capture_output=True, text=True, check=False)
if result.returncode:
print("ERROR: getself")
return None
else:
result = result.stdout.strip().replace("\t\n", ",").replace("\t", ",")
return result.split(",")[-1]
def gettree():
'''
return: self routing tree
'''
command = ["yggdrasilctl", "gettree"]
result = subprocess.run(command, capture_output=True, text=True, check=False)
if result.returncode:
print("ERROR: getself")
return None
else:
return result.stdout.replace("\t\n", ",").replace("\t", ",").replace("\n", ",").split(",")
def remote_gettree(address):
'''
address: public key of remote node
returns: a list of public keys in routing table of the remote node
'''
command = ["yggdrasilctl", "debug_remotegettree", "key="+str(address)]
result = subprocess.run(command, capture_output=True, text=True, check=False)
if result.returncode:
print("ERROR: key", address)
return None, None
else:
result = eval(result.stdout)
for k in result:
try:
return k, result[k]['keys']
except KeyError:
return None, None
def getnodeinfo(address):
'''
address: public key of remote node
returns: string nodeinfo with raw string json similar
'''
command = ["yggdrasilctl", "getnodeinfo", "key="+str(address)]
result = subprocess.run(command, capture_output=True, text=True, check=False)
if result.returncode:
print("ERROR: key", address)
return None
else:
#Expect format as described here https://yggdrasil-network.github.io/nodeinfo.html
try:
return eval(string)
except:
return {}
self_tree = gettree()
to_visit = set()
for i in self_tree:
addr = i.strip()
if len(addr) != 64:
continue
else:
to_visit.add(addr)
final_tree = dict()
final_tree[getself()] = ""
visited = [getself()]
print("processing nodes...")
#TODO write recursive to use multi-threading in wide
while len(to_visit) > 0:
print("\rTo Proccess:",len(to_visit), end="")
key = to_visit.pop()
if key in visited:
continue
else:
visited.append(key)
#print('visiting',key)
# get infor of node: address, nodeinfo, peers
address, peers = remote_gettree(key)
nodeinfo = getnodeinfo(key)
with multiprocessing.Pool(processes=2) as pool: # call two functions at the same time
async_remote_gettree = pool.apply_async(remote_gettree, args=(key,))
async_getnodeinfo = pool.apply_async(getnodeinfo, args=(key,))
address, peers = async_remote_gettree.get()
nodeinfo = async_getnodeinfo.get()
final_tree[key] = {
"address": address,
"key": key,
"nodeinfo": nodeinfo,
"peers": peers,
}
if address == None and peers == None:
continue
else:
for elem in peers:
to_visit.add(elem)
total_nodes = len(visited)
print("Total:", total_nodes, "nodes")
final_list = []
for node in visited:
final_list.append(final_tree[key])
output = json.dumps(final_list, indent=2)
file_path = 'yggcrawlfull.json'
with open(file_path, 'w+') as f:
f.write(output)
Quote from: asgard on December 30, 2025, 07:56:14 PMI created my instance of that, not exactly same but works:Can you share script for crawling nodes ?
http://[227:bc24:9a82:e290:7f4a:f992:2ef6:5b34]:8008/
docker run -p [ipv6]:80:80 ... so the container will expose port 80 for specific ipv6 address, you can use as many as you want. To avoid port conflict, ensure that there is no application on port 80 of host machinedig @324:71e:281a:9ed3::53 website.yggreturns a record SOA ns.alfis.name. ... meaning it serves .ygg domains