# Description: 从zoomeye和fofa采集节点信息
import requests
import urllib3
from urllib3.exceptions import InsecureRequestWarning
urllib3.disable_warnings(InsecureRequestWarning)
import json
from ipSearch.xdbSearcher import XdbSearcher
import random
import re
import base64
from bs4 import BeautifulSoup
import socket


def search_ip(ip):  # 根据ip查询地理位置,idc等信息
    dbPath = "./ipSearch/ip2region.xdb"
    vi = XdbSearcher.loadVectorIndexFromFile(dbfile=dbPath)
    searcher = XdbSearcher(dbfile=dbPath, vectorIndex=vi)
    region_str = searcher.search(ip)
    return region_str

def use_zoomeye():  # 使用zoomeye采集节点
    url = 'https://www.zoomeye.org/api/search'
    params = {
        'q': '"Tailscale"+"DERP"+country:"CN"',
        'page': '1',
        'pageSize': '20',
        't': 'v4+v6+web'
    }
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36'
    }
    info = requests.get(url, params=params, headers=headers)
    if info.status_code == 200: 
        data = json.loads(info.text)["matches"]
        node_list = []
        for i in data:
            if i["portinfo"]["service"] == "https":
                ip = i["ip"]
                port = i["portinfo"]["port"]
                ip_info = re.sub(r'\b(\w+)\s+\1\b', r'\1', search_ip(ip).replace("|", " ").replace("0", "").replace("省","").replace("市","") + "." + "".join(random.sample("0123456789", 3))).replace(" ", "")
                # 向列表中添加节点信息
                node_list.append({"ip": ip, "port": port, "info": ip_info})
        return True, node_list
    else:
        return False, []

def use_fofainfo(): # 使用fofa采集节点
    url = 'https://fofa.info/result'
    params = {
        'qbase64': base64.b64encode('body="DERP" && body="Tailscale" && country="CN"'.encode('utf-8')).decode('utf-8')
    }
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36'
    }
    info = requests.get(url, params=params, headers=headers)
    if info.status_code == 200:
        soup = BeautifulSoup(info.text, "html.parser")
        pattern = re.compile(r'<a\s+href="https[^"]+"[^>]*>')
        matches = re.findall(pattern, str(soup))
        node_list = []
        for match in matches:
            match = match.replace('<a href="https://', "").replace('" target="_blank">', "")
            ip = match.split(":")[0]
            try:
                port = match.split(":")[1]
            except:
                port = "443"
                ip = socket.gethostbyname(ip)
            ip_info = re.sub(r'\b(\w+)\s+\1\b', r'\1', search_ip(ip).replace("|", " ").replace("0", "").replace("省","").replace("市","") + "." + "".join(random.sample("0123456789", 3))).replace(" ", "")
            # 向列表中添加节点信息
            node_list.append({"ip": ip, "port": port, "info": ip_info})
        return True, node_list
    else:
        return False, []

if __name__ == "__main__":
    # 使用zoomeye采集节点
    status, zoomeye_node_list = use_zoomeye()
    if status:
        print("zoomeye采集成功")
    else:
        print("zoomeye采集失败")
    # 使用fofa采集节点
    status, fofa_node_list = use_fofainfo()
    if status:
        print("fofa采集成功")
    else:
        print("fofa采集失败")
    # 合并节点
    node_list = zoomeye_node_list + fofa_node_list
    print("共采集到" + str(len(node_list)) + "个节点")
    # 写入yaml文件, 以便于后续调用
    with open("temp_nodes.yaml", "w", encoding="utf-8") as f:
        for node in node_list:
            # 检验节点是否可用
            if requests.get(f"https://{node['ip']}:{node['port']}", timeout=3, verify=False).status_code == 200:
                f.write(f"- ip: {node['ip']}\n")
                f.write(f"  port: {node['port']}\n")
                f.write(f"  info: {node['info']}\n")
                print(f"节点{node['ip']}:{node['port']}连接正常")
            else:
                print(f"节点{node['ip']}:{node['port']}不可用")
    print("写入成功")