171 lines
5.1 KiB
Python
Raw Permalink Normal View History

# Copyright 2022 The Ip2Region Authors. All rights reserved.
# Use of this source code is governed by a Apache2.0-style
# license that can be found in the LICENSE file.
#
# Created by luckydog on 2022/6/29.
# Copyright © 2022年 luckydog. All rights reserved.
#
import socket
import struct
import io
import sys
# xdb默认参数
HeaderInfoLength = 256
VectorIndexRows = 256
VectorIndexCols = 256
VectorIndexSize = 8
SegmentIndexSize = 14
class XdbSearcher(object):
__f = None
# the minimal memory allocation.
vectorIndex = None
# 整个读取xdb保存在内存中
contentBuff = None
@staticmethod
def loadVectorIndexFromFile(dbfile):
try:
f = io.open(dbfile, "rb")
f.seek(HeaderInfoLength)
vi_len = VectorIndexRows * VectorIndexCols * SegmentIndexSize
vector_data = f.read(vi_len)
f.close()
return vector_data
except IOError as e:
print("[Error]: %s" % e)
@staticmethod
def loadContentFromFile(dbfile):
try:
f = io.open(dbfile, "rb")
all_data = f.read()
f.close()
return all_data
except IOError as e:
print("[Error]: %s" % e)
def __init__(self, dbfile=None, vectorIndex=None, contentBuff=None):
self.initDatabase(dbfile, vectorIndex, contentBuff)
def search(self, ip):
if isinstance(ip, str):
if not ip.isdigit(): ip = self.ip2long(ip)
return self.searchByIPLong(ip)
else:
return self.searchByIPLong(ip)
def searchByIPStr(self, ip):
if not ip.isdigit(): ip = self.ip2long(ip)
return self.searchByIPLong(ip)
def searchByIPLong(self, ip):
# locate the segment index block based on the vector index
sPtr = ePtr = 0
il0 = (int)((ip >> 24) & 0xFF)
il1 = (int)((ip >> 16) & 0xFF)
idx = il0 * VectorIndexCols * VectorIndexSize + il1 * VectorIndexSize
if self.vectorIndex is not None:
sPtr = self.getLong(self.vectorIndex, idx)
ePtr = self.getLong(self.vectorIndex, idx + 4)
elif self.contentBuff is not None:
sPtr = self.getLong(self.contentBuff, HeaderInfoLength + idx)
ePtr = self.getLong(self.contentBuff, HeaderInfoLength + idx + 4)
else:
self.__f.seek(HeaderInfoLength + idx)
buffer_ptr = self.__f.read(8)
sPtr = self.getLong(buffer_ptr, 0)
ePtr = self.getLong(buffer_ptr, 4)
# binary search the segment index block to get the region info
dataLen = dataPtr = int(-1)
l = int(0)
h = int((ePtr - sPtr) / SegmentIndexSize)
while l <= h:
m = int((l + h) >> 1)
p = int(sPtr + m * SegmentIndexSize)
# read the segment index
buffer_sip = self.readBuffer(p, SegmentIndexSize)
sip = self.getLong(buffer_sip, 0)
if ip < sip:
h = m - 1
else:
eip = self.getLong(buffer_sip, 4)
if ip > eip:
l = m + 1
else:
dataLen = self.getInt2(buffer_sip, 8)
dataPtr = self.getLong(buffer_sip, 10)
break
# empty match interception
if dataPtr < 0:
return ""
buffer_string = self.readBuffer(dataPtr, dataLen)
return_string = buffer_string.decode("utf-8")
return return_string
def readBuffer(self, offset, length):
buffer = None
# check the in-memory buffer first
if self.contentBuff is not None:
buffer = self.contentBuff[offset:offset + length]
return buffer
# read from the file handle
if self.__f is not None:
self.__f.seek(offset)
buffer = self.__f.read(length)
return buffer
def initDatabase(self, dbfile, vi, cb):
"""
" initialize the database for search
" param: dbFile, vectorIndex, contentBuff
"""
try:
if cb is not None:
self.__f = None
self.vectorIndex = None
self.contentBuff = cb
else:
self.__f = io.open(dbfile, "rb")
self.vectorIndex = vi
except IOError as e:
print("[Error]: %s" % e)
sys.exit()
def ip2long(self, ip):
_ip = socket.inet_aton(ip)
return struct.unpack("!L", _ip)[0]
def isip(self, ip):
p = ip.split(".")
if len(p) != 4: return False
for pp in p:
if not pp.isdigit(): return False
if len(pp) > 3: return False
if int(pp) > 255: return False
return True
def getLong(self, b, offset):
if len(b[offset:offset + 4]) == 4:
return struct.unpack('I', b[offset:offset + 4])[0]
return 0
def getInt2(self, b, offset):
return ((b[offset] & 0x000000FF) | (b[offset+1] & 0x0000FF00))
def close(self):
if self.__f is not None:
self.__f.close()
self.vectorIndex = None
self.contentBuff = None