171 lines
5.1 KiB
Python
171 lines
5.1 KiB
Python
|
# Copyright 2022 The Ip2Region Authors. All rights reserved.
|
|||
|
# Use of this source code is governed by a Apache2.0-style
|
|||
|
# license that can be found in the LICENSE file.
|
|||
|
#
|
|||
|
# Created by luckydog on 2022/6/29.
|
|||
|
# Copyright © 2022年 luckydog. All rights reserved.
|
|||
|
#
|
|||
|
|
|||
|
import socket
|
|||
|
import struct
|
|||
|
import io
|
|||
|
import sys
|
|||
|
|
|||
|
|
|||
|
# xdb默认参数
|
|||
|
HeaderInfoLength = 256
|
|||
|
VectorIndexRows = 256
|
|||
|
VectorIndexCols = 256
|
|||
|
VectorIndexSize = 8
|
|||
|
SegmentIndexSize = 14
|
|||
|
|
|||
|
|
|||
|
class XdbSearcher(object):
|
|||
|
__f = None
|
|||
|
|
|||
|
# the minimal memory allocation.
|
|||
|
vectorIndex = None
|
|||
|
# 整个读取xdb,保存在内存中
|
|||
|
contentBuff = None
|
|||
|
|
|||
|
@staticmethod
|
|||
|
def loadVectorIndexFromFile(dbfile):
|
|||
|
try:
|
|||
|
f = io.open(dbfile, "rb")
|
|||
|
f.seek(HeaderInfoLength)
|
|||
|
vi_len = VectorIndexRows * VectorIndexCols * SegmentIndexSize
|
|||
|
vector_data = f.read(vi_len)
|
|||
|
f.close()
|
|||
|
return vector_data
|
|||
|
except IOError as e:
|
|||
|
print("[Error]: %s" % e)
|
|||
|
|
|||
|
@staticmethod
|
|||
|
def loadContentFromFile(dbfile):
|
|||
|
try:
|
|||
|
f = io.open(dbfile, "rb")
|
|||
|
all_data = f.read()
|
|||
|
f.close()
|
|||
|
return all_data
|
|||
|
except IOError as e:
|
|||
|
print("[Error]: %s" % e)
|
|||
|
|
|||
|
def __init__(self, dbfile=None, vectorIndex=None, contentBuff=None):
|
|||
|
self.initDatabase(dbfile, vectorIndex, contentBuff)
|
|||
|
|
|||
|
def search(self, ip):
|
|||
|
if isinstance(ip, str):
|
|||
|
if not ip.isdigit(): ip = self.ip2long(ip)
|
|||
|
return self.searchByIPLong(ip)
|
|||
|
else:
|
|||
|
return self.searchByIPLong(ip)
|
|||
|
|
|||
|
def searchByIPStr(self, ip):
|
|||
|
if not ip.isdigit(): ip = self.ip2long(ip)
|
|||
|
return self.searchByIPLong(ip)
|
|||
|
|
|||
|
def searchByIPLong(self, ip):
|
|||
|
# locate the segment index block based on the vector index
|
|||
|
sPtr = ePtr = 0
|
|||
|
il0 = (int)((ip >> 24) & 0xFF)
|
|||
|
il1 = (int)((ip >> 16) & 0xFF)
|
|||
|
idx = il0 * VectorIndexCols * VectorIndexSize + il1 * VectorIndexSize
|
|||
|
|
|||
|
if self.vectorIndex is not None:
|
|||
|
sPtr = self.getLong(self.vectorIndex, idx)
|
|||
|
ePtr = self.getLong(self.vectorIndex, idx + 4)
|
|||
|
elif self.contentBuff is not None:
|
|||
|
sPtr = self.getLong(self.contentBuff, HeaderInfoLength + idx)
|
|||
|
ePtr = self.getLong(self.contentBuff, HeaderInfoLength + idx + 4)
|
|||
|
else:
|
|||
|
self.__f.seek(HeaderInfoLength + idx)
|
|||
|
buffer_ptr = self.__f.read(8)
|
|||
|
sPtr = self.getLong(buffer_ptr, 0)
|
|||
|
ePtr = self.getLong(buffer_ptr, 4)
|
|||
|
|
|||
|
# binary search the segment index block to get the region info
|
|||
|
dataLen = dataPtr = int(-1)
|
|||
|
l = int(0)
|
|||
|
h = int((ePtr - sPtr) / SegmentIndexSize)
|
|||
|
while l <= h:
|
|||
|
m = int((l + h) >> 1)
|
|||
|
p = int(sPtr + m * SegmentIndexSize)
|
|||
|
# read the segment index
|
|||
|
buffer_sip = self.readBuffer(p, SegmentIndexSize)
|
|||
|
sip = self.getLong(buffer_sip, 0)
|
|||
|
if ip < sip:
|
|||
|
h = m - 1
|
|||
|
else:
|
|||
|
eip = self.getLong(buffer_sip, 4)
|
|||
|
if ip > eip:
|
|||
|
l = m + 1
|
|||
|
else:
|
|||
|
dataLen = self.getInt2(buffer_sip, 8)
|
|||
|
dataPtr = self.getLong(buffer_sip, 10)
|
|||
|
break
|
|||
|
|
|||
|
# empty match interception
|
|||
|
if dataPtr < 0:
|
|||
|
return ""
|
|||
|
|
|||
|
buffer_string = self.readBuffer(dataPtr, dataLen)
|
|||
|
return_string = buffer_string.decode("utf-8")
|
|||
|
return return_string
|
|||
|
|
|||
|
def readBuffer(self, offset, length):
|
|||
|
buffer = None
|
|||
|
# check the in-memory buffer first
|
|||
|
if self.contentBuff is not None:
|
|||
|
buffer = self.contentBuff[offset:offset + length]
|
|||
|
return buffer
|
|||
|
|
|||
|
# read from the file handle
|
|||
|
if self.__f is not None:
|
|||
|
self.__f.seek(offset)
|
|||
|
buffer = self.__f.read(length)
|
|||
|
return buffer
|
|||
|
|
|||
|
def initDatabase(self, dbfile, vi, cb):
|
|||
|
"""
|
|||
|
" initialize the database for search
|
|||
|
" param: dbFile, vectorIndex, contentBuff
|
|||
|
"""
|
|||
|
try:
|
|||
|
if cb is not None:
|
|||
|
self.__f = None
|
|||
|
self.vectorIndex = None
|
|||
|
self.contentBuff = cb
|
|||
|
else:
|
|||
|
self.__f = io.open(dbfile, "rb")
|
|||
|
self.vectorIndex = vi
|
|||
|
except IOError as e:
|
|||
|
print("[Error]: %s" % e)
|
|||
|
sys.exit()
|
|||
|
|
|||
|
def ip2long(self, ip):
|
|||
|
_ip = socket.inet_aton(ip)
|
|||
|
return struct.unpack("!L", _ip)[0]
|
|||
|
|
|||
|
def isip(self, ip):
|
|||
|
p = ip.split(".")
|
|||
|
|
|||
|
if len(p) != 4: return False
|
|||
|
for pp in p:
|
|||
|
if not pp.isdigit(): return False
|
|||
|
if len(pp) > 3: return False
|
|||
|
if int(pp) > 255: return False
|
|||
|
return True
|
|||
|
|
|||
|
def getLong(self, b, offset):
|
|||
|
if len(b[offset:offset + 4]) == 4:
|
|||
|
return struct.unpack('I', b[offset:offset + 4])[0]
|
|||
|
return 0
|
|||
|
|
|||
|
def getInt2(self, b, offset):
|
|||
|
return ((b[offset] & 0x000000FF) | (b[offset+1] & 0x0000FF00))
|
|||
|
|
|||
|
def close(self):
|
|||
|
if self.__f is not None:
|
|||
|
self.__f.close()
|
|||
|
self.vectorIndex = None
|
|||
|
self.contentBuff = None
|