171 lines
5.1 KiB
Python
171 lines
5.1 KiB
Python
# Copyright 2022 The Ip2Region Authors. All rights reserved.
|
||
# Use of this source code is governed by a Apache2.0-style
|
||
# license that can be found in the LICENSE file.
|
||
#
|
||
# Created by luckydog on 2022/6/29.
|
||
# Copyright © 2022年 luckydog. All rights reserved.
|
||
#
|
||
|
||
import socket
|
||
import struct
|
||
import io
|
||
import sys
|
||
|
||
|
||
# xdb默认参数
|
||
HeaderInfoLength = 256
|
||
VectorIndexRows = 256
|
||
VectorIndexCols = 256
|
||
VectorIndexSize = 8
|
||
SegmentIndexSize = 14
|
||
|
||
|
||
class XdbSearcher(object):
|
||
__f = None
|
||
|
||
# the minimal memory allocation.
|
||
vectorIndex = None
|
||
# 整个读取xdb,保存在内存中
|
||
contentBuff = None
|
||
|
||
@staticmethod
|
||
def loadVectorIndexFromFile(dbfile):
|
||
try:
|
||
f = io.open(dbfile, "rb")
|
||
f.seek(HeaderInfoLength)
|
||
vi_len = VectorIndexRows * VectorIndexCols * SegmentIndexSize
|
||
vector_data = f.read(vi_len)
|
||
f.close()
|
||
return vector_data
|
||
except IOError as e:
|
||
print("[Error]: %s" % e)
|
||
|
||
@staticmethod
|
||
def loadContentFromFile(dbfile):
|
||
try:
|
||
f = io.open(dbfile, "rb")
|
||
all_data = f.read()
|
||
f.close()
|
||
return all_data
|
||
except IOError as e:
|
||
print("[Error]: %s" % e)
|
||
|
||
def __init__(self, dbfile=None, vectorIndex=None, contentBuff=None):
|
||
self.initDatabase(dbfile, vectorIndex, contentBuff)
|
||
|
||
def search(self, ip):
|
||
if isinstance(ip, str):
|
||
if not ip.isdigit(): ip = self.ip2long(ip)
|
||
return self.searchByIPLong(ip)
|
||
else:
|
||
return self.searchByIPLong(ip)
|
||
|
||
def searchByIPStr(self, ip):
|
||
if not ip.isdigit(): ip = self.ip2long(ip)
|
||
return self.searchByIPLong(ip)
|
||
|
||
def searchByIPLong(self, ip):
|
||
# locate the segment index block based on the vector index
|
||
sPtr = ePtr = 0
|
||
il0 = (int)((ip >> 24) & 0xFF)
|
||
il1 = (int)((ip >> 16) & 0xFF)
|
||
idx = il0 * VectorIndexCols * VectorIndexSize + il1 * VectorIndexSize
|
||
|
||
if self.vectorIndex is not None:
|
||
sPtr = self.getLong(self.vectorIndex, idx)
|
||
ePtr = self.getLong(self.vectorIndex, idx + 4)
|
||
elif self.contentBuff is not None:
|
||
sPtr = self.getLong(self.contentBuff, HeaderInfoLength + idx)
|
||
ePtr = self.getLong(self.contentBuff, HeaderInfoLength + idx + 4)
|
||
else:
|
||
self.__f.seek(HeaderInfoLength + idx)
|
||
buffer_ptr = self.__f.read(8)
|
||
sPtr = self.getLong(buffer_ptr, 0)
|
||
ePtr = self.getLong(buffer_ptr, 4)
|
||
|
||
# binary search the segment index block to get the region info
|
||
dataLen = dataPtr = int(-1)
|
||
l = int(0)
|
||
h = int((ePtr - sPtr) / SegmentIndexSize)
|
||
while l <= h:
|
||
m = int((l + h) >> 1)
|
||
p = int(sPtr + m * SegmentIndexSize)
|
||
# read the segment index
|
||
buffer_sip = self.readBuffer(p, SegmentIndexSize)
|
||
sip = self.getLong(buffer_sip, 0)
|
||
if ip < sip:
|
||
h = m - 1
|
||
else:
|
||
eip = self.getLong(buffer_sip, 4)
|
||
if ip > eip:
|
||
l = m + 1
|
||
else:
|
||
dataLen = self.getInt2(buffer_sip, 8)
|
||
dataPtr = self.getLong(buffer_sip, 10)
|
||
break
|
||
|
||
# empty match interception
|
||
if dataPtr < 0:
|
||
return ""
|
||
|
||
buffer_string = self.readBuffer(dataPtr, dataLen)
|
||
return_string = buffer_string.decode("utf-8")
|
||
return return_string
|
||
|
||
def readBuffer(self, offset, length):
|
||
buffer = None
|
||
# check the in-memory buffer first
|
||
if self.contentBuff is not None:
|
||
buffer = self.contentBuff[offset:offset + length]
|
||
return buffer
|
||
|
||
# read from the file handle
|
||
if self.__f is not None:
|
||
self.__f.seek(offset)
|
||
buffer = self.__f.read(length)
|
||
return buffer
|
||
|
||
def initDatabase(self, dbfile, vi, cb):
|
||
"""
|
||
" initialize the database for search
|
||
" param: dbFile, vectorIndex, contentBuff
|
||
"""
|
||
try:
|
||
if cb is not None:
|
||
self.__f = None
|
||
self.vectorIndex = None
|
||
self.contentBuff = cb
|
||
else:
|
||
self.__f = io.open(dbfile, "rb")
|
||
self.vectorIndex = vi
|
||
except IOError as e:
|
||
print("[Error]: %s" % e)
|
||
sys.exit()
|
||
|
||
def ip2long(self, ip):
|
||
_ip = socket.inet_aton(ip)
|
||
return struct.unpack("!L", _ip)[0]
|
||
|
||
def isip(self, ip):
|
||
p = ip.split(".")
|
||
|
||
if len(p) != 4: return False
|
||
for pp in p:
|
||
if not pp.isdigit(): return False
|
||
if len(pp) > 3: return False
|
||
if int(pp) > 255: return False
|
||
return True
|
||
|
||
def getLong(self, b, offset):
|
||
if len(b[offset:offset + 4]) == 4:
|
||
return struct.unpack('I', b[offset:offset + 4])[0]
|
||
return 0
|
||
|
||
def getInt2(self, b, offset):
|
||
return ((b[offset] & 0x000000FF) | (b[offset+1] & 0x0000FF00))
|
||
|
||
def close(self):
|
||
if self.__f is not None:
|
||
self.__f.close()
|
||
self.vectorIndex = None
|
||
self.contentBuff = None |